From 1fb1092d75faa6d7698a7effe40382fbf8d1ae43 Mon Sep 17 00:00:00 2001 From: pjd Date: Wed, 4 Jul 2012 20:20:48 +0000 Subject: [PATCH] Make use of GEOM Gate direct reads feature. This allows HAST to serve reads with native speed of the underlying provider. There are three situations when direct reads are not used: 1. Data is being synchronized and synchronization source is the secondary node, which means secondary node has more recent data and we should read from it. 2. Local read failed and we have to try to read from the secondary node. 3. Local component is unavailable and all I/O requests are served from the secondary node. Sponsored by: Panzura, http://www.panzura.com MFC after: 1 month --- sbin/hastd/primary.c | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/sbin/hastd/primary.c b/sbin/hastd/primary.c index d6d93b14afd0..88159cb6bec1 100644 --- a/sbin/hastd/primary.c +++ b/sbin/hastd/primary.c @@ -543,6 +543,27 @@ primary_connect(struct hast_resource *res, struct proto_conn **connp) return (0); } + +/* + * Function instructs GEOM_GATE to handle reads directly from within the kernel. + */ +static void +enable_direct_reads(struct hast_resource *res) +{ + struct g_gate_ctl_modify ggiomodify; + + bzero(&ggiomodify, sizeof(ggiomodify)); + ggiomodify.gctl_version = G_GATE_VERSION; + ggiomodify.gctl_unit = res->hr_ggateunit; + ggiomodify.gctl_modify = GG_MODIFY_READPROV | GG_MODIFY_READOFFSET; + strlcpy(ggiomodify.gctl_readprov, res->hr_localpath, + sizeof(ggiomodify.gctl_readprov)); + ggiomodify.gctl_readoffset = res->hr_localoff; + if (ioctl(res->hr_ggatefd, G_GATE_CMD_MODIFY, &ggiomodify) == 0) + pjdlog_debug(1, "Direct reads enabled."); + else + pjdlog_errno(LOG_WARNING, "Failed to enable direct reads"); +} static int init_remote(struct hast_resource *res, struct proto_conn **inp, @@ -692,6 +713,8 @@ init_remote(struct hast_resource *res, struct proto_conn **inp, res->hr_secondary_localcnt = nv_get_uint64(nvin, "localcnt"); res->hr_secondary_remotecnt = nv_get_uint64(nvin, "remotecnt"); res->hr_syncsrc = nv_get_uint8(nvin, "syncsrc"); + if (res->hr_syncsrc == HAST_SYNCSRC_PRIMARY) + enable_direct_reads(res); if (nv_exists(nvin, "virgin")) { /* * Secondary was reinitialized, bump localcnt if it is 0 as @@ -1789,13 +1812,14 @@ sync_thread(void *arg __unused) struct timeval tstart, tend, tdiff; unsigned int ii, ncomp, ncomps; off_t offset, length, synced; - bool dorewind; + bool dorewind, directreads; int syncext; ncomps = HAST_NCOMPONENTS; dorewind = true; synced = 0; offset = -1; + directreads = false; for (;;) { mtx_lock(&sync_lock); @@ -1867,6 +1891,8 @@ sync_thread(void *arg __unused) event_send(res, EVENT_SYNCDONE); } mtx_lock(&metadata_lock); + if (res->hr_syncsrc == HAST_SYNCSRC_SECONDARY) + directreads = true; res->hr_syncsrc = HAST_SYNCSRC_UNDEF; res->hr_primary_localcnt = res->hr_secondary_remotecnt; @@ -1880,6 +1906,10 @@ sync_thread(void *arg __unused) mtx_unlock(&metadata_lock); } rw_unlock(&hio_remote_lock[ncomp]); + if (directreads) { + directreads = false; + enable_direct_reads(res); + } continue; } pjdlog_debug(2, "sync: Taking free request.");