Add i/o error counters to hastd(8) and make hastctl(8) display
them. This may be useful for detecting problems with HAST disks. Discussed with and reviewed by: pjd MFC after: 1 week
This commit is contained in:
parent
7323adac99
commit
2adbba660d
@ -351,6 +351,12 @@ control_status(struct nv *nv)
|
||||
(uint64_t)nv_get_uint64(nv, "stat_flush%u", ii));
|
||||
printf(" activemap updates: %ju\n",
|
||||
(uint64_t)nv_get_uint64(nv, "stat_activemap_update%u", ii));
|
||||
printf(" local errors: "
|
||||
"read: %ju, write: %ju, delete: %ju, flush: %ju\n",
|
||||
(uintmax_t)nv_get_uint64(nv, "stat_read_error%u", ii),
|
||||
(uintmax_t)nv_get_uint64(nv, "stat_write_error%u", ii),
|
||||
(uintmax_t)nv_get_uint64(nv, "stat_delete_error%u", ii),
|
||||
(uintmax_t)nv_get_uint64(nv, "stat_flush_error%u", ii));
|
||||
}
|
||||
return (ret);
|
||||
}
|
||||
|
@ -207,6 +207,14 @@ control_status_worker(struct hast_resource *res, struct nv *nvout,
|
||||
"stat_flush%u", no);
|
||||
nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_activemap_update"),
|
||||
"stat_activemap_update%u", no);
|
||||
nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_read_error"),
|
||||
"stat_read_error%u", no);
|
||||
nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_write_error"),
|
||||
"stat_write_error%u", no);
|
||||
nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_delete_error"),
|
||||
"stat_delete_error%u", no);
|
||||
nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_flush_error"),
|
||||
"stat_flush_error%u", no);
|
||||
end:
|
||||
if (cnvin != NULL)
|
||||
nv_free(cnvin);
|
||||
@ -459,6 +467,16 @@ ctrl_thread(void *arg)
|
||||
nv_add_uint64(nvout, res->hr_stat_flush, "stat_flush");
|
||||
nv_add_uint64(nvout, res->hr_stat_activemap_update,
|
||||
"stat_activemap_update");
|
||||
nv_add_uint64(nvout, res->hr_stat_read_error,
|
||||
"stat_read_error");
|
||||
nv_add_uint64(nvout, res->hr_stat_write_error +
|
||||
res->hr_stat_activemap_write_error,
|
||||
"stat_write_error");
|
||||
nv_add_uint64(nvout, res->hr_stat_delete_error,
|
||||
"stat_delete_error");
|
||||
nv_add_uint64(nvout, res->hr_stat_flush_error +
|
||||
res->hr_stat_activemap_flush_error,
|
||||
"stat_flush_error");
|
||||
nv_add_int16(nvout, 0, "error");
|
||||
break;
|
||||
case CONTROL_RELOAD:
|
||||
|
@ -239,6 +239,18 @@ struct hast_resource {
|
||||
uint64_t hr_stat_flush;
|
||||
/* Number of activemap updates. */
|
||||
uint64_t hr_stat_activemap_update;
|
||||
/* Number of local read errors. */
|
||||
uint64_t hr_stat_read_error;
|
||||
/* Number of local write errors. */
|
||||
uint64_t hr_stat_write_error;
|
||||
/* Number of local delete errors. */
|
||||
uint64_t hr_stat_delete_error;
|
||||
/* Number of flush errors. */
|
||||
uint64_t hr_stat_flush_error;
|
||||
/* Number of activemap write errors. */
|
||||
uint64_t hr_stat_activemap_write_error;
|
||||
/* Number of activemap flush errors. */
|
||||
uint64_t hr_stat_activemap_flush_error;
|
||||
|
||||
/* Next resource. */
|
||||
TAILQ_ENTRY(hast_resource) hr_next;
|
||||
|
@ -303,6 +303,7 @@ hast_activemap_flush(struct hast_resource *res)
|
||||
if (pwrite(res->hr_localfd, buf, size, METADATA_SIZE) !=
|
||||
(ssize_t)size) {
|
||||
pjdlog_errno(LOG_ERR, "Unable to flush activemap to disk");
|
||||
res->hr_stat_activemap_write_error++;
|
||||
return (-1);
|
||||
}
|
||||
if (res->hr_metaflush == 1 && g_flush(res->hr_localfd) == -1) {
|
||||
@ -313,6 +314,7 @@ hast_activemap_flush(struct hast_resource *res)
|
||||
} else {
|
||||
pjdlog_errno(LOG_ERR,
|
||||
"Unable to flush disk cache on activemap update");
|
||||
res->hr_stat_activemap_flush_error++;
|
||||
return (-1);
|
||||
}
|
||||
}
|
||||
@ -1936,6 +1938,22 @@ ggate_send_thread(void *arg)
|
||||
"G_GATE_CMD_DONE failed");
|
||||
}
|
||||
}
|
||||
if (hio->hio_errors[0]) {
|
||||
switch (ggio->gctl_cmd) {
|
||||
case BIO_READ:
|
||||
res->hr_stat_read_error++;
|
||||
break;
|
||||
case BIO_WRITE:
|
||||
res->hr_stat_write_error++;
|
||||
break;
|
||||
case BIO_DELETE:
|
||||
res->hr_stat_delete_error++;
|
||||
break;
|
||||
case BIO_FLUSH:
|
||||
res->hr_stat_flush_error++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
pjdlog_debug(2,
|
||||
"ggate_send: (%p) Moving request to the free queue.", hio);
|
||||
QUEUE_INSERT2(hio, free);
|
||||
|
@ -765,6 +765,7 @@ disk_thread(void *arg)
|
||||
pjdlog_errno(LOG_WARNING,
|
||||
"Unable to store cleared activemap");
|
||||
free(map);
|
||||
res->hr_stat_activemap_write_error++;
|
||||
break;
|
||||
}
|
||||
free(map);
|
||||
@ -883,8 +884,23 @@ send_thread(void *arg)
|
||||
PJDLOG_ABORT("Unexpected command (cmd=%hhu).",
|
||||
hio->hio_cmd);
|
||||
}
|
||||
if (hio->hio_error != 0)
|
||||
if (hio->hio_error != 0) {
|
||||
switch (hio->hio_cmd) {
|
||||
case HIO_READ:
|
||||
res->hr_stat_read_error++;
|
||||
break;
|
||||
case HIO_WRITE:
|
||||
res->hr_stat_write_error++;
|
||||
break;
|
||||
case HIO_DELETE:
|
||||
res->hr_stat_delete_error++;
|
||||
break;
|
||||
case HIO_FLUSH:
|
||||
res->hr_stat_flush_error++;
|
||||
break;
|
||||
}
|
||||
nv_add_int16(nvout, hio->hio_error, "error");
|
||||
}
|
||||
if (hast_proto_send(res, res->hr_remoteout, nvout, data,
|
||||
length) == -1) {
|
||||
secondary_exit(EX_TEMPFAIL, "Unable to send reply");
|
||||
|
Loading…
Reference in New Issue
Block a user