bdev/nvme: Add support to get the health log for NVMe device
Add a new RPC method to get the health log of a certain NVMe device. Below is the example: ./scripts/rpc.py bdev_nvme_get_controller_health_info -c Nvme0 { "model_number": "INTEL SSDPE2KX020T8", "serial_number": "BTLJ72430ARH2P0BGN", "firmware_revision": "VDV10110", "traddr": "0000:08:00.0", "temperature_celsius": 33, "available_spare_percentage": 99, "available_spare_threshold_percentage": 10, "percentage_used_percentage": 2, "data_units_read": 1013408619, "data_units_written": 346792685, "host_read_commands": 30457773264, "host_write_commands": 18949677715, "controller_busy_time": 4979, "power_cycles": 49, "power_on_hours": 31114, "unsafe_shutdowns": 18, "media_errors": 17, "num_err_log_entries": 19, "warning_temperature_time_minutes": 0, "critical_composite_temperature_time_minutes": 0 } Change-Id: I53125d2ec16cb36011571473430aece99167b803 Signed-off-by: GangCao <gang.cao@intel.com> Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/8806 Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Community-CI: Mellanox Build Bot
This commit is contained in:
parent
2d629511f5
commit
49ffdc18af
@ -457,6 +457,7 @@ Example response:
|
||||
"bdev_passthru_delete"
|
||||
"bdev_nvme_apply_firmware",
|
||||
"bdev_nvme_get_transport_statistics",
|
||||
"bdev_nvme_get_controller_health_info",
|
||||
"bdev_nvme_detach_controller",
|
||||
"bdev_nvme_attach_controller",
|
||||
"bdev_null_create",
|
||||
@ -3341,6 +3342,62 @@ Example response:
|
||||
}
|
||||
~~~
|
||||
|
||||
## bdev_nvme_get_controller_health_info {#rpc_bdev_nvme_get_controller_health_info}
|
||||
|
||||
Display health log of the required NVMe bdev device.
|
||||
|
||||
### Parameters
|
||||
|
||||
Name | Optional | Type | Description
|
||||
----------------------- | -------- | ----------- | -----------
|
||||
name | Required | string | Name of the NVMe bdev controller
|
||||
|
||||
### Response
|
||||
|
||||
The response is the object containing information about health log of the NVMe controller.
|
||||
|
||||
### Example
|
||||
|
||||
Example request:
|
||||
|
||||
~~~
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"method": "bdev_nvme_get_controller_health_info",
|
||||
"id": 1,
|
||||
"params": {
|
||||
"name": "Nvme0"
|
||||
}
|
||||
}
|
||||
~~~
|
||||
|
||||
Example response:
|
||||
|
||||
~~~
|
||||
{
|
||||
"model_number": "INTEL SSDPE2KX020T8",
|
||||
"serial_number": "BTLJ72430ARH2P0BGN",
|
||||
"firmware_revision": "VDV10110",
|
||||
"traddr": "0000:08:00.0",
|
||||
"temperature_celsius": 32,
|
||||
"available_spare_percentage": 99,
|
||||
"available_spare_threshold_percentage": 10,
|
||||
"percentage_used": 2,
|
||||
"data_units_read": 1013408619,
|
||||
"data_units_written": 346792685,
|
||||
"host_read_commands": 30457773282,
|
||||
"host_write_commands": 18949677715,
|
||||
"controller_busy_time": 4979,
|
||||
"power_cycles": 49,
|
||||
"power_on_hours": 31118,
|
||||
"unsafe_shutdowns": 18,
|
||||
"media_errors": 17,
|
||||
"num_err_log_entries": 19,
|
||||
"warning_temperature_time_minutes": 0,
|
||||
"critical_composite_temperature_time_minutes": 0
|
||||
}
|
||||
~~~
|
||||
|
||||
## bdev_rbd_register_cluster {#rpc_bdev_rbd_register_cluster}
|
||||
|
||||
This method is available only if SPDK was build with Ceph RBD support.
|
||||
|
@ -41,6 +41,9 @@
|
||||
#include "spdk/string.h"
|
||||
#include "spdk/rpc.h"
|
||||
#include "spdk/util.h"
|
||||
#include "spdk/env.h"
|
||||
#include "spdk/nvme.h"
|
||||
#include "spdk/nvme_spec.h"
|
||||
|
||||
#include "spdk/log.h"
|
||||
#include "spdk/bdev_module.h"
|
||||
@ -1145,3 +1148,217 @@ cleanup:
|
||||
free_rpc_bdev_nvme_reset_controller_req(&req);
|
||||
}
|
||||
SPDK_RPC_REGISTER("bdev_nvme_reset_controller", rpc_bdev_nvme_reset_controller, SPDK_RPC_RUNTIME)
|
||||
|
||||
struct rpc_get_controller_health_info {
|
||||
char *name;
|
||||
};
|
||||
|
||||
struct spdk_nvme_health_info_context {
|
||||
struct spdk_jsonrpc_request *request;
|
||||
struct spdk_nvme_ctrlr *ctrlr;
|
||||
struct spdk_nvme_health_information_page health_page;
|
||||
};
|
||||
|
||||
static void
|
||||
free_rpc_get_controller_health_info(struct rpc_get_controller_health_info *r)
|
||||
{
|
||||
free(r->name);
|
||||
}
|
||||
|
||||
static const struct spdk_json_object_decoder rpc_get_controller_health_info_decoders[] = {
|
||||
{"name", offsetof(struct rpc_get_controller_health_info, name), spdk_json_decode_string, true},
|
||||
};
|
||||
|
||||
static void nvme_health_info_cleanup(struct spdk_nvme_health_info_context *context, bool response)
|
||||
{
|
||||
if (response == true) {
|
||||
spdk_jsonrpc_send_error_response(context->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
|
||||
"Internal error.");
|
||||
}
|
||||
|
||||
free(context);
|
||||
}
|
||||
|
||||
static void
|
||||
get_health_log_page_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl)
|
||||
{
|
||||
int i;
|
||||
char buf[128];
|
||||
struct spdk_nvme_health_info_context *context = cb_arg;
|
||||
struct spdk_jsonrpc_request *request = context->request;
|
||||
struct spdk_json_write_ctx *w;
|
||||
struct spdk_nvme_ctrlr *ctrlr = context->ctrlr;
|
||||
const struct spdk_nvme_transport_id *trid = NULL;
|
||||
const struct spdk_nvme_ctrlr_data *cdata = NULL;
|
||||
struct spdk_nvme_health_information_page *health_page = NULL;
|
||||
|
||||
if (spdk_nvme_cpl_is_error(cpl)) {
|
||||
nvme_health_info_cleanup(context, true);
|
||||
SPDK_ERRLOG("get log page failed\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (ctrlr == NULL) {
|
||||
nvme_health_info_cleanup(context, true);
|
||||
SPDK_ERRLOG("ctrlr is NULL\n");
|
||||
return;
|
||||
} else {
|
||||
trid = spdk_nvme_ctrlr_get_transport_id(ctrlr);
|
||||
cdata = spdk_nvme_ctrlr_get_data(ctrlr);
|
||||
health_page = &(context->health_page);
|
||||
}
|
||||
|
||||
w = spdk_jsonrpc_begin_result(request);
|
||||
|
||||
spdk_json_write_object_begin(w);
|
||||
snprintf(buf, sizeof(cdata->mn) + 1, "%s", cdata->mn);
|
||||
spdk_str_trim(buf);
|
||||
spdk_json_write_named_string(w, "model_number", buf);
|
||||
snprintf(buf, sizeof(cdata->sn) + 1, "%s", cdata->sn);
|
||||
spdk_str_trim(buf);
|
||||
spdk_json_write_named_string(w, "serial_number", buf);
|
||||
snprintf(buf, sizeof(cdata->fr) + 1, "%s", cdata->fr);
|
||||
spdk_str_trim(buf);
|
||||
spdk_json_write_named_string(w, "firmware_revision", buf);
|
||||
spdk_json_write_named_string(w, "traddr", trid->traddr);
|
||||
spdk_json_write_named_uint64(w, "temperature_celsius", health_page->temperature - 273);
|
||||
spdk_json_write_named_uint64(w, "available_spare_percentage", health_page->available_spare);
|
||||
spdk_json_write_named_uint64(w, "available_spare_threshold_percentage",
|
||||
health_page->available_spare_threshold);
|
||||
spdk_json_write_named_uint64(w, "percentage_used", health_page->percentage_used);
|
||||
spdk_json_write_named_uint128(w, "data_units_read",
|
||||
health_page->data_units_read[0], health_page->data_units_read[1]);
|
||||
spdk_json_write_named_uint128(w, "data_units_written",
|
||||
health_page->data_units_written[0], health_page->data_units_written[1]);
|
||||
spdk_json_write_named_uint128(w, "host_read_commands",
|
||||
health_page->host_read_commands[0], health_page->host_read_commands[1]);
|
||||
spdk_json_write_named_uint128(w, "host_write_commands",
|
||||
health_page->host_write_commands[0], health_page->host_write_commands[1]);
|
||||
spdk_json_write_named_uint128(w, "controller_busy_time",
|
||||
health_page->controller_busy_time[0], health_page->controller_busy_time[1]);
|
||||
spdk_json_write_named_uint128(w, "power_cycles",
|
||||
health_page->power_cycles[0], health_page->power_cycles[1]);
|
||||
spdk_json_write_named_uint128(w, "power_on_hours",
|
||||
health_page->power_on_hours[0], health_page->power_on_hours[1]);
|
||||
spdk_json_write_named_uint128(w, "unsafe_shutdowns",
|
||||
health_page->unsafe_shutdowns[0], health_page->unsafe_shutdowns[1]);
|
||||
spdk_json_write_named_uint128(w, "media_errors",
|
||||
health_page->media_errors[0], health_page->media_errors[1]);
|
||||
spdk_json_write_named_uint128(w, "num_err_log_entries",
|
||||
health_page->num_error_info_log_entries[0], health_page->num_error_info_log_entries[1]);
|
||||
spdk_json_write_named_uint64(w, "warning_temperature_time_minutes", health_page->warning_temp_time);
|
||||
spdk_json_write_named_uint64(w, "critical_composite_temperature_time_minutes",
|
||||
health_page->critical_temp_time);
|
||||
for (i = 0; i < 8; i++) {
|
||||
if (health_page->temp_sensor[i] != 0) {
|
||||
spdk_json_write_named_uint64(w, "temperature_sensor_celsius", health_page->temp_sensor[i] - 273);
|
||||
}
|
||||
}
|
||||
spdk_json_write_object_end(w);
|
||||
|
||||
spdk_jsonrpc_end_result(request, w);
|
||||
nvme_health_info_cleanup(context, false);
|
||||
}
|
||||
|
||||
static void
|
||||
get_health_log_page(struct spdk_nvme_health_info_context *context)
|
||||
{
|
||||
struct spdk_nvme_ctrlr *ctrlr = context->ctrlr;
|
||||
|
||||
if (spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_LOG_HEALTH_INFORMATION,
|
||||
SPDK_NVME_GLOBAL_NS_TAG,
|
||||
&(context->health_page), sizeof(context->health_page), 0,
|
||||
get_health_log_page_completion, context)) {
|
||||
nvme_health_info_cleanup(context, true);
|
||||
SPDK_ERRLOG("spdk_nvme_ctrlr_cmd_get_log_page() failed\n");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
get_temperature_threshold_feature_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl)
|
||||
{
|
||||
struct spdk_nvme_health_info_context *context = cb_arg;
|
||||
|
||||
if (spdk_nvme_cpl_is_error(cpl)) {
|
||||
nvme_health_info_cleanup(context, true);
|
||||
SPDK_ERRLOG("feature SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD failed in completion\n");
|
||||
} else {
|
||||
get_health_log_page(context);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
get_temperature_threshold_feature(struct spdk_nvme_health_info_context *context)
|
||||
{
|
||||
struct spdk_nvme_cmd cmd = {};
|
||||
|
||||
cmd.opc = SPDK_NVME_OPC_GET_FEATURES;
|
||||
cmd.cdw10 = SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD;
|
||||
|
||||
return spdk_nvme_ctrlr_cmd_admin_raw(context->ctrlr, &cmd, NULL, 0,
|
||||
get_temperature_threshold_feature_completion, context);
|
||||
}
|
||||
|
||||
static void
|
||||
get_controller_health_info(struct spdk_jsonrpc_request *request, struct spdk_nvme_ctrlr *ctrlr)
|
||||
{
|
||||
struct spdk_nvme_health_info_context *context;
|
||||
|
||||
context = calloc(1, sizeof(struct spdk_nvme_health_info_context));
|
||||
if (!context) {
|
||||
spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
|
||||
"Memory allocation error.");
|
||||
return;
|
||||
}
|
||||
|
||||
context->request = request;
|
||||
context->ctrlr = ctrlr;
|
||||
|
||||
if (get_temperature_threshold_feature(context)) {
|
||||
nvme_health_info_cleanup(context, true);
|
||||
SPDK_ERRLOG("feature SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD failed to submit\n");
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
static void
|
||||
rpc_bdev_nvme_get_controller_health_info(struct spdk_jsonrpc_request *request,
|
||||
const struct spdk_json_val *params)
|
||||
{
|
||||
struct rpc_get_controller_health_info req = {};
|
||||
struct nvme_ctrlr *nvme_ctrlr = NULL;
|
||||
|
||||
if (!params) {
|
||||
spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
|
||||
"Missing device name");
|
||||
|
||||
return;
|
||||
}
|
||||
if (spdk_json_decode_object(params, rpc_get_controller_health_info_decoders,
|
||||
SPDK_COUNTOF(rpc_get_controller_health_info_decoders), &req)) {
|
||||
SPDK_ERRLOG("spdk_json_decode_object failed\n");
|
||||
free_rpc_get_controller_health_info(&req);
|
||||
spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
|
||||
"Invalid parameters");
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
nvme_ctrlr = nvme_ctrlr_get_by_name(req.name);
|
||||
|
||||
if (!nvme_ctrlr) {
|
||||
SPDK_ERRLOG("nvme ctrlr name '%s' does not exist\n", req.name);
|
||||
free_rpc_get_controller_health_info(&req);
|
||||
spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
|
||||
"Device not found");
|
||||
return;
|
||||
}
|
||||
|
||||
get_controller_health_info(request, nvme_ctrlr->ctrlr);
|
||||
free_rpc_get_controller_health_info(&req);
|
||||
|
||||
return;
|
||||
}
|
||||
SPDK_RPC_REGISTER("bdev_nvme_get_controller_health_info",
|
||||
rpc_bdev_nvme_get_controller_health_info, SPDK_RPC_RUNTIME)
|
||||
|
@ -964,6 +964,15 @@ if __name__ == "__main__":
|
||||
help='Get bdev_nvme poll group transport statistics')
|
||||
p.set_defaults(func=bdev_nvme_get_transport_statistics)
|
||||
|
||||
def bdev_nvme_get_controller_health_info(args):
|
||||
print_dict(rpc.bdev.bdev_nvme_get_controller_health_info(args.client,
|
||||
name=args.name))
|
||||
|
||||
p = subparsers.add_parser('bdev_nvme_get_controller_health_info',
|
||||
help='Display health log of the required NVMe bdev controller.')
|
||||
p.add_argument('-c', '--name', help="Name of the NVMe bdev controller. Example: Nvme0", required=True)
|
||||
p.set_defaults(func=bdev_nvme_get_controller_health_info)
|
||||
|
||||
# iSCSI
|
||||
def iscsi_set_options(args):
|
||||
rpc.iscsi.iscsi_set_options(
|
||||
|
@ -1276,3 +1276,17 @@ def bdev_nvme_apply_firmware(client, bdev_name, filename):
|
||||
def bdev_nvme_get_transport_statistics(client):
|
||||
"""Get bdev_nvme poll group transport statistics"""
|
||||
return client.call('bdev_nvme_get_transport_statistics')
|
||||
|
||||
|
||||
def bdev_nvme_get_controller_health_info(client, name):
|
||||
"""Display health log of the required NVMe bdev controller.
|
||||
|
||||
Args:
|
||||
name: name of the required NVMe bdev controller
|
||||
|
||||
Returns:
|
||||
Health log for the requested NVMe bdev controller.
|
||||
"""
|
||||
params = {}
|
||||
params['name'] = name
|
||||
return client.call('bdev_nvme_get_controller_health_info', params)
|
||||
|
Loading…
Reference in New Issue
Block a user