From 4cb79292f73a598bf23263f541c17da72d55b365 Mon Sep 17 00:00:00 2001 From: Jim Harris Date: Mon, 17 Sep 2012 21:26:55 +0000 Subject: [PATCH] Add nvmecontrol(8) source code and beginnings of a man page to the tree. Sponsored by: Intel Contributions from: Joe Golio/EMC --- sbin/nvmecontrol/Makefile | 6 + sbin/nvmecontrol/nvmecontrol.8 | 87 +++++ sbin/nvmecontrol/nvmecontrol.c | 600 +++++++++++++++++++++++++++++++++ 3 files changed, 693 insertions(+) create mode 100644 sbin/nvmecontrol/Makefile create mode 100644 sbin/nvmecontrol/nvmecontrol.8 create mode 100644 sbin/nvmecontrol/nvmecontrol.c diff --git a/sbin/nvmecontrol/Makefile b/sbin/nvmecontrol/Makefile new file mode 100644 index 000000000000..04cefaaa191e --- /dev/null +++ b/sbin/nvmecontrol/Makefile @@ -0,0 +1,6 @@ +# $FreeBSD$ + +PROG= nvmecontrol +MAN= nvmecontrol.8 + +.include diff --git a/sbin/nvmecontrol/nvmecontrol.8 b/sbin/nvmecontrol/nvmecontrol.8 new file mode 100644 index 000000000000..f489f2497f01 --- /dev/null +++ b/sbin/nvmecontrol/nvmecontrol.8 @@ -0,0 +1,87 @@ +.\" +.\" Copyright (c) 2012 Intel Corporation +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions, and the following disclaimer, +.\" without modification. +.\" 2. Redistributions in binary form must reproduce at minimum a disclaimer +.\" substantially similar to the "NO WARRANTY" disclaimer below +.\" ("Disclaimer") and any redistribution must be conditioned upon +.\" including a substantially similar Disclaimer requirement for further +.\" binary redistribution. +.\" +.\" NO WARRANTY +.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +.\" "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +.\" LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR +.\" A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +.\" HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +.\" STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +.\" IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGES. +.\" +.\" nvmecontrol man page. +.\" +.\" Author: Jim Harris +.\" +.\" $FreeBSD$ +.\" +.Dd September 17, 2012 +.Dt NVMECONTROL 8 +.Os +.Sh NAME +.Nm nvmecontrol +.Nd NVM Express control utility +.Sh SYNOPSIS +.Nm +.Ic devlist +.Nm +.Ic identify +.Op Fl v +.Op Fl x +.Aq device id +.Nm +.Ic perftest +.Aq Fl n Ar num_threads +.Aq Fl o Ar read|write +.Op Fl p +.Aq Fl s Ar size_in_bytes +.Aq Fl t Ar time_in_sec +.Aq device id +.Sh DESCRIPTION +NVM Express (NVMe) is a storage protocol standard, for SSDs and other +high-speed storage devices over PCI Express. +.Sh EXAMPLES +.Dl nvmecontrol devlist +.Pp +Display a list of NVMe controllers and namespaces along with their device nodes. +.Dl nvmecontrol identify nvme0 +.Pp +Display a human-readable summary of the nvme0 IDENTIFY_CONTROLLER data. +.Pp +.Dl nvmecontrol identify -x -v nvme0ns1 +.Pp +Display a hexadecimal dump of the nvme0 IDENTIFY_NAMESPACE data for namespace +1. +.Pp +.Dl nvmecontrol perftest -n 32 -o read -s 512 -t 30 nvme0ns1 +.Pp +Run a performance test on nvme0ns1 using 32 kernel threads for 30 seconds. Each +thread will issue a single 512 byte read command. Results are printed to +stdout when 30 seconds expires. +.Pp +.Sh AUTHORS +.An -nosplit +.Nm +was developed by Intel and originally written by +.An Jim Harris Aq jimharris@FreeBSD.org . +.Pp +This man page was written by +.An Jim Harris Aq jimharris@FreeBSD.org . diff --git a/sbin/nvmecontrol/nvmecontrol.c b/sbin/nvmecontrol/nvmecontrol.c new file mode 100644 index 000000000000..6abd3f5d1d7a --- /dev/null +++ b/sbin/nvmecontrol/nvmecontrol.c @@ -0,0 +1,600 @@ +/*- + * Copyright (C) 2012 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEVLIST_USAGE \ +" nvmecontrol devlist\n" + +#define IDENTIFY_USAGE \ +" nvmecontrol identify \n" + +#define PERFTEST_USAGE \ +" nvmecontrol perftest <-n num_threads> <-o read|write>\n" \ +" <-s size_in_bytes> <-t time_in_seconds>\n" \ +" <-i intr|wait> [-f refthread] [-p]\n" \ +" \n" + +static void perftest_usage(void); + +static void +usage(void) +{ + fprintf(stderr, "usage:\n"); + fprintf(stderr, DEVLIST_USAGE); + fprintf(stderr, IDENTIFY_USAGE); + fprintf(stderr, PERFTEST_USAGE); + exit(EX_USAGE); +} + +static void +print_controller_hex(struct nvme_controller_data *cdata, uint32_t length) +{ + uint32_t *p; + uint32_t i, j; + + p = (uint32_t *)cdata; + length /= sizeof(uint32_t); + + for (i = 0; i < length; i+=8) { + printf("%03x: ", i*4); + for (j = 0; j < 8; j++) + printf("%08x ", p[i+j]); + printf("\n"); + } + + printf("\n"); +} + +static void +print_controller(struct nvme_controller_data *cdata) +{ + printf("Controller Capabilities/Features\n"); + printf("================================\n"); + printf("Vendor ID: %04x\n", cdata->vid); + printf("Subsystem Vendor ID: %04x\n", cdata->ssvid); + printf("Serial Number: %s\n", cdata->sn); + printf("Model Number: %s\n", cdata->mn); + printf("Firmware Version: %s\n", cdata->fr); + printf("Recommended Arb Burst: %d\n", cdata->rab); + printf("IEEE OUI Identifier: %02x %02x %02x\n", + cdata->ieee[0], cdata->ieee[1], cdata->ieee[2]); + printf("Multi-Interface Cap: %02x\n", cdata->mic); + /* TODO: Use CAP.MPSMIN to determine true memory page size. */ + printf("Max Data Transfer Size: "); + if (cdata->mdts == 0) + printf("Unlimited\n"); + else + printf("%d\n", PAGE_SIZE * (1 << cdata->mdts)); + printf("\n"); + + printf("Admin Command Set Attributes\n"); + printf("============================\n"); + printf("Security Send/Receive: %s\n", + cdata->oacs.security ? "Supported" : "Not Supported"); + printf("Format NVM: %s\n", + cdata->oacs.format ? "Supported" : "Not Supported"); + printf("Firmware Activate/Download: %s\n", + cdata->oacs.firmware ? "Supported" : "Not Supported"); + printf("Abort Command Limit: %d\n", cdata->acl+1); + printf("Async Event Request Limit: %d\n", cdata->aerl+1); + printf("Number of Firmware Slots: "); + if (cdata->oacs.firmware != 0) + printf("%d\n", cdata->frmw.num_slots); + else + printf("N/A\n"); + printf("Firmware Slot 1 Read-Only: "); + if (cdata->oacs.firmware != 0) + printf("%s\n", cdata->frmw.slot1_ro ? "Yes" : "No"); + else + printf("N/A\n"); + printf("Per-Namespace SMART Log: %s\n", + cdata->lpa.ns_smart ? "Yes" : "No"); + printf("Error Log Page Entries: %d\n", cdata->elpe+1); + printf("Number of Power States: %d\n", cdata->npss+1); + printf("\n"); + + printf("NVM Command Set Attributes\n"); + printf("==========================\n"); + printf("Submission Queue Entry Size\n"); + printf(" Max: %d\n", 1 << cdata->sqes.max); + printf(" Min: %d\n", 1 << cdata->sqes.min); + printf("Completion Queue Entry Size\n"); + printf(" Max: %d\n", 1 << cdata->cqes.max); + printf(" Min: %d\n", 1 << cdata->cqes.min); + printf("Number of Namespaces: %d\n", cdata->nn); + printf("Compare Command: %s\n", + cdata->oncs.compare ? "Supported" : "Not Supported"); + printf("Write Uncorrectable Command: %s\n", + cdata->oncs.write_unc ? "Supported" : "Not Supported"); + printf("Dataset Management Command: %s\n", + cdata->oncs.dsm ? "Supported" : "Not Supported"); + printf("Volatile Write Cache: %s\n", + cdata->vwc.present ? "Present" : "Not Present"); +} + +static void +print_namespace_hex(struct nvme_namespace_data *nsdata, uint32_t length) +{ + uint32_t *p; + uint32_t i, j; + + p = (uint32_t *)nsdata; + length /= sizeof(uint32_t); + + for (i = 0; i < length; i+=8) { + printf("%03x: ", i*4); + for (j = 0; j < 8; j++) + printf("%08x ", p[i+j]); + printf("\n"); + } + + printf("\n"); +} + +static void +print_namespace(struct nvme_namespace_data *nsdata) +{ + uint32_t i; + + printf("Size (in LBAs): %lld (%lldM)\n", + (long long)nsdata->nsze, + (long long)nsdata->nsze / 1024 / 1024); + printf("Capacity (in LBAs): %lld (%lldM)\n", + (long long)nsdata->ncap, + (long long)nsdata->ncap / 1024 / 1024); + printf("Utilization (in LBAs): %lld (%lldM)\n", + (long long)nsdata->nuse, + (long long)nsdata->nuse / 1024 / 1024); + printf("Thin Provisioning: %s\n", + nsdata->nsfeat.thin_prov ? "Supported" : "Not Supported"); + printf("Number of LBA Formats: %d\n", nsdata->nlbaf+1); + printf("Current LBA Format: LBA Format #%d\n", + nsdata->flbas.format); + for (i = 0; i <= nsdata->nlbaf; i++) { + printf("LBA Format #%d:\n", i); + printf(" LBA Data Size: %d\n", + 1 << nsdata->lbaf[i].lbads); + } +} + +static uint32_t +ns_get_sector_size(struct nvme_namespace_data *nsdata) +{ + + return (1 << nsdata->lbaf[0].lbads); +} + + +static void +devlist(int argc, char *argv[]) +{ + struct nvme_controller_data cdata; + struct nvme_namespace_data nsdata; + struct stat devstat; + char name[64], path[64]; + uint32_t i; + int ch, ctrlr, exit_code, fd, found; + + exit_code = EX_OK; + + while ((ch = getopt(argc, argv, "")) != -1) { + switch ((char)ch) { + default: + usage(); + } + } + + ctrlr = -1; + found = 0; + + while (1) { + ctrlr++; + sprintf(name, "nvme%d", ctrlr); + sprintf(path, "/dev/%s", name); + + if (stat(path, &devstat) != 0) + break; + + found++; + + fd = open(path, O_RDWR); + if (fd < 0) { + printf("Could not open %s.\n", path); + exit_code = EX_NOPERM; + continue; + } + + if (ioctl(fd, NVME_IDENTIFY_CONTROLLER, &cdata) == -1) { + printf("ioctl to %s failed.\n", path); + exit_code = EX_IOERR; + continue; + } + + printf("%6s: %s\n", name, cdata.mn); + + for (i = 0; i < cdata.nn; i++) { + sprintf(name, "nvme%dns%d", ctrlr, i+1); + sprintf(path, "/dev/%s", name); + + fd = open(path, O_RDWR); + if (fd < 0) { + printf("Could not open %s.\n", path); + exit_code = EX_NOPERM; + continue; + } + if (ioctl(fd, NVME_IDENTIFY_NAMESPACE, &nsdata) == -1) { + printf("ioctl to %s failed.\n", path); + exit_code = EX_IOERR; + continue; + } + printf(" %10s (%lldGB)\n", + name, + nsdata.nsze * + (long long)ns_get_sector_size(&nsdata) / + 1024 / 1024 / 1024); + } + } + + if (found == 0) + printf("No NVMe controllers found.\n"); + + exit(exit_code); +} + +static void +identify_ctrlr(int argc, char *argv[]) +{ + struct nvme_controller_data cdata; + struct stat devstat; + char path[64]; + int ch, fd, hexflag = 0, hexlength; + int verboseflag = 0; + + while ((ch = getopt(argc, argv, "vx")) != -1) { + switch ((char)ch) { + case 'v': + verboseflag = 1; + break; + case 'x': + hexflag = 1; + break; + default: + usage(); + } + } + + sprintf(path, "/dev/%s", argv[optind]); + + if (stat(path, &devstat) != 0) { + printf("Invalid device node '%s'.\n", path); + exit(EX_IOERR); + } + + fd = open(path, O_RDWR); + if (fd < 0) { + printf("Could not open %s.\n", path); + exit(EX_NOPERM); + } + + if (ioctl(fd, NVME_IDENTIFY_CONTROLLER, &cdata) == -1) { + printf("ioctl to %s failed.\n", path); + exit(EX_IOERR); + } + + if (hexflag == 1) { + if (verboseflag == 1) + hexlength = sizeof(struct nvme_controller_data); + else + hexlength = offsetof(struct nvme_controller_data, + reserved5); + print_controller_hex(&cdata, hexlength); + exit(EX_OK); + } + + if (verboseflag == 1) { + printf("-v not currently supported without -x.\n"); + usage(); + } + + print_controller(&cdata); + exit(EX_OK); +} + +static void +identify_ns(int argc, char *argv[]) +{ + struct nvme_namespace_data nsdata; + struct stat devstat; + char path[64]; + int ch, fd, hexflag = 0, hexlength; + int verboseflag = 0; + + while ((ch = getopt(argc, argv, "vx")) != -1) { + switch ((char)ch) { + case 'v': + verboseflag = 1; + break; + case 'x': + hexflag = 1; + break; + default: + usage(); + } + } + + sprintf(path, "/dev/%s", argv[optind]); + + if (stat(path, &devstat) != 0) { + printf("Invalid device node '%s'.\n", path); + exit(EX_IOERR); + } + + fd = open(path, O_RDWR); + if (fd < 0) { + printf("Could not open %s.\n", path); + exit(EX_NOPERM); + } + + if (ioctl(fd, NVME_IDENTIFY_NAMESPACE, &nsdata) == -1) { + printf("ioctl to %s failed.\n", path); + exit(EX_IOERR); + } + + if (hexflag == 1) { + if (verboseflag == 1) + hexlength = sizeof(struct nvme_namespace_data); + else + hexlength = offsetof(struct nvme_namespace_data, + reserved6); + print_namespace_hex(&nsdata, hexlength); + exit(EX_OK); + } + + if (verboseflag == 1) { + printf("-v not currently supported without -x.\n"); + usage(); + } + + print_namespace(&nsdata); + exit(EX_OK); +} + +static void +identify(int argc, char *argv[]) +{ + char *target; + + if (argc < 2) + usage(); + + while (getopt(argc, argv, "vx") != -1) ; + + target = argv[optind]; + + /* Specified device node must have "nvme" in it. */ + if (strstr(argv[optind], "nvme") == NULL) { + printf("Invalid device node '%s'.\n", argv[optind]); + exit(EX_IOERR); + } + + optreset = 1; + optind = 1; + + /* + * If devicde node contains "ns", we consider it a namespace, + * otherwise, consider it a controller. + */ + if (strstr(target, "ns") == NULL) + identify_ctrlr(argc, argv); + else + identify_ns(argc, argv); +} + +static void +print_perftest(struct nvme_io_test *io_test, bool perthread) +{ + uint32_t i, io_completed = 0, iops, mbps; + + for (i = 0; i < io_test->num_threads; i++) + io_completed += io_test->io_completed[i]; + + iops = io_completed/io_test->time; + mbps = iops * io_test->size / (1024*1024); + + printf("Threads: %2d Size: %6d %5s Time: %3d IO/s: %7d MB/s: %4d\n", + io_test->num_threads, io_test->size, + io_test->opc == NVME_OPC_READ ? "READ" : "WRITE", + io_test->time, iops, mbps); + + if (perthread) + for (i = 0; i < io_test->num_threads; i++) + printf("\t%3d: %8d IO/s\n", i, + io_test->io_completed[i]/io_test->time); + + exit(1); +} + +static void +perftest_usage(void) +{ + fprintf(stderr, "usage:\n"); + fprintf(stderr, PERFTEST_USAGE); + exit(EX_USAGE); +} + +static void +perftest(int argc, char *argv[]) +{ + struct nvme_io_test io_test; + int fd; + char ch; + char *p; + const char *name; + char path[64]; + u_long ioctl_cmd = NVME_IO_TEST; + bool nflag, oflag, sflag, tflag; + int err, perthread = 0; + + nflag = oflag = sflag = tflag = false; + name = NULL; + + memset(&io_test, 0, sizeof(io_test)); + + while ((ch = getopt(argc, argv, "f:i:n:o:ps:t:")) != -1) { + switch (ch) { + case 'f': + if (!strcmp(optarg, "refthread")) + io_test.flags |= NVME_TEST_FLAG_REFTHREAD; + break; + case 'i': + if (!strcmp(optarg, "bio") || + !strcmp(optarg, "wait")) + ioctl_cmd = NVME_BIO_TEST; + else if (!strcmp(optarg, "io") || + !strcmp(optarg, "intr")) + ioctl_cmd = NVME_IO_TEST; + break; + case 'n': + nflag = true; + io_test.num_threads = strtoul(optarg, &p, 0); + if (p != NULL && *p != '\0') { + fprintf(stderr, + "\"%s\" not valid number of threads.\n", + optarg); + perftest_usage(); + } else if (io_test.num_threads == 0 || + io_test.num_threads > 128) { + fprintf(stderr, + "\"%s\" not valid number of threads.\n", + optarg); + perftest_usage(); + } + break; + case 'o': + oflag = true; + if (!strcmp(optarg, "read") || !strcmp(optarg, "READ")) + io_test.opc = NVME_OPC_READ; + else if (!strcmp(optarg, "write") || + !strcmp(optarg, "WRITE")) + io_test.opc = NVME_OPC_WRITE; + else { + fprintf(stderr, "\"%s\" not valid opcode.\n", + optarg); + perftest_usage(); + } + break; + case 'p': + perthread = 1; + break; + case 's': + sflag = true; + io_test.size = strtoul(optarg, &p, 0); + if (p == NULL || *p == '\0' || toupper(*p) == 'B') { + // do nothing + } else if (toupper(*p) == 'K') { + io_test.size *= 1024; + } else if (toupper(*p) == 'M') { + io_test.size *= 1024 * 1024; + } else { + fprintf(stderr, "\"%s\" not valid size.\n", + optarg); + perftest_usage(); + } + break; + case 't': + tflag = true; + io_test.time = strtoul(optarg, &p, 0); + if (p != NULL && *p != '\0') { + fprintf(stderr, + "\"%s\" not valid time duration.\n", + optarg); + perftest_usage(); + } + break; + } + } + + name = argv[optind]; + + if (!nflag || !oflag || !sflag || !tflag || name == NULL) + perftest_usage(); + + sprintf(path, "/dev/%s", name); + + fd = open(path, O_RDWR); + if (fd < 0) { + fprintf(stderr, "%s not valid device.\n", path); + perftest_usage(); + } + + err = ioctl(fd, ioctl_cmd, &io_test); + + if (err) { + fprintf(stderr, "NVME_IO_TEST returned %d\n", errno); + exit(EX_IOERR); + } + + print_perftest(&io_test, perthread); + exit(EX_OK); +} + +int +main(int argc, char *argv[]) +{ + + if (argc < 2) + usage(); + + if (strcmp(argv[1], "devlist") == 0) + devlist(argc-1, &argv[1]); + else if (strcmp(argv[1], "identify") == 0) + identify(argc-1, &argv[1]); + else if (strcmp(argv[1], "perftest") == 0) + perftest(argc-1, &argv[1]); + + usage(); + + return (0); +}