From 2a4fd6b17a3ca38cb1f1cdc5154ba59c66199e38 Mon Sep 17 00:00:00 2001 From: Conrad Meyer Date: Mon, 26 Oct 2015 19:34:12 +0000 Subject: [PATCH] ioat: Add support for Block Fill operations The IOAT hardware supports writing a 64-bit pattern to some destination buffer. The same limitations on buffer length apply as for copy operations. Throughput is a bit higher (probably because fill does not have to spend bandwidth reading from a source in memory). Support for testing Block Fill has been added to ioatcontrol(8) and the ioat_test device. ioatcontrol(8) accepts the '-f' flag, which tests Block Fill. (If the flag is omitted, the tool tests copy by default.) The '-V' flag, in conjunction with '-f', verifies that buffers are filled in the expected pattern. Tested on: Broadwell DE (Xeon D-1500) Sponsored by: EMC / Isilon Storage Division --- sys/dev/ioat/ioat.c | 31 +++++++++++++++++++++++++ sys/dev/ioat/ioat.h | 8 +++++++ sys/dev/ioat/ioat_test.c | 41 ++++++++++++++++++++++++++++++---- sys/dev/ioat/ioat_test.h | 9 ++++++++ tools/tools/ioat/ioatcontrol.8 | 17 +++++++++----- tools/tools/ioat/ioatcontrol.c | 12 ++++++++-- 6 files changed, 107 insertions(+), 11 deletions(-) diff --git a/sys/dev/ioat/ioat.c b/sys/dev/ioat/ioat.c index fed6e37d380e..b3939f89b0da 100644 --- a/sys/dev/ioat/ioat.c +++ b/sys/dev/ioat/ioat.c @@ -748,6 +748,37 @@ ioat_copy(bus_dmaengine_t dmaengine, bus_addr_t dst, return (&desc->bus_dmadesc); } +struct bus_dmadesc * +ioat_blockfill(bus_dmaengine_t dmaengine, bus_addr_t dst, uint64_t fillpattern, + bus_size_t len, bus_dmaengine_callback_t callback_fn, void *callback_arg, + uint32_t flags) +{ + struct ioat_fill_hw_descriptor *hw_desc; + struct ioat_descriptor *desc; + struct ioat_softc *ioat; + + CTR0(KTR_IOAT, __func__); + ioat = to_ioat_softc(dmaengine); + + if ((dst & (0xffffull << 48)) != 0) { + ioat_log_message(0, "%s: High 16 bits of dst invalid\n", + __func__); + return (NULL); + } + + desc = ioat_op_generic(ioat, IOAT_OP_FILL, len, fillpattern, dst, + callback_fn, callback_arg, flags); + if (desc == NULL) + return (NULL); + + hw_desc = desc->u.fill; + if (g_ioat_debug_level >= 3) + dump_descriptor(hw_desc); + + ioat_submit_single(ioat); + return (&desc->bus_dmadesc); +} + /* * Ring Management */ diff --git a/sys/dev/ioat/ioat.h b/sys/dev/ioat/ioat.h index 46f9769be824..e99714df275f 100644 --- a/sys/dev/ioat/ioat.h +++ b/sys/dev/ioat/ioat.h @@ -68,6 +68,14 @@ void ioat_put_dmaengine(bus_dmaengine_t dmaengine); void ioat_acquire(bus_dmaengine_t dmaengine); void ioat_release(bus_dmaengine_t dmaengine); +/* + * Issue a blockfill operation. The 64-bit pattern 'fillpattern' is written to + * 'len' physically contiguous bytes at 'dst'. + */ +struct bus_dmadesc *ioat_blockfill(bus_dmaengine_t dmaengine, bus_addr_t dst, + uint64_t fillpattern, bus_size_t len, bus_dmaengine_callback_t callback_fn, + void *callback_arg, uint32_t flags); + /* Issues the copy data operation */ struct bus_dmadesc *ioat_copy(bus_dmaengine_t dmaengine, bus_addr_t dst, bus_addr_t src, bus_size_t len, bus_dmaengine_callback_t callback_fn, diff --git a/sys/dev/ioat/ioat_test.c b/sys/dev/ioat/ioat_test.c index 215b0363e429..3f88ed98a689 100644 --- a/sys/dev/ioat/ioat_test.c +++ b/sys/dev/ioat/ioat_test.c @@ -123,11 +123,26 @@ test_transaction *ioat_test_transaction_create(unsigned num_buffers, static bool ioat_compare_ok(struct test_transaction *tx) { - uint32_t i; + struct ioat_test *test; + char *dst, *src; + uint32_t i, j; + + test = tx->test; for (i = 0; i < tx->depth; i++) { - if (memcmp(tx->buf[2*i], tx->buf[2*i+1], tx->length) != 0) - return (false); + dst = tx->buf[2 * i + 1]; + src = tx->buf[2 * i]; + + if (test->testkind == IOAT_TEST_FILL) { + for (j = 0; j < tx->length; j += sizeof(uint64_t)) { + if (memcmp(src, &dst[j], + MIN(sizeof(uint64_t), tx->length - j)) + != 0) + return (false); + } + } else if (test->testkind == IOAT_TEST_DMA) + if (memcmp(src, dst, tx->length) != 0) + return (false); } return (true); } @@ -208,8 +223,11 @@ ioat_test_submit_1_tx(struct ioat_test *test, bus_dmaengine_t dma) struct bus_dmadesc *desc; bus_dmaengine_callback_t cb; bus_addr_t src, dest; + uint64_t fillpattern; uint32_t i, flags; + desc = NULL; + IT_LOCK(); while (TAILQ_EMPTY(&test->free_q)) msleep(&test->free_q, &ioat_test_lk, 0, "test_submit", 0); @@ -232,7 +250,15 @@ ioat_test_submit_1_tx(struct ioat_test *test, bus_dmaengine_t dma) flags = 0; } - desc = ioat_copy(dma, src, dest, tx->length, cb, tx, flags); + if (test->testkind == IOAT_TEST_DMA) + desc = ioat_copy(dma, dest, src, tx->length, cb, tx, + flags); + else if (test->testkind == IOAT_TEST_FILL) { + fillpattern = *(uint64_t *)tx->buf[2*i]; + desc = ioat_blockfill(dma, dest, fillpattern, + tx->length, cb, tx, flags); + } + if (desc == NULL) panic("Failed to allocate a ring slot " "-- this shouldn't happen!"); @@ -279,6 +305,13 @@ ioat_dma_test(void *arg) return; } + if (test->testkind >= IOAT_NUM_TESTKINDS) { + ioat_test_log(0, "Invalid kind %u\n", + (unsigned)test->testkind); + test->status[IOAT_TEST_INVALID_INPUT]++; + return; + } + dmaengine = ioat_get_dmaengine(test->channel_index); if (dmaengine == NULL) { ioat_test_log(0, "Couldn't acquire dmaengine\n"); diff --git a/sys/dev/ioat/ioat_test.h b/sys/dev/ioat/ioat_test.h index 290d09b49017..ecfef7d5cb19 100644 --- a/sys/dev/ioat/ioat_test.h +++ b/sys/dev/ioat/ioat_test.h @@ -34,15 +34,24 @@ enum ioat_res { IOAT_TEST_NO_DMA_ENGINE, IOAT_TEST_NO_MEMORY, IOAT_TEST_MISCOMPARE, + IOAT_TEST_INVALID_INPUT, IOAT_NUM_RES }; +enum ioat_test_kind { + IOAT_TEST_FILL = 0, + IOAT_TEST_DMA, + IOAT_NUM_TESTKINDS +}; + struct test_transaction; struct ioat_test { volatile uint32_t status[IOAT_NUM_RES]; uint32_t channel_index; + enum ioat_test_kind testkind; + /* HW max of 1MB */ uint32_t buffer_size; uint32_t chain_depth; diff --git a/tools/tools/ioat/ioatcontrol.8 b/tools/tools/ioat/ioatcontrol.8 index b04db858fb8b..7e3234825eda 100644 --- a/tools/tools/ioat/ioatcontrol.8 +++ b/tools/tools/ioat/ioatcontrol.8 @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd October 21, 2015 +.Dd October 26, 2015 .Dt IOATCONTROL 8 .Os .Sh NAME @@ -33,6 +33,7 @@ .Xr ioat 4 .Sh SYNOPSIS .Nm +.Op Fl f .Op Fl V .Ar channel_number .Ar num_txns @@ -46,8 +47,12 @@ allows one to issue some number of test operations to the driver on a specific hardware channel. The arguments are as follows: .Bl -tag -width Ds +.It Fl f +Test block fill (by default, +.Nm +tests copy) .It Fl V -Verify copies for accuracy +Verify copies/fills for accuracy .El .Pp .Nm @@ -98,13 +103,14 @@ The interface between .Nm and .Xr ioat 4 . -.Nm +.Xr ioat 4 exposes it with .Cd hw.ioat.enable_ioat_test=1 . .Sh DIAGNOSTICS The wait channel .Va test_submit -indicates that the test code is keeping the DMA engine full of work. +indicates that the test code has enqueued all requested transactions and is +waiting on the IOAT hardware to complete one before issuing another operation. .Sh SEE ALSO .Xr ioat 4 .Sh HISTORY @@ -119,7 +125,8 @@ driver and .Nm tool were developed by .An \&Jim Harris Aq Mt jimharris@FreeBSD.org , +.An \&Carl Delsey Aq Mt carl.r.delsey@intel.com , and -.An \&Carl Delsey Aq Mt carl.r.delsey@intel.com . +.An \&Conrad Meyer Aq Mt cem@FreeBSD.org . This manual page was written by .An \&Conrad Meyer Aq Mt cem@FreeBSD.org . diff --git a/tools/tools/ioat/ioatcontrol.c b/tools/tools/ioat/ioatcontrol.c index 88fbea657fa3..e55ce4f605d3 100644 --- a/tools/tools/ioat/ioatcontrol.c +++ b/tools/tools/ioat/ioatcontrol.c @@ -48,7 +48,7 @@ static void usage(void) { - printf("Usage: %s [-V] [ " + printf("Usage: %s [-fV] [ " "[ [duration]]]\n", getprogname()); exit(EX_USAGE); } @@ -58,9 +58,13 @@ main(int argc, char **argv) { struct ioat_test t; int fd, ch; + bool fflag; - while ((ch = getopt(argc, argv, "V")) != -1) { + while ((ch = getopt(argc, argv, "fV")) != -1) { switch (ch) { + case 'f': + fflag = true; + break; case 'V': t.verify = true; break; @@ -78,6 +82,10 @@ main(int argc, char **argv) t.buffer_size = 256 * 1024; t.chain_depth = 2; t.duration = 0; + t.testkind = IOAT_TEST_DMA; + + if (fflag) + t.testkind = IOAT_TEST_FILL; t.channel_index = atoi(argv[0]); if (t.channel_index > 8) {