ioat: Add support for Block Fill operations

The IOAT hardware supports writing a 64-bit pattern to some destination
buffer.  The same limitations on buffer length apply as for copy
operations.  Throughput is a bit higher (probably because fill does not
have to spend bandwidth reading from a source in memory).

Support for testing Block Fill has been added to ioatcontrol(8) and the
ioat_test device.  ioatcontrol(8) accepts the '-f' flag, which tests
Block Fill.  (If the flag is omitted, the tool tests copy by default.)
The '-V' flag, in conjunction with '-f', verifies that buffers are
filled in the expected pattern.

Tested on:	Broadwell DE (Xeon D-1500)
Sponsored by:	EMC / Isilon Storage Division
This commit is contained in:
Conrad Meyer 2015-10-26 19:34:12 +00:00
parent 9e3bbf26a9
commit 2a4fd6b17a
6 changed files with 107 additions and 11 deletions

View File

@ -748,6 +748,37 @@ ioat_copy(bus_dmaengine_t dmaengine, bus_addr_t dst,
return (&desc->bus_dmadesc);
}
struct bus_dmadesc *
ioat_blockfill(bus_dmaengine_t dmaengine, bus_addr_t dst, uint64_t fillpattern,
bus_size_t len, bus_dmaengine_callback_t callback_fn, void *callback_arg,
uint32_t flags)
{
struct ioat_fill_hw_descriptor *hw_desc;
struct ioat_descriptor *desc;
struct ioat_softc *ioat;
CTR0(KTR_IOAT, __func__);
ioat = to_ioat_softc(dmaengine);
if ((dst & (0xffffull << 48)) != 0) {
ioat_log_message(0, "%s: High 16 bits of dst invalid\n",
__func__);
return (NULL);
}
desc = ioat_op_generic(ioat, IOAT_OP_FILL, len, fillpattern, dst,
callback_fn, callback_arg, flags);
if (desc == NULL)
return (NULL);
hw_desc = desc->u.fill;
if (g_ioat_debug_level >= 3)
dump_descriptor(hw_desc);
ioat_submit_single(ioat);
return (&desc->bus_dmadesc);
}
/*
* Ring Management
*/

View File

@ -68,6 +68,14 @@ void ioat_put_dmaengine(bus_dmaengine_t dmaengine);
void ioat_acquire(bus_dmaengine_t dmaengine);
void ioat_release(bus_dmaengine_t dmaengine);
/*
* Issue a blockfill operation. The 64-bit pattern 'fillpattern' is written to
* 'len' physically contiguous bytes at 'dst'.
*/
struct bus_dmadesc *ioat_blockfill(bus_dmaengine_t dmaengine, bus_addr_t dst,
uint64_t fillpattern, bus_size_t len, bus_dmaengine_callback_t callback_fn,
void *callback_arg, uint32_t flags);
/* Issues the copy data operation */
struct bus_dmadesc *ioat_copy(bus_dmaengine_t dmaengine, bus_addr_t dst,
bus_addr_t src, bus_size_t len, bus_dmaengine_callback_t callback_fn,

View File

@ -123,11 +123,26 @@ test_transaction *ioat_test_transaction_create(unsigned num_buffers,
static bool
ioat_compare_ok(struct test_transaction *tx)
{
uint32_t i;
struct ioat_test *test;
char *dst, *src;
uint32_t i, j;
test = tx->test;
for (i = 0; i < tx->depth; i++) {
if (memcmp(tx->buf[2*i], tx->buf[2*i+1], tx->length) != 0)
return (false);
dst = tx->buf[2 * i + 1];
src = tx->buf[2 * i];
if (test->testkind == IOAT_TEST_FILL) {
for (j = 0; j < tx->length; j += sizeof(uint64_t)) {
if (memcmp(src, &dst[j],
MIN(sizeof(uint64_t), tx->length - j))
!= 0)
return (false);
}
} else if (test->testkind == IOAT_TEST_DMA)
if (memcmp(src, dst, tx->length) != 0)
return (false);
}
return (true);
}
@ -208,8 +223,11 @@ ioat_test_submit_1_tx(struct ioat_test *test, bus_dmaengine_t dma)
struct bus_dmadesc *desc;
bus_dmaengine_callback_t cb;
bus_addr_t src, dest;
uint64_t fillpattern;
uint32_t i, flags;
desc = NULL;
IT_LOCK();
while (TAILQ_EMPTY(&test->free_q))
msleep(&test->free_q, &ioat_test_lk, 0, "test_submit", 0);
@ -232,7 +250,15 @@ ioat_test_submit_1_tx(struct ioat_test *test, bus_dmaengine_t dma)
flags = 0;
}
desc = ioat_copy(dma, src, dest, tx->length, cb, tx, flags);
if (test->testkind == IOAT_TEST_DMA)
desc = ioat_copy(dma, dest, src, tx->length, cb, tx,
flags);
else if (test->testkind == IOAT_TEST_FILL) {
fillpattern = *(uint64_t *)tx->buf[2*i];
desc = ioat_blockfill(dma, dest, fillpattern,
tx->length, cb, tx, flags);
}
if (desc == NULL)
panic("Failed to allocate a ring slot "
"-- this shouldn't happen!");
@ -279,6 +305,13 @@ ioat_dma_test(void *arg)
return;
}
if (test->testkind >= IOAT_NUM_TESTKINDS) {
ioat_test_log(0, "Invalid kind %u\n",
(unsigned)test->testkind);
test->status[IOAT_TEST_INVALID_INPUT]++;
return;
}
dmaengine = ioat_get_dmaengine(test->channel_index);
if (dmaengine == NULL) {
ioat_test_log(0, "Couldn't acquire dmaengine\n");

View File

@ -34,15 +34,24 @@ enum ioat_res {
IOAT_TEST_NO_DMA_ENGINE,
IOAT_TEST_NO_MEMORY,
IOAT_TEST_MISCOMPARE,
IOAT_TEST_INVALID_INPUT,
IOAT_NUM_RES
};
enum ioat_test_kind {
IOAT_TEST_FILL = 0,
IOAT_TEST_DMA,
IOAT_NUM_TESTKINDS
};
struct test_transaction;
struct ioat_test {
volatile uint32_t status[IOAT_NUM_RES];
uint32_t channel_index;
enum ioat_test_kind testkind;
/* HW max of 1MB */
uint32_t buffer_size;
uint32_t chain_depth;

View File

@ -24,7 +24,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd October 21, 2015
.Dd October 26, 2015
.Dt IOATCONTROL 8
.Os
.Sh NAME
@ -33,6 +33,7 @@
.Xr ioat 4
.Sh SYNOPSIS
.Nm
.Op Fl f
.Op Fl V
.Ar channel_number
.Ar num_txns
@ -46,8 +47,12 @@ allows one to issue some number of test operations to the
driver on a specific hardware channel.
The arguments are as follows:
.Bl -tag -width Ds
.It Fl f
Test block fill (by default,
.Nm
tests copy)
.It Fl V
Verify copies for accuracy
Verify copies/fills for accuracy
.El
.Pp
.Nm
@ -98,13 +103,14 @@ The interface between
.Nm
and
.Xr ioat 4 .
.Nm
.Xr ioat 4
exposes it with
.Cd hw.ioat.enable_ioat_test=1 .
.Sh DIAGNOSTICS
The wait channel
.Va test_submit
indicates that the test code is keeping the DMA engine full of work.
indicates that the test code has enqueued all requested transactions and is
waiting on the IOAT hardware to complete one before issuing another operation.
.Sh SEE ALSO
.Xr ioat 4
.Sh HISTORY
@ -119,7 +125,8 @@ driver and
.Nm
tool were developed by
.An \&Jim Harris Aq Mt jimharris@FreeBSD.org ,
.An \&Carl Delsey Aq Mt carl.r.delsey@intel.com ,
and
.An \&Carl Delsey Aq Mt carl.r.delsey@intel.com .
.An \&Conrad Meyer Aq Mt cem@FreeBSD.org .
This manual page was written by
.An \&Conrad Meyer Aq Mt cem@FreeBSD.org .

View File

@ -48,7 +48,7 @@ static void
usage(void)
{
printf("Usage: %s [-V] <channel #> <txns> [<bufsize> "
printf("Usage: %s [-fV] <channel #> <txns> [<bufsize> "
"[<chain-len> [duration]]]\n", getprogname());
exit(EX_USAGE);
}
@ -58,9 +58,13 @@ main(int argc, char **argv)
{
struct ioat_test t;
int fd, ch;
bool fflag;
while ((ch = getopt(argc, argv, "V")) != -1) {
while ((ch = getopt(argc, argv, "fV")) != -1) {
switch (ch) {
case 'f':
fflag = true;
break;
case 'V':
t.verify = true;
break;
@ -78,6 +82,10 @@ main(int argc, char **argv)
t.buffer_size = 256 * 1024;
t.chain_depth = 2;
t.duration = 0;
t.testkind = IOAT_TEST_DMA;
if (fflag)
t.testkind = IOAT_TEST_FILL;
t.channel_index = atoi(argv[0]);
if (t.channel_index > 8) {