From ae2a2410dfbb8af28cde979e8837897684afd270 Mon Sep 17 00:00:00 2001 From: cem Date: Mon, 14 Dec 2015 22:01:52 +0000 Subject: [PATCH] ioat(4): Add support for interrupt coalescing In I/OAT, this is done through the INTRDELAY register. On supported platforms, this register can coalesce interrupts in a set period to avoid excessive interrupt load for small descriptor workflows. The period is configurable anywhere from 1 microsecond to 16.38 milliseconds, in microsecond granularity. Sponsored by: EMC / Isilon Storage Division --- share/man/man4/ioat.4 | 20 +++++++++++++++++- sys/dev/ioat/ioat.c | 40 ++++++++++++++++++++++++++++++++++++ sys/dev/ioat/ioat.h | 22 ++++++++++++++++++++ sys/dev/ioat/ioat_hw.h | 4 ++++ sys/dev/ioat/ioat_internal.h | 3 +++ 5 files changed, 88 insertions(+), 1 deletion(-) diff --git a/share/man/man4/ioat.4 b/share/man/man4/ioat.4 index faa22ae49960..7ba7768fde8c 100644 --- a/share/man/man4/ioat.4 +++ b/share/man/man4/ioat.4 @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd December 9, 2015 +.Dd December 14, 2015 .Dt IOAT 4 .Os .Sh NAME @@ -63,6 +63,10 @@ In .Fn ioat_get_dmaengine "uint32_t channel_index" .Ft void .Fn ioat_put_dmaengine "bus_dmaengine_t dmaengine" +.Ft int +.Fn ioat_set_interrupt_coalesce "bus_dmaengine_t dmaengine" "uint16_t delay" +.Ft uint16_t +.Fn ioat_get_max_coalesce_period "bus_dmaengine_t dmaengine" .Ft void .Fn ioat_acquire "bus_dmaengine_t dmaengine" .Ft void @@ -129,6 +133,20 @@ flag. For example, a user might submit multiple operations to the same channel and only enable an interrupt and callback for the last operation. .Pp +The hardware can delay and coalesce interrupts on a given channel for a +configurable period of time, in microseconds. +This may be desired to reduce the processing and interrupt overhead per +descriptor, especially for workflows consisting of many small operations. +Software can control this on a per-channel basis with the +.Fn ioat_set_interrupt_coalesce +API. +The +.Fn ioat_get_max_coalesce_period +API can be used to determine the maximum coalescing period supported by the +hardware, in microseconds. +Current platforms support up to a 16.383 millisecond coalescing period. +Optimal configuration will vary by workflow and desired operation latency. +.Pp All operations are safe to use in a non-blocking context with the .Ar DMA_NO_WAIT flag. diff --git a/sys/dev/ioat/ioat.c b/sys/dev/ioat/ioat.c index 1f32f9f76fb8..12a6fe429912 100644 --- a/sys/dev/ioat/ioat.c +++ b/sys/dev/ioat/ioat.c @@ -404,6 +404,11 @@ ioat3_attach(device_t device) xfercap = ioat_read_xfercap(ioat); ioat->max_xfer_size = 1 << xfercap; + ioat->intrdelay_supported = (ioat_read_2(ioat, IOAT_INTRDELAY_OFFSET) & + IOAT_INTRDELAY_SUPPORTED) != 0; + if (ioat->intrdelay_supported) + ioat->intrdelay_max = IOAT_INTRDELAY_US_MASK; + /* TODO: need to check DCA here if we ever do XOR/PQ */ mtx_init(&ioat->submit_lock, "ioat_submit", NULL, MTX_DEF); @@ -730,6 +735,32 @@ ioat_put_dmaengine(bus_dmaengine_t dmaengine) ioat_put(ioat, IOAT_DMAENGINE_REF); } +int +ioat_set_interrupt_coalesce(bus_dmaengine_t dmaengine, uint16_t delay) +{ + struct ioat_softc *ioat; + + ioat = to_ioat_softc(dmaengine); + if (!ioat->intrdelay_supported) + return (ENODEV); + if (delay > ioat->intrdelay_max) + return (ERANGE); + + ioat_write_2(ioat, IOAT_INTRDELAY_OFFSET, delay); + ioat->cached_intrdelay = + ioat_read_2(ioat, IOAT_INTRDELAY_OFFSET) & IOAT_INTRDELAY_US_MASK; + return (0); +} + +uint16_t +ioat_get_max_coalesce_period(bus_dmaengine_t dmaengine) +{ + struct ioat_softc *ioat; + + ioat = to_ioat_softc(dmaengine); + return (ioat->intrdelay_max); +} + void ioat_acquire(bus_dmaengine_t dmaengine) { @@ -1641,6 +1672,11 @@ ioat_setup_sysctl(device_t device) &ioat->version, 0, "HW version (0xMM form)"); SYSCTL_ADD_UINT(ctx, par, OID_AUTO, "max_xfer_size", CTLFLAG_RD, &ioat->max_xfer_size, 0, "HW maximum transfer size"); + SYSCTL_ADD_INT(ctx, par, OID_AUTO, "intrdelay_supported", CTLFLAG_RD, + &ioat->intrdelay_supported, 0, "Is INTRDELAY supported"); + SYSCTL_ADD_U16(ctx, par, OID_AUTO, "intrdelay_max", CTLFLAG_RD, + &ioat->intrdelay_max, 0, + "Maximum configurable INTRDELAY on this channel (microseconds)"); tmp = SYSCTL_ADD_NODE(ctx, par, OID_AUTO, "state", CTLFLAG_RD, NULL, "IOAT channel internal state"); @@ -1671,6 +1707,10 @@ ioat_setup_sysctl(device_t device) CTLTYPE_STRING | CTLFLAG_RD, ioat, 0, sysctl_handle_chansts, "A", "String of the channel status"); + SYSCTL_ADD_U16(ctx, state, OID_AUTO, "intrdelay", CTLFLAG_RD, + &ioat->cached_intrdelay, 0, + "Current INTRDELAY on this channel (cached, microseconds)"); + tmp = SYSCTL_ADD_NODE(ctx, par, OID_AUTO, "hammer", CTLFLAG_RD, NULL, "Big hammers (mostly for testing)"); hammer = SYSCTL_CHILDREN(tmp); diff --git a/sys/dev/ioat/ioat.h b/sys/dev/ioat/ioat.h index 8174ce81526b..5c64af13a032 100644 --- a/sys/dev/ioat/ioat.h +++ b/sys/dev/ioat/ioat.h @@ -60,6 +60,28 @@ bus_dmaengine_t ioat_get_dmaengine(uint32_t channel_index); /* Release the DMA channel */ void ioat_put_dmaengine(bus_dmaengine_t dmaengine); +/* + * Set interrupt coalescing on a DMA channel. + * + * The argument is in microseconds. A zero value disables coalescing. Any + * other value delays interrupt generation for N microseconds to provide + * opportunity to coalesce multiple operations into a single interrupt. + * + * Returns an error status, or zero on success. + * + * - ERANGE if the given value exceeds the delay supported by the hardware. + * (All current hardware supports a maximum of 0x3fff microseconds delay.) + * - ENODEV if the hardware does not support interrupt coalescing. + */ +int ioat_set_interrupt_coalesce(bus_dmaengine_t dmaengine, uint16_t delay); + +/* + * Return the maximum supported coalescing period, for use in + * ioat_set_interrupt_coalesce(). If the hardware does not support coalescing, + * returns zero. + */ +uint16_t ioat_get_max_coalesce_period(bus_dmaengine_t dmaengine); + /* * Acquire must be called before issuing an operation to perform. Release is * called after. Multiple operations can be issued within the context of one diff --git a/sys/dev/ioat/ioat_hw.h b/sys/dev/ioat/ioat_hw.h index 43c78caeb189..1aeee2d9c999 100644 --- a/sys/dev/ioat/ioat_hw.h +++ b/sys/dev/ioat/ioat_hw.h @@ -50,6 +50,10 @@ __FBSDID("$FreeBSD$"); #define IOAT_VER_3_3 0x33 #define IOAT_INTRDELAY_OFFSET 0x0C +#define IOAT_INTRDELAY_SUPPORTED (1 << 15) +/* Reserved. (1 << 14) */ +/* [13:0] is the coalesce period, in microseconds. */ +#define IOAT_INTRDELAY_US_MASK ((1 << 14) - 1) #define IOAT_CS_STATUS_OFFSET 0x0E diff --git a/sys/dev/ioat/ioat_internal.h b/sys/dev/ioat/ioat_internal.h index 148f3d907be7..1b248513368e 100644 --- a/sys/dev/ioat/ioat_internal.h +++ b/sys/dev/ioat/ioat_internal.h @@ -373,6 +373,8 @@ struct ioat_softc { struct resource *pci_resource; uint32_t max_xfer_size; uint32_t capabilities; + uint16_t intrdelay_max; + uint16_t cached_intrdelay; struct resource *res; int rid; @@ -393,6 +395,7 @@ struct ioat_softc { boolean_t is_completion_pending; boolean_t is_reset_pending; boolean_t is_channel_running; + boolean_t intrdelay_supported; uint32_t head; uint32_t tail;