bec7ff798a
And document them in ioat.4. Sponsored by: EMC / Isilon Storage Division
335 lines
9.7 KiB
Groff
335 lines
9.7 KiB
Groff
.\" Copyright (c) 2015 EMC / Isilon Storage Division
|
|
.\" All rights reserved.
|
|
.\"
|
|
.\" Redistribution and use in source and binary forms, with or without
|
|
.\" modification, are permitted provided that the following conditions
|
|
.\" are met:
|
|
.\" 1. Redistributions of source code must retain the above copyright
|
|
.\" notice, this list of conditions and the following disclaimer.
|
|
.\" 2. Redistributions in binary form must reproduce the above copyright
|
|
.\" notice, this list of conditions and the following disclaimer in the
|
|
.\" documentation and/or other materials provided with the distribution.
|
|
.\"
|
|
.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
|
|
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
|
|
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
.\" SUCH DAMAGE.
|
|
.\"
|
|
.\" $FreeBSD$
|
|
.\"
|
|
.Dd May 3, 2016
|
|
.Dt IOAT 4
|
|
.Os
|
|
.Sh NAME
|
|
.Nm I/OAT
|
|
.Nd Intel I/O Acceleration Technology
|
|
.Sh SYNOPSIS
|
|
To compile this driver into your kernel,
|
|
place the following line in your kernel configuration file:
|
|
.Bd -ragged -offset indent
|
|
.Cd "device ioat"
|
|
.Ed
|
|
.Pp
|
|
Or, to load the driver as a module at boot, place the following line in
|
|
.Xr loader.conf 5 :
|
|
.Bd -literal -offset indent
|
|
ioat_load="YES"
|
|
.Ed
|
|
.Pp
|
|
In
|
|
.Xr loader.conf 5 :
|
|
.Pp
|
|
.Cd hw.ioat.force_legacy_interrupts=0
|
|
.Pp
|
|
In
|
|
.Xr loader.conf 5 or
|
|
.Xr sysctl.conf 5 :
|
|
.Pp
|
|
.Cd hw.ioat.enable_ioat_test=0
|
|
.Cd hw.ioat.debug_level=0
|
|
(only critical errors; maximum of 3)
|
|
.Pp
|
|
.Ft typedef void
|
|
.Fn (*bus_dmaengine_callback_t) "void *arg" "int error"
|
|
.Pp
|
|
.Ft bus_dmaengine_t
|
|
.Fn ioat_get_dmaengine "uint32_t channel_index"
|
|
.Ft void
|
|
.Fn ioat_put_dmaengine "bus_dmaengine_t dmaengine"
|
|
.Ft int
|
|
.Fn ioat_get_hwversion "bus_dmaengine_t dmaengine"
|
|
.Ft size_t
|
|
.Fn ioat_get_max_io_size "bus_dmaengine_t dmaengine"
|
|
.Ft int
|
|
.Fn ioat_set_interrupt_coalesce "bus_dmaengine_t dmaengine" "uint16_t delay"
|
|
.Ft uint16_t
|
|
.Fn ioat_get_max_coalesce_period "bus_dmaengine_t dmaengine"
|
|
.Ft void
|
|
.Fn ioat_acquire "bus_dmaengine_t dmaengine"
|
|
.Ft int
|
|
.Fn ioat_acquire_reserve "bus_dmaengine_t dmaengine" "uint32_t n" "int mflags"
|
|
.Ft void
|
|
.Fn ioat_release "bus_dmaengine_t dmaengine"
|
|
.Ft struct bus_dmadesc *
|
|
.Fo ioat_copy
|
|
.Fa "bus_dmaengine_t dmaengine"
|
|
.Fa "bus_addr_t dst"
|
|
.Fa "bus_addr_t src"
|
|
.Fa "bus_size_t len"
|
|
.Fa "bus_dmaengine_callback_t callback_fn"
|
|
.Fa "void *callback_arg"
|
|
.Fa "uint32_t flags"
|
|
.Fc
|
|
.Ft struct bus_dmadesc *
|
|
.Fo ioat_copy_8k_aligned
|
|
.Fa "bus_dmaengine_t dmaengine"
|
|
.Fa "bus_addr_t dst1"
|
|
.Fa "bus_addr_t dst2"
|
|
.Fa "bus_addr_t src1"
|
|
.Fa "bus_addr_t src2"
|
|
.Fa "bus_dmaengine_callback_t callback_fn"
|
|
.Fa "void *callback_arg"
|
|
.Fa "uint32_t flags"
|
|
.Fc
|
|
.Ft struct bus_dmadesc *
|
|
.Fo ioat_copy_crc
|
|
.Fa "bus_dmaengine_t dmaengine"
|
|
.Fa "bus_addr_t dst"
|
|
.Fa "bus_addr_t src"
|
|
.Fa "bus_size_t len"
|
|
.Fa "uint32_t *initialseed"
|
|
.Fa "bus_addr_t crcptr"
|
|
.Fa "bus_dmaengine_callback_t callback_fn"
|
|
.Fa "void *callback_arg"
|
|
.Fa "uint32_t flags"
|
|
.Fc
|
|
.Ft struct bus_dmadesc *
|
|
.Fo ioat_crc
|
|
.Fa "bus_dmaengine_t dmaengine"
|
|
.Fa "bus_addr_t src"
|
|
.Fa "bus_size_t len"
|
|
.Fa "uint32_t *initialseed"
|
|
.Fa "bus_addr_t crcptr"
|
|
.Fa "bus_dmaengine_callback_t callback_fn"
|
|
.Fa "void *callback_arg"
|
|
.Fa "uint32_t flags"
|
|
.Fc
|
|
.Ft struct bus_dmadesc *
|
|
.Fo ioat_blockfill
|
|
.Fa "bus_dmaengine_t dmaengine"
|
|
.Fa "bus_addr_t dst"
|
|
.Fa "uint64_t fillpattern"
|
|
.Fa "bus_size_t len"
|
|
.Fa "bus_dmaengine_callback_t callback_fn"
|
|
.Fa "void *callback_arg"
|
|
.Fa "uint32_t flags"
|
|
.Fc
|
|
.Ft struct bus_dmadesc *
|
|
.Fo ioat_null
|
|
.Fa "bus_dmaengine_t dmaengine"
|
|
.Fa "bus_dmaengine_callback_t callback_fn"
|
|
.Fa "void *callback_arg"
|
|
.Fa "uint32_t flags"
|
|
.Fc
|
|
.Sh DESCRIPTION
|
|
The
|
|
.Nm
|
|
driver provides a kernel API to a variety of DMA engines on some Intel server
|
|
platforms.
|
|
.Pp
|
|
There is a number of DMA channels per CPU package.
|
|
(Typically 4 or 8.)
|
|
Each may be used independently.
|
|
Operations on a single channel proceed sequentially.
|
|
.Pp
|
|
Blockfill operations can be used to write a 64-bit pattern to memory.
|
|
.Pp
|
|
Copy operations can be used to offload memory copies to the DMA engines.
|
|
.Pp
|
|
Null operations do nothing, but may be used to test the interrupt and callback
|
|
mechanism.
|
|
.Pp
|
|
All operations can optionally trigger an interrupt at completion with the
|
|
.Ar DMA_INT_EN
|
|
flag.
|
|
For example, a user might submit multiple operations to the same channel and
|
|
only enable an interrupt and callback for the last operation.
|
|
.Pp
|
|
The hardware can delay and coalesce interrupts on a given channel for a
|
|
configurable period of time, in microseconds.
|
|
This may be desired to reduce the processing and interrupt overhead per
|
|
descriptor, especially for workflows consisting of many small operations.
|
|
Software can control this on a per-channel basis with the
|
|
.Fn ioat_set_interrupt_coalesce
|
|
API.
|
|
The
|
|
.Fn ioat_get_max_coalesce_period
|
|
API can be used to determine the maximum coalescing period supported by the
|
|
hardware, in microseconds.
|
|
Current platforms support up to a 16.383 millisecond coalescing period.
|
|
Optimal configuration will vary by workflow and desired operation latency.
|
|
.Pp
|
|
All operations are safe to use in a non-blocking context with the
|
|
.Ar DMA_NO_WAIT
|
|
flag.
|
|
(Of course, allocations may fail and operations requested with
|
|
.Ar DMA_NO_WAIT
|
|
may return NULL.)
|
|
.Pp
|
|
Operations that depend on the result of prior operations should use
|
|
.Ar DMA_FENCE .
|
|
For example, such a scenario can happen when two related DMA operations are
|
|
queued.
|
|
First, a DMA copy to one location (A), followed directly by a DMA copy
|
|
from A to B.
|
|
In this scenario, some classes of I/OAT hardware may prefetch A for the second
|
|
operation before it is written by the first operation.
|
|
To avoid reading a stale value in sequences of dependent operations, use
|
|
.Ar DMA_FENCE .
|
|
.Pp
|
|
All operations, as well as
|
|
.Fn ioat_get_dmaengine ,
|
|
can return NULL in special circumstances.
|
|
For example, if the
|
|
.Nm
|
|
driver is being unloaded, or the administrator has induced a hardware reset, or
|
|
a usage error has resulted in a hardware error state that needs to be recovered
|
|
from.
|
|
.Pp
|
|
It is invalid to attempt to submit new DMA operations in a
|
|
.Fa bus_dmaengine_callback_t
|
|
context.
|
|
.Pp
|
|
The CRC operations have three distinct modes.
|
|
The default mode is to accumulate.
|
|
By accumulating over multiple descriptors, a user may gather a CRC over several
|
|
chunks of memory and only write out the result once.
|
|
.Pp
|
|
The
|
|
.Ar DMA_CRC_STORE
|
|
flag causes the operation to emit the CRC32C result.
|
|
If
|
|
.Ar DMA_CRC_INLINE
|
|
is set, the result is written inline with the destination data (or source in
|
|
.Fn ioat_crc
|
|
mode).
|
|
If
|
|
.Ar DMA_CRC_INLINE
|
|
is not set, the result is written to the provided
|
|
.Fa crcptr .
|
|
.Pp
|
|
Similarly, the
|
|
.Ar DMA_CRC_TEST
|
|
flag causes the operation to compare the CRC32C result to an existing checksum.
|
|
If
|
|
.Ar DMA_CRC_INLINE
|
|
is set, the result is compared against the inline four bytes trailing the
|
|
source data.
|
|
If it is not set, the result is compared against the value pointed to by
|
|
.Fa crcptr .
|
|
.Pp
|
|
.Fn ioat_copy_crc
|
|
calculates a CRC32C while copying data.
|
|
.Fn ioat_crc
|
|
only computes a CRC32C of some data.
|
|
If the
|
|
.Fa initialseed
|
|
argument to either routine is non-NULL, the CRC32C engine is initialized with
|
|
the value it points to.
|
|
.Sh USAGE
|
|
A typical user will lookup the DMA engine object for a given channel with
|
|
.Fn ioat_get_dmaengine .
|
|
When the user wants to offload a copy, they will first
|
|
.Fn ioat_acquire
|
|
the
|
|
.Ar bus_dmaengine_t
|
|
object for exclusive access to enqueue operations on that channel.
|
|
Optionally, the user can reserve space by using
|
|
.Fn ioat_acquire_reserve
|
|
instead.
|
|
If
|
|
.Fn ioat_acquire_reserve
|
|
succeeds, there is guaranteed to be room for
|
|
.Fa N
|
|
new operations in the internal ring buffer.
|
|
.Pp
|
|
Then, they will submit one or more operations using
|
|
.Fn ioat_blockfill ,
|
|
.Fn ioat_copy ,
|
|
.Fn ioat_copy_8k_aligned ,
|
|
.Fn ioat_copy_crc ,
|
|
.Fn ioat_crc ,
|
|
or
|
|
.Fn ioat_null .
|
|
After queuing one or more individual DMA operations, they will
|
|
.Fn ioat_release
|
|
the
|
|
.Ar bus_dmaengine_t
|
|
to drop their exclusive access to the channel.
|
|
The routine they provided for the
|
|
.Fa callback_fn
|
|
argument will be invoked with the provided
|
|
.Fa callback_arg
|
|
when the operation is complete.
|
|
When they are finished with the
|
|
.Ar bus_dmaengine_t ,
|
|
the user should
|
|
.Fn ioat_put_dmaengine .
|
|
.Pp
|
|
Users MUST NOT block between
|
|
.Fn ioat_acquire
|
|
and
|
|
.Fn ioat_release .
|
|
Users SHOULD NOT hold
|
|
.Ar bus_dmaengine_t
|
|
references for a very long time to enable fault recovery and kernel module
|
|
unload.
|
|
.Pp
|
|
For an example of usage, see
|
|
.Pa src/sys/dev/ioat/ioat_test.c .
|
|
.Sh FILES
|
|
.Bl -tag
|
|
.It Pa /dev/ioat_test
|
|
test device for
|
|
.Xr ioatcontrol 8
|
|
.El
|
|
.Sh SEE ALSO
|
|
.Xr ioatcontrol 8
|
|
.Sh HISTORY
|
|
The
|
|
.Nm
|
|
driver first appeared in
|
|
.Fx 11.0 .
|
|
.Sh AUTHORS
|
|
The
|
|
.Nm
|
|
driver was developed by
|
|
.An \&Jim Harris Aq Mt jimharris@FreeBSD.org ,
|
|
.An \&Carl Delsey Aq Mt carl.r.delsey@intel.com ,
|
|
and
|
|
.An \&Conrad Meyer Aq Mt cem@FreeBSD.org .
|
|
This manual page was written by
|
|
.An \&Conrad Meyer Aq Mt cem@FreeBSD.org .
|
|
.Sh CAVEATS
|
|
Copy operation takes bus addresses as parameters, not virtual addresses.
|
|
.Pp
|
|
Buffers for individual copy operations must be physically contiguous.
|
|
.Pp
|
|
Copies larger than max transfer size (1MB, but may vary by hardware) are not
|
|
supported.
|
|
Future versions will likely support this by breaking up the transfer into
|
|
smaller sizes.
|
|
.Sh BUGS
|
|
The
|
|
.Nm
|
|
driver only supports blockfill, copy, and null operations at this time.
|
|
The driver does not yet support advanced DMA modes, such as XOR, that some
|
|
I/OAT devices support.
|