fe1a5a9b42
Add a status for page faults to be used when getting the completion status of an operation. Signed-off-by: Sean Morrissey <sean.morrissey@intel.com>
638 lines
19 KiB
C
638 lines
19 KiB
C
/* SPDX-License-Identifier: BSD-3-Clause
|
|
* Copyright 2021 Intel Corporation
|
|
*/
|
|
|
|
#include <x86intrin.h>
|
|
|
|
#include <rte_malloc.h>
|
|
#include <rte_common.h>
|
|
#include <rte_log.h>
|
|
#include <rte_prefetch.h>
|
|
|
|
#include "idxd_internal.h"
|
|
|
|
#define IDXD_PMD_NAME_STR "dmadev_idxd"
|
|
|
|
/* systems with DSA all support AVX2 so allow our data-path functions to
|
|
* always use at least that instruction set
|
|
*/
|
|
#ifndef __AVX2__
|
|
#define __use_avx2 __attribute__((target("avx2")))
|
|
#else
|
|
#define __use_avx2
|
|
#endif
|
|
|
|
__use_avx2
|
|
static __rte_always_inline rte_iova_t
|
|
__desc_idx_to_iova(struct idxd_dmadev *idxd, uint16_t n)
|
|
{
|
|
return idxd->desc_iova + (n * sizeof(struct idxd_hw_desc));
|
|
}
|
|
|
|
__use_avx2
|
|
static __rte_always_inline void
|
|
__idxd_movdir64b(volatile void *dst, const struct idxd_hw_desc *src)
|
|
{
|
|
asm volatile (".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
|
|
:
|
|
: "a" (dst), "d" (src)
|
|
: "memory");
|
|
}
|
|
|
|
__use_avx2
|
|
static __rte_always_inline void
|
|
__submit(struct idxd_dmadev *idxd)
|
|
{
|
|
rte_prefetch1(&idxd->batch_comp_ring[idxd->batch_idx_read]);
|
|
|
|
if (idxd->batch_size == 0)
|
|
return;
|
|
|
|
/* write completion to batch comp ring */
|
|
rte_iova_t comp_addr = idxd->batch_iova +
|
|
(idxd->batch_idx_write * sizeof(struct idxd_completion));
|
|
|
|
if (idxd->batch_size == 1) {
|
|
/* submit batch directly */
|
|
struct idxd_hw_desc desc =
|
|
idxd->desc_ring[idxd->batch_start & idxd->desc_ring_mask];
|
|
desc.completion = comp_addr;
|
|
desc.op_flags |= IDXD_FLAG_REQUEST_COMPLETION;
|
|
_mm_sfence(); /* fence before writing desc to device */
|
|
__idxd_movdir64b(idxd->portal, &desc);
|
|
} else {
|
|
const struct idxd_hw_desc batch_desc = {
|
|
.op_flags = (idxd_op_batch << IDXD_CMD_OP_SHIFT) |
|
|
IDXD_FLAG_COMPLETION_ADDR_VALID |
|
|
IDXD_FLAG_REQUEST_COMPLETION,
|
|
.desc_addr = __desc_idx_to_iova(idxd,
|
|
idxd->batch_start & idxd->desc_ring_mask),
|
|
.completion = comp_addr,
|
|
.size = idxd->batch_size,
|
|
};
|
|
_mm_sfence(); /* fence before writing desc to device */
|
|
__idxd_movdir64b(idxd->portal, &batch_desc);
|
|
}
|
|
|
|
if (++idxd->batch_idx_write > idxd->max_batches)
|
|
idxd->batch_idx_write = 0;
|
|
|
|
idxd->stats.submitted += idxd->batch_size;
|
|
|
|
idxd->batch_start += idxd->batch_size;
|
|
idxd->batch_size = 0;
|
|
idxd->batch_idx_ring[idxd->batch_idx_write] = idxd->batch_start;
|
|
_mm256_store_si256((void *)&idxd->batch_comp_ring[idxd->batch_idx_write],
|
|
_mm256_setzero_si256());
|
|
}
|
|
|
|
__use_avx2
|
|
static __rte_always_inline int
|
|
__idxd_write_desc(struct idxd_dmadev *idxd,
|
|
const uint32_t op_flags,
|
|
const rte_iova_t src,
|
|
const rte_iova_t dst,
|
|
const uint32_t size,
|
|
const uint32_t flags)
|
|
{
|
|
uint16_t mask = idxd->desc_ring_mask;
|
|
uint16_t job_id = idxd->batch_start + idxd->batch_size;
|
|
/* we never wrap batches, so we only mask the start and allow start+size to overflow */
|
|
uint16_t write_idx = (idxd->batch_start & mask) + idxd->batch_size;
|
|
|
|
/* first check batch ring space then desc ring space */
|
|
if ((idxd->batch_idx_read == 0 && idxd->batch_idx_write == idxd->max_batches) ||
|
|
idxd->batch_idx_write + 1 == idxd->batch_idx_read)
|
|
return -ENOSPC;
|
|
if (((write_idx + 1) & mask) == (idxd->ids_returned & mask))
|
|
return -ENOSPC;
|
|
|
|
/* write desc. Note: descriptors don't wrap, but the completion address does */
|
|
const uint64_t op_flags64 = (uint64_t)(op_flags | IDXD_FLAG_COMPLETION_ADDR_VALID) << 32;
|
|
const uint64_t comp_addr = __desc_idx_to_iova(idxd, write_idx & mask);
|
|
_mm256_store_si256((void *)&idxd->desc_ring[write_idx],
|
|
_mm256_set_epi64x(dst, src, comp_addr, op_flags64));
|
|
_mm256_store_si256((void *)&idxd->desc_ring[write_idx].size,
|
|
_mm256_set_epi64x(0, 0, 0, size));
|
|
|
|
idxd->batch_size++;
|
|
|
|
rte_prefetch0_write(&idxd->desc_ring[write_idx + 1]);
|
|
|
|
if (flags & RTE_DMA_OP_FLAG_SUBMIT)
|
|
__submit(idxd);
|
|
|
|
return job_id;
|
|
}
|
|
|
|
__use_avx2
|
|
int
|
|
idxd_enqueue_copy(void *dev_private, uint16_t qid __rte_unused, rte_iova_t src,
|
|
rte_iova_t dst, unsigned int length, uint64_t flags)
|
|
{
|
|
/* we can take advantage of the fact that the fence flag in dmadev and DSA are the same,
|
|
* but check it at compile time to be sure.
|
|
*/
|
|
RTE_BUILD_BUG_ON(RTE_DMA_OP_FLAG_FENCE != IDXD_FLAG_FENCE);
|
|
uint32_t memmove = (idxd_op_memmove << IDXD_CMD_OP_SHIFT) |
|
|
IDXD_FLAG_CACHE_CONTROL | (flags & IDXD_FLAG_FENCE);
|
|
return __idxd_write_desc(dev_private, memmove, src, dst, length,
|
|
flags);
|
|
}
|
|
|
|
__use_avx2
|
|
int
|
|
idxd_enqueue_fill(void *dev_private, uint16_t qid __rte_unused, uint64_t pattern,
|
|
rte_iova_t dst, unsigned int length, uint64_t flags)
|
|
{
|
|
uint32_t fill = (idxd_op_fill << IDXD_CMD_OP_SHIFT) |
|
|
IDXD_FLAG_CACHE_CONTROL | (flags & IDXD_FLAG_FENCE);
|
|
return __idxd_write_desc(dev_private, fill, pattern, dst, length,
|
|
flags);
|
|
}
|
|
|
|
__use_avx2
|
|
int
|
|
idxd_submit(void *dev_private, uint16_t qid __rte_unused)
|
|
{
|
|
__submit(dev_private);
|
|
return 0;
|
|
}
|
|
|
|
__use_avx2
|
|
static enum rte_dma_status_code
|
|
get_comp_status(struct idxd_completion *c)
|
|
{
|
|
uint8_t st = c->status;
|
|
switch (st) {
|
|
/* successful descriptors are not written back normally */
|
|
case IDXD_COMP_STATUS_INCOMPLETE:
|
|
case IDXD_COMP_STATUS_SUCCESS:
|
|
return RTE_DMA_STATUS_SUCCESSFUL;
|
|
case IDXD_COMP_STATUS_PAGE_FAULT:
|
|
return RTE_DMA_STATUS_PAGE_FAULT;
|
|
case IDXD_COMP_STATUS_INVALID_OPCODE:
|
|
return RTE_DMA_STATUS_INVALID_OPCODE;
|
|
case IDXD_COMP_STATUS_INVALID_SIZE:
|
|
return RTE_DMA_STATUS_INVALID_LENGTH;
|
|
case IDXD_COMP_STATUS_SKIPPED:
|
|
return RTE_DMA_STATUS_NOT_ATTEMPTED;
|
|
default:
|
|
return RTE_DMA_STATUS_ERROR_UNKNOWN;
|
|
}
|
|
}
|
|
|
|
__use_avx2
|
|
int
|
|
idxd_vchan_status(const struct rte_dma_dev *dev, uint16_t vchan __rte_unused,
|
|
enum rte_dma_vchan_status *status)
|
|
{
|
|
struct idxd_dmadev *idxd = dev->fp_obj->dev_private;
|
|
uint16_t last_batch_write = idxd->batch_idx_write == 0 ? idxd->max_batches :
|
|
idxd->batch_idx_write - 1;
|
|
uint8_t bstatus = (idxd->batch_comp_ring[last_batch_write].status != 0);
|
|
|
|
/* An IDXD device will always be either active or idle.
|
|
* RTE_DMA_VCHAN_HALTED_ERROR is therefore not supported by IDXD.
|
|
*/
|
|
*status = bstatus ? RTE_DMA_VCHAN_IDLE : RTE_DMA_VCHAN_ACTIVE;
|
|
|
|
return 0;
|
|
}
|
|
|
|
__use_avx2
|
|
static __rte_always_inline int
|
|
batch_ok(struct idxd_dmadev *idxd, uint16_t max_ops, enum rte_dma_status_code *status)
|
|
{
|
|
uint16_t ret;
|
|
uint8_t bstatus;
|
|
|
|
if (max_ops == 0)
|
|
return 0;
|
|
|
|
/* first check if there are any unreturned handles from last time */
|
|
if (idxd->ids_avail != idxd->ids_returned) {
|
|
ret = RTE_MIN((uint16_t)(idxd->ids_avail - idxd->ids_returned), max_ops);
|
|
idxd->ids_returned += ret;
|
|
if (status)
|
|
memset(status, RTE_DMA_STATUS_SUCCESSFUL, ret * sizeof(*status));
|
|
return ret;
|
|
}
|
|
|
|
if (idxd->batch_idx_read == idxd->batch_idx_write)
|
|
return 0;
|
|
|
|
bstatus = idxd->batch_comp_ring[idxd->batch_idx_read].status;
|
|
/* now check if next batch is complete and successful */
|
|
if (bstatus == IDXD_COMP_STATUS_SUCCESS) {
|
|
/* since the batch idx ring stores the start of each batch, pre-increment to lookup
|
|
* start of next batch.
|
|
*/
|
|
if (++idxd->batch_idx_read > idxd->max_batches)
|
|
idxd->batch_idx_read = 0;
|
|
idxd->ids_avail = idxd->batch_idx_ring[idxd->batch_idx_read];
|
|
|
|
ret = RTE_MIN((uint16_t)(idxd->ids_avail - idxd->ids_returned), max_ops);
|
|
idxd->ids_returned += ret;
|
|
if (status)
|
|
memset(status, RTE_DMA_STATUS_SUCCESSFUL, ret * sizeof(*status));
|
|
return ret;
|
|
}
|
|
/* check if batch is incomplete */
|
|
else if (bstatus == IDXD_COMP_STATUS_INCOMPLETE)
|
|
return 0;
|
|
|
|
return -1; /* error case */
|
|
}
|
|
|
|
__use_avx2
|
|
static inline uint16_t
|
|
batch_completed(struct idxd_dmadev *idxd, uint16_t max_ops, bool *has_error)
|
|
{
|
|
uint16_t i;
|
|
uint16_t b_start, b_end, next_batch;
|
|
|
|
int ret = batch_ok(idxd, max_ops, NULL);
|
|
if (ret >= 0)
|
|
return ret;
|
|
|
|
/* ERROR case, not successful, not incomplete */
|
|
/* Get the batch size, and special case size 1.
|
|
* once we identify the actual failure job, return other jobs, then update
|
|
* the batch ring indexes to make it look like the first job of the batch has failed.
|
|
* Subsequent calls here will always return zero packets, and the error must be cleared by
|
|
* calling the completed_status() function.
|
|
*/
|
|
next_batch = (idxd->batch_idx_read + 1);
|
|
if (next_batch > idxd->max_batches)
|
|
next_batch = 0;
|
|
b_start = idxd->batch_idx_ring[idxd->batch_idx_read];
|
|
b_end = idxd->batch_idx_ring[next_batch];
|
|
|
|
if (b_end - b_start == 1) { /* not a batch */
|
|
*has_error = true;
|
|
return 0;
|
|
}
|
|
|
|
for (i = b_start; i < b_end; i++) {
|
|
struct idxd_completion *c = (void *)&idxd->desc_ring[i & idxd->desc_ring_mask];
|
|
if (c->status > IDXD_COMP_STATUS_SUCCESS) /* ignore incomplete(0) and success(1) */
|
|
break;
|
|
}
|
|
ret = RTE_MIN((uint16_t)(i - idxd->ids_returned), max_ops);
|
|
if (ret < max_ops)
|
|
*has_error = true; /* we got up to the point of error */
|
|
idxd->ids_avail = idxd->ids_returned += ret;
|
|
|
|
/* to ensure we can call twice and just return 0, set start of batch to where we finished */
|
|
idxd->batch_comp_ring[idxd->batch_idx_read].completed_size -= ret;
|
|
idxd->batch_idx_ring[idxd->batch_idx_read] += ret;
|
|
if (idxd->batch_idx_ring[next_batch] - idxd->batch_idx_ring[idxd->batch_idx_read] == 1) {
|
|
/* copy over the descriptor status to the batch ring as if no batch */
|
|
uint16_t d_idx = idxd->batch_idx_ring[idxd->batch_idx_read] & idxd->desc_ring_mask;
|
|
struct idxd_completion *desc_comp = (void *)&idxd->desc_ring[d_idx];
|
|
idxd->batch_comp_ring[idxd->batch_idx_read].status = desc_comp->status;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
__use_avx2
|
|
static uint16_t
|
|
batch_completed_status(struct idxd_dmadev *idxd, uint16_t max_ops, enum rte_dma_status_code *status)
|
|
{
|
|
uint16_t next_batch;
|
|
|
|
int ret = batch_ok(idxd, max_ops, status);
|
|
if (ret >= 0)
|
|
return ret;
|
|
|
|
/* ERROR case, not successful, not incomplete */
|
|
/* Get the batch size, and special case size 1.
|
|
*/
|
|
next_batch = (idxd->batch_idx_read + 1);
|
|
if (next_batch > idxd->max_batches)
|
|
next_batch = 0;
|
|
const uint16_t b_start = idxd->batch_idx_ring[idxd->batch_idx_read];
|
|
const uint16_t b_end = idxd->batch_idx_ring[next_batch];
|
|
const uint16_t b_len = b_end - b_start;
|
|
if (b_len == 1) {/* not a batch */
|
|
*status = get_comp_status(&idxd->batch_comp_ring[idxd->batch_idx_read]);
|
|
if (status != RTE_DMA_STATUS_SUCCESSFUL)
|
|
idxd->stats.errors++;
|
|
idxd->ids_avail++;
|
|
idxd->ids_returned++;
|
|
idxd->batch_idx_read = next_batch;
|
|
return 1;
|
|
}
|
|
|
|
/* not a single-element batch, need to process more.
|
|
* Scenarios:
|
|
* 1. max_ops >= batch_size - can fit everything, simple case
|
|
* - loop through completed ops and then add on any not-attempted ones
|
|
* 2. max_ops < batch_size - can't fit everything, more complex case
|
|
* - loop through completed/incomplete and stop when hit max_ops
|
|
* - adjust the batch descriptor to update where we stopped, with appropriate bcount
|
|
* - if bcount is to be exactly 1, update the batch descriptor as it will be treated as
|
|
* non-batch next time.
|
|
*/
|
|
const uint16_t bcount = idxd->batch_comp_ring[idxd->batch_idx_read].completed_size;
|
|
for (ret = 0; ret < b_len && ret < max_ops; ret++) {
|
|
struct idxd_completion *c = (void *)
|
|
&idxd->desc_ring[(b_start + ret) & idxd->desc_ring_mask];
|
|
status[ret] = (ret < bcount) ? get_comp_status(c) : RTE_DMA_STATUS_NOT_ATTEMPTED;
|
|
if (status[ret] != RTE_DMA_STATUS_SUCCESSFUL)
|
|
idxd->stats.errors++;
|
|
}
|
|
idxd->ids_avail = idxd->ids_returned += ret;
|
|
|
|
/* everything fit */
|
|
if (ret == b_len) {
|
|
idxd->batch_idx_read = next_batch;
|
|
return ret;
|
|
}
|
|
|
|
/* set up for next time, update existing batch descriptor & start idx at batch_idx_read */
|
|
idxd->batch_idx_ring[idxd->batch_idx_read] += ret;
|
|
if (ret > bcount) {
|
|
/* we have only incomplete ones - set batch completed size to 0 */
|
|
struct idxd_completion *comp = &idxd->batch_comp_ring[idxd->batch_idx_read];
|
|
comp->completed_size = 0;
|
|
/* if there is only one descriptor left, job skipped so set flag appropriately */
|
|
if (b_len - ret == 1)
|
|
comp->status = IDXD_COMP_STATUS_SKIPPED;
|
|
} else {
|
|
struct idxd_completion *comp = &idxd->batch_comp_ring[idxd->batch_idx_read];
|
|
comp->completed_size -= ret;
|
|
/* if there is only one descriptor left, copy status info straight to desc */
|
|
if (comp->completed_size == 1) {
|
|
struct idxd_completion *c = (void *)
|
|
&idxd->desc_ring[(b_start + ret) & idxd->desc_ring_mask];
|
|
comp->status = c->status;
|
|
/* individual descs can be ok without writeback, but not batches */
|
|
if (comp->status == IDXD_COMP_STATUS_INCOMPLETE)
|
|
comp->status = IDXD_COMP_STATUS_SUCCESS;
|
|
} else if (bcount == b_len) {
|
|
/* check if we still have an error, and clear flag if not */
|
|
uint16_t i;
|
|
for (i = b_start + ret; i < b_end; i++) {
|
|
struct idxd_completion *c = (void *)
|
|
&idxd->desc_ring[i & idxd->desc_ring_mask];
|
|
if (c->status > IDXD_COMP_STATUS_SUCCESS)
|
|
break;
|
|
}
|
|
if (i == b_end) /* no errors */
|
|
comp->status = IDXD_COMP_STATUS_SUCCESS;
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
__use_avx2
|
|
uint16_t
|
|
idxd_completed(void *dev_private, uint16_t qid __rte_unused, uint16_t max_ops,
|
|
uint16_t *last_idx, bool *has_error)
|
|
{
|
|
struct idxd_dmadev *idxd = dev_private;
|
|
uint16_t batch, ret = 0;
|
|
|
|
do {
|
|
batch = batch_completed(idxd, max_ops - ret, has_error);
|
|
ret += batch;
|
|
} while (batch > 0 && *has_error == false);
|
|
|
|
idxd->stats.completed += ret;
|
|
*last_idx = idxd->ids_returned - 1;
|
|
return ret;
|
|
}
|
|
|
|
__use_avx2
|
|
uint16_t
|
|
idxd_completed_status(void *dev_private, uint16_t qid __rte_unused, uint16_t max_ops,
|
|
uint16_t *last_idx, enum rte_dma_status_code *status)
|
|
{
|
|
struct idxd_dmadev *idxd = dev_private;
|
|
uint16_t batch, ret = 0;
|
|
|
|
do {
|
|
batch = batch_completed_status(idxd, max_ops - ret, &status[ret]);
|
|
ret += batch;
|
|
} while (batch > 0);
|
|
|
|
idxd->stats.completed += ret;
|
|
*last_idx = idxd->ids_returned - 1;
|
|
return ret;
|
|
}
|
|
|
|
int
|
|
idxd_dump(const struct rte_dma_dev *dev, FILE *f)
|
|
{
|
|
struct idxd_dmadev *idxd = dev->fp_obj->dev_private;
|
|
unsigned int i;
|
|
|
|
fprintf(f, "== IDXD Private Data ==\n");
|
|
fprintf(f, " Portal: %p\n", idxd->portal);
|
|
fprintf(f, " Config: { ring_size: %u }\n",
|
|
idxd->qcfg.nb_desc);
|
|
fprintf(f, " Batch ring (sz = %u, max_batches = %u):\n\t",
|
|
idxd->max_batches + 1, idxd->max_batches);
|
|
for (i = 0; i <= idxd->max_batches; i++) {
|
|
fprintf(f, " %u ", idxd->batch_idx_ring[i]);
|
|
if (i == idxd->batch_idx_read && i == idxd->batch_idx_write)
|
|
fprintf(f, "[rd ptr, wr ptr] ");
|
|
else if (i == idxd->batch_idx_read)
|
|
fprintf(f, "[rd ptr] ");
|
|
else if (i == idxd->batch_idx_write)
|
|
fprintf(f, "[wr ptr] ");
|
|
if (i == idxd->max_batches)
|
|
fprintf(f, "\n");
|
|
}
|
|
|
|
fprintf(f, " Curr batch: start = %u, size = %u\n", idxd->batch_start, idxd->batch_size);
|
|
fprintf(f, " IDS: avail = %u, returned: %u\n", idxd->ids_avail, idxd->ids_returned);
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
idxd_stats_get(const struct rte_dma_dev *dev, uint16_t vchan __rte_unused,
|
|
struct rte_dma_stats *stats, uint32_t stats_sz)
|
|
{
|
|
struct idxd_dmadev *idxd = dev->fp_obj->dev_private;
|
|
if (stats_sz < sizeof(*stats))
|
|
return -EINVAL;
|
|
*stats = idxd->stats;
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
idxd_stats_reset(struct rte_dma_dev *dev, uint16_t vchan __rte_unused)
|
|
{
|
|
struct idxd_dmadev *idxd = dev->fp_obj->dev_private;
|
|
idxd->stats = (struct rte_dma_stats){0};
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
idxd_info_get(const struct rte_dma_dev *dev, struct rte_dma_info *info, uint32_t size)
|
|
{
|
|
struct idxd_dmadev *idxd = dev->fp_obj->dev_private;
|
|
|
|
if (size < sizeof(*info))
|
|
return -EINVAL;
|
|
|
|
*info = (struct rte_dma_info) {
|
|
.dev_capa = RTE_DMA_CAPA_MEM_TO_MEM | RTE_DMA_CAPA_HANDLES_ERRORS |
|
|
RTE_DMA_CAPA_OPS_COPY | RTE_DMA_CAPA_OPS_FILL,
|
|
.max_vchans = 1,
|
|
.max_desc = 4096,
|
|
.min_desc = 64,
|
|
};
|
|
if (idxd->sva_support)
|
|
info->dev_capa |= RTE_DMA_CAPA_SVA;
|
|
return 0;
|
|
}
|
|
|
|
uint16_t
|
|
idxd_burst_capacity(const void *dev_private, uint16_t vchan __rte_unused)
|
|
{
|
|
const struct idxd_dmadev *idxd = dev_private;
|
|
uint16_t write_idx = idxd->batch_start + idxd->batch_size;
|
|
uint16_t used_space;
|
|
|
|
/* Check for space in the batch ring */
|
|
if ((idxd->batch_idx_read == 0 && idxd->batch_idx_write == idxd->max_batches) ||
|
|
idxd->batch_idx_write + 1 == idxd->batch_idx_read)
|
|
return 0;
|
|
|
|
/* Subtract and mask to get in correct range */
|
|
used_space = (write_idx - idxd->ids_returned) & idxd->desc_ring_mask;
|
|
|
|
const int ret = RTE_MIN((idxd->desc_ring_mask - used_space),
|
|
(idxd->max_batch_size - idxd->batch_size));
|
|
return ret < 0 ? 0 : (uint16_t)ret;
|
|
}
|
|
|
|
int
|
|
idxd_configure(struct rte_dma_dev *dev __rte_unused, const struct rte_dma_conf *dev_conf,
|
|
uint32_t conf_sz)
|
|
{
|
|
if (sizeof(struct rte_dma_conf) != conf_sz)
|
|
return -EINVAL;
|
|
|
|
if (dev_conf->nb_vchans != 1)
|
|
return -EINVAL;
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
idxd_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan __rte_unused,
|
|
const struct rte_dma_vchan_conf *qconf, uint32_t qconf_sz)
|
|
{
|
|
struct idxd_dmadev *idxd = dev->fp_obj->dev_private;
|
|
uint16_t max_desc = qconf->nb_desc;
|
|
|
|
if (sizeof(struct rte_dma_vchan_conf) != qconf_sz)
|
|
return -EINVAL;
|
|
|
|
idxd->qcfg = *qconf;
|
|
|
|
if (!rte_is_power_of_2(max_desc))
|
|
max_desc = rte_align32pow2(max_desc);
|
|
IDXD_PMD_DEBUG("DMA dev %u using %u descriptors", dev->data->dev_id, max_desc);
|
|
idxd->desc_ring_mask = max_desc - 1;
|
|
idxd->qcfg.nb_desc = max_desc;
|
|
|
|
/* in case we are reconfiguring a device, free any existing memory */
|
|
rte_free(idxd->desc_ring);
|
|
|
|
/* allocate the descriptor ring at 2x size as batches can't wrap */
|
|
idxd->desc_ring = rte_zmalloc(NULL, sizeof(*idxd->desc_ring) * max_desc * 2, 0);
|
|
if (idxd->desc_ring == NULL)
|
|
return -ENOMEM;
|
|
idxd->desc_iova = rte_mem_virt2iova(idxd->desc_ring);
|
|
|
|
idxd->batch_idx_read = 0;
|
|
idxd->batch_idx_write = 0;
|
|
idxd->batch_start = 0;
|
|
idxd->batch_size = 0;
|
|
idxd->ids_returned = 0;
|
|
idxd->ids_avail = 0;
|
|
|
|
memset(idxd->batch_comp_ring, 0, sizeof(*idxd->batch_comp_ring) *
|
|
(idxd->max_batches + 1));
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
idxd_dmadev_create(const char *name, struct rte_device *dev,
|
|
const struct idxd_dmadev *base_idxd,
|
|
const struct rte_dma_dev_ops *ops)
|
|
{
|
|
struct idxd_dmadev *idxd = NULL;
|
|
struct rte_dma_dev *dmadev = NULL;
|
|
int ret = 0;
|
|
|
|
RTE_BUILD_BUG_ON(sizeof(struct idxd_hw_desc) != 64);
|
|
RTE_BUILD_BUG_ON(offsetof(struct idxd_hw_desc, size) != 32);
|
|
RTE_BUILD_BUG_ON(sizeof(struct idxd_completion) != 32);
|
|
|
|
if (!name) {
|
|
IDXD_PMD_ERR("Invalid name of the device!");
|
|
ret = -EINVAL;
|
|
goto cleanup;
|
|
}
|
|
|
|
/* Allocate device structure */
|
|
dmadev = rte_dma_pmd_allocate(name, dev->numa_node, sizeof(struct idxd_dmadev));
|
|
if (dmadev == NULL) {
|
|
IDXD_PMD_ERR("Unable to allocate dma device");
|
|
ret = -ENOMEM;
|
|
goto cleanup;
|
|
}
|
|
dmadev->dev_ops = ops;
|
|
dmadev->device = dev;
|
|
|
|
dmadev->fp_obj->copy = idxd_enqueue_copy;
|
|
dmadev->fp_obj->fill = idxd_enqueue_fill;
|
|
dmadev->fp_obj->submit = idxd_submit;
|
|
dmadev->fp_obj->completed = idxd_completed;
|
|
dmadev->fp_obj->completed_status = idxd_completed_status;
|
|
dmadev->fp_obj->burst_capacity = idxd_burst_capacity;
|
|
|
|
idxd = dmadev->data->dev_private;
|
|
*idxd = *base_idxd; /* copy over the main fields already passed in */
|
|
idxd->dmadev = dmadev;
|
|
|
|
/* allocate batch index ring and completion ring.
|
|
* The +1 is because we can never fully use
|
|
* the ring, otherwise read == write means both full and empty.
|
|
*/
|
|
idxd->batch_comp_ring = rte_zmalloc_socket(NULL, (sizeof(idxd->batch_idx_ring[0]) +
|
|
sizeof(idxd->batch_comp_ring[0])) * (idxd->max_batches + 1),
|
|
sizeof(idxd->batch_comp_ring[0]), dev->numa_node);
|
|
if (idxd->batch_comp_ring == NULL) {
|
|
IDXD_PMD_ERR("Unable to reserve memory for batch data\n");
|
|
ret = -ENOMEM;
|
|
goto cleanup;
|
|
}
|
|
idxd->batch_idx_ring = (void *)&idxd->batch_comp_ring[idxd->max_batches+1];
|
|
idxd->batch_iova = rte_mem_virt2iova(idxd->batch_comp_ring);
|
|
|
|
dmadev->fp_obj->dev_private = idxd;
|
|
|
|
idxd->dmadev->state = RTE_DMA_DEV_READY;
|
|
|
|
return 0;
|
|
|
|
cleanup:
|
|
if (dmadev)
|
|
rte_dma_pmd_release(name);
|
|
|
|
return ret;
|
|
}
|
|
|
|
int idxd_pmd_logtype;
|
|
|
|
RTE_LOG_REGISTER_DEFAULT(idxd_pmd_logtype, WARNING);
|