spdk: Add block fill API to ioat driver
For those Crystal Beach DMA channels which support block fill capability, we add a fill API here that can zero out pages or fill them with a fixed pattern. Change-Id: I8a57337702b951c703d494004b111f6d206279fb Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
This commit is contained in:
parent
0ca6afb850
commit
8aa497f083
@ -49,6 +49,11 @@
|
||||
|
||||
#define SRC_BUFFER_SIZE (512*1024)
|
||||
|
||||
enum ioat_task_type {
|
||||
IOAT_COPY_TYPE,
|
||||
IOAT_FILL_TYPE,
|
||||
};
|
||||
|
||||
struct user_config {
|
||||
int queue_depth;
|
||||
int time_in_sec;
|
||||
@ -67,6 +72,8 @@ static struct user_config g_user_config;
|
||||
struct thread_entry {
|
||||
uint64_t xfer_completed;
|
||||
uint64_t xfer_failed;
|
||||
uint64_t fill_completed;
|
||||
uint64_t fill_failed;
|
||||
uint64_t current_queue_depth;
|
||||
unsigned lcore_id;
|
||||
bool is_draining;
|
||||
@ -75,9 +82,11 @@ struct thread_entry {
|
||||
};
|
||||
|
||||
struct ioat_task {
|
||||
enum ioat_task_type type;
|
||||
struct thread_entry *thread_entry;
|
||||
void *buffer;
|
||||
int len;
|
||||
uint64_t fill_pattern;
|
||||
void *src;
|
||||
void *dst;
|
||||
};
|
||||
@ -124,14 +133,29 @@ static void prepare_ioat_task(struct thread_entry *thread_entry, struct ioat_tas
|
||||
int len;
|
||||
int src_offset;
|
||||
int dst_offset;
|
||||
int num_ddwords;
|
||||
uint64_t fill_pattern;
|
||||
|
||||
src_offset = rand_r(&seed) % SRC_BUFFER_SIZE;
|
||||
len = rand_r(&seed) % (SRC_BUFFER_SIZE - src_offset);
|
||||
dst_offset = rand_r(&seed) % (SRC_BUFFER_SIZE - len);
|
||||
if (ioat_task->type == IOAT_FILL_TYPE) {
|
||||
fill_pattern = rand_r(&seed);
|
||||
fill_pattern = fill_pattern << 32 | rand_r(&seed);
|
||||
|
||||
memset(ioat_task->buffer, 0, SRC_BUFFER_SIZE);
|
||||
/* ensure that the length of memset block is 8 Bytes aligned */
|
||||
num_ddwords = (rand_r(&seed) % SRC_BUFFER_SIZE) / 8;
|
||||
len = num_ddwords * 8;
|
||||
if (len < 8)
|
||||
len = 8;
|
||||
dst_offset = rand_r(&seed) % (SRC_BUFFER_SIZE - len);
|
||||
ioat_task->fill_pattern = fill_pattern;
|
||||
} else {
|
||||
src_offset = rand_r(&seed) % SRC_BUFFER_SIZE;
|
||||
len = rand_r(&seed) % (SRC_BUFFER_SIZE - src_offset);
|
||||
dst_offset = rand_r(&seed) % (SRC_BUFFER_SIZE - len);
|
||||
|
||||
memset(ioat_task->buffer, 0, SRC_BUFFER_SIZE);
|
||||
ioat_task->src = g_src + src_offset;
|
||||
}
|
||||
ioat_task->len = len;
|
||||
ioat_task->src = g_src + src_offset;
|
||||
ioat_task->dst = ioat_task->buffer + dst_offset;
|
||||
ioat_task->thread_entry = thread_entry;
|
||||
}
|
||||
@ -139,14 +163,31 @@ static void prepare_ioat_task(struct thread_entry *thread_entry, struct ioat_tas
|
||||
static void
|
||||
ioat_done(void *cb_arg)
|
||||
{
|
||||
uint64_t *value;
|
||||
int i, failed = 0;
|
||||
struct ioat_task *ioat_task = (struct ioat_task *)cb_arg;
|
||||
struct thread_entry *thread_entry = ioat_task->thread_entry;
|
||||
|
||||
if (memcmp(ioat_task->src, ioat_task->dst, ioat_task->len)) {
|
||||
thread_entry->xfer_failed++;
|
||||
if (ioat_task->type == IOAT_FILL_TYPE) {
|
||||
value = (uint64_t *)ioat_task->dst;
|
||||
for (i = 0; i < ioat_task->len / 8; i++) {
|
||||
if (*value != ioat_task->fill_pattern) {
|
||||
thread_entry->fill_failed++;
|
||||
failed = 1;
|
||||
break;
|
||||
}
|
||||
value++;
|
||||
}
|
||||
if (!failed)
|
||||
thread_entry->fill_completed++;
|
||||
} else {
|
||||
thread_entry->xfer_completed++;
|
||||
if (memcmp(ioat_task->src, ioat_task->dst, ioat_task->len)) {
|
||||
thread_entry->xfer_failed++;
|
||||
} else {
|
||||
thread_entry->xfer_completed++;
|
||||
}
|
||||
}
|
||||
|
||||
thread_entry->current_queue_depth--;
|
||||
if (thread_entry->is_draining) {
|
||||
rte_mempool_put(thread_entry->data_pool, ioat_task->buffer);
|
||||
@ -278,7 +319,10 @@ drain_xfers(struct thread_entry *thread_entry)
|
||||
static void
|
||||
submit_single_xfer(struct ioat_task *ioat_task)
|
||||
{
|
||||
ioat_submit_copy(ioat_task, ioat_done, ioat_task->dst, ioat_task->src, ioat_task->len);
|
||||
if (ioat_task->type == IOAT_FILL_TYPE)
|
||||
ioat_submit_fill(ioat_task, ioat_done, ioat_task->dst, ioat_task->fill_pattern, ioat_task->len);
|
||||
else
|
||||
ioat_submit_copy(ioat_task, ioat_done, ioat_task->dst, ioat_task->src, ioat_task->len);
|
||||
ioat_task->thread_entry->current_queue_depth++;
|
||||
}
|
||||
|
||||
@ -290,6 +334,11 @@ submit_xfers(struct thread_entry *thread_entry, uint64_t queue_depth)
|
||||
rte_mempool_get(thread_entry->task_pool, (void **)&ioat_task);
|
||||
rte_mempool_get(thread_entry->data_pool, &(ioat_task->buffer));
|
||||
|
||||
ioat_task->type = IOAT_COPY_TYPE;
|
||||
if (ioat_get_dma_capabilities() & IOAT_ENGINE_FILL_SUPPORTED) {
|
||||
if (queue_depth % 2)
|
||||
ioat_task->type = IOAT_FILL_TYPE;
|
||||
}
|
||||
prepare_ioat_task(thread_entry, ioat_task);
|
||||
submit_single_xfer(ioat_task);
|
||||
}
|
||||
@ -397,10 +446,12 @@ dump_result(struct thread_entry *threads, int len)
|
||||
for (i = 0; i < len; i++) {
|
||||
struct thread_entry *t = &threads[i];
|
||||
total_completed += t->xfer_completed;
|
||||
total_completed += t->fill_completed;
|
||||
total_failed += t->xfer_failed;
|
||||
total_failed += t->fill_failed;
|
||||
if (t->xfer_completed || t->xfer_failed)
|
||||
printf("lcore = %d, success = %ld, failed = %ld \n",
|
||||
t->lcore_id, t->xfer_completed, t->xfer_failed);
|
||||
printf("lcore = %d, copy success = %ld, copy failed = %ld, fill success = %ld, fill failed = %ld \n",
|
||||
t->lcore_id, t->xfer_completed, t->xfer_failed, t->fill_completed, t->fill_failed);
|
||||
}
|
||||
return total_failed ? 1 : 0;
|
||||
}
|
||||
|
@ -90,6 +90,15 @@ void ioat_unregister_thread(void);
|
||||
int64_t ioat_submit_copy(void *cb_arg, ioat_callback_t cb_fn,
|
||||
void *dst, const void *src, uint64_t nbytes);
|
||||
|
||||
/**
|
||||
* Submit a DMA engine memory fill request.
|
||||
*
|
||||
* Before submitting any requests on a thread, the thread must be registered
|
||||
* using the \ref ioat_register_thread() function.
|
||||
*/
|
||||
int64_t ioat_submit_fill(void *cb_arg, ioat_callback_t cb_fn,
|
||||
void *dst, uint64_t fill_pattern, uint64_t nbytes);
|
||||
|
||||
/**
|
||||
* Check for completed requests on the current thread.
|
||||
*
|
||||
@ -100,4 +109,20 @@ int64_t ioat_submit_copy(void *cb_arg, ioat_callback_t cb_fn,
|
||||
*/
|
||||
int ioat_process_events(void);
|
||||
|
||||
/**
|
||||
* DMA engine capability flags
|
||||
*/
|
||||
enum ioat_dma_capability_flags {
|
||||
IOAT_ENGINE_COPY_SUPPORTED = 0x1, /**< The memory copy is supported */
|
||||
IOAT_ENGINE_FILL_SUPPORTED = 0x2, /**< The memory fill is supported */
|
||||
};
|
||||
|
||||
/**
|
||||
* Get the DMA engine capabilities.
|
||||
*
|
||||
* Before submitting any requests on a thread, the thread must be registered
|
||||
* using the \ref ioat_register_thread() function.
|
||||
*/
|
||||
uint32_t ioat_get_dma_capabilities(void);
|
||||
|
||||
#endif
|
||||
|
@ -53,6 +53,13 @@
|
||||
#define IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004
|
||||
#define IOAT_CHANCTRL_INT_REARM 0x0001
|
||||
|
||||
/* DMA Channel Capabilities */
|
||||
#define IOAT_DMACAP_PB (1 << 0)
|
||||
#define IOAT_DMACAP_DCA (1 << 4)
|
||||
#define IOAT_DMACAP_BFILL (1 << 6)
|
||||
#define IOAT_DMACAP_XOR (1 << 8)
|
||||
#define IOAT_DMACAP_PQ (1 << 9)
|
||||
#define IOAT_DMACAP_DMA_DIF (1 << 10)
|
||||
|
||||
struct ioat_registers {
|
||||
uint8_t chancnt;
|
||||
|
104
lib/ioat/ioat.c
104
lib/ioat/ioat.c
@ -295,6 +295,37 @@ ioat_prep_copy(struct ioat_channel *ioat, uint64_t dst,
|
||||
return desc;
|
||||
}
|
||||
|
||||
static struct ioat_descriptor *
|
||||
ioat_prep_fill(struct ioat_channel *ioat, uint64_t dst,
|
||||
uint64_t fill_pattern, uint32_t len)
|
||||
{
|
||||
struct ioat_descriptor *desc;
|
||||
union ioat_hw_descriptor *hw_desc;
|
||||
|
||||
ioat_assert(len <= ioat->max_xfer_size);
|
||||
|
||||
if (ioat_get_ring_space(ioat) < 1) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ioat_get_ring_entry(ioat, ioat->head, &desc, &hw_desc);
|
||||
|
||||
hw_desc->fill.u.control_raw = 0;
|
||||
hw_desc->fill.u.control.op = IOAT_OP_FILL;
|
||||
hw_desc->fill.u.control.completion_update = 1;
|
||||
|
||||
hw_desc->fill.size = len;
|
||||
hw_desc->fill.src_data = fill_pattern;
|
||||
hw_desc->fill.dest_addr = dst;
|
||||
|
||||
desc->callback_fn = NULL;
|
||||
desc->callback_arg = NULL;
|
||||
|
||||
ioat_submit_single(ioat);
|
||||
|
||||
return desc;
|
||||
}
|
||||
|
||||
static int ioat_reset_hw(struct ioat_channel *ioat)
|
||||
{
|
||||
int timeout;
|
||||
@ -419,6 +450,10 @@ ioat_channel_start(struct ioat_channel *ioat)
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Always support DMA copy */
|
||||
ioat->dma_capabilities = IOAT_ENGINE_COPY_SUPPORTED;
|
||||
if (ioat->regs->dmacapability & IOAT_DMACAP_BFILL)
|
||||
ioat->dma_capabilities |= IOAT_ENGINE_FILL_SUPPORTED;
|
||||
xfercap = ioat->regs->xfercap;
|
||||
|
||||
/* Only bits [4:0] are valid. */
|
||||
@ -663,6 +698,75 @@ ioat_submit_copy(void *cb_arg, ioat_callback_t cb_fn,
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
int64_t
|
||||
ioat_submit_fill(void *cb_arg, ioat_callback_t cb_fn,
|
||||
void *dst, uint64_t fill_pattern, uint64_t nbytes)
|
||||
{
|
||||
struct ioat_channel *ioat;
|
||||
struct ioat_descriptor *last_desc = NULL;
|
||||
uint64_t remaining, op_size;
|
||||
uint64_t vdst;
|
||||
uint32_t orig_head;
|
||||
|
||||
ioat = ioat_thread_channel;
|
||||
if (!ioat) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!(ioat->dma_capabilities & IOAT_ENGINE_FILL_SUPPORTED)) {
|
||||
ioat_printf(ioat, "Channel does not support memory fill\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
orig_head = ioat->head;
|
||||
|
||||
vdst = (uint64_t)dst;
|
||||
remaining = nbytes;
|
||||
|
||||
while (remaining) {
|
||||
op_size = remaining;
|
||||
op_size = min(op_size, ioat->max_xfer_size);
|
||||
remaining -= op_size;
|
||||
|
||||
last_desc = ioat_prep_fill(ioat,
|
||||
ioat_vtophys((void *)vdst),
|
||||
fill_pattern,
|
||||
op_size);
|
||||
|
||||
if (remaining == 0 || last_desc == NULL) {
|
||||
break;
|
||||
}
|
||||
|
||||
vdst += op_size;
|
||||
}
|
||||
|
||||
if (last_desc) {
|
||||
last_desc->callback_fn = cb_fn;
|
||||
last_desc->callback_arg = cb_arg;
|
||||
} else {
|
||||
/*
|
||||
* Ran out of descriptors in the ring - reset head to leave things as they were
|
||||
* in case we managed to fill out any descriptors.
|
||||
*/
|
||||
ioat->head = orig_head;
|
||||
return -1;
|
||||
}
|
||||
|
||||
ioat_flush(ioat);
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
uint32_t ioat_get_dma_capabilities(void)
|
||||
{
|
||||
struct ioat_channel *ioat;
|
||||
|
||||
ioat = ioat_thread_channel;
|
||||
if (!ioat) {
|
||||
return 0;
|
||||
}
|
||||
return ioat->dma_capabilities;
|
||||
}
|
||||
|
||||
int ioat_process_events(void)
|
||||
{
|
||||
if (!ioat_thread_channel) {
|
||||
|
@ -74,6 +74,7 @@ struct ioat_channel {
|
||||
struct ioat_descriptor *ring;
|
||||
union ioat_hw_descriptor *hw_ring;
|
||||
uint64_t hw_ring_phys_addr;
|
||||
uint32_t dma_capabilities;
|
||||
};
|
||||
|
||||
static inline uint32_t
|
||||
|
Loading…
Reference in New Issue
Block a user