spdk: Add block fill API to ioat driver
For those Crystal Beach DMA channels which support block fill capability, we add a fill API here that can zero out pages or fill them with a fixed pattern. Change-Id: I8a57337702b951c703d494004b111f6d206279fb Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
This commit is contained in:
parent
0ca6afb850
commit
8aa497f083
@ -49,6 +49,11 @@
|
|||||||
|
|
||||||
#define SRC_BUFFER_SIZE (512*1024)
|
#define SRC_BUFFER_SIZE (512*1024)
|
||||||
|
|
||||||
|
enum ioat_task_type {
|
||||||
|
IOAT_COPY_TYPE,
|
||||||
|
IOAT_FILL_TYPE,
|
||||||
|
};
|
||||||
|
|
||||||
struct user_config {
|
struct user_config {
|
||||||
int queue_depth;
|
int queue_depth;
|
||||||
int time_in_sec;
|
int time_in_sec;
|
||||||
@ -67,6 +72,8 @@ static struct user_config g_user_config;
|
|||||||
struct thread_entry {
|
struct thread_entry {
|
||||||
uint64_t xfer_completed;
|
uint64_t xfer_completed;
|
||||||
uint64_t xfer_failed;
|
uint64_t xfer_failed;
|
||||||
|
uint64_t fill_completed;
|
||||||
|
uint64_t fill_failed;
|
||||||
uint64_t current_queue_depth;
|
uint64_t current_queue_depth;
|
||||||
unsigned lcore_id;
|
unsigned lcore_id;
|
||||||
bool is_draining;
|
bool is_draining;
|
||||||
@ -75,9 +82,11 @@ struct thread_entry {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct ioat_task {
|
struct ioat_task {
|
||||||
|
enum ioat_task_type type;
|
||||||
struct thread_entry *thread_entry;
|
struct thread_entry *thread_entry;
|
||||||
void *buffer;
|
void *buffer;
|
||||||
int len;
|
int len;
|
||||||
|
uint64_t fill_pattern;
|
||||||
void *src;
|
void *src;
|
||||||
void *dst;
|
void *dst;
|
||||||
};
|
};
|
||||||
@ -124,14 +133,29 @@ static void prepare_ioat_task(struct thread_entry *thread_entry, struct ioat_tas
|
|||||||
int len;
|
int len;
|
||||||
int src_offset;
|
int src_offset;
|
||||||
int dst_offset;
|
int dst_offset;
|
||||||
|
int num_ddwords;
|
||||||
|
uint64_t fill_pattern;
|
||||||
|
|
||||||
src_offset = rand_r(&seed) % SRC_BUFFER_SIZE;
|
if (ioat_task->type == IOAT_FILL_TYPE) {
|
||||||
len = rand_r(&seed) % (SRC_BUFFER_SIZE - src_offset);
|
fill_pattern = rand_r(&seed);
|
||||||
dst_offset = rand_r(&seed) % (SRC_BUFFER_SIZE - len);
|
fill_pattern = fill_pattern << 32 | rand_r(&seed);
|
||||||
|
|
||||||
memset(ioat_task->buffer, 0, SRC_BUFFER_SIZE);
|
/* ensure that the length of memset block is 8 Bytes aligned */
|
||||||
|
num_ddwords = (rand_r(&seed) % SRC_BUFFER_SIZE) / 8;
|
||||||
|
len = num_ddwords * 8;
|
||||||
|
if (len < 8)
|
||||||
|
len = 8;
|
||||||
|
dst_offset = rand_r(&seed) % (SRC_BUFFER_SIZE - len);
|
||||||
|
ioat_task->fill_pattern = fill_pattern;
|
||||||
|
} else {
|
||||||
|
src_offset = rand_r(&seed) % SRC_BUFFER_SIZE;
|
||||||
|
len = rand_r(&seed) % (SRC_BUFFER_SIZE - src_offset);
|
||||||
|
dst_offset = rand_r(&seed) % (SRC_BUFFER_SIZE - len);
|
||||||
|
|
||||||
|
memset(ioat_task->buffer, 0, SRC_BUFFER_SIZE);
|
||||||
|
ioat_task->src = g_src + src_offset;
|
||||||
|
}
|
||||||
ioat_task->len = len;
|
ioat_task->len = len;
|
||||||
ioat_task->src = g_src + src_offset;
|
|
||||||
ioat_task->dst = ioat_task->buffer + dst_offset;
|
ioat_task->dst = ioat_task->buffer + dst_offset;
|
||||||
ioat_task->thread_entry = thread_entry;
|
ioat_task->thread_entry = thread_entry;
|
||||||
}
|
}
|
||||||
@ -139,14 +163,31 @@ static void prepare_ioat_task(struct thread_entry *thread_entry, struct ioat_tas
|
|||||||
static void
|
static void
|
||||||
ioat_done(void *cb_arg)
|
ioat_done(void *cb_arg)
|
||||||
{
|
{
|
||||||
|
uint64_t *value;
|
||||||
|
int i, failed = 0;
|
||||||
struct ioat_task *ioat_task = (struct ioat_task *)cb_arg;
|
struct ioat_task *ioat_task = (struct ioat_task *)cb_arg;
|
||||||
struct thread_entry *thread_entry = ioat_task->thread_entry;
|
struct thread_entry *thread_entry = ioat_task->thread_entry;
|
||||||
|
|
||||||
if (memcmp(ioat_task->src, ioat_task->dst, ioat_task->len)) {
|
if (ioat_task->type == IOAT_FILL_TYPE) {
|
||||||
thread_entry->xfer_failed++;
|
value = (uint64_t *)ioat_task->dst;
|
||||||
|
for (i = 0; i < ioat_task->len / 8; i++) {
|
||||||
|
if (*value != ioat_task->fill_pattern) {
|
||||||
|
thread_entry->fill_failed++;
|
||||||
|
failed = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
value++;
|
||||||
|
}
|
||||||
|
if (!failed)
|
||||||
|
thread_entry->fill_completed++;
|
||||||
} else {
|
} else {
|
||||||
thread_entry->xfer_completed++;
|
if (memcmp(ioat_task->src, ioat_task->dst, ioat_task->len)) {
|
||||||
|
thread_entry->xfer_failed++;
|
||||||
|
} else {
|
||||||
|
thread_entry->xfer_completed++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
thread_entry->current_queue_depth--;
|
thread_entry->current_queue_depth--;
|
||||||
if (thread_entry->is_draining) {
|
if (thread_entry->is_draining) {
|
||||||
rte_mempool_put(thread_entry->data_pool, ioat_task->buffer);
|
rte_mempool_put(thread_entry->data_pool, ioat_task->buffer);
|
||||||
@ -278,7 +319,10 @@ drain_xfers(struct thread_entry *thread_entry)
|
|||||||
static void
|
static void
|
||||||
submit_single_xfer(struct ioat_task *ioat_task)
|
submit_single_xfer(struct ioat_task *ioat_task)
|
||||||
{
|
{
|
||||||
ioat_submit_copy(ioat_task, ioat_done, ioat_task->dst, ioat_task->src, ioat_task->len);
|
if (ioat_task->type == IOAT_FILL_TYPE)
|
||||||
|
ioat_submit_fill(ioat_task, ioat_done, ioat_task->dst, ioat_task->fill_pattern, ioat_task->len);
|
||||||
|
else
|
||||||
|
ioat_submit_copy(ioat_task, ioat_done, ioat_task->dst, ioat_task->src, ioat_task->len);
|
||||||
ioat_task->thread_entry->current_queue_depth++;
|
ioat_task->thread_entry->current_queue_depth++;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -290,6 +334,11 @@ submit_xfers(struct thread_entry *thread_entry, uint64_t queue_depth)
|
|||||||
rte_mempool_get(thread_entry->task_pool, (void **)&ioat_task);
|
rte_mempool_get(thread_entry->task_pool, (void **)&ioat_task);
|
||||||
rte_mempool_get(thread_entry->data_pool, &(ioat_task->buffer));
|
rte_mempool_get(thread_entry->data_pool, &(ioat_task->buffer));
|
||||||
|
|
||||||
|
ioat_task->type = IOAT_COPY_TYPE;
|
||||||
|
if (ioat_get_dma_capabilities() & IOAT_ENGINE_FILL_SUPPORTED) {
|
||||||
|
if (queue_depth % 2)
|
||||||
|
ioat_task->type = IOAT_FILL_TYPE;
|
||||||
|
}
|
||||||
prepare_ioat_task(thread_entry, ioat_task);
|
prepare_ioat_task(thread_entry, ioat_task);
|
||||||
submit_single_xfer(ioat_task);
|
submit_single_xfer(ioat_task);
|
||||||
}
|
}
|
||||||
@ -397,10 +446,12 @@ dump_result(struct thread_entry *threads, int len)
|
|||||||
for (i = 0; i < len; i++) {
|
for (i = 0; i < len; i++) {
|
||||||
struct thread_entry *t = &threads[i];
|
struct thread_entry *t = &threads[i];
|
||||||
total_completed += t->xfer_completed;
|
total_completed += t->xfer_completed;
|
||||||
|
total_completed += t->fill_completed;
|
||||||
total_failed += t->xfer_failed;
|
total_failed += t->xfer_failed;
|
||||||
|
total_failed += t->fill_failed;
|
||||||
if (t->xfer_completed || t->xfer_failed)
|
if (t->xfer_completed || t->xfer_failed)
|
||||||
printf("lcore = %d, success = %ld, failed = %ld \n",
|
printf("lcore = %d, copy success = %ld, copy failed = %ld, fill success = %ld, fill failed = %ld \n",
|
||||||
t->lcore_id, t->xfer_completed, t->xfer_failed);
|
t->lcore_id, t->xfer_completed, t->xfer_failed, t->fill_completed, t->fill_failed);
|
||||||
}
|
}
|
||||||
return total_failed ? 1 : 0;
|
return total_failed ? 1 : 0;
|
||||||
}
|
}
|
||||||
|
@ -90,6 +90,15 @@ void ioat_unregister_thread(void);
|
|||||||
int64_t ioat_submit_copy(void *cb_arg, ioat_callback_t cb_fn,
|
int64_t ioat_submit_copy(void *cb_arg, ioat_callback_t cb_fn,
|
||||||
void *dst, const void *src, uint64_t nbytes);
|
void *dst, const void *src, uint64_t nbytes);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Submit a DMA engine memory fill request.
|
||||||
|
*
|
||||||
|
* Before submitting any requests on a thread, the thread must be registered
|
||||||
|
* using the \ref ioat_register_thread() function.
|
||||||
|
*/
|
||||||
|
int64_t ioat_submit_fill(void *cb_arg, ioat_callback_t cb_fn,
|
||||||
|
void *dst, uint64_t fill_pattern, uint64_t nbytes);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check for completed requests on the current thread.
|
* Check for completed requests on the current thread.
|
||||||
*
|
*
|
||||||
@ -100,4 +109,20 @@ int64_t ioat_submit_copy(void *cb_arg, ioat_callback_t cb_fn,
|
|||||||
*/
|
*/
|
||||||
int ioat_process_events(void);
|
int ioat_process_events(void);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DMA engine capability flags
|
||||||
|
*/
|
||||||
|
enum ioat_dma_capability_flags {
|
||||||
|
IOAT_ENGINE_COPY_SUPPORTED = 0x1, /**< The memory copy is supported */
|
||||||
|
IOAT_ENGINE_FILL_SUPPORTED = 0x2, /**< The memory fill is supported */
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the DMA engine capabilities.
|
||||||
|
*
|
||||||
|
* Before submitting any requests on a thread, the thread must be registered
|
||||||
|
* using the \ref ioat_register_thread() function.
|
||||||
|
*/
|
||||||
|
uint32_t ioat_get_dma_capabilities(void);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -53,6 +53,13 @@
|
|||||||
#define IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004
|
#define IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004
|
||||||
#define IOAT_CHANCTRL_INT_REARM 0x0001
|
#define IOAT_CHANCTRL_INT_REARM 0x0001
|
||||||
|
|
||||||
|
/* DMA Channel Capabilities */
|
||||||
|
#define IOAT_DMACAP_PB (1 << 0)
|
||||||
|
#define IOAT_DMACAP_DCA (1 << 4)
|
||||||
|
#define IOAT_DMACAP_BFILL (1 << 6)
|
||||||
|
#define IOAT_DMACAP_XOR (1 << 8)
|
||||||
|
#define IOAT_DMACAP_PQ (1 << 9)
|
||||||
|
#define IOAT_DMACAP_DMA_DIF (1 << 10)
|
||||||
|
|
||||||
struct ioat_registers {
|
struct ioat_registers {
|
||||||
uint8_t chancnt;
|
uint8_t chancnt;
|
||||||
|
104
lib/ioat/ioat.c
104
lib/ioat/ioat.c
@ -295,6 +295,37 @@ ioat_prep_copy(struct ioat_channel *ioat, uint64_t dst,
|
|||||||
return desc;
|
return desc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct ioat_descriptor *
|
||||||
|
ioat_prep_fill(struct ioat_channel *ioat, uint64_t dst,
|
||||||
|
uint64_t fill_pattern, uint32_t len)
|
||||||
|
{
|
||||||
|
struct ioat_descriptor *desc;
|
||||||
|
union ioat_hw_descriptor *hw_desc;
|
||||||
|
|
||||||
|
ioat_assert(len <= ioat->max_xfer_size);
|
||||||
|
|
||||||
|
if (ioat_get_ring_space(ioat) < 1) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
ioat_get_ring_entry(ioat, ioat->head, &desc, &hw_desc);
|
||||||
|
|
||||||
|
hw_desc->fill.u.control_raw = 0;
|
||||||
|
hw_desc->fill.u.control.op = IOAT_OP_FILL;
|
||||||
|
hw_desc->fill.u.control.completion_update = 1;
|
||||||
|
|
||||||
|
hw_desc->fill.size = len;
|
||||||
|
hw_desc->fill.src_data = fill_pattern;
|
||||||
|
hw_desc->fill.dest_addr = dst;
|
||||||
|
|
||||||
|
desc->callback_fn = NULL;
|
||||||
|
desc->callback_arg = NULL;
|
||||||
|
|
||||||
|
ioat_submit_single(ioat);
|
||||||
|
|
||||||
|
return desc;
|
||||||
|
}
|
||||||
|
|
||||||
static int ioat_reset_hw(struct ioat_channel *ioat)
|
static int ioat_reset_hw(struct ioat_channel *ioat)
|
||||||
{
|
{
|
||||||
int timeout;
|
int timeout;
|
||||||
@ -419,6 +450,10 @@ ioat_channel_start(struct ioat_channel *ioat)
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Always support DMA copy */
|
||||||
|
ioat->dma_capabilities = IOAT_ENGINE_COPY_SUPPORTED;
|
||||||
|
if (ioat->regs->dmacapability & IOAT_DMACAP_BFILL)
|
||||||
|
ioat->dma_capabilities |= IOAT_ENGINE_FILL_SUPPORTED;
|
||||||
xfercap = ioat->regs->xfercap;
|
xfercap = ioat->regs->xfercap;
|
||||||
|
|
||||||
/* Only bits [4:0] are valid. */
|
/* Only bits [4:0] are valid. */
|
||||||
@ -663,6 +698,75 @@ ioat_submit_copy(void *cb_arg, ioat_callback_t cb_fn,
|
|||||||
return nbytes;
|
return nbytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int64_t
|
||||||
|
ioat_submit_fill(void *cb_arg, ioat_callback_t cb_fn,
|
||||||
|
void *dst, uint64_t fill_pattern, uint64_t nbytes)
|
||||||
|
{
|
||||||
|
struct ioat_channel *ioat;
|
||||||
|
struct ioat_descriptor *last_desc = NULL;
|
||||||
|
uint64_t remaining, op_size;
|
||||||
|
uint64_t vdst;
|
||||||
|
uint32_t orig_head;
|
||||||
|
|
||||||
|
ioat = ioat_thread_channel;
|
||||||
|
if (!ioat) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!(ioat->dma_capabilities & IOAT_ENGINE_FILL_SUPPORTED)) {
|
||||||
|
ioat_printf(ioat, "Channel does not support memory fill\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
orig_head = ioat->head;
|
||||||
|
|
||||||
|
vdst = (uint64_t)dst;
|
||||||
|
remaining = nbytes;
|
||||||
|
|
||||||
|
while (remaining) {
|
||||||
|
op_size = remaining;
|
||||||
|
op_size = min(op_size, ioat->max_xfer_size);
|
||||||
|
remaining -= op_size;
|
||||||
|
|
||||||
|
last_desc = ioat_prep_fill(ioat,
|
||||||
|
ioat_vtophys((void *)vdst),
|
||||||
|
fill_pattern,
|
||||||
|
op_size);
|
||||||
|
|
||||||
|
if (remaining == 0 || last_desc == NULL) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
vdst += op_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (last_desc) {
|
||||||
|
last_desc->callback_fn = cb_fn;
|
||||||
|
last_desc->callback_arg = cb_arg;
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* Ran out of descriptors in the ring - reset head to leave things as they were
|
||||||
|
* in case we managed to fill out any descriptors.
|
||||||
|
*/
|
||||||
|
ioat->head = orig_head;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
ioat_flush(ioat);
|
||||||
|
return nbytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t ioat_get_dma_capabilities(void)
|
||||||
|
{
|
||||||
|
struct ioat_channel *ioat;
|
||||||
|
|
||||||
|
ioat = ioat_thread_channel;
|
||||||
|
if (!ioat) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return ioat->dma_capabilities;
|
||||||
|
}
|
||||||
|
|
||||||
int ioat_process_events(void)
|
int ioat_process_events(void)
|
||||||
{
|
{
|
||||||
if (!ioat_thread_channel) {
|
if (!ioat_thread_channel) {
|
||||||
|
@ -74,6 +74,7 @@ struct ioat_channel {
|
|||||||
struct ioat_descriptor *ring;
|
struct ioat_descriptor *ring;
|
||||||
union ioat_hw_descriptor *hw_ring;
|
union ioat_hw_descriptor *hw_ring;
|
||||||
uint64_t hw_ring_phys_addr;
|
uint64_t hw_ring_phys_addr;
|
||||||
|
uint32_t dma_capabilities;
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline uint32_t
|
static inline uint32_t
|
||||||
|
Loading…
Reference in New Issue
Block a user