spdk: Add block fill API to ioat driver

For those Crystal Beach DMA channels which support block fill capability,
we add a fill API here that can zero out pages or fill them with a
fixed pattern.

Change-Id: I8a57337702b951c703d494004b111f6d206279fb
Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
This commit is contained in:
Changpeng Liu 2016-01-29 16:01:43 +08:00
parent 0ca6afb850
commit 8aa497f083
5 changed files with 199 additions and 11 deletions

View File

@ -49,6 +49,11 @@
#define SRC_BUFFER_SIZE (512*1024) #define SRC_BUFFER_SIZE (512*1024)
enum ioat_task_type {
IOAT_COPY_TYPE,
IOAT_FILL_TYPE,
};
struct user_config { struct user_config {
int queue_depth; int queue_depth;
int time_in_sec; int time_in_sec;
@ -67,6 +72,8 @@ static struct user_config g_user_config;
struct thread_entry { struct thread_entry {
uint64_t xfer_completed; uint64_t xfer_completed;
uint64_t xfer_failed; uint64_t xfer_failed;
uint64_t fill_completed;
uint64_t fill_failed;
uint64_t current_queue_depth; uint64_t current_queue_depth;
unsigned lcore_id; unsigned lcore_id;
bool is_draining; bool is_draining;
@ -75,9 +82,11 @@ struct thread_entry {
}; };
struct ioat_task { struct ioat_task {
enum ioat_task_type type;
struct thread_entry *thread_entry; struct thread_entry *thread_entry;
void *buffer; void *buffer;
int len; int len;
uint64_t fill_pattern;
void *src; void *src;
void *dst; void *dst;
}; };
@ -124,14 +133,29 @@ static void prepare_ioat_task(struct thread_entry *thread_entry, struct ioat_tas
int len; int len;
int src_offset; int src_offset;
int dst_offset; int dst_offset;
int num_ddwords;
uint64_t fill_pattern;
if (ioat_task->type == IOAT_FILL_TYPE) {
fill_pattern = rand_r(&seed);
fill_pattern = fill_pattern << 32 | rand_r(&seed);
/* ensure that the length of memset block is 8 Bytes aligned */
num_ddwords = (rand_r(&seed) % SRC_BUFFER_SIZE) / 8;
len = num_ddwords * 8;
if (len < 8)
len = 8;
dst_offset = rand_r(&seed) % (SRC_BUFFER_SIZE - len);
ioat_task->fill_pattern = fill_pattern;
} else {
src_offset = rand_r(&seed) % SRC_BUFFER_SIZE; src_offset = rand_r(&seed) % SRC_BUFFER_SIZE;
len = rand_r(&seed) % (SRC_BUFFER_SIZE - src_offset); len = rand_r(&seed) % (SRC_BUFFER_SIZE - src_offset);
dst_offset = rand_r(&seed) % (SRC_BUFFER_SIZE - len); dst_offset = rand_r(&seed) % (SRC_BUFFER_SIZE - len);
memset(ioat_task->buffer, 0, SRC_BUFFER_SIZE); memset(ioat_task->buffer, 0, SRC_BUFFER_SIZE);
ioat_task->len = len;
ioat_task->src = g_src + src_offset; ioat_task->src = g_src + src_offset;
}
ioat_task->len = len;
ioat_task->dst = ioat_task->buffer + dst_offset; ioat_task->dst = ioat_task->buffer + dst_offset;
ioat_task->thread_entry = thread_entry; ioat_task->thread_entry = thread_entry;
} }
@ -139,14 +163,31 @@ static void prepare_ioat_task(struct thread_entry *thread_entry, struct ioat_tas
static void static void
ioat_done(void *cb_arg) ioat_done(void *cb_arg)
{ {
uint64_t *value;
int i, failed = 0;
struct ioat_task *ioat_task = (struct ioat_task *)cb_arg; struct ioat_task *ioat_task = (struct ioat_task *)cb_arg;
struct thread_entry *thread_entry = ioat_task->thread_entry; struct thread_entry *thread_entry = ioat_task->thread_entry;
if (ioat_task->type == IOAT_FILL_TYPE) {
value = (uint64_t *)ioat_task->dst;
for (i = 0; i < ioat_task->len / 8; i++) {
if (*value != ioat_task->fill_pattern) {
thread_entry->fill_failed++;
failed = 1;
break;
}
value++;
}
if (!failed)
thread_entry->fill_completed++;
} else {
if (memcmp(ioat_task->src, ioat_task->dst, ioat_task->len)) { if (memcmp(ioat_task->src, ioat_task->dst, ioat_task->len)) {
thread_entry->xfer_failed++; thread_entry->xfer_failed++;
} else { } else {
thread_entry->xfer_completed++; thread_entry->xfer_completed++;
} }
}
thread_entry->current_queue_depth--; thread_entry->current_queue_depth--;
if (thread_entry->is_draining) { if (thread_entry->is_draining) {
rte_mempool_put(thread_entry->data_pool, ioat_task->buffer); rte_mempool_put(thread_entry->data_pool, ioat_task->buffer);
@ -278,6 +319,9 @@ drain_xfers(struct thread_entry *thread_entry)
static void static void
submit_single_xfer(struct ioat_task *ioat_task) submit_single_xfer(struct ioat_task *ioat_task)
{ {
if (ioat_task->type == IOAT_FILL_TYPE)
ioat_submit_fill(ioat_task, ioat_done, ioat_task->dst, ioat_task->fill_pattern, ioat_task->len);
else
ioat_submit_copy(ioat_task, ioat_done, ioat_task->dst, ioat_task->src, ioat_task->len); ioat_submit_copy(ioat_task, ioat_done, ioat_task->dst, ioat_task->src, ioat_task->len);
ioat_task->thread_entry->current_queue_depth++; ioat_task->thread_entry->current_queue_depth++;
} }
@ -290,6 +334,11 @@ submit_xfers(struct thread_entry *thread_entry, uint64_t queue_depth)
rte_mempool_get(thread_entry->task_pool, (void **)&ioat_task); rte_mempool_get(thread_entry->task_pool, (void **)&ioat_task);
rte_mempool_get(thread_entry->data_pool, &(ioat_task->buffer)); rte_mempool_get(thread_entry->data_pool, &(ioat_task->buffer));
ioat_task->type = IOAT_COPY_TYPE;
if (ioat_get_dma_capabilities() & IOAT_ENGINE_FILL_SUPPORTED) {
if (queue_depth % 2)
ioat_task->type = IOAT_FILL_TYPE;
}
prepare_ioat_task(thread_entry, ioat_task); prepare_ioat_task(thread_entry, ioat_task);
submit_single_xfer(ioat_task); submit_single_xfer(ioat_task);
} }
@ -397,10 +446,12 @@ dump_result(struct thread_entry *threads, int len)
for (i = 0; i < len; i++) { for (i = 0; i < len; i++) {
struct thread_entry *t = &threads[i]; struct thread_entry *t = &threads[i];
total_completed += t->xfer_completed; total_completed += t->xfer_completed;
total_completed += t->fill_completed;
total_failed += t->xfer_failed; total_failed += t->xfer_failed;
total_failed += t->fill_failed;
if (t->xfer_completed || t->xfer_failed) if (t->xfer_completed || t->xfer_failed)
printf("lcore = %d, success = %ld, failed = %ld \n", printf("lcore = %d, copy success = %ld, copy failed = %ld, fill success = %ld, fill failed = %ld \n",
t->lcore_id, t->xfer_completed, t->xfer_failed); t->lcore_id, t->xfer_completed, t->xfer_failed, t->fill_completed, t->fill_failed);
} }
return total_failed ? 1 : 0; return total_failed ? 1 : 0;
} }

View File

@ -90,6 +90,15 @@ void ioat_unregister_thread(void);
int64_t ioat_submit_copy(void *cb_arg, ioat_callback_t cb_fn, int64_t ioat_submit_copy(void *cb_arg, ioat_callback_t cb_fn,
void *dst, const void *src, uint64_t nbytes); void *dst, const void *src, uint64_t nbytes);
/**
* Submit a DMA engine memory fill request.
*
* Before submitting any requests on a thread, the thread must be registered
* using the \ref ioat_register_thread() function.
*/
int64_t ioat_submit_fill(void *cb_arg, ioat_callback_t cb_fn,
void *dst, uint64_t fill_pattern, uint64_t nbytes);
/** /**
* Check for completed requests on the current thread. * Check for completed requests on the current thread.
* *
@ -100,4 +109,20 @@ int64_t ioat_submit_copy(void *cb_arg, ioat_callback_t cb_fn,
*/ */
int ioat_process_events(void); int ioat_process_events(void);
/**
* DMA engine capability flags
*/
enum ioat_dma_capability_flags {
IOAT_ENGINE_COPY_SUPPORTED = 0x1, /**< The memory copy is supported */
IOAT_ENGINE_FILL_SUPPORTED = 0x2, /**< The memory fill is supported */
};
/**
* Get the DMA engine capabilities.
*
* Before submitting any requests on a thread, the thread must be registered
* using the \ref ioat_register_thread() function.
*/
uint32_t ioat_get_dma_capabilities(void);
#endif #endif

View File

@ -53,6 +53,13 @@
#define IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004 #define IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004
#define IOAT_CHANCTRL_INT_REARM 0x0001 #define IOAT_CHANCTRL_INT_REARM 0x0001
/* DMA Channel Capabilities */
#define IOAT_DMACAP_PB (1 << 0)
#define IOAT_DMACAP_DCA (1 << 4)
#define IOAT_DMACAP_BFILL (1 << 6)
#define IOAT_DMACAP_XOR (1 << 8)
#define IOAT_DMACAP_PQ (1 << 9)
#define IOAT_DMACAP_DMA_DIF (1 << 10)
struct ioat_registers { struct ioat_registers {
uint8_t chancnt; uint8_t chancnt;

View File

@ -295,6 +295,37 @@ ioat_prep_copy(struct ioat_channel *ioat, uint64_t dst,
return desc; return desc;
} }
static struct ioat_descriptor *
ioat_prep_fill(struct ioat_channel *ioat, uint64_t dst,
uint64_t fill_pattern, uint32_t len)
{
struct ioat_descriptor *desc;
union ioat_hw_descriptor *hw_desc;
ioat_assert(len <= ioat->max_xfer_size);
if (ioat_get_ring_space(ioat) < 1) {
return NULL;
}
ioat_get_ring_entry(ioat, ioat->head, &desc, &hw_desc);
hw_desc->fill.u.control_raw = 0;
hw_desc->fill.u.control.op = IOAT_OP_FILL;
hw_desc->fill.u.control.completion_update = 1;
hw_desc->fill.size = len;
hw_desc->fill.src_data = fill_pattern;
hw_desc->fill.dest_addr = dst;
desc->callback_fn = NULL;
desc->callback_arg = NULL;
ioat_submit_single(ioat);
return desc;
}
static int ioat_reset_hw(struct ioat_channel *ioat) static int ioat_reset_hw(struct ioat_channel *ioat)
{ {
int timeout; int timeout;
@ -419,6 +450,10 @@ ioat_channel_start(struct ioat_channel *ioat)
return -1; return -1;
} }
/* Always support DMA copy */
ioat->dma_capabilities = IOAT_ENGINE_COPY_SUPPORTED;
if (ioat->regs->dmacapability & IOAT_DMACAP_BFILL)
ioat->dma_capabilities |= IOAT_ENGINE_FILL_SUPPORTED;
xfercap = ioat->regs->xfercap; xfercap = ioat->regs->xfercap;
/* Only bits [4:0] are valid. */ /* Only bits [4:0] are valid. */
@ -663,6 +698,75 @@ ioat_submit_copy(void *cb_arg, ioat_callback_t cb_fn,
return nbytes; return nbytes;
} }
int64_t
ioat_submit_fill(void *cb_arg, ioat_callback_t cb_fn,
void *dst, uint64_t fill_pattern, uint64_t nbytes)
{
struct ioat_channel *ioat;
struct ioat_descriptor *last_desc = NULL;
uint64_t remaining, op_size;
uint64_t vdst;
uint32_t orig_head;
ioat = ioat_thread_channel;
if (!ioat) {
return -1;
}
if (!(ioat->dma_capabilities & IOAT_ENGINE_FILL_SUPPORTED)) {
ioat_printf(ioat, "Channel does not support memory fill\n");
return -1;
}
orig_head = ioat->head;
vdst = (uint64_t)dst;
remaining = nbytes;
while (remaining) {
op_size = remaining;
op_size = min(op_size, ioat->max_xfer_size);
remaining -= op_size;
last_desc = ioat_prep_fill(ioat,
ioat_vtophys((void *)vdst),
fill_pattern,
op_size);
if (remaining == 0 || last_desc == NULL) {
break;
}
vdst += op_size;
}
if (last_desc) {
last_desc->callback_fn = cb_fn;
last_desc->callback_arg = cb_arg;
} else {
/*
* Ran out of descriptors in the ring - reset head to leave things as they were
* in case we managed to fill out any descriptors.
*/
ioat->head = orig_head;
return -1;
}
ioat_flush(ioat);
return nbytes;
}
uint32_t ioat_get_dma_capabilities(void)
{
struct ioat_channel *ioat;
ioat = ioat_thread_channel;
if (!ioat) {
return 0;
}
return ioat->dma_capabilities;
}
int ioat_process_events(void) int ioat_process_events(void)
{ {
if (!ioat_thread_channel) { if (!ioat_thread_channel) {

View File

@ -74,6 +74,7 @@ struct ioat_channel {
struct ioat_descriptor *ring; struct ioat_descriptor *ring;
union ioat_hw_descriptor *hw_ring; union ioat_hw_descriptor *hw_ring;
uint64_t hw_ring_phys_addr; uint64_t hw_ring_phys_addr;
uint32_t dma_capabilities;
}; };
static inline uint32_t static inline uint32_t