bbdev: enhance offload cost test
Offload cost test was improved in order to collect more accurate results. Signed-off-by: Kamil Chalupnik <kamilx.chalupnik@intel.com> Acked-by: Amr Mokhtar <amr.mokhtar@intel.com>
This commit is contained in:
parent
9d3e1cb135
commit
9fa6ebde8e
@ -88,19 +88,19 @@ struct thread_params {
|
||||
/* Stores time statistics */
|
||||
struct test_time_stats {
|
||||
/* Stores software enqueue total working time */
|
||||
uint64_t enq_sw_tot_time;
|
||||
uint64_t enq_sw_total_time;
|
||||
/* Stores minimum value of software enqueue working time */
|
||||
uint64_t enq_sw_min_time;
|
||||
/* Stores maximum value of software enqueue working time */
|
||||
uint64_t enq_sw_max_time;
|
||||
/* Stores turbo enqueue total working time */
|
||||
uint64_t enq_tur_tot_time;
|
||||
/* Stores minimum value of turbo enqueue working time */
|
||||
uint64_t enq_tur_min_time;
|
||||
/* Stores maximum value of turbo enqueue working time */
|
||||
uint64_t enq_tur_max_time;
|
||||
uint64_t enq_acc_total_time;
|
||||
/* Stores minimum value of accelerator enqueue working time */
|
||||
uint64_t enq_acc_min_time;
|
||||
/* Stores maximum value of accelerator enqueue working time */
|
||||
uint64_t enq_acc_max_time;
|
||||
/* Stores dequeue total working time */
|
||||
uint64_t deq_tot_time;
|
||||
uint64_t deq_total_time;
|
||||
/* Stores minimum value of dequeue working time */
|
||||
uint64_t deq_min_time;
|
||||
/* Stores maximum value of dequeue working time */
|
||||
@ -1200,12 +1200,15 @@ dequeue_event_callback(uint16_t dev_id,
|
||||
burst_sz = tp->op_params->burst_sz;
|
||||
num_to_process = tp->op_params->num_to_process;
|
||||
|
||||
if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
|
||||
if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
|
||||
deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id, dec_ops,
|
||||
burst_sz);
|
||||
else
|
||||
rte_bbdev_dec_op_free_bulk(dec_ops, deq);
|
||||
} else {
|
||||
deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id, enc_ops,
|
||||
burst_sz);
|
||||
rte_bbdev_enc_op_free_bulk(enc_ops, deq);
|
||||
}
|
||||
|
||||
if (deq < burst_sz) {
|
||||
printf(
|
||||
@ -1316,8 +1319,6 @@ throughput_intr_lcore_dec(void *arg)
|
||||
|
||||
enqueued += rte_bbdev_enqueue_dec_ops(tp->dev_id, queue_id, ops,
|
||||
num_to_enq);
|
||||
|
||||
rte_bbdev_dec_op_free_bulk(ops, num_to_enq);
|
||||
}
|
||||
|
||||
if (allocs_failed > 0)
|
||||
@ -1380,8 +1381,6 @@ throughput_intr_lcore_enc(void *arg)
|
||||
|
||||
enqueued += rte_bbdev_enqueue_enc_ops(tp->dev_id, queue_id, ops,
|
||||
num_to_enq);
|
||||
|
||||
rte_bbdev_enc_op_free_bulk(ops, num_to_enq);
|
||||
}
|
||||
|
||||
if (allocs_failed > 0)
|
||||
@ -1575,13 +1574,14 @@ print_throughput(struct thread_params *t_params, unsigned int used_cores)
|
||||
RTE_LCORE_FOREACH(lcore_id) {
|
||||
if (iter++ >= used_cores)
|
||||
break;
|
||||
printf("\tlcore_id: %u, throughput: %.8lg MOPS, %.8lg Mbps\n",
|
||||
lcore_id, t_params[lcore_id].mops, t_params[lcore_id].mbps);
|
||||
printf("Throughput for core (%u): %.8lg MOPS, %.8lg Mbps\n",
|
||||
lcore_id, t_params[lcore_id].mops,
|
||||
t_params[lcore_id].mbps);
|
||||
total_mops += t_params[lcore_id].mops;
|
||||
total_mbps += t_params[lcore_id].mbps;
|
||||
}
|
||||
printf(
|
||||
"\n\tTotal stats for %u cores: throughput: %.8lg MOPS, %.8lg Mbps\n",
|
||||
"\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps\n",
|
||||
used_cores, total_mops, total_mbps);
|
||||
}
|
||||
|
||||
@ -1882,7 +1882,7 @@ latency_test(struct active_device *ad,
|
||||
TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
|
||||
|
||||
printf(
|
||||
"Validation/Latency test: dev: %s, burst size: %u, num ops: %u, op type: %s\n",
|
||||
"\nValidation/Latency test: dev: %s, burst size: %u, num ops: %u, op type: %s\n",
|
||||
info.dev_name, burst_sz, num_to_process, op_type_str);
|
||||
|
||||
if (op_type == RTE_BBDEV_OP_TURBO_DEC)
|
||||
@ -1899,10 +1899,10 @@ latency_test(struct active_device *ad,
|
||||
if (iter <= 0)
|
||||
return TEST_FAILED;
|
||||
|
||||
printf("\toperation latency:\n"
|
||||
"\t\tavg latency: %lg cycles, %lg us\n"
|
||||
"\t\tmin latency: %lg cycles, %lg us\n"
|
||||
"\t\tmax latency: %lg cycles, %lg us\n",
|
||||
printf("Operation latency:\n"
|
||||
"\tavg latency: %lg cycles, %lg us\n"
|
||||
"\tmin latency: %lg cycles, %lg us\n"
|
||||
"\tmax latency: %lg cycles, %lg us\n",
|
||||
(double)total_time / (double)iter,
|
||||
(double)(total_time * 1000000) / (double)iter /
|
||||
(double)rte_get_tsc_hz(), (double)min_time,
|
||||
@ -1930,7 +1930,7 @@ get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id,
|
||||
stats->dequeued_count = q_stats->dequeued_count;
|
||||
stats->enqueue_err_count = q_stats->enqueue_err_count;
|
||||
stats->dequeue_err_count = q_stats->dequeue_err_count;
|
||||
stats->offload_time = q_stats->offload_time;
|
||||
stats->acc_offload_cycles = q_stats->acc_offload_cycles;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1974,18 +1974,18 @@ offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs,
|
||||
queue_id, dev_id);
|
||||
|
||||
enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
|
||||
stats.offload_time;
|
||||
stats.acc_offload_cycles;
|
||||
time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
|
||||
enq_sw_last_time);
|
||||
time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
|
||||
enq_sw_last_time);
|
||||
time_st->enq_sw_tot_time += enq_sw_last_time;
|
||||
time_st->enq_sw_total_time += enq_sw_last_time;
|
||||
|
||||
time_st->enq_tur_max_time = RTE_MAX(time_st->enq_tur_max_time,
|
||||
stats.offload_time);
|
||||
time_st->enq_tur_min_time = RTE_MIN(time_st->enq_tur_min_time,
|
||||
stats.offload_time);
|
||||
time_st->enq_tur_tot_time += stats.offload_time;
|
||||
time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
|
||||
stats.acc_offload_cycles);
|
||||
time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
|
||||
stats.acc_offload_cycles);
|
||||
time_st->enq_acc_total_time += stats.acc_offload_cycles;
|
||||
|
||||
/* ensure enqueue has been completed */
|
||||
rte_delay_ms(10);
|
||||
@ -2003,7 +2003,7 @@ offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs,
|
||||
deq_last_time);
|
||||
time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
|
||||
deq_last_time);
|
||||
time_st->deq_tot_time += deq_last_time;
|
||||
time_st->deq_total_time += deq_last_time;
|
||||
|
||||
/* Dequeue remaining operations if needed*/
|
||||
while (burst_sz != deq)
|
||||
@ -2055,18 +2055,18 @@ offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs,
|
||||
queue_id, dev_id);
|
||||
|
||||
enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
|
||||
stats.offload_time;
|
||||
stats.acc_offload_cycles;
|
||||
time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
|
||||
enq_sw_last_time);
|
||||
time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
|
||||
enq_sw_last_time);
|
||||
time_st->enq_sw_tot_time += enq_sw_last_time;
|
||||
time_st->enq_sw_total_time += enq_sw_last_time;
|
||||
|
||||
time_st->enq_tur_max_time = RTE_MAX(time_st->enq_tur_max_time,
|
||||
stats.offload_time);
|
||||
time_st->enq_tur_min_time = RTE_MIN(time_st->enq_tur_min_time,
|
||||
stats.offload_time);
|
||||
time_st->enq_tur_tot_time += stats.offload_time;
|
||||
time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
|
||||
stats.acc_offload_cycles);
|
||||
time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
|
||||
stats.acc_offload_cycles);
|
||||
time_st->enq_acc_total_time += stats.acc_offload_cycles;
|
||||
|
||||
/* ensure enqueue has been completed */
|
||||
rte_delay_ms(10);
|
||||
@ -2084,7 +2084,7 @@ offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs,
|
||||
deq_last_time);
|
||||
time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
|
||||
deq_last_time);
|
||||
time_st->deq_tot_time += deq_last_time;
|
||||
time_st->deq_total_time += deq_last_time;
|
||||
|
||||
while (burst_sz != deq)
|
||||
deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
|
||||
@ -2121,7 +2121,7 @@ offload_cost_test(struct active_device *ad,
|
||||
|
||||
memset(&time_st, 0, sizeof(struct test_time_stats));
|
||||
time_st.enq_sw_min_time = UINT64_MAX;
|
||||
time_st.enq_tur_min_time = UINT64_MAX;
|
||||
time_st.enq_acc_min_time = UINT64_MAX;
|
||||
time_st.deq_min_time = UINT64_MAX;
|
||||
|
||||
TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
|
||||
@ -2134,7 +2134,7 @@ offload_cost_test(struct active_device *ad,
|
||||
TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
|
||||
|
||||
printf(
|
||||
"Offload latency test: dev: %s, burst size: %u, num ops: %u, op type: %s\n",
|
||||
"\nOffload latency test: dev: %s, burst size: %u, num ops: %u, op type: %s\n",
|
||||
info.dev_name, burst_sz, num_to_process, op_type_str);
|
||||
|
||||
if (op_type == RTE_BBDEV_OP_TURBO_DEC)
|
||||
@ -2149,36 +2149,36 @@ offload_cost_test(struct active_device *ad,
|
||||
if (iter <= 0)
|
||||
return TEST_FAILED;
|
||||
|
||||
printf("\tenq offload cost latency:\n"
|
||||
"\t\tsoftware avg %lg cycles, %lg us\n"
|
||||
"\t\tsoftware min %lg cycles, %lg us\n"
|
||||
"\t\tsoftware max %lg cycles, %lg us\n"
|
||||
"\t\tturbo avg %lg cycles, %lg us\n"
|
||||
"\t\tturbo min %lg cycles, %lg us\n"
|
||||
"\t\tturbo max %lg cycles, %lg us\n",
|
||||
(double)time_st.enq_sw_tot_time / (double)iter,
|
||||
(double)(time_st.enq_sw_tot_time * 1000000) /
|
||||
printf("Enqueue offload cost latency:\n"
|
||||
"\tDriver offload avg %lg cycles, %lg us\n"
|
||||
"\tDriver offload min %lg cycles, %lg us\n"
|
||||
"\tDriver offload max %lg cycles, %lg us\n"
|
||||
"\tAccelerator offload avg %lg cycles, %lg us\n"
|
||||
"\tAccelerator offload min %lg cycles, %lg us\n"
|
||||
"\tAccelerator offload max %lg cycles, %lg us\n",
|
||||
(double)time_st.enq_sw_total_time / (double)iter,
|
||||
(double)(time_st.enq_sw_total_time * 1000000) /
|
||||
(double)iter / (double)rte_get_tsc_hz(),
|
||||
(double)time_st.enq_sw_min_time,
|
||||
(double)(time_st.enq_sw_min_time * 1000000) /
|
||||
rte_get_tsc_hz(), (double)time_st.enq_sw_max_time,
|
||||
(double)(time_st.enq_sw_max_time * 1000000) /
|
||||
rte_get_tsc_hz(), (double)time_st.enq_tur_tot_time /
|
||||
rte_get_tsc_hz(), (double)time_st.enq_acc_total_time /
|
||||
(double)iter,
|
||||
(double)(time_st.enq_tur_tot_time * 1000000) /
|
||||
(double)(time_st.enq_acc_total_time * 1000000) /
|
||||
(double)iter / (double)rte_get_tsc_hz(),
|
||||
(double)time_st.enq_tur_min_time,
|
||||
(double)(time_st.enq_tur_min_time * 1000000) /
|
||||
rte_get_tsc_hz(), (double)time_st.enq_tur_max_time,
|
||||
(double)(time_st.enq_tur_max_time * 1000000) /
|
||||
(double)time_st.enq_acc_min_time,
|
||||
(double)(time_st.enq_acc_min_time * 1000000) /
|
||||
rte_get_tsc_hz(), (double)time_st.enq_acc_max_time,
|
||||
(double)(time_st.enq_acc_max_time * 1000000) /
|
||||
rte_get_tsc_hz());
|
||||
|
||||
printf("\tdeq offload cost latency - one op:\n"
|
||||
"\t\tavg %lg cycles, %lg us\n"
|
||||
"\t\tmin %lg cycles, %lg us\n"
|
||||
"\t\tmax %lg cycles, %lg us\n",
|
||||
(double)time_st.deq_tot_time / (double)iter,
|
||||
(double)(time_st.deq_tot_time * 1000000) /
|
||||
printf("Dequeue offload cost latency - one op:\n"
|
||||
"\tavg %lg cycles, %lg us\n"
|
||||
"\tmin %lg cycles, %lg us\n"
|
||||
"\tmax %lg cycles, %lg us\n",
|
||||
(double)time_st.deq_total_time / (double)iter,
|
||||
(double)(time_st.deq_total_time * 1000000) /
|
||||
(double)iter / (double)rte_get_tsc_hz(),
|
||||
(double)time_st.deq_min_time,
|
||||
(double)(time_st.deq_min_time * 1000000) /
|
||||
@ -2194,7 +2194,7 @@ offload_cost_test(struct active_device *ad,
|
||||
static int
|
||||
offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id,
|
||||
const uint16_t num_to_process, uint16_t burst_sz,
|
||||
uint64_t *deq_tot_time, uint64_t *deq_min_time,
|
||||
uint64_t *deq_total_time, uint64_t *deq_min_time,
|
||||
uint64_t *deq_max_time)
|
||||
{
|
||||
int i, deq_total;
|
||||
@ -2214,7 +2214,7 @@ offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id,
|
||||
deq_last_time = rte_rdtsc_precise() - deq_start_time;
|
||||
*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
|
||||
*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
|
||||
*deq_tot_time += deq_last_time;
|
||||
*deq_total_time += deq_last_time;
|
||||
}
|
||||
|
||||
return i;
|
||||
@ -2223,7 +2223,7 @@ offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id,
|
||||
static int
|
||||
offload_latency_empty_q_test_enc(uint16_t dev_id, uint16_t queue_id,
|
||||
const uint16_t num_to_process, uint16_t burst_sz,
|
||||
uint64_t *deq_tot_time, uint64_t *deq_min_time,
|
||||
uint64_t *deq_total_time, uint64_t *deq_min_time,
|
||||
uint64_t *deq_max_time)
|
||||
{
|
||||
int i, deq_total;
|
||||
@ -2242,7 +2242,7 @@ offload_latency_empty_q_test_enc(uint16_t dev_id, uint16_t queue_id,
|
||||
deq_last_time = rte_rdtsc_precise() - deq_start_time;
|
||||
*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
|
||||
*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
|
||||
*deq_tot_time += deq_last_time;
|
||||
*deq_total_time += deq_last_time;
|
||||
}
|
||||
|
||||
return i;
|
||||
@ -2261,7 +2261,7 @@ offload_latency_empty_q_test(struct active_device *ad,
|
||||
return TEST_SKIPPED;
|
||||
#else
|
||||
int iter;
|
||||
uint64_t deq_tot_time, deq_min_time, deq_max_time;
|
||||
uint64_t deq_total_time, deq_min_time, deq_max_time;
|
||||
uint16_t burst_sz = op_params->burst_sz;
|
||||
const uint16_t num_to_process = op_params->num_to_process;
|
||||
const enum rte_bbdev_op_type op_type = test_vector.op_type;
|
||||
@ -2269,7 +2269,7 @@ offload_latency_empty_q_test(struct active_device *ad,
|
||||
struct rte_bbdev_info info;
|
||||
const char *op_type_str;
|
||||
|
||||
deq_tot_time = deq_max_time = 0;
|
||||
deq_total_time = deq_max_time = 0;
|
||||
deq_min_time = UINT64_MAX;
|
||||
|
||||
TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
|
||||
@ -2281,27 +2281,27 @@ offload_latency_empty_q_test(struct active_device *ad,
|
||||
TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
|
||||
|
||||
printf(
|
||||
"Offload latency empty dequeue test: dev: %s, burst size: %u, num ops: %u, op type: %s\n",
|
||||
"\nOffload latency empty dequeue test: dev: %s, burst size: %u, num ops: %u, op type: %s\n",
|
||||
info.dev_name, burst_sz, num_to_process, op_type_str);
|
||||
|
||||
if (op_type == RTE_BBDEV_OP_TURBO_DEC)
|
||||
iter = offload_latency_empty_q_test_dec(ad->dev_id, queue_id,
|
||||
num_to_process, burst_sz, &deq_tot_time,
|
||||
num_to_process, burst_sz, &deq_total_time,
|
||||
&deq_min_time, &deq_max_time);
|
||||
else
|
||||
iter = offload_latency_empty_q_test_enc(ad->dev_id, queue_id,
|
||||
num_to_process, burst_sz, &deq_tot_time,
|
||||
num_to_process, burst_sz, &deq_total_time,
|
||||
&deq_min_time, &deq_max_time);
|
||||
|
||||
if (iter <= 0)
|
||||
return TEST_FAILED;
|
||||
|
||||
printf("\tempty deq offload\n"
|
||||
"\t\tavg. latency: %lg cycles, %lg us\n"
|
||||
"\t\tmin. latency: %lg cycles, %lg us\n"
|
||||
"\t\tmax. latency: %lg cycles, %lg us\n",
|
||||
(double)deq_tot_time / (double)iter,
|
||||
(double)(deq_tot_time * 1000000) / (double)iter /
|
||||
printf("Empty dequeue offload\n"
|
||||
"\tavg. latency: %lg cycles, %lg us\n"
|
||||
"\tmin. latency: %lg cycles, %lg us\n"
|
||||
"\tmax. latency: %lg cycles, %lg us\n",
|
||||
(double)deq_total_time / (double)iter,
|
||||
(double)(deq_total_time * 1000000) / (double)iter /
|
||||
(double)rte_get_tsc_hz(), (double)deq_min_time,
|
||||
(double)(deq_min_time * 1000000) / rte_get_tsc_hz(),
|
||||
(double)deq_max_time, (double)(deq_max_time * 1000000) /
|
||||
|
@ -481,7 +481,7 @@ CONFIG_RTE_PMD_PACKET_PREFETCH=y
|
||||
#
|
||||
CONFIG_RTE_LIBRTE_BBDEV=y
|
||||
CONFIG_RTE_BBDEV_MAX_DEVS=128
|
||||
CONFIG_RTE_BBDEV_OFFLOAD_COST=n
|
||||
CONFIG_RTE_BBDEV_OFFLOAD_COST=y
|
||||
|
||||
#
|
||||
# Compile PMD for NULL bbdev device
|
||||
|
@ -510,9 +510,10 @@ process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
|
||||
#ifdef RTE_BBDEV_OFFLOAD_COST
|
||||
start_time = rte_rdtsc_precise();
|
||||
#endif
|
||||
/* CRC24A generation */
|
||||
bblib_lte_crc24a_gen(&crc_req, &crc_resp);
|
||||
#ifdef RTE_BBDEV_OFFLOAD_COST
|
||||
q_stats->offload_time += rte_rdtsc_precise() - start_time;
|
||||
q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
|
||||
#endif
|
||||
} else if (enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) {
|
||||
/* CRC24B */
|
||||
@ -542,9 +543,10 @@ process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
|
||||
#ifdef RTE_BBDEV_OFFLOAD_COST
|
||||
start_time = rte_rdtsc_precise();
|
||||
#endif
|
||||
/* CRC24B generation */
|
||||
bblib_lte_crc24b_gen(&crc_req, &crc_resp);
|
||||
#ifdef RTE_BBDEV_OFFLOAD_COST
|
||||
q_stats->offload_time += rte_rdtsc_precise() - start_time;
|
||||
q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
|
||||
#endif
|
||||
} else {
|
||||
ret = is_enc_input_valid(k, k_idx, total_left);
|
||||
@ -596,15 +598,14 @@ process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
|
||||
#ifdef RTE_BBDEV_OFFLOAD_COST
|
||||
start_time = rte_rdtsc_precise();
|
||||
#endif
|
||||
|
||||
/* Turbo encoding */
|
||||
if (bblib_turbo_encoder(&turbo_req, &turbo_resp) != 0) {
|
||||
op->status |= 1 << RTE_BBDEV_DRV_ERROR;
|
||||
rte_bbdev_log(ERR, "Turbo Encoder failed");
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef RTE_BBDEV_OFFLOAD_COST
|
||||
q_stats->offload_time += rte_rdtsc_precise() - start_time;
|
||||
q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
|
||||
#endif
|
||||
|
||||
/* Restore 3 first bytes of next CB if they were overwritten by CRC*/
|
||||
@ -671,23 +672,21 @@ process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
|
||||
#ifdef RTE_BBDEV_OFFLOAD_COST
|
||||
start_time = rte_rdtsc_precise();
|
||||
#endif
|
||||
|
||||
/* Rate-Matching */
|
||||
if (bblib_rate_match_dl(&rm_req, &rm_resp) != 0) {
|
||||
op->status |= 1 << RTE_BBDEV_DRV_ERROR;
|
||||
rte_bbdev_log(ERR, "Rate matching failed");
|
||||
return;
|
||||
}
|
||||
#ifdef RTE_BBDEV_OFFLOAD_COST
|
||||
q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
|
||||
#endif
|
||||
|
||||
/* SW fills an entire last byte even if E%8 != 0. Clear the
|
||||
* superfluous data bits for consistency with HW device.
|
||||
*/
|
||||
mask_id = (e & 7) >> 1;
|
||||
rm_out[out_len - 1] &= mask_out[mask_id];
|
||||
|
||||
#ifdef RTE_BBDEV_OFFLOAD_COST
|
||||
q_stats->offload_time += rte_rdtsc_precise() - start_time;
|
||||
#endif
|
||||
|
||||
enc->output.length += rm_resp.OutputLen;
|
||||
} else {
|
||||
/* Rate matching is bypassed */
|
||||
@ -798,7 +797,7 @@ enqueue_enc_all_ops(struct turbo_sw_queue *q, struct rte_bbdev_enc_op **ops,
|
||||
{
|
||||
uint16_t i;
|
||||
#ifdef RTE_BBDEV_OFFLOAD_COST
|
||||
queue_stats->offload_time = 0;
|
||||
queue_stats->acc_offload_cycles = 0;
|
||||
#endif
|
||||
|
||||
for (i = 0; i < nb_ops; ++i)
|
||||
@ -905,7 +904,8 @@ static inline void
|
||||
process_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
|
||||
uint8_t c, uint16_t k, uint16_t kw, struct rte_mbuf *m_in,
|
||||
struct rte_mbuf *m_out, uint16_t in_offset, uint16_t out_offset,
|
||||
bool check_crc_24b, uint16_t crc24_overlap, uint16_t total_left)
|
||||
bool check_crc_24b, uint16_t crc24_overlap, uint16_t total_left,
|
||||
struct rte_bbdev_stats *q_stats)
|
||||
{
|
||||
int ret;
|
||||
int32_t k_idx;
|
||||
@ -917,6 +917,11 @@ process_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
|
||||
struct bblib_turbo_decoder_request turbo_req;
|
||||
struct bblib_turbo_decoder_response turbo_resp;
|
||||
struct rte_bbdev_op_turbo_dec *dec = &op->turbo_dec;
|
||||
#ifdef RTE_BBDEV_OFFLOAD_COST
|
||||
uint64_t start_time;
|
||||
#else
|
||||
RTE_SET_USED(q_stats);
|
||||
#endif
|
||||
|
||||
k_idx = compute_idx(k);
|
||||
|
||||
@ -942,7 +947,14 @@ process_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
|
||||
deint_req.pharqbuffer = q->deint_input;
|
||||
deint_req.ncb = ncb_without_null;
|
||||
deint_resp.pinteleavebuffer = q->deint_output;
|
||||
|
||||
#ifdef RTE_BBDEV_OFFLOAD_COST
|
||||
start_time = rte_rdtsc_precise();
|
||||
#endif
|
||||
bblib_deinterleave_ul(&deint_req, &deint_resp);
|
||||
#ifdef RTE_BBDEV_OFFLOAD_COST
|
||||
q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
|
||||
#endif
|
||||
} else
|
||||
move_padding_bytes(in, q->deint_output, k, ncb);
|
||||
|
||||
@ -961,7 +973,15 @@ process_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
|
||||
adapter_req.ncb = ncb_without_null;
|
||||
adapter_req.pinteleavebuffer = adapter_input;
|
||||
adapter_resp.pharqout = q->adapter_output;
|
||||
|
||||
#ifdef RTE_BBDEV_OFFLOAD_COST
|
||||
start_time = rte_rdtsc_precise();
|
||||
#endif
|
||||
/* Turbo decode adaptation */
|
||||
bblib_turbo_adapter_ul(&adapter_req, &adapter_resp);
|
||||
#ifdef RTE_BBDEV_OFFLOAD_COST
|
||||
q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
|
||||
#endif
|
||||
|
||||
out = (uint8_t *)rte_pktmbuf_append(m_out, ((k - crc24_overlap) >> 3));
|
||||
if (out == NULL) {
|
||||
@ -986,12 +1006,20 @@ process_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
|
||||
turbo_resp.ag_buf = q->ag;
|
||||
turbo_resp.cb_buf = q->code_block;
|
||||
turbo_resp.output = out;
|
||||
|
||||
#ifdef RTE_BBDEV_OFFLOAD_COST
|
||||
start_time = rte_rdtsc_precise();
|
||||
#endif
|
||||
/* Turbo decode */
|
||||
iter_cnt = bblib_turbo_decoder(&turbo_req, &turbo_resp);
|
||||
#ifdef RTE_BBDEV_OFFLOAD_COST
|
||||
q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
|
||||
#endif
|
||||
dec->hard_output.length += (k >> 3);
|
||||
|
||||
if (iter_cnt > 0) {
|
||||
/* Temporary solution for returned iter_count from SDK */
|
||||
iter_cnt = (iter_cnt - 1) / 2;
|
||||
iter_cnt = (iter_cnt - 1) >> 1;
|
||||
dec->iter_count = RTE_MAX(iter_cnt, dec->iter_count);
|
||||
} else {
|
||||
op->status |= 1 << RTE_BBDEV_DATA_ERROR;
|
||||
@ -1001,7 +1029,8 @@ process_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
|
||||
}
|
||||
|
||||
static inline void
|
||||
enqueue_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op)
|
||||
enqueue_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
|
||||
struct rte_bbdev_stats *queue_stats)
|
||||
{
|
||||
uint8_t c, r = 0;
|
||||
uint16_t kw, k = 0;
|
||||
@ -1053,7 +1082,7 @@ enqueue_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op)
|
||||
process_dec_cb(q, op, c, k, kw, m_in, m_out, in_offset,
|
||||
out_offset, check_bit(dec->op_flags,
|
||||
RTE_BBDEV_TURBO_CRC_TYPE_24B), crc24_overlap,
|
||||
total_left);
|
||||
total_left, queue_stats);
|
||||
/* To keep CRC24 attached to end of Code block, use
|
||||
* RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP flag as it
|
||||
* removed by default once verified.
|
||||
@ -1075,12 +1104,15 @@ enqueue_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op)
|
||||
|
||||
static inline uint16_t
|
||||
enqueue_dec_all_ops(struct turbo_sw_queue *q, struct rte_bbdev_dec_op **ops,
|
||||
uint16_t nb_ops)
|
||||
uint16_t nb_ops, struct rte_bbdev_stats *queue_stats)
|
||||
{
|
||||
uint16_t i;
|
||||
#ifdef RTE_BBDEV_OFFLOAD_COST
|
||||
queue_stats->acc_offload_cycles = 0;
|
||||
#endif
|
||||
|
||||
for (i = 0; i < nb_ops; ++i)
|
||||
enqueue_dec_one_op(q, ops[i]);
|
||||
enqueue_dec_one_op(q, ops[i], queue_stats);
|
||||
|
||||
return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops,
|
||||
NULL);
|
||||
@ -1112,7 +1144,7 @@ enqueue_dec_ops(struct rte_bbdev_queue_data *q_data,
|
||||
struct turbo_sw_queue *q = queue;
|
||||
uint16_t nb_enqueued = 0;
|
||||
|
||||
nb_enqueued = enqueue_dec_all_ops(q, ops, nb_ops);
|
||||
nb_enqueued = enqueue_dec_all_ops(q, ops, nb_ops, &q_data->queue_stats);
|
||||
|
||||
q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued;
|
||||
q_data->queue_stats.enqueued_count += nb_enqueued;
|
||||
|
@ -239,8 +239,13 @@ struct rte_bbdev_stats {
|
||||
uint64_t enqueue_err_count;
|
||||
/** Total error count on operations dequeued */
|
||||
uint64_t dequeue_err_count;
|
||||
/** Offload time */
|
||||
uint64_t offload_time;
|
||||
/** CPU cycles consumed by the (HW/SW) accelerator device to offload
|
||||
* the enqueue request to its internal queues.
|
||||
* - For a HW device this is the cycles consumed in MMIO write
|
||||
* - For a SW (vdev) device, this is the processing time of the
|
||||
* bbdev operation
|
||||
*/
|
||||
uint64_t acc_offload_cycles;
|
||||
};
|
||||
|
||||
/**
|
||||
|
Loading…
x
Reference in New Issue
Block a user