baseband/turbo_sw: extend for 5G

Implementation still based on Intel SDK libraries
optimized for AVX512 instructions set and 5GNR.
This can be also build for AVX2 for 4G capability or
without SDK dependency for maintenance.

Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com>
Acked-by: Amr Mokhtar <amr.mokhtar@intel.com>
This commit is contained in:
Nicolas Chautru 2019-07-03 08:24:07 -07:00 committed by Akhil Goyal
parent 3f3f608142
commit c769c71175
6 changed files with 708 additions and 10 deletions

View File

@ -541,6 +541,7 @@ CONFIG_RTE_LIBRTE_BBDEV_DEBUG=n
CONFIG_RTE_BBDEV_MAX_DEVS=128
CONFIG_RTE_BBDEV_OFFLOAD_COST=y
CONFIG_RTE_BBDEV_SDK_AVX2=n
CONFIG_RTE_BBDEV_SDK_AVX512=n
#
# Compile PMD for NULL bbdev device

View File

@ -139,6 +139,14 @@ New Features
(Programmable Acceleration Card) N3000. See the
:doc:`../bbdevs/fpga_lte_fec` BBDEV guide for more details on this new driver.
* **Updated TURBO_SW bbdev PMD.**
Updated the ``turbo_sw`` bbdev driver with changes including:
* Added option to build the driver with or without dependency of external
SDK libraries.
* Added support for 5GNR encode/decode operations.
* **Updated the QuickAssist Technology (QAT) symmetric crypto PMD.**
Added support for digest-encrypted cases where digest is appended

View File

@ -3,7 +3,6 @@
include $(RTE_SDK)/mk/rte.vars.mk
# library name
LIB = librte_pmd_bbdev_turbo_sw.a
@ -34,6 +33,20 @@ LDLIBS += -L$(FLEXRAN_SDK)/lib_common -lcommon
LDLIBS += -lstdc++ -lirc -limf -lipps -lsvml
endif
ifeq ($(CONFIG_RTE_BBDEV_SDK_AVX512),y)
ifeq ($(CONFIG_RTE_BBDEV_SDK_AVX2),n)
$(error "CONFIG_RTE_BBDEV_SDK_AVX512 requires CONFIG_RTE_BBDEV_SDK_AVX2 set")
endif
CFLAGS += -I$(FLEXRAN_SDK)/lib_ldpc_encoder_5gnr
CFLAGS += -I$(FLEXRAN_SDK)/lib_ldpc_decoder_5gnr
CFLAGS += -I$(FLEXRAN_SDK)/lib_LDPC_ratematch_5gnr
CFLAGS += -I$(FLEXRAN_SDK)/lib_rate_dematching_5gnr
LDLIBS += -L$(FLEXRAN_SDK)/lib_ldpc_encoder_5gnr -lldpc_encoder_5gnr
LDLIBS += -L$(FLEXRAN_SDK)/lib_ldpc_decoder_5gnr -lldpc_decoder_5gnr
LDLIBS += -L$(FLEXRAN_SDK)/lib_LDPC_ratematch_5gnr -lLDPC_ratematch_5gnr
LDLIBS += -L$(FLEXRAN_SDK)/lib_rate_dematching_5gnr -lrate_dematching_5gnr
endif
# library version
LIBABIVER := 1

View File

@ -14,11 +14,23 @@
#include <rte_bbdev.h>
#include <rte_bbdev_pmd.h>
#include <rte_hexdump.h>
#include <rte_log.h>
#ifdef RTE_BBDEV_SDK_AVX2
#include <ipp.h>
#include <ipps.h>
#include <phy_turbo.h>
#include <phy_crc.h>
#include <phy_rate_match.h>
#endif
#ifdef RTE_BBDEV_SDK_AVX512
#include <bit_reverse.h>
#include <phy_ldpc_encoder_5gnr.h>
#include <phy_ldpc_decoder_5gnr.h>
#include <phy_LDPC_ratematch_5gnr.h>
#include <phy_rate_dematching_5gnr.h>
#endif
#define DRIVER_NAME baseband_turbo_sw
@ -84,6 +96,7 @@ struct turbo_sw_queue {
enum rte_bbdev_op_type type;
} __rte_cache_aligned;
#ifdef RTE_BBDEV_SDK_AVX2
static inline char *
mbuf_append(struct rte_mbuf *m_head, struct rte_mbuf *m, uint16_t len)
@ -179,6 +192,41 @@ info_get(struct rte_bbdev *dev, struct rte_bbdev_driver_info *dev_info)
RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
}
},
#endif
#ifdef RTE_BBDEV_SDK_AVX512
{
.type = RTE_BBDEV_OP_LDPC_ENC,
.cap.ldpc_enc = {
.capability_flags =
RTE_BBDEV_LDPC_RATE_MATCH |
RTE_BBDEV_LDPC_CRC_24A_ATTACH |
RTE_BBDEV_LDPC_CRC_24B_ATTACH,
.num_buffers_src =
RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
.num_buffers_dst =
RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
}
},
{
.type = RTE_BBDEV_OP_LDPC_DEC,
.cap.ldpc_dec = {
.capability_flags =
RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK |
RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK |
RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP |
RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE |
RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE |
RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE,
.llr_size = 8,
.llr_decimals = 2,
.harq_memory_size = 0,
.num_buffers_src =
RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
.num_buffers_hard_out =
RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
.num_buffers_soft_out = 0,
}
},
#endif
RTE_BBDEV_END_OF_CAPABILITIES_LIST()
};
@ -186,14 +234,12 @@ info_get(struct rte_bbdev *dev, struct rte_bbdev_driver_info *dev_info)
static struct rte_bbdev_queue_conf default_queue_conf = {
.queue_size = RTE_BBDEV_QUEUE_SIZE_LIMIT,
};
#ifdef RTE_BBDEV_SDK_AVX2
static const enum rte_cpu_flag_t cpu_flag = RTE_CPUFLAG_SSE4_2;
dev_info->cpu_flag_reqs = &cpu_flag;
#else
dev_info->cpu_flag_reqs = NULL;
#endif
default_queue_conf.socket = dev->data->socket_id;
dev_info->driver_name = RTE_STR(DRIVER_NAME);
@ -280,7 +326,7 @@ q_setup(struct rte_bbdev *dev, uint16_t q_id,
return -ENAMETOOLONG;
}
q->enc_in = rte_zmalloc_socket(name,
(RTE_BBDEV_TURBO_MAX_CB_SIZE >> 3) * sizeof(*q->enc_in),
(RTE_BBDEV_LDPC_MAX_CB_SIZE >> 3) * sizeof(*q->enc_in),
RTE_CACHE_LINE_SIZE, queue_conf->socket);
if (q->enc_in == NULL) {
rte_bbdev_log(ERR,
@ -288,7 +334,7 @@ q_setup(struct rte_bbdev *dev, uint16_t q_id,
goto free_q;
}
/* Allocate memory for Aplha Gamma temp buffer. */
/* Allocate memory for Alpha Gamma temp buffer. */
ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"_ag%u:%u",
dev->data->dev_id, q_id);
if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) {
@ -423,6 +469,7 @@ static const struct rte_bbdev_ops pmd_ops = {
};
#ifdef RTE_BBDEV_SDK_AVX2
#ifdef RTE_LIBRTE_BBDEV_DEBUG
/* Checks if the encoder input buffer is correct.
* Returns 0 if it's valid, -1 otherwise.
*/
@ -478,6 +525,7 @@ is_dec_input_valid(int32_t k_idx, int16_t kw, int16_t in_length)
return 0;
}
#endif
#endif
static inline void
process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
@ -487,7 +535,11 @@ process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
uint16_t in_length, struct rte_bbdev_stats *q_stats)
{
#ifdef RTE_BBDEV_SDK_AVX2
#ifdef RTE_LIBRTE_BBDEV_DEBUG
int ret;
#else
RTE_SET_USED(in_length);
#endif
int16_t k_idx;
uint16_t m;
uint8_t *in, *out0, *out1, *out2, *tmp_out, *rm_out;
@ -511,11 +563,14 @@ process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
/* CRC24A (for TB) */
if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH) &&
(enc->code_block_mode == 1)) {
#ifdef RTE_LIBRTE_BBDEV_DEBUG
ret = is_enc_input_valid(k - 24, k_idx, in_length);
if (ret != 0) {
op->status |= 1 << RTE_BBDEV_DATA_ERROR;
return;
}
#endif
crc_req.data = in;
crc_req.len = k - 24;
/* Check if there is a room for CRC bits if not use
@ -544,11 +599,14 @@ process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
#endif
} else if (enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) {
/* CRC24B */
#ifdef RTE_LIBRTE_BBDEV_DEBUG
ret = is_enc_input_valid(k - 24, k_idx, in_length);
if (ret != 0) {
op->status |= 1 << RTE_BBDEV_DATA_ERROR;
return;
}
#endif
crc_req.data = in;
crc_req.len = k - 24;
/* Check if there is a room for CRC bits if this is the last
@ -575,13 +633,16 @@ process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
#ifdef RTE_BBDEV_OFFLOAD_COST
q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
#endif
} else {
}
#ifdef RTE_LIBRTE_BBDEV_DEBUG
else {
ret = is_enc_input_valid(k, k_idx, in_length);
if (ret != 0) {
op->status |= 1 << RTE_BBDEV_DATA_ERROR;
return;
}
}
#endif
/* Turbo encoder */
@ -757,6 +818,143 @@ process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
#endif
}
static inline void
process_ldpc_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
uint32_t e, struct rte_mbuf *m_in, struct rte_mbuf *m_out_head,
struct rte_mbuf *m_out, uint16_t in_offset, uint16_t out_offset,
uint16_t seg_total_left, struct rte_bbdev_stats *q_stats)
{
#ifdef RTE_BBDEV_SDK_AVX512
RTE_SET_USED(seg_total_left);
uint8_t *in, *rm_out;
struct rte_bbdev_op_ldpc_enc *enc = &op->ldpc_enc;
struct bblib_ldpc_encoder_5gnr_request ldpc_req;
struct bblib_ldpc_encoder_5gnr_response ldpc_resp;
struct bblib_LDPC_ratematch_5gnr_request rm_req;
struct bblib_LDPC_ratematch_5gnr_response rm_resp;
struct bblib_crc_request crc_req;
struct bblib_crc_response crc_resp;
uint16_t msgLen, puntBits, parity_offset, out_len;
uint16_t K = (enc->basegraph == 1 ? 22 : 10) * enc->z_c;
uint16_t in_length_in_bits = K - enc->n_filler;
uint16_t in_length_in_bytes = (in_length_in_bits + 7) >> 3;
#ifdef RTE_BBDEV_OFFLOAD_COST
uint64_t start_time = rte_rdtsc_precise();
#else
RTE_SET_USED(q_stats);
#endif
in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset);
/* Masking the Filler bits explicitly */
memset(q->enc_in + (in_length_in_bytes - 3), 0,
((K + 7) >> 3) - (in_length_in_bytes - 3));
/* CRC Generation */
if (enc->op_flags & RTE_BBDEV_LDPC_CRC_24A_ATTACH) {
rte_memcpy(q->enc_in, in, in_length_in_bytes - 3);
crc_req.data = in;
crc_req.len = in_length_in_bits - 24;
crc_resp.data = q->enc_in;
bblib_lte_crc24a_gen(&crc_req, &crc_resp);
} else if (enc->op_flags & RTE_BBDEV_LDPC_CRC_24B_ATTACH) {
rte_memcpy(q->enc_in, in, in_length_in_bytes - 3);
crc_req.data = in;
crc_req.len = in_length_in_bits - 24;
crc_resp.data = q->enc_in;
bblib_lte_crc24b_gen(&crc_req, &crc_resp);
} else
rte_memcpy(q->enc_in, in, in_length_in_bytes);
/* LDPC Encoding */
ldpc_req.Zc = enc->z_c;
ldpc_req.baseGraph = enc->basegraph;
/* Number of rows set to maximum */
ldpc_req.nRows = ldpc_req.baseGraph == 1 ? 46 : 42;
ldpc_req.numberCodeblocks = 1;
ldpc_req.input[0] = (int8_t *) q->enc_in;
ldpc_resp.output[0] = (int8_t *) q->enc_out;
bblib_bit_reverse(ldpc_req.input[0], in_length_in_bytes << 3);
if (bblib_ldpc_encoder_5gnr(&ldpc_req, &ldpc_resp) != 0) {
op->status |= 1 << RTE_BBDEV_DRV_ERROR;
rte_bbdev_log(ERR, "LDPC Encoder failed");
return;
}
/*
* Systematic + Parity : Recreating stream with filler bits, ideally
* the bit select could handle this in the RM SDK
*/
msgLen = (ldpc_req.baseGraph == 1 ? 22 : 10) * ldpc_req.Zc;
puntBits = 2 * ldpc_req.Zc;
parity_offset = msgLen - puntBits;
ippsCopyBE_1u(((uint8_t *) ldpc_req.input[0]) + (puntBits / 8),
puntBits%8, q->adapter_output, 0, parity_offset);
ippsCopyBE_1u(q->enc_out, 0, q->adapter_output + (parity_offset / 8),
parity_offset % 8, ldpc_req.nRows * ldpc_req.Zc);
out_len = (e + 7) >> 3;
/* get output data starting address */
rm_out = (uint8_t *)mbuf_append(m_out_head, m_out, out_len);
if (rm_out == NULL) {
op->status |= 1 << RTE_BBDEV_DATA_ERROR;
rte_bbdev_log(ERR,
"Too little space in output mbuf");
return;
}
/*
* rte_bbdev_op_data.offset can be different than the offset
* of the appended bytes
*/
rm_out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset);
/* Rate-Matching */
rm_req.E = e;
rm_req.Ncb = enc->n_cb;
rm_req.Qm = enc->q_m;
rm_req.Zc = enc->z_c;
rm_req.baseGraph = enc->basegraph;
rm_req.input = q->adapter_output;
rm_req.nLen = enc->n_filler;
rm_req.nullIndex = parity_offset - enc->n_filler;
rm_req.rvidx = enc->rv_index;
rm_resp.output = q->deint_output;
if (bblib_LDPC_ratematch_5gnr(&rm_req, &rm_resp) != 0) {
op->status |= 1 << RTE_BBDEV_DRV_ERROR;
rte_bbdev_log(ERR, "Rate matching failed");
return;
}
/* RM SDK may provide non zero bits on last byte */
if ((e % 8) != 0)
q->deint_output[out_len-1] &= (1 << (e % 8)) - 1;
bblib_bit_reverse((int8_t *) q->deint_output, out_len << 3);
rte_memcpy(rm_out, q->deint_output, out_len);
enc->output.length += out_len;
#ifdef RTE_BBDEV_OFFLOAD_COST
q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
#endif
#else
RTE_SET_USED(q);
RTE_SET_USED(op);
RTE_SET_USED(e);
RTE_SET_USED(m_in);
RTE_SET_USED(m_out_head);
RTE_SET_USED(m_out);
RTE_SET_USED(in_offset);
RTE_SET_USED(out_offset);
RTE_SET_USED(seg_total_left);
RTE_SET_USED(q_stats);
#endif
}
static inline void
enqueue_enc_one_op(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
struct rte_bbdev_stats *queue_stats)
@ -850,6 +1048,93 @@ enqueue_enc_one_op(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
}
}
static inline void
enqueue_ldpc_enc_one_op(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
struct rte_bbdev_stats *queue_stats)
{
uint8_t c, r, crc24_bits = 0;
uint32_t e;
struct rte_bbdev_op_ldpc_enc *enc = &op->ldpc_enc;
uint16_t in_offset = enc->input.offset;
uint16_t out_offset = enc->output.offset;
struct rte_mbuf *m_in = enc->input.data;
struct rte_mbuf *m_out = enc->output.data;
struct rte_mbuf *m_out_head = enc->output.data;
uint32_t in_length, mbuf_total_left = enc->input.length;
uint16_t seg_total_left;
/* Clear op status */
op->status = 0;
if (mbuf_total_left > RTE_BBDEV_TURBO_MAX_TB_SIZE >> 3) {
rte_bbdev_log(ERR, "TB size (%u) is too big, max: %d",
mbuf_total_left, RTE_BBDEV_TURBO_MAX_TB_SIZE);
op->status = 1 << RTE_BBDEV_DATA_ERROR;
return;
}
if (m_in == NULL || m_out == NULL) {
rte_bbdev_log(ERR, "Invalid mbuf pointer");
op->status = 1 << RTE_BBDEV_DATA_ERROR;
return;
}
if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) ||
(enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH))
crc24_bits = 24;
if (enc->code_block_mode == 0) { /* For Transport Block mode */
c = enc->tb_params.c;
r = enc->tb_params.r;
} else { /* For Code Block mode */
c = 1;
r = 0;
}
while (mbuf_total_left > 0 && r < c) {
seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset;
if (enc->code_block_mode == 0) {
e = (r < enc->tb_params.cab) ?
enc->tb_params.ea : enc->tb_params.eb;
} else {
e = enc->cb_params.e;
}
process_ldpc_enc_cb(q, op, e, m_in, m_out_head,
m_out, in_offset, out_offset, seg_total_left,
queue_stats);
/* Update total_left */
in_length = (enc->basegraph == 1 ? 22 : 10) * enc->z_c;
in_length = ((in_length - crc24_bits - enc->n_filler) >> 3);
mbuf_total_left -= in_length;
/* Update offsets for next CBs (if exist) */
in_offset += in_length;
out_offset += (e + 7) >> 3;
/* Update offsets */
if (seg_total_left == in_length) {
/* Go to the next mbuf */
m_in = m_in->next;
m_out = m_out->next;
in_offset = 0;
out_offset = 0;
}
r++;
}
/* check if all input data was processed */
if (mbuf_total_left != 0) {
op->status |= 1 << RTE_BBDEV_DATA_ERROR;
rte_bbdev_log(ERR,
"Mismatch between mbuf length and included CBs sizes %d",
mbuf_total_left);
}
}
static inline uint16_t
enqueue_enc_all_ops(struct turbo_sw_queue *q, struct rte_bbdev_enc_op **ops,
uint16_t nb_ops, struct rte_bbdev_stats *queue_stats)
@ -866,6 +1151,23 @@ enqueue_enc_all_ops(struct turbo_sw_queue *q, struct rte_bbdev_enc_op **ops,
NULL);
}
static inline uint16_t
enqueue_ldpc_enc_all_ops(struct turbo_sw_queue *q,
struct rte_bbdev_enc_op **ops,
uint16_t nb_ops, struct rte_bbdev_stats *queue_stats)
{
uint16_t i;
#ifdef RTE_BBDEV_OFFLOAD_COST
queue_stats->acc_offload_cycles = 0;
#endif
for (i = 0; i < nb_ops; ++i)
enqueue_ldpc_enc_one_op(q, ops[i], queue_stats);
return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops,
NULL);
}
#ifdef RTE_BBDEV_SDK_AVX2
static inline void
move_padding_bytes(const uint8_t *in, uint8_t *out, uint16_t k,
@ -890,7 +1192,11 @@ process_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
struct rte_bbdev_stats *q_stats)
{
#ifdef RTE_BBDEV_SDK_AVX2
#ifdef RTE_LIBRTE_BBDEV_DEBUG
int ret;
#else
RTE_SET_USED(in_length);
#endif
int32_t k_idx;
int32_t iter_cnt;
uint8_t *in, *out, *adapter_input;
@ -908,11 +1214,13 @@ process_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
k_idx = compute_idx(k);
#ifdef RTE_LIBRTE_BBDEV_DEBUG
ret = is_dec_input_valid(k_idx, kw, in_length);
if (ret != 0) {
op->status |= 1 << RTE_BBDEV_DATA_ERROR;
return;
}
#endif
in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset);
ncb = kw;
@ -930,6 +1238,7 @@ process_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
#ifdef RTE_BBDEV_OFFLOAD_COST
start_time = rte_rdtsc_precise();
#endif
/* Sub-block De-Interleaving */
bblib_deinterleave_ul(&deint_req, &deint_resp);
#ifdef RTE_BBDEV_OFFLOAD_COST
q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
@ -1024,6 +1333,202 @@ process_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
#endif
}
static inline void
process_ldpc_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
uint8_t c, uint16_t out_length, uint16_t e,
struct rte_mbuf *m_in,
struct rte_mbuf *m_out_head, struct rte_mbuf *m_out,
struct rte_mbuf *m_harq_in,
struct rte_mbuf *m_harq_out_head, struct rte_mbuf *m_harq_out,
uint16_t in_offset, uint16_t out_offset,
uint16_t harq_in_offset, uint16_t harq_out_offset,
bool check_crc_24b,
uint16_t crc24_overlap, uint16_t in_length,
struct rte_bbdev_stats *q_stats)
{
#ifdef RTE_BBDEV_SDK_AVX512
RTE_SET_USED(in_length);
RTE_SET_USED(c);
uint8_t *in, *out, *harq_in, *harq_out, *adapter_input;
struct bblib_rate_dematching_5gnr_request derm_req;
struct bblib_rate_dematching_5gnr_response derm_resp;
struct bblib_ldpc_decoder_5gnr_request dec_req;
struct bblib_ldpc_decoder_5gnr_response dec_resp;
struct bblib_crc_request crc_req;
struct bblib_crc_response crc_resp;
struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec;
uint16_t K, parity_offset, sys_cols, outLenWithCrc;
int16_t deRmOutSize, numRows;
/* Compute some LDPC BG lengths */
outLenWithCrc = out_length + (crc24_overlap >> 3);
sys_cols = (dec->basegraph == 1) ? 22 : 10;
K = sys_cols * dec->z_c;
parity_offset = K - 2 * dec->z_c;
#ifdef RTE_BBDEV_OFFLOAD_COST
uint64_t start_time = rte_rdtsc_precise();
#else
RTE_SET_USED(q_stats);
#endif
in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset);
if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE)) {
/**
* Single contiguous block from the first LLR of the
* circular buffer.
*/
harq_in = NULL;
if (m_harq_in != NULL)
harq_in = rte_pktmbuf_mtod_offset(m_harq_in,
uint8_t *, harq_in_offset);
if (harq_in == NULL) {
op->status |= 1 << RTE_BBDEV_DATA_ERROR;
rte_bbdev_log(ERR, "No space in harq input mbuf");
return;
}
uint16_t harq_in_length = RTE_MIN(
dec->harq_combined_input.length,
(uint32_t) dec->n_cb);
memset(q->ag + harq_in_length, 0,
dec->n_cb - harq_in_length);
rte_memcpy(q->ag, harq_in, harq_in_length);
}
derm_req.p_in = (int8_t *) in;
derm_req.p_harq = q->ag; /* This doesn't include the filler bits */
derm_req.base_graph = dec->basegraph;
derm_req.zc = dec->z_c;
derm_req.ncb = dec->n_cb;
derm_req.e = e;
derm_req.k0 = 0; /* Actual output from SDK */
derm_req.isretx = check_bit(dec->op_flags,
RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE);
derm_req.rvid = dec->rv_index;
derm_req.modulation_order = dec->q_m;
derm_req.start_null_index = parity_offset - dec->n_filler;
derm_req.num_of_null = dec->n_filler;
bblib_rate_dematching_5gnr(&derm_req, &derm_resp);
/* Compute RM out size and number of rows */
deRmOutSize = RTE_MIN(
derm_req.k0 + derm_req.e -
((derm_req.k0 < derm_req.start_null_index) ?
0 : dec->n_filler),
dec->n_cb - dec->n_filler);
if (m_harq_in != NULL)
deRmOutSize = RTE_MAX(deRmOutSize,
RTE_MIN(dec->n_cb - dec->n_filler,
m_harq_in->data_len));
numRows = ((deRmOutSize + dec->n_filler + dec->z_c - 1) / dec->z_c)
- sys_cols + 2;
numRows = RTE_MAX(4, numRows);
/* get output data starting address */
out = (uint8_t *)mbuf_append(m_out_head, m_out, out_length);
if (out == NULL) {
op->status |= 1 << RTE_BBDEV_DATA_ERROR;
rte_bbdev_log(ERR,
"Too little space in LDPC decoder output mbuf");
return;
}
/* rte_bbdev_op_data.offset can be different than the offset
* of the appended bytes
*/
out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset);
adapter_input = q->enc_out;
dec_req.Zc = dec->z_c;
dec_req.baseGraph = dec->basegraph;
dec_req.nRows = numRows;
dec_req.numChannelLlrs = deRmOutSize;
dec_req.varNodes = derm_req.p_harq;
dec_req.numFillerBits = dec->n_filler;
dec_req.maxIterations = dec->iter_max;
dec_req.enableEarlyTermination = check_bit(dec->op_flags,
RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE);
dec_resp.varNodes = (int16_t *) q->adapter_output;
dec_resp.compactedMessageBytes = q->enc_out;
bblib_ldpc_decoder_5gnr(&dec_req, &dec_resp);
dec->iter_count = RTE_MAX(dec_resp.iterationAtTermination,
dec->iter_count);
if (!dec_resp.parityPassedAtTermination)
op->status |= 1 << RTE_BBDEV_SYNDROME_ERROR;
bblib_bit_reverse((int8_t *) q->enc_out, outLenWithCrc << 3);
if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK) ||
check_bit(dec->op_flags,
RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK)) {
crc_req.data = adapter_input;
crc_req.len = K - dec->n_filler - 24;
crc_resp.check_passed = false;
crc_resp.data = adapter_input;
if (check_crc_24b)
bblib_lte_crc24b_check(&crc_req, &crc_resp);
else
bblib_lte_crc24a_check(&crc_req, &crc_resp);
if (!crc_resp.check_passed)
op->status |= 1 << RTE_BBDEV_CRC_ERROR;
}
#ifdef RTE_BBDEV_OFFLOAD_COST
q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
#endif
if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE)) {
harq_out = NULL;
if (m_harq_out != NULL) {
/* Initialize HARQ data length since we overwrite */
m_harq_out->data_len = 0;
/* Check there is enough space
* in the HARQ outbound buffer
*/
harq_out = (uint8_t *)mbuf_append(m_harq_out_head,
m_harq_out, deRmOutSize);
}
if (harq_out == NULL) {
op->status |= 1 << RTE_BBDEV_DATA_ERROR;
rte_bbdev_log(ERR, "No space in HARQ output mbuf");
return;
}
/* get output data starting address and overwrite the data */
harq_out = rte_pktmbuf_mtod_offset(m_harq_out, uint8_t *,
harq_out_offset);
rte_memcpy(harq_out, derm_req.p_harq, deRmOutSize);
dec->harq_combined_output.length += deRmOutSize;
}
rte_memcpy(out, adapter_input, out_length);
dec->hard_output.length += out_length;
#else
RTE_SET_USED(q);
RTE_SET_USED(op);
RTE_SET_USED(c);
RTE_SET_USED(out_length);
RTE_SET_USED(e);
RTE_SET_USED(m_in);
RTE_SET_USED(m_out_head);
RTE_SET_USED(m_out);
RTE_SET_USED(m_harq_in);
RTE_SET_USED(m_harq_out_head);
RTE_SET_USED(m_harq_out);
RTE_SET_USED(harq_in_offset);
RTE_SET_USED(harq_out_offset);
RTE_SET_USED(in_offset);
RTE_SET_USED(out_offset);
RTE_SET_USED(check_crc_24b);
RTE_SET_USED(crc24_overlap);
RTE_SET_USED(in_length);
RTE_SET_USED(q_stats);
#endif
}
static inline void
enqueue_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
struct rte_bbdev_stats *queue_stats)
@ -1083,6 +1588,7 @@ enqueue_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
in_offset, out_offset, check_bit(dec->op_flags,
RTE_BBDEV_TURBO_CRC_TYPE_24B), crc24_overlap,
seg_total_left, queue_stats);
/* To keep CRC24 attached to end of Code block, use
* RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP flag as it
* removed by default once verified.
@ -1104,6 +1610,103 @@ enqueue_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
}
r++;
}
if (mbuf_total_left != 0) {
op->status |= 1 << RTE_BBDEV_DATA_ERROR;
rte_bbdev_log(ERR,
"Mismatch between mbuf length and included Circular buffer sizes");
}
}
static inline void
enqueue_ldpc_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
struct rte_bbdev_stats *queue_stats)
{
uint8_t c, r = 0;
uint16_t e, out_length;
uint16_t crc24_overlap = 0;
struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec;
struct rte_mbuf *m_in = dec->input.data;
struct rte_mbuf *m_harq_in = dec->harq_combined_input.data;
struct rte_mbuf *m_harq_out = dec->harq_combined_output.data;
struct rte_mbuf *m_harq_out_head = dec->harq_combined_output.data;
struct rte_mbuf *m_out = dec->hard_output.data;
struct rte_mbuf *m_out_head = dec->hard_output.data;
uint16_t in_offset = dec->input.offset;
uint16_t harq_in_offset = dec->harq_combined_input.offset;
uint16_t harq_out_offset = dec->harq_combined_output.offset;
uint16_t out_offset = dec->hard_output.offset;
uint32_t mbuf_total_left = dec->input.length;
uint16_t seg_total_left;
/* Clear op status */
op->status = 0;
if (m_in == NULL || m_out == NULL) {
rte_bbdev_log(ERR, "Invalid mbuf pointer");
op->status = 1 << RTE_BBDEV_DATA_ERROR;
return;
}
if (dec->code_block_mode == 0) { /* For Transport Block mode */
c = dec->tb_params.c;
e = dec->tb_params.ea;
} else { /* For Code Block mode */
c = 1;
e = dec->cb_params.e;
}
if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP))
crc24_overlap = 24;
out_length = (dec->basegraph == 1 ? 22 : 10) * dec->z_c; /* K */
out_length = ((out_length - crc24_overlap - dec->n_filler) >> 3);
while (mbuf_total_left > 0) {
if (dec->code_block_mode == 0)
e = (r < dec->tb_params.cab) ?
dec->tb_params.ea : dec->tb_params.eb;
seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset;
process_ldpc_dec_cb(q, op, c, out_length, e,
m_in, m_out_head, m_out,
m_harq_in, m_harq_out_head, m_harq_out,
in_offset, out_offset, harq_in_offset,
harq_out_offset,
check_bit(dec->op_flags,
RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK),
crc24_overlap,
seg_total_left, queue_stats);
/* To keep CRC24 attached to end of Code block, use
* RTE_BBDEV_LDPC_DEC_TB_CRC_24B_KEEP flag as it
* removed by default once verified.
*/
mbuf_total_left -= e;
/* Update offsets */
if (seg_total_left == e) {
/* Go to the next mbuf */
m_in = m_in->next;
m_out = m_out->next;
if (m_harq_in != NULL)
m_harq_in = m_harq_in->next;
if (m_harq_out != NULL)
m_harq_out = m_harq_out->next;
in_offset = 0;
out_offset = 0;
harq_in_offset = 0;
harq_out_offset = 0;
} else {
/* Update offsets for next CBs (if exist) */
in_offset += e;
out_offset += out_length;
}
r++;
}
if (mbuf_total_left != 0) {
op->status |= 1 << RTE_BBDEV_DATA_ERROR;
rte_bbdev_log(ERR,
@ -1127,6 +1730,23 @@ enqueue_dec_all_ops(struct turbo_sw_queue *q, struct rte_bbdev_dec_op **ops,
NULL);
}
static inline uint16_t
enqueue_ldpc_dec_all_ops(struct turbo_sw_queue *q,
struct rte_bbdev_dec_op **ops,
uint16_t nb_ops, struct rte_bbdev_stats *queue_stats)
{
uint16_t i;
#ifdef RTE_BBDEV_OFFLOAD_COST
queue_stats->acc_offload_cycles = 0;
#endif
for (i = 0; i < nb_ops; ++i)
enqueue_ldpc_dec_one_op(q, ops[i], queue_stats);
return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops,
NULL);
}
/* Enqueue burst */
static uint16_t
enqueue_enc_ops(struct rte_bbdev_queue_data *q_data,
@ -1144,6 +1764,24 @@ enqueue_enc_ops(struct rte_bbdev_queue_data *q_data,
return nb_enqueued;
}
/* Enqueue burst */
static uint16_t
enqueue_ldpc_enc_ops(struct rte_bbdev_queue_data *q_data,
struct rte_bbdev_enc_op **ops, uint16_t nb_ops)
{
void *queue = q_data->queue_private;
struct turbo_sw_queue *q = queue;
uint16_t nb_enqueued = 0;
nb_enqueued = enqueue_ldpc_enc_all_ops(
q, ops, nb_ops, &q_data->queue_stats);
q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued;
q_data->queue_stats.enqueued_count += nb_enqueued;
return nb_enqueued;
}
/* Enqueue burst */
static uint16_t
enqueue_dec_ops(struct rte_bbdev_queue_data *q_data,
@ -1161,6 +1799,24 @@ enqueue_dec_ops(struct rte_bbdev_queue_data *q_data,
return nb_enqueued;
}
/* Enqueue burst */
static uint16_t
enqueue_ldpc_dec_ops(struct rte_bbdev_queue_data *q_data,
struct rte_bbdev_dec_op **ops, uint16_t nb_ops)
{
void *queue = q_data->queue_private;
struct turbo_sw_queue *q = queue;
uint16_t nb_enqueued = 0;
nb_enqueued = enqueue_ldpc_dec_all_ops(q, ops, nb_ops,
&q_data->queue_stats);
q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued;
q_data->queue_stats.enqueued_count += nb_enqueued;
return nb_enqueued;
}
/* Dequeue decode burst */
static uint16_t
dequeue_dec_ops(struct rte_bbdev_queue_data *q_data,
@ -1273,6 +1929,10 @@ turbo_sw_bbdev_create(struct rte_vdev_device *vdev,
bbdev->dequeue_dec_ops = dequeue_dec_ops;
bbdev->enqueue_enc_ops = enqueue_enc_ops;
bbdev->enqueue_dec_ops = enqueue_dec_ops;
bbdev->dequeue_ldpc_enc_ops = dequeue_enc_ops;
bbdev->dequeue_ldpc_dec_ops = dequeue_dec_ops;
bbdev->enqueue_ldpc_enc_ops = enqueue_ldpc_enc_ops;
bbdev->enqueue_ldpc_dec_ops = enqueue_ldpc_dec_ops;
((struct bbdev_private *) bbdev->data->dev_private)->max_nb_queues =
init_params->queues_num;

View File

@ -23,6 +23,16 @@ if dpdk_conf.has('RTE_BBDEV_SDK_AVX2')
includes += include_directories(path + '/lib_common')
endif
endif
if dpdk_conf.has('RTE_BBDEV_SDK_AVX512')
ext_deps += cc.find_library('libldpc_encoder_5gnr', dirs: [path + '/lib_ldpc_encoder_5gnr'], required: true)
ext_deps += cc.find_library('libldpc_decoder_5gnr', dirs: [path + '/lib_ldpc_decoder_5gnr'], required: true)
ext_deps += cc.find_library('libLDPC_ratematch_5gnr', dirs: [path + '/lib_LDPC_ratematch_5gnr'], required: true)
ext_deps += cc.find_library('librate_dematching_5gnr', dirs: [path + '/lib_rate_dematching_5gnr'], required: true)
includes += include_directories(path + '/lib_ldpc_encoder_5gnr')
includes += include_directories(path + '/lib_ldpc_decoder_5gnr')
includes += include_directories(path + '/lib_LDPC_ratematch_5gnr')
includes += include_directories(path + '/lib_rate_dematching_5gnr')
endif
deps += ['bbdev', 'bus_vdev', 'ring']
name = 'bbdev_turbo_sw'

View File

@ -236,7 +236,13 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_TURBO_SW) += -L$(FLEXRAN_SDK)/lib_crc -lcr
_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_TURBO_SW) += -L$(FLEXRAN_SDK)/lib_turbo -lturbo
_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_TURBO_SW) += -L$(FLEXRAN_SDK)/lib_rate_matching -lrate_matching
_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_TURBO_SW) += -L$(FLEXRAN_SDK)/lib_common -lcommon
_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_TURBO_SW) += -lirc -limf -lstdc++ -lipps
_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_TURBO_SW) += -lirc -limf -lstdc++ -lipps -lsvml
ifeq ($(CONFIG_RTE_BBDEV_SDK_AVX512),y)
_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_TURBO_SW) += -L$(FLEXRAN_SDK)/lib_LDPC_ratematch_5gnr -lLDPC_ratematch_5gnr
_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_TURBO_SW) += -L$(FLEXRAN_SDK)/lib_ldpc_encoder_5gnr -lldpc_encoder_5gnr
_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_TURBO_SW) += -L$(FLEXRAN_SDK)/lib_ldpc_decoder_5gnr -lldpc_decoder_5gnr
_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_TURBO_SW) += -L$(FLEXRAN_SDK)/lib_rate_dematching_5gnr -lrate_dematching_5gnr
endif # CONFIG_RTE_BBDEV_SDK_AVX512
endif # CONFIG_RTE_BBDEV_SDK_AVX2
endif # CONFIG_RTE_LIBRTE_BBDEV