bbdev: add operation for FFT processing

Extended bbdev operations to support FFT based operations.

Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com>
Acked-by: Hemant Agrawal <hemant.agrawal@nxp.com>
Acked-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Acked-by: Akhil Goyal <gakhil@marvell.com>
This commit is contained in:
Nicolas Chautru 2022-10-04 10:16:54 -07:00 committed by Akhil Goyal
parent 973320514f
commit 9d3933252d
7 changed files with 357 additions and 14 deletions

View File

@ -1118,6 +1118,111 @@ Figure :numref:`figure_turbo_tb_decode` above
showing the Turbo decoding of CBs using BBDEV interface in TB-mode
is also valid for LDPC decode.
BBDEV FFT Operation
~~~~~~~~~~~~~~~~~~~
This operation allows to run a combination of DFT and/or IDFT and/or time-domain windowing.
These can be used in a modular fashion (using bypass modes) or as a processing pipeline
which can be used for FFT-based baseband signal processing.
In more details it allows :
* to process the data first through an IDFT of adjustable size and padding;
* to perform the windowing as a programmable cyclic shift offset of the data
followed by a pointwise multiplication by a time domain window;
* to process the related data through a DFT of adjustable size and
de-padding for each such cyclic shift output.
A flexible number of Rx antennas are being processed in parallel with the same configuration.
The API allows more generally for flexibility in what the PMD may support (capability flags) and
flexibility to adjust some of the parameters of the processing.
The operation/capability flags that can be set for each FFT operation are given below.
**NOTE:** The actual operation flags that may be used with a specific
bbdev PMD are dependent on the driver capabilities as reported via
``rte_bbdev_info_get()``, and may be a subset of those below.
+--------------------------------------------------------------------+
|Description of FFT capability flags |
+====================================================================+
|RTE_BBDEV_FFT_WINDOWING |
| Set to enable/support windowing in time domain |
+--------------------------------------------------------------------+
|RTE_BBDEV_FFT_CS_ADJUSTMENT |
| Set to enable/support the cyclic shift time offset adjustment |
+--------------------------------------------------------------------+
|RTE_BBDEV_FFT_DFT_BYPASS |
| Set to bypass the DFT and use directly the IDFT as an option |
+--------------------------------------------------------------------+
|RTE_BBDEV_FFT_IDFT_BYPASS |
| Set to bypass the IDFT and use directly the DFT as an option |
+--------------------------------------------------------------------+
|RTE_BBDEV_FFT_WINDOWING_BYPASS |
| Set to bypass the time domain windowing as an option |
+--------------------------------------------------------------------+
|RTE_BBDEV_FFT_POWER_MEAS |
| Set to provide an optional power measurement of the DFT output |
+--------------------------------------------------------------------+
|RTE_BBDEV_FFT_FP16_INPUT |
| Set if the input data shall use FP16 format instead of INT16 |
+--------------------------------------------------------------------+
|RTE_BBDEV_FFT_FP16_OUTPUT |
| Set if the output data shall use FP16 format instead of INT16 |
+--------------------------------------------------------------------+
The FFT parameters are set out in the table below.
+-------------------------+--------------------------------------------------------------+
|Parameter |Description |
+=========================+==============================================================+
|base_input |input data |
+-------------------------+--------------------------------------------------------------+
|base_output |output data |
+-------------------------+--------------------------------------------------------------+
|power_meas_output |optional output data with power measurement on DFT output |
+-------------------------+--------------------------------------------------------------+
|op_flags |bitmask of all active operation capabilities |
+-------------------------+--------------------------------------------------------------+
|input_sequence_size |size of the input sequence in 32-bits points per antenna |
+-------------------------+--------------------------------------------------------------+
|input_leading_padding |number of points padded at the start of input data |
+-------------------------+--------------------------------------------------------------+
|output_sequence_size |size of the output sequence per antenna and cyclic shift |
+-------------------------+--------------------------------------------------------------+
|output_leading_depadding |number of points de-padded at the start of output data |
+-------------------------+--------------------------------------------------------------+
|window_index |optional windowing profile index used for each cyclic shift |
+-------------------------+--------------------------------------------------------------+
|cs_bitmap |bitmap of the cyclic shift output requested (LSB for index 0) |
+-------------------------+--------------------------------------------------------------+
|num_antennas_log2 |number of antennas as a log2 (10 maps to 1024...) |
+-------------------------+--------------------------------------------------------------+
|idft_log2 |IDFT size as a log2 |
+-------------------------+--------------------------------------------------------------+
|dft_log2 |DFT size as a log2 |
+-------------------------+--------------------------------------------------------------+
|cs_time_adjustment |adjustment of time position of all the cyclic shift output |
+-------------------------+--------------------------------------------------------------+
|idft_shift |shift down of signal level post iDFT |
+-------------------------+--------------------------------------------------------------+
|dft_shift |shift down of signal level post DFT |
+-------------------------+--------------------------------------------------------------+
|ncs_reciprocal |inverse of max number of CS normalized to 15b (ie. 231 for 12)|
+-------------------------+--------------------------------------------------------------+
|power_shift |shift down of level of power measurement when enabled |
+-------------------------+--------------------------------------------------------------+
|fp16_exp_adjust |value added to FP16 exponent at conversion from INT16 |
+-------------------------+--------------------------------------------------------------+
The mbuf input ``base_input`` is mandatory for all bbdev PMDs and
is the incoming data for the processing. Its size may not fit into an actual mbuf,
but the structure is used to pass iova address.
The mbuf output ``output`` is mandatory and is output of the FFT processing chain.
Each point is a complex number of 32bits :
either as 2 INT16 or as 2 FP16 based when the option supported.
The data layout is based on contiguous concatenation of output data
first by cyclic shift then by antenna.
Sample code
-----------

View File

@ -94,10 +94,6 @@ Deprecation Notices
``RTE_ETH_EVENT_IPSEC_SA_BYTE_HARD_EXPIRY`` and
``RTE_ETH_EVENT_IPSEC_SA_PKT_HARD_EXPIRY`` in DPDK 22.11.
* bbdev: Will extend API to support new operation type ``RTE_BBDEV_OP_FFT`` as per
this `RFC <https://patches.dpdk.org/project/dpdk/list/?series=22111>`__.
This should be updated in DPDK 22.11.
* cryptodev: The function ``rte_cryptodev_cb_fn`` will be updated
to have another parameter ``qp_id`` to return the queue pair ID
which got error interrupt to the application,

View File

@ -168,6 +168,12 @@ New Features
* Added AES-CCM support in lookaside protocol (IPsec) for CN9K & CN10K.
* Added AES & DES DOCSIS algorithm support in lookaside crypto for CN9K.
* **Added bbdev operation for FFT processing.**
Added a new operation type in bbdev for FFT processing with new functions
``rte_bbdev_enqueue_fft_ops``, ``rte_bbdev_dequeue_fft_ops``,
and related structures.
* **Added eventdev adapter instance get API.**
* Added ``rte_event_eth_rx_adapter_instance_get`` to get Rx adapter

View File

@ -24,7 +24,7 @@
#define DEV_NAME "BBDEV"
/* Number of supported operation types in *rte_bbdev_op_type*. */
#define BBDEV_OP_TYPE_COUNT 5
#define BBDEV_OP_TYPE_COUNT 6
/* BBDev library logging ID */
RTE_LOG_REGISTER_DEFAULT(bbdev_logtype, NOTICE);
@ -852,6 +852,9 @@ get_bbdev_op_size(enum rte_bbdev_op_type type)
case RTE_BBDEV_OP_LDPC_ENC:
result = sizeof(struct rte_bbdev_enc_op);
break;
case RTE_BBDEV_OP_FFT:
result = sizeof(struct rte_bbdev_fft_op);
break;
default:
break;
}
@ -875,6 +878,10 @@ bbdev_op_init(struct rte_mempool *mempool, void *arg, void *element,
struct rte_bbdev_enc_op *op = element;
memset(op, 0, mempool->elt_size);
op->mempool = mempool;
} else if (type == RTE_BBDEV_OP_FFT) {
struct rte_bbdev_fft_op *op = element;
memset(op, 0, mempool->elt_size);
op->mempool = mempool;
}
}
@ -1125,6 +1132,7 @@ rte_bbdev_op_type_str(enum rte_bbdev_op_type op_type)
"RTE_BBDEV_OP_TURBO_ENC",
"RTE_BBDEV_OP_LDPC_DEC",
"RTE_BBDEV_OP_LDPC_ENC",
"RTE_BBDEV_OP_FFT",
};
if (op_type < BBDEV_OP_TYPE_COUNT)

View File

@ -401,6 +401,12 @@ typedef uint16_t (*rte_bbdev_enqueue_dec_ops_t)(
struct rte_bbdev_dec_op **ops,
uint16_t num);
/** @internal Enqueue FFT operations for processing on queue of a device. */
typedef uint16_t (*rte_bbdev_enqueue_fft_ops_t)(
struct rte_bbdev_queue_data *q_data,
struct rte_bbdev_fft_op **ops,
uint16_t num);
/** @internal Dequeue encode operations from a queue of a device. */
typedef uint16_t (*rte_bbdev_dequeue_enc_ops_t)(
struct rte_bbdev_queue_data *q_data,
@ -411,6 +417,11 @@ typedef uint16_t (*rte_bbdev_dequeue_dec_ops_t)(
struct rte_bbdev_queue_data *q_data,
struct rte_bbdev_dec_op **ops, uint16_t num);
/** @internal Dequeue FFT operations from a queue of a device. */
typedef uint16_t (*rte_bbdev_dequeue_fft_ops_t)(
struct rte_bbdev_queue_data *q_data,
struct rte_bbdev_fft_op **ops, uint16_t num);
#define RTE_BBDEV_NAME_MAX_LEN 64 /**< Max length of device name */
/**
@ -459,6 +470,10 @@ struct __rte_cache_aligned rte_bbdev {
rte_bbdev_dequeue_enc_ops_t dequeue_ldpc_enc_ops;
/** Dequeue decode function */
rte_bbdev_dequeue_dec_ops_t dequeue_ldpc_dec_ops;
/** Enqueue FFT function */
rte_bbdev_enqueue_fft_ops_t enqueue_fft_ops;
/** Dequeue FFT function */
rte_bbdev_dequeue_fft_ops_t dequeue_fft_ops;
const struct rte_bbdev_ops *dev_ops; /**< Functions exported by PMD */
struct rte_bbdev_data *data; /**< Pointer to device data */
enum rte_bbdev_state state; /**< If device is currently used or not */
@ -591,11 +606,10 @@ rte_bbdev_enqueue_ldpc_dec_ops(uint16_t dev_id, uint16_t queue_id,
return dev->enqueue_ldpc_dec_ops(q_data, ops, num_ops);
}
/**
* Dequeue a burst of processed encode operations from a queue of the device.
* This functions returns only the current contents of the queue, and does not
* block until @ num_ops is available.
* Enqueue a burst of FFT operations to a queue of the device.
* This functions only enqueues as many operations as currently possible and
* does not block until @p num_ops entries in the queue are available.
* This function does not provide any error notification to avoid the
* corresponding overhead.
*
@ -604,15 +618,46 @@ rte_bbdev_enqueue_ldpc_dec_ops(uint16_t dev_id, uint16_t queue_id,
* @param queue_id
* The index of the queue.
* @param ops
* Pointer array where operations will be dequeued to. Must have at least
* @p num_ops entries
* ie. A pointer to a table of void * pointers (ops) that will be filled.
* Pointer array containing operations to be enqueued.
* Must have at least @p num_ops entries.
* @param num_ops
* The maximum number of operations to enqueue.
*
* @return
* The number of operations actually enqueued.
* (This is the number of processed entries in the @p ops array.)
*/
__rte_experimental
static inline uint16_t
rte_bbdev_enqueue_fft_ops(uint16_t dev_id, uint16_t queue_id,
struct rte_bbdev_fft_op **ops, uint16_t num_ops)
{
struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
struct rte_bbdev_queue_data *q_data = &dev->data->queues[queue_id];
return dev->enqueue_fft_ops(q_data, ops, num_ops);
}
/**
* Dequeue a burst of processed encode operations from a queue of the device.
* This functions returns only the current contents of the queue,
* and does not block until @ num_ops is available.
* This function does not provide any error notification to avoid the
* corresponding overhead.
*
* @param dev_id
* The identifier of the device.
* @param queue_id
* The index of the queue.
* @param ops
* Pointer array where operations will be dequeued to.
* Must have at least @p num_ops entries, i.e.
* a pointer to a table of void * pointers (ops) that will be filled.
* @param num_ops
* The maximum number of operations to dequeue.
*
* @return
* The number of operations actually dequeued (this is the number of entries
* copied into the @p ops array).
* The number of operations actually dequeued.
* (This is the number of entries copied into the @p ops array.)
*/
static inline uint16_t
rte_bbdev_dequeue_enc_ops(uint16_t dev_id, uint16_t queue_id,
@ -716,6 +761,37 @@ rte_bbdev_dequeue_ldpc_dec_ops(uint16_t dev_id, uint16_t queue_id,
return dev->dequeue_ldpc_dec_ops(q_data, ops, num_ops);
}
/**
* Dequeue a burst of FFT operations from a queue of the device.
* This functions returns only the current contents of the queue, and does not
* block until @ num_ops is available.
* This function does not provide any error notification to avoid the
* corresponding overhead.
*
* @param dev_id
* The identifier of the device.
* @param queue_id
* The index of the queue.
* @param ops
* Pointer array where operations will be dequeued to. Must have at least
* @p num_ops entries
* @param num_ops
* The maximum number of operations to dequeue.
*
* @return
* The number of operations actually dequeued (this is the number of entries
* copied into the @p ops array).
*/
__rte_experimental
static inline uint16_t
rte_bbdev_dequeue_fft_ops(uint16_t dev_id, uint16_t queue_id,
struct rte_bbdev_fft_op **ops, uint16_t num_ops)
{
struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
struct rte_bbdev_queue_data *q_data = &dev->data->queues[queue_id];
return dev->dequeue_fft_ops(q_data, ops, num_ops);
}
/** Definitions of device event types */
enum rte_bbdev_event_type {
RTE_BBDEV_EVENT_UNKNOWN, /**< unknown event type */

View File

@ -47,6 +47,8 @@ extern "C" {
#define RTE_BBDEV_TURBO_MAX_CODE_BLOCKS (64)
/* LDPC: Maximum number of Code Blocks in Transport Block.*/
#define RTE_BBDEV_LDPC_MAX_CODE_BLOCKS (256)
/* 12 CS maximum */
#define RTE_BBDEV_MAX_CS_2 (6)
/*
* Maximum size to be used to manage the enum rte_bbdev_op_type
@ -218,6 +220,26 @@ enum rte_bbdev_op_ldpcenc_flag_bitmasks {
RTE_BBDEV_LDPC_ENC_CONCATENATION = (1ULL << 7)
};
/** Flags for FFT operation and capability structure. */
enum rte_bbdev_op_fft_flag_bitmasks {
/** Flexible windowing capability. */
RTE_BBDEV_FFT_WINDOWING = (1ULL << 0),
/** Flexible adjustment of Cyclic Shift time offset. */
RTE_BBDEV_FFT_CS_ADJUSTMENT = (1ULL << 1),
/** Set for bypass the DFT and get directly into iDFT input. */
RTE_BBDEV_FFT_DFT_BYPASS = (1ULL << 2),
/** Set for bypass the IDFT and get directly the DFT output. */
RTE_BBDEV_FFT_IDFT_BYPASS = (1ULL << 3),
/** Set for bypass time domain windowing. */
RTE_BBDEV_FFT_WINDOWING_BYPASS = (1ULL << 4),
/** Set for optional power measurement on DFT output. */
RTE_BBDEV_FFT_POWER_MEAS = (1ULL << 5),
/** Set if the input data used FP16 format. */
RTE_BBDEV_FFT_FP16_INPUT = (1ULL << 6),
/** Set if the output data uses FP16 format. */
RTE_BBDEV_FFT_FP16_OUTPUT = (1ULL << 7)
};
/** Flags for the Code Block/Transport block mode */
enum rte_bbdev_op_cb_mode {
/** One operation is one or fraction of one transport block */
@ -696,6 +718,55 @@ struct rte_bbdev_op_ldpc_enc {
};
};
/** Operation structure for FFT processing.
*
* The operation processes the data for multiple antennas in a single call
* (i.e. for all the REs belonging to a given SRS sequence for instance).
*
* The output mbuf data structure is expected to be allocated by the
* application with enough room for the output data.
*/
struct rte_bbdev_op_fft {
/** Input data starting from first antenna. */
struct rte_bbdev_op_data base_input;
/** Output data starting from first antenna and first cyclic shift. */
struct rte_bbdev_op_data base_output;
/** Optional power measurement output data. */
struct rte_bbdev_op_data power_meas_output;
/** Flags from rte_bbdev_op_fft_flag_bitmasks. */
uint32_t op_flags;
/** Input sequence size in 32-bits points. */
uint16_t input_sequence_size;
/** Padding at the start of the sequence. */
uint16_t input_leading_padding;
/** Output sequence size in 32-bits points. */
uint16_t output_sequence_size;
/** Depadding at the start of the DFT output. */
uint16_t output_leading_depadding;
/** Window index being used for each cyclic shift output. */
uint8_t window_index[RTE_BBDEV_MAX_CS_2];
/** Bitmap of the cyclic shift output requested. */
uint16_t cs_bitmap;
/** Number of antennas as a log2 8 to 128. */
uint8_t num_antennas_log2;
/** iDFT size as a log2 - 32 to 2048. */
uint8_t idft_log2;
/** DFT size as a log2 - 8 to 2048. */
uint8_t dft_log2;
/** Adjustment of position of the cyclic shifts - -31 to 31. */
int8_t cs_time_adjustment;
/** iDFT shift down. */
int8_t idft_shift;
/** DFT shift down. */
int8_t dft_shift;
/** NCS reciprocal factor. */
uint16_t ncs_reciprocal;
/** Power measurement out shift down. */
uint16_t power_shift;
/** Adjust the FP6 exponent for INT<->FP16 conversion. */
uint16_t fp16_exp_adjust;
};
/** List of the capabilities for the Turbo Decoder */
struct rte_bbdev_op_cap_turbo_dec {
/** Flags from rte_bbdev_op_td_flag_bitmasks */
@ -748,6 +819,16 @@ struct rte_bbdev_op_cap_ldpc_enc {
uint16_t num_buffers_dst;
};
/** List of the capabilities for the FFT. */
struct rte_bbdev_op_cap_fft {
/** Flags from *rte_bbdev_op_fft_flag_bitmasks*. */
uint32_t capability_flags;
/** Number of input code block buffers. */
uint16_t num_buffers_src;
/** Number of output code block buffers. */
uint16_t num_buffers_dst;
};
/** Different operation types supported by the device.
* The related macro RTE_BBDEV_OP_TYPE_SIZE_MAX can be used as an absolute maximum for
* notably sizing array while allowing for future enumeration insertion.
@ -758,6 +839,7 @@ enum rte_bbdev_op_type {
RTE_BBDEV_OP_TURBO_ENC, /**< Turbo encode */
RTE_BBDEV_OP_LDPC_DEC, /**< LDPC decode */
RTE_BBDEV_OP_LDPC_ENC, /**< LDPC encode */
RTE_BBDEV_OP_FFT, /**< FFT */
/* Note: RTE_BBDEV_OP_TYPE_SIZE_MAX must be larger or equal to maximum enum value */
};
@ -801,6 +883,18 @@ struct rte_bbdev_dec_op {
};
};
/** Structure specifying a single FFT operation. */
struct rte_bbdev_fft_op {
/** Status of operation performed. */
int status;
/** Mempool used for op instance. */
struct rte_mempool *mempool;
/** Opaque pointer for user data. */
void *opaque_data;
/** Contains turbo decoder specific parameters. */
struct rte_bbdev_op_fft fft;
};
/** Operation capabilities supported by a device */
struct rte_bbdev_op_cap {
enum rte_bbdev_op_type type; /**< Type of operation */
@ -809,6 +903,7 @@ struct rte_bbdev_op_cap {
struct rte_bbdev_op_cap_turbo_enc turbo_enc;
struct rte_bbdev_op_cap_ldpc_dec ldpc_dec;
struct rte_bbdev_op_cap_ldpc_enc ldpc_enc;
struct rte_bbdev_op_cap_fft fft;
} cap; /**< Operation-type specific capabilities */
};
@ -927,6 +1022,41 @@ rte_bbdev_dec_op_alloc_bulk(struct rte_mempool *mempool,
return 0;
}
/**
* Bulk allocate FFT operations from a mempool with default parameters.
*
* @param mempool
* Operation mempool, created by *rte_bbdev_op_pool_create*.
* @param ops
* Output array to place allocated operations.
* @param num_ops
* Number of operations to allocate.
*
* @returns
* - 0 on success.
* - EINVAL if invalid mempool is provided.
*/
__rte_experimental
static inline int
rte_bbdev_fft_op_alloc_bulk(struct rte_mempool *mempool,
struct rte_bbdev_fft_op **ops, uint16_t num_ops)
{
struct rte_bbdev_op_pool_private *priv;
int ret;
/* Check type */
priv = (struct rte_bbdev_op_pool_private *)rte_mempool_get_priv(mempool);
if (unlikely(priv->type != RTE_BBDEV_OP_FFT))
return -EINVAL;
/* Get elements */
ret = rte_mempool_get_bulk(mempool, (void **)ops, num_ops);
if (unlikely(ret < 0))
return ret;
return 0;
}
/**
* Free decode operation structures that were allocated by
* rte_bbdev_dec_op_alloc_bulk().
@ -961,6 +1091,24 @@ rte_bbdev_enc_op_free_bulk(struct rte_bbdev_enc_op **ops, unsigned int num_ops)
rte_mempool_put_bulk(ops[0]->mempool, (void **)ops, num_ops);
}
/**
* Free encode operation structures that were allocated by
* *rte_bbdev_fft_op_alloc_bulk*.
* All structures must belong to the same mempool.
*
* @param ops
* Operation structures.
* @param num_ops
* Number of structures.
*/
__rte_experimental
static inline void
rte_bbdev_fft_op_free_bulk(struct rte_bbdev_fft_op **ops, unsigned int num_ops)
{
if (num_ops > 0)
rte_mempool_put_bulk(ops[0]->mempool, (void **)ops, num_ops);
}
#ifdef __cplusplus
}
#endif

View File

@ -44,5 +44,9 @@ EXPERIMENTAL {
global:
# added in 22.11
rte_bbdev_dequeue_fft_ops;
rte_bbdev_device_status_str;
rte_bbdev_enqueue_fft_ops;
rte_bbdev_fft_op_alloc_bulk;
rte_bbdev_fft_op_free_bulk;
};