numam-dpdk/lib/librte_mbuf/rte_mbuf_dyn.h

343 lines
12 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright 2019 6WIND S.A.
*/
#ifndef _RTE_MBUF_DYN_H_
#define _RTE_MBUF_DYN_H_
/**
* @file
* RTE Mbuf dynamic fields and flags
*
* Many DPDK features require to store data inside the mbuf. As the room
* in mbuf structure is limited, it is not possible to have a field for
* each feature. Also, changing fields in the mbuf structure can break
* the API or ABI.
*
* This module addresses this issue, by enabling the dynamic
* registration of fields or flags:
*
* - a dynamic field is a named area in the rte_mbuf structure, with a
* given size (>= 1 byte) and alignment constraint.
* - a dynamic flag is a named bit in the rte_mbuf structure, stored
* in mbuf->ol_flags.
*
* The placement of the field or flag can be automatic, in this case the
* zones that have the smallest size and alignment constraint are
* selected in priority. Else, a specific field offset or flag bit
* number can be requested through the API.
*
* The typical use case is when a specific offload feature requires to
* register a dedicated offload field in the mbuf structure, and adding
* a static field or flag is not justified.
*
* Example of use:
*
* - A rte_mbuf_dynfield structure is defined, containing the parameters
* of the dynamic field to be registered:
* const struct rte_mbuf_dynfield rte_dynfield_my_feature = { ... };
* - The application initializes the PMD, and asks for this feature
* at port initialization by passing DEV_RX_OFFLOAD_MY_FEATURE in
* rxconf. This will make the PMD to register the field by calling
* rte_mbuf_dynfield_register(&rte_dynfield_my_feature). The PMD
* stores the returned offset.
* - The application that uses the offload feature also registers
* the field to retrieve the same offset.
* - When the PMD receives a packet, it can set the field:
* *RTE_MBUF_DYNFIELD(m, offset, <type *>) = value;
* - In the main loop, the application can retrieve the value with
* the same macro.
*
* To avoid wasting space, the dynamic fields or flags must only be
* reserved on demand, when an application asks for the related feature.
*
* The registration can be done at any moment, but it is not possible
* to unregister fields or flags for now.
*
* A dynamic field can be reserved and used by an application only.
* It can for instance be a packet mark.
*
* To avoid namespace collisions, the dynamic mbuf field or flag names
* have to be chosen with care. It is advised to use the same
* conventions than function names in dpdk:
* - "rte_mbuf_dynfield_<name>" if defined in mbuf library
* - "rte_<libname>_dynfield_<name>" if defined in another library
* - "rte_net_<pmd>_dynfield_<name>" if defined in a PMD
* - any name that does not start with "rte_" in an application
*/
#include <stdio.h>
#include <stdint.h>
#include <sys/types.h>
#include <rte_compat.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* Maximum length of the dynamic field or flag string.
*/
#define RTE_MBUF_DYN_NAMESIZE 64
/**
* Structure describing the parameters of a mbuf dynamic field.
*/
struct rte_mbuf_dynfield {
char name[RTE_MBUF_DYN_NAMESIZE]; /**< Name of the field. */
size_t size; /**< The number of bytes to reserve. */
size_t align; /**< The alignment constraint (power of 2). */
unsigned int flags; /**< Reserved for future use, must be 0. */
};
/**
* Structure describing the parameters of a mbuf dynamic flag.
*/
struct rte_mbuf_dynflag {
char name[RTE_MBUF_DYN_NAMESIZE]; /**< Name of the dynamic flag. */
unsigned int flags; /**< Reserved for future use, must be 0. */
};
/**
* Register space for a dynamic field in the mbuf structure.
*
* If the field is already registered (same name and parameters), its
* offset is returned.
*
* @param params
* A structure containing the requested parameters (name, size,
* alignment constraint and flags).
* @return
* The offset in the mbuf structure, or -1 on error.
* Possible values for rte_errno:
* - EINVAL: invalid parameters (size, align, or flags).
* - EEXIST: this name is already register with different parameters.
* - EPERM: called from a secondary process.
* - ENOENT: not enough room in mbuf.
* - ENOMEM: allocation failure.
* - ENAMETOOLONG: name does not ends with \0.
*/
__rte_experimental
int rte_mbuf_dynfield_register(const struct rte_mbuf_dynfield *params);
/**
* Register space for a dynamic field in the mbuf structure at offset.
*
* If the field is already registered (same name, parameters and offset),
* the offset is returned.
*
* @param params
* A structure containing the requested parameters (name, size,
* alignment constraint and flags).
* @param offset
* The requested offset. Ignored if SIZE_MAX is passed.
* @return
* The offset in the mbuf structure, or -1 on error.
* Possible values for rte_errno:
* - EINVAL: invalid parameters (size, align, flags, or offset).
* - EEXIST: this name is already register with different parameters.
* - EBUSY: the requested offset cannot be used.
* - EPERM: called from a secondary process.
* - ENOENT: not enough room in mbuf.
* - ENOMEM: allocation failure.
* - ENAMETOOLONG: name does not ends with \0.
*/
__rte_experimental
int rte_mbuf_dynfield_register_offset(const struct rte_mbuf_dynfield *params,
size_t offset);
/**
* Lookup for a registered dynamic mbuf field.
*
* @param name
* A string identifying the dynamic field.
* @param params
* If not NULL, and if the lookup is successful, the structure is
* filled with the parameters of the dynamic field.
* @return
* The offset of this field in the mbuf structure, or -1 on error.
* Possible values for rte_errno:
* - ENOENT: no dynamic field matches this name.
*/
__rte_experimental
int rte_mbuf_dynfield_lookup(const char *name,
struct rte_mbuf_dynfield *params);
/**
* Register a dynamic flag in the mbuf structure.
*
* If the flag is already registered (same name and parameters), its
* bitnum is returned.
*
* @param params
* A structure containing the requested parameters of the dynamic
* flag (name and options).
* @return
* The number of the reserved bit, or -1 on error.
* Possible values for rte_errno:
* - EINVAL: invalid parameters (size, align, or flags).
* - EEXIST: this name is already register with different parameters.
* - EPERM: called from a secondary process.
* - ENOENT: no more flag available.
* - ENOMEM: allocation failure.
* - ENAMETOOLONG: name is longer than RTE_MBUF_DYN_NAMESIZE - 1.
*/
__rte_experimental
int rte_mbuf_dynflag_register(const struct rte_mbuf_dynflag *params);
/**
* Register a dynamic flag in the mbuf structure specifying bitnum.
*
* If the flag is already registered (same name, parameters and bitnum),
* the bitnum is returned.
*
* @param params
* A structure containing the requested parameters of the dynamic
* flag (name and options).
* @param bitnum
* The requested bitnum. Ignored if UINT_MAX is passed.
* @return
* The number of the reserved bit, or -1 on error.
* Possible values for rte_errno:
* - EINVAL: invalid parameters (size, align, or flags).
* - EEXIST: this name is already register with different parameters.
* - EBUSY: the requested bitnum cannot be used.
* - EPERM: called from a secondary process.
* - ENOENT: no more flag available.
* - ENOMEM: allocation failure.
* - ENAMETOOLONG: name is longer than RTE_MBUF_DYN_NAMESIZE - 1.
*/
__rte_experimental
int rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag *params,
unsigned int bitnum);
/**
* Lookup for a registered dynamic mbuf flag.
*
* @param name
* A string identifying the dynamic flag.
* @param params
* If not NULL, and if the lookup is successful, the structure is
* filled with the parameters of the dynamic flag.
* @return
* The offset of this flag in the mbuf structure, or -1 on error.
* Possible values for rte_errno:
* - ENOENT: no dynamic flag matches this name.
*/
__rte_experimental
int rte_mbuf_dynflag_lookup(const char *name,
struct rte_mbuf_dynflag *params);
/**
* Helper macro to access to a dynamic field.
*/
#define RTE_MBUF_DYNFIELD(m, offset, type) ((type)((uintptr_t)(m) + (offset)))
/**
* Dump the status of dynamic fields and flags.
*
* @param out
* The stream where the status is displayed.
*/
__rte_experimental
void rte_mbuf_dyn_dump(FILE *out);
ethdev: extend flow metadata Currently, metadata can be set on egress path via mbuf tx_metadata field with PKT_TX_METADATA flag and RTE_FLOW_ITEM_TYPE_META matches metadata. This patch extends the metadata feature usability. 1) RTE_FLOW_ACTION_TYPE_SET_META When supporting multiple tables, Tx metadata can also be set by a rule and matched by another rule. This new action allows metadata to be set as a result of flow match. 2) Metadata on ingress There's also need to support metadata on ingress. Metadata can be set by SET_META action and matched by META item like Tx. The final value set by the action will be delivered to application via metadata dynamic field of mbuf which can be accessed by RTE_FLOW_DYNF_METADATA() macro or with rte_flow_dynf_metadata_set() and rte_flow_dynf_metadata_get() helper routines. PKT_RX_DYNF_METADATA flag will be set along with the data. The mbuf dynamic field must be registered by calling rte_flow_dynf_metadata_register() prior to use SET_META action. The availability of dynamic mbuf metadata field can be checked with rte_flow_dynf_metadata_avail() routine. If application is going to engage the metadata feature it registers the metadata dynamic fields, then PMD checks the metadata field availability and handles the appropriate fields in datapath. For loopback/hairpin packet, metadata set on Rx/Tx may or may not be propagated to the other path depending on hardware capability. MARK and METADATA look similar and might operate in similar way, but not interacting. Initially, there were proposed two metadata related actions: - RTE_FLOW_ACTION_TYPE_FLAG - RTE_FLOW_ACTION_TYPE_MARK These actions set the special flag in the packet metadata, MARK action stores some specified value in the metadata storage, and, on the packet receiving PMD puts the flag and value to the mbuf and applications can see the packet was threated inside flow engine according to the appropriate RTE flow(s). MARK and FLAG are like some kind of gateway to transfer some per-packet information from the flow engine to the application via receiving datapath. Also, there is the item of type RTE_FLOW_ITEM_TYPE_MARK provided. It allows us to extend the flow match pattern with the capability to match the metadata values set by MARK/FLAG actions on other flows. From the datapath point of view, the MARK and FLAG are related to the receiving side only. It would useful to have the same gateway on the transmitting side and there was the feature of type RTE_FLOW_ITEM_TYPE_META was proposed. The application can fill the field in mbuf and this value will be transferred to some field in the packet metadata inside the flow engine. It did not matter whether these metadata fields are shared because of MARK and META items belonged to different domains (receiving and transmitting) and could be vendor-specific. So far, so good, DPDK proposes some entities to control metadata inside the flow engine and gateways to exchange these values on a per-packet basis via datapaths. As we can see, the MARK and META means are not symmetric, there is absent action which would allow us to set META value on the transmitting path. So, the action of type: - RTE_FLOW_ACTION_TYPE_SET_META was proposed. The next, applications raise the new requirements for packet metadata. The flow ngines are getting more complex, internal switches are introduced, multiple ports might be supported within the same flow engine namespace. From the DPDK points of view, it means the packets might be sent on one eth_dev port and received on the other one, and the packet path inside the flow engine entirely belongs to the same hardware device. The simplest example is SR-IOV with PF, VFs and the representors. And there is a brilliant opportunity to provide some out-of-band channel to transfer some extra data from one port to another one, besides the packet data itself. And applications would like to use this opportunity. It is supposed for application to use trials (with rte_flow_validate) to detect which metadata features (FLAG, MARK, META) actually supported by PMD and underlying hardware. It might depend on PMD configuration, system software, hardware settings, etc., and should be detected in run time. Signed-off-by: Yongseok Koh <yskoh@mellanox.com> Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com> Acked-by: Andrew Rybchenko <arybchenko@solarflare.com> Acked-by: Olivier Matz <olivier.matz@6wind.com> Acked-by: Ori Kam <orika@mellanox.com>
2019-11-05 14:19:30 +00:00
/*
* Placeholder for dynamic fields and flags declarations.
* This is centralizing point to gather all field names
* and parameters together.
*/
/*
* The metadata dynamic field provides some extra packet information
* to interact with RTE Flow engine. The metadata in sent mbufs can be
* used to match on some Flows. The metadata in received mbufs can
* provide some feedback from the Flows. The metadata flag tells
* whether the field contains actual value to send, or received one.
*/
#define RTE_MBUF_DYNFIELD_METADATA_NAME "rte_flow_dynfield_metadata"
#define RTE_MBUF_DYNFLAG_METADATA_NAME "rte_flow_dynflag_metadata"
mbuf: introduce accurate packet Tx scheduling There is the requirement on some networks for precise traffic timing management. The ability to send (and, generally speaking, receive) the packets at the very precisely specified moment of time provides the opportunity to support the connections with Time Division Multiplexing using the contemporary general purpose NIC without involving an auxiliary hardware. For example, the supporting of O-RAN Fronthaul interface is one of the promising features for potentially usage of the precise time management for the egress packets. The main objective of this patchset is to specify the way how applications can provide the moment of time at what the packet transmission must be started and to describe in preliminary the supporting this feature from mlx5 PMD side [1]. The new dynamic timestamp field is proposed, it provides some timing information, the units and time references (initial phase) are not explicitly defined but are maintained always the same for a given port. Some devices allow to query rte_eth_read_clock() that will return the current device timestamp. The dynamic timestamp flag tells whether the field contains actual timestamp value. For the packets being sent this value can be used by PMD to schedule packet sending. The device clock is opaque entity, the units and frequency are vendor specific and might depend on hardware capabilities and configurations. If might (or not) be synchronized with real time via PTP, might (or not) be synchronous with CPU clock (for example if NIC and CPU share the same clock source there might be no any drift between the NIC and CPU clocks), etc. After PKT_RX_TIMESTAMP flag and fixed timestamp field supposed deprecation and obsoleting, these dynamic flag and field might be used to manage the timestamps on receiving datapath as well. Having the dedicated flags for Rx/Tx timestamps allows applications not to perform explicit flags reset on forwarding and not to promote received timestamps to the transmitting datapath by default. The static PKT_RX_TIMESTAMP is considered as candidate to become the dynamic flag and this move should be discussed. When PMD sees the "rte_dynfield_timestamp" set on the packet being sent it tries to synchronize the time of packet appearing on the wire with the specified packet timestamp. If the specified one is in the past it should be ignored, if one is in the distant future it should be capped with some reasonable value (in range of seconds). These specific cases ("too late" and "distant future") can be optionally reported via device xstats to assist applications to detect the time-related problems. There is no any packet reordering according timestamps is supposed, neither within packet burst, nor between packets, it is an entirely application responsibility to generate packets and its timestamps in desired order. The timestamps can be put only in the first packet in the burst providing the entire burst scheduling. PMD reports the ability to synchronize packet sending on timestamp with new offload flag: This is palliative and might be replaced with new eth_dev API about reporting/managing the supported dynamic flags and its related features. This API would break ABI compatibility and can't be introduced at the moment, so is postponed to 20.11. For testing purposes it is proposed to update testpmd "txonly" forwarding mode routine. With this update testpmd application generates the packets and sets the dynamic timestamps according to specified time pattern if it sees the "rte_dynfield_timestamp" is registered. The new testpmd command is proposed to configure sending pattern: set tx_times <burst_gap>,<intra_gap> <intra_gap> - the delay between the packets within the burst specified in the device clock units. The number of packets in the burst is defined by txburst parameter <burst_gap> - the delay between the bursts in the device clock units As the result the bursts of packet will be transmitted with specific delays between the packets within the burst and specific delay between the bursts. The rte_eth_read_clock is supposed to be engaged to get the current device clock value and provide the reference for the timestamps. [1] http://patches.dpdk.org/patch/73714/ Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com> Acked-by: Olivier Matz <olivier.matz@6wind.com> Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com>
2020-07-10 12:39:41 +00:00
/**
* The timestamp dynamic field provides some timing information, the
* units and time references (initial phase) are not explicitly defined
* but are maintained always the same for a given port. Some devices allow
* to query rte_eth_read_clock() that will return the current device
* timestamp. The dynamic Tx timestamp flag tells whether the field contains
* actual timestamp value for the packets being sent, this value can be
* used by PMD to schedule packet sending.
*/
#define RTE_MBUF_DYNFIELD_TIMESTAMP_NAME "rte_dynfield_timestamp"
typedef uint64_t rte_mbuf_timestamp_t;
/**
* Indicate that the timestamp field in the mbuf was filled by the driver.
*/
#define RTE_MBUF_DYNFLAG_RX_TIMESTAMP_NAME "rte_dynflag_rx_timestamp"
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice.
*
* Register dynamic mbuf field and flag for Rx timestamp.
*
* @param field_offset
* Pointer to the offset of the registered mbuf field, can be NULL.
* The same field is shared for Rx and Tx timestamp.
* @param rx_flag
* Pointer to the mask of the registered offload flag, can be NULL.
* @return
* 0 on success, -1 otherwise.
* Possible values for rte_errno:
* - EEXIST: already registered with different parameters.
* - EPERM: called from a secondary process.
* - ENOENT: no more field or flag available.
* - ENOMEM: allocation failure.
*/
__rte_experimental
int rte_mbuf_dyn_rx_timestamp_register(int *field_offset, uint64_t *rx_flag);
mbuf: introduce accurate packet Tx scheduling There is the requirement on some networks for precise traffic timing management. The ability to send (and, generally speaking, receive) the packets at the very precisely specified moment of time provides the opportunity to support the connections with Time Division Multiplexing using the contemporary general purpose NIC without involving an auxiliary hardware. For example, the supporting of O-RAN Fronthaul interface is one of the promising features for potentially usage of the precise time management for the egress packets. The main objective of this patchset is to specify the way how applications can provide the moment of time at what the packet transmission must be started and to describe in preliminary the supporting this feature from mlx5 PMD side [1]. The new dynamic timestamp field is proposed, it provides some timing information, the units and time references (initial phase) are not explicitly defined but are maintained always the same for a given port. Some devices allow to query rte_eth_read_clock() that will return the current device timestamp. The dynamic timestamp flag tells whether the field contains actual timestamp value. For the packets being sent this value can be used by PMD to schedule packet sending. The device clock is opaque entity, the units and frequency are vendor specific and might depend on hardware capabilities and configurations. If might (or not) be synchronized with real time via PTP, might (or not) be synchronous with CPU clock (for example if NIC and CPU share the same clock source there might be no any drift between the NIC and CPU clocks), etc. After PKT_RX_TIMESTAMP flag and fixed timestamp field supposed deprecation and obsoleting, these dynamic flag and field might be used to manage the timestamps on receiving datapath as well. Having the dedicated flags for Rx/Tx timestamps allows applications not to perform explicit flags reset on forwarding and not to promote received timestamps to the transmitting datapath by default. The static PKT_RX_TIMESTAMP is considered as candidate to become the dynamic flag and this move should be discussed. When PMD sees the "rte_dynfield_timestamp" set on the packet being sent it tries to synchronize the time of packet appearing on the wire with the specified packet timestamp. If the specified one is in the past it should be ignored, if one is in the distant future it should be capped with some reasonable value (in range of seconds). These specific cases ("too late" and "distant future") can be optionally reported via device xstats to assist applications to detect the time-related problems. There is no any packet reordering according timestamps is supposed, neither within packet burst, nor between packets, it is an entirely application responsibility to generate packets and its timestamps in desired order. The timestamps can be put only in the first packet in the burst providing the entire burst scheduling. PMD reports the ability to synchronize packet sending on timestamp with new offload flag: This is palliative and might be replaced with new eth_dev API about reporting/managing the supported dynamic flags and its related features. This API would break ABI compatibility and can't be introduced at the moment, so is postponed to 20.11. For testing purposes it is proposed to update testpmd "txonly" forwarding mode routine. With this update testpmd application generates the packets and sets the dynamic timestamps according to specified time pattern if it sees the "rte_dynfield_timestamp" is registered. The new testpmd command is proposed to configure sending pattern: set tx_times <burst_gap>,<intra_gap> <intra_gap> - the delay between the packets within the burst specified in the device clock units. The number of packets in the burst is defined by txburst parameter <burst_gap> - the delay between the bursts in the device clock units As the result the bursts of packet will be transmitted with specific delays between the packets within the burst and specific delay between the bursts. The rte_eth_read_clock is supposed to be engaged to get the current device clock value and provide the reference for the timestamps. [1] http://patches.dpdk.org/patch/73714/ Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com> Acked-by: Olivier Matz <olivier.matz@6wind.com> Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com>
2020-07-10 12:39:41 +00:00
/**
* When PMD sees the RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME flag set on the
* packet being sent it tries to synchronize the time of packet appearing
* on the wire with the specified packet timestamp. If the specified one
* is in the past it should be ignored, if one is in the distant future
* it should be capped with some reasonable value (in range of seconds).
*
* There is no any packet reordering according to timestamps is supposed,
* neither for packet within the burst, nor for the whole bursts, it is
* an entirely application responsibility to generate packets and its
* timestamps in desired order. The timestamps might be put only in
* the first packet in the burst providing the entire burst scheduling.
*/
#define RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME "rte_dynflag_tx_timestamp"
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice.
*
* Register dynamic mbuf field and flag for Tx timestamp.
*
* @param field_offset
* Pointer to the offset of the registered mbuf field, can be NULL.
* The same field is shared for Rx and Tx timestamp.
* @param tx_flag
* Pointer to the mask of the registered offload flag, can be NULL.
* @return
* 0 on success, -1 otherwise.
* Possible values for rte_errno:
* - EEXIST: already registered with different parameters.
* - EPERM: called from a secondary process.
* - ENOENT: no more field or flag available.
* - ENOMEM: allocation failure.
*/
__rte_experimental
int rte_mbuf_dyn_tx_timestamp_register(int *field_offset, uint64_t *tx_flag);
#ifdef __cplusplus
}
#endif
#endif /* _RTE_MBUF_DYN_H_ */