numam-dpdk/lib/librte_kni/rte_kni_common.h

139 lines
3.7 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: (BSD-3-Clause OR LGPL-2.1) */
/*
* Copyright(c) 2007-2014 Intel Corporation.
*/
#ifndef _RTE_KNI_COMMON_H_
#define _RTE_KNI_COMMON_H_
#ifdef __KERNEL__
#include <linux/if.h>
#include <asm/barrier.h>
#define RTE_STD_C11
#else
#include <rte_common.h>
#include <rte_config.h>
#endif
/*
* KNI name is part of memzone name. Must not exceed IFNAMSIZ.
*/
#define RTE_KNI_NAMESIZE 16
#define RTE_CACHE_LINE_MIN_SIZE 64
/*
* Request id.
*/
enum rte_kni_req_id {
RTE_KNI_REQ_UNKNOWN = 0,
RTE_KNI_REQ_CHANGE_MTU,
RTE_KNI_REQ_CFG_NETWORK_IF,
RTE_KNI_REQ_CHANGE_MAC_ADDR,
RTE_KNI_REQ_CHANGE_PROMISC,
RTE_KNI_REQ_CHANGE_ALLMULTI,
RTE_KNI_REQ_MAX,
};
/*
* Structure for KNI request.
*/
struct rte_kni_request {
uint32_t req_id; /**< Request id */
RTE_STD_C11
union {
uint32_t new_mtu; /**< New MTU */
uint8_t if_up; /**< 1: interface up, 0: interface down */
uint8_t mac_addr[6]; /**< MAC address for interface */
uint8_t promiscusity;/**< 1: promisc mode enable, 0: disable */
uint8_t allmulti; /**< 1: all-multicast mode enable, 0: disable */
};
int32_t result; /**< Result for processing request */
} __attribute__((__packed__));
/*
* Fifo struct mapped in a shared memory. It describes a circular buffer FIFO
* Write and read should wrap around. Fifo is empty when write == read
* Writing should never overwrite the read position
*/
struct rte_kni_fifo {
#ifdef RTE_USE_C11_MEM_MODEL
unsigned write; /**< Next position to be written*/
unsigned read; /**< Next position to be read */
#else
volatile unsigned write; /**< Next position to be written*/
volatile unsigned read; /**< Next position to be read */
#endif
unsigned len; /**< Circular buffer length */
unsigned elem_size; /**< Pointer size - for 32/64 bit OS */
void *volatile buffer[]; /**< The buffer contains mbuf pointers */
};
/*
* The kernel image of the rte_mbuf struct, with only the relevant fields.
* Padding is necessary to assure the offsets of these fields
*/
struct rte_kni_mbuf {
void *buf_addr __attribute__((__aligned__(RTE_CACHE_LINE_SIZE)));
uint64_t buf_iova;
uint16_t data_off; /**< Start address of data in segment buffer. */
char pad1[2];
uint16_t nb_segs; /**< Number of segments. */
char pad4[2];
uint64_t ol_flags; /**< Offload features. */
char pad2[4];
uint32_t pkt_len; /**< Total pkt len: sum of all segment data_len. */
uint16_t data_len; /**< Amount of data in segment buffer. */
mbuf: move pool pointer in first half According to the Technical Board decision (http://mails.dpdk.org/archives/dev/2020-November/191859.html), the mempool pointer in the mbuf struct is moved from the second to the first half. It may increase performance in some cases on systems having 64-byte cache line, i.e. mbuf split in two cache lines. Due to this change, all fields after "pool" are moved up. Hopefully no vector data path is impacted. Moving this field gives more space to dynfield1 while dropping the temporary dynfield0. This is how the mbuf layout looks like (pahole-style): word type name byte size 0 void * buf_addr; /* 0 + 8 */ 1 rte_iova_t buf_iova /* 8 + 8 */ /* --- RTE_MARKER64 rearm_data; */ 2 uint16_t data_off; /* 16 + 2 */ uint16_t refcnt; /* 18 + 2 */ uint16_t nb_segs; /* 20 + 2 */ uint16_t port; /* 22 + 2 */ 3 uint64_t ol_flags; /* 24 + 8 */ /* --- RTE_MARKER rx_descriptor_fields1; */ 4 uint32_t union packet_type; /* 32 + 4 */ uint32_t pkt_len; /* 36 + 4 */ 5 uint16_t data_len; /* 40 + 2 */ uint16_t vlan_tci; /* 42 + 2 */ 5.5 uint64_t union hash; /* 44 + 8 */ 6.5 uint16_t vlan_tci_outer; /* 52 + 2 */ uint16_t buf_len; /* 54 + 2 */ 7 struct rte_mempool * pool; /* 56 + 8 */ /* --- RTE_MARKER cacheline1; */ 8 struct rte_mbuf * next; /* 64 + 8 */ 9 uint64_t union tx_offload; /* 72 + 8 */ 10 struct rte_mbuf_ext_shared_info * shinfo; /* 80 + 8 */ 11 uint16_t priv_size; /* 88 + 2 */ uint16_t timesync; /* 90 + 2 */ 11.5 uint32_t dynfield1[9]; /* 92 + 36 */ 16 /* --- END 128 */ Signed-off-by: Thomas Monjalon <thomas@monjalon.net> Acked-by: Morten Brørup <mb@smartsharesystems.com> Acked-by: Olivier Matz <olivier.matz@6wind.com> Acked-by: Jerin Jacob <jerinj@marvell.com> Acked-by: Stephen Hemminger <stephen@networkplumber.org>
2020-10-29 01:55:43 +01:00
char pad3[14];
void *pool;
/* fields on second cache line */
__attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)))
void *next; /**< Physical address of next mbuf in kernel. */
mbuf: split mbuf across two cache lines. This change splits the mbuf in two to move the pool and next pointers to the second cache line. This frees up 16 bytes in first cache line. The reason for this change is that we believe that there is no possible way that we can ever fit all the fields we need to fit into a 64-byte mbuf, and so we need to start looking at a 128-byte mbuf instead. Examples of new fields that need to fit in, include - * 32-bits more for filter information for support for the new filters in the i40e driver (and possibly other future drivers) * an additional 2-4 bytes for storing info on a second vlan tag to allow drivers to support double Vlan/QinQ * 4-bytes for storing a sequence number to enable out of order packet processing and subsequent packet reordering as well as potentially a number of other fields or splitting out fields that are superimposed over each other right now, e.g. for the qos scheduler. We also want to allow space for use by other non-Intel NIC drivers that may be open-sourced to dpdk.org in the future too, where they support fields and offloads that currently supported hardware doesn't. If we accept the fact of a 2-cache-line mbuf, then the issue becomes how to rework things so that we spread our fields over the two cache lines while causing the lowest slow-down possible. The general approach that we are looking to take is to focus the first cache line on fields that are updated on RX , so that receive only deals with one cache line. The second cache line can be used for application data and information that will only be used on the TX leg. This would allow us to work on the first cache line in RX as now, and have the second cache line being prefetched in the background so that it is available when necessary. Hardware prefetches should help us out here. We also may move rarely used, or slow-path RX fields e.g. such as those for chained mbufs with jumbo frames, to the second cache line, depending upon the performance impact and bytes savings achieved. Signed-off-by: Bruce Richardson <bruce.richardson@intel.com> Acked-by: Thomas Monjalon <thomas.monjalon@6wind.com>
2014-09-11 14:15:44 +01:00
};
/*
* Struct used to create a KNI device. Passed to the kernel in IOCTL call
*/
struct rte_kni_device_info {
char name[RTE_KNI_NAMESIZE]; /**< Network device name for KNI */
phys_addr_t tx_phys;
phys_addr_t rx_phys;
phys_addr_t alloc_phys;
phys_addr_t free_phys;
/* Used by Ethtool */
phys_addr_t req_phys;
phys_addr_t resp_phys;
phys_addr_t sync_phys;
void * sync_va;
/* mbuf mempool */
void * mbuf_va;
phys_addr_t mbuf_phys;
uint16_t group_id; /**< Group ID */
uint32_t core_id; /**< core ID to bind for kernel thread */
__extension__
uint8_t force_bind : 1; /**< Flag for kernel thread binding */
/* mbuf size */
unsigned mbuf_size;
unsigned int mtu;
unsigned int min_mtu;
unsigned int max_mtu;
uint8_t mac_addr[6];
uint8_t iova_mode;
};
#define KNI_DEVICE "kni"
#define RTE_KNI_IOCTL_TEST _IOWR(0, 1, int)
#define RTE_KNI_IOCTL_CREATE _IOWR(0, 2, struct rte_kni_device_info)
#define RTE_KNI_IOCTL_RELEASE _IOWR(0, 3, struct rte_kni_device_info)
#endif /* _RTE_KNI_COMMON_H_ */