5569dd7d90
The Kni kthreads seem to be re-scheduled at a granularity of roughly 1 millisecond right now, which seems to be insufficient for performing tests involving a lot of control plane traffic. Even if KNI_KTHREAD_RESCHEDULE_INTERVAL is set to 5 microseconds, it seems that the existing code cannot reschedule at the desired granularily, due to precision constraints of schedule_timeout_interruptible(). In our use case, we leverage the Linux Kernel for control plane, and it is not uncommon to have 60K - 100K pps for some signaling protocols. Since we are not in atomic context, the usleep_range() function seems to be more appropriate for being able to introduce smaller controlled delays, in the range of 5-10 microseconds. Upon reading the existing code, it would seem that this was the original intent. Adding sub-millisecond delays, seems unfeasible with a call to schedule_timeout_interruptible(). KNI_KTHREAD_RESCHEDULE_INTERVAL 5 /* us */ schedule_timeout_interruptible( usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL)); Below, we attempted a brief comparison between the existing implementation, which uses schedule_timeout_interruptible() and usleep_range(). We attempt to measure the CPU usage, and RTT between two Kni interfaces, which are created on top of vmxnet3 adapters, connected by a vSwitch. insmod rte_kni.ko kthread_mode=single carrier=on schedule_timeout_interruptible(usecs_to_jiffies(5)) kni_single CPU Usage: 2-4 % [root@localhost ~]# ping 1.1.1.2 -I eth1 PING 1.1.1.2 (1.1.1.2) from 1.1.1.1 eth1: 56(84) bytes of data. 64 bytes from 1.1.1.2: icmp_seq=1 ttl=64 time=2.70 ms 64 bytes from 1.1.1.2: icmp_seq=2 ttl=64 time=1.00 ms 64 bytes from 1.1.1.2: icmp_seq=3 ttl=64 time=1.99 ms 64 bytes from 1.1.1.2: icmp_seq=4 ttl=64 time=0.985 ms 64 bytes from 1.1.1.2: icmp_seq=5 ttl=64 time=1.00 ms usleep_range(5, 10) kni_single CPU usage: 50% 64 bytes from 1.1.1.2: icmp_seq=1 ttl=64 time=0.338 ms 64 bytes from 1.1.1.2: icmp_seq=2 ttl=64 time=0.150 ms 64 bytes from 1.1.1.2: icmp_seq=3 ttl=64 time=0.123 ms 64 bytes from 1.1.1.2: icmp_seq=4 ttl=64 time=0.139 ms 64 bytes from 1.1.1.2: icmp_seq=5 ttl=64 time=0.159 ms usleep_range(20, 50) kni_single CPU usage: 24% 64 bytes from 1.1.1.2: icmp_seq=1 ttl=64 time=0.202 ms 64 bytes from 1.1.1.2: icmp_seq=2 ttl=64 time=0.170 ms 64 bytes from 1.1.1.2: icmp_seq=3 ttl=64 time=0.171 ms 64 bytes from 1.1.1.2: icmp_seq=4 ttl=64 time=0.248 ms 64 bytes from 1.1.1.2: icmp_seq=5 ttl=64 time=0.185 ms usleep_range(50, 100) kni_single CPU usage: 13% 64 bytes from 1.1.1.2: icmp_seq=1 ttl=64 time=0.537 ms 64 bytes from 1.1.1.2: icmp_seq=2 ttl=64 time=0.257 ms 64 bytes from 1.1.1.2: icmp_seq=3 ttl=64 time=0.231 ms 64 bytes from 1.1.1.2: icmp_seq=4 ttl=64 time=0.143 ms 64 bytes from 1.1.1.2: icmp_seq=5 ttl=64 time=0.200 ms usleep_range(100, 200) kni_single CPU usage: 7% 64 bytes from 1.1.1.2: icmp_seq=1 ttl=64 time=0.716 ms 64 bytes from 1.1.1.2: icmp_seq=2 ttl=64 time=0.167 ms 64 bytes from 1.1.1.2: icmp_seq=3 ttl=64 time=0.459 ms 64 bytes from 1.1.1.2: icmp_seq=4 ttl=64 time=0.455 ms 64 bytes from 1.1.1.2: icmp_seq=5 ttl=64 time=0.252 ms usleep_range(1000, 1100) kni_single CPU usage: 2% 64 bytes from 1.1.1.2: icmp_seq=1 ttl=64 time=2.22 ms 64 bytes from 1.1.1.2: icmp_seq=2 ttl=64 time=1.17 ms 64 bytes from 1.1.1.2: icmp_seq=3 ttl=64 time=1.17 ms 64 bytes from 1.1.1.2: icmp_seq=4 ttl=64 time=1.17 ms 64 bytes from 1.1.1.2: icmp_seq=5 ttl=64 time=1.15 ms Upon testing, usleep_range(1000, 1100) seems roughly equivalent in latency and cpu usage to the variant with schedule_timeout_interruptible(), while usleep_range(100, 200) seems to give a decent tradeoff between latency and cpu usage, while allowing users to tweak the limits for improved precision if they have such use cases. Disabling RTE_KNI_PREEMPT_DEFAULT, interestingly seems to lead to a softlockup on my kernel. Kernel panic - not syncing: softlockup: hung tasks CPU: 0 PID: 1226 Comm: kni_single Tainted: G W O 3.10 #1 <IRQ> [<ffffffff814f84de>] dump_stack+0x19/0x1b [<ffffffff814f7891>] panic+0xcd/0x1e0 [<ffffffff810993b0>] watchdog_timer_fn+0x160/0x160 [<ffffffff810644b2>] __run_hrtimer.isra.4+0x42/0xd0 [<ffffffff81064b57>] hrtimer_interrupt+0xe7/0x1f0 [<ffffffff8102cd57>] smp_apic_timer_interrupt+0x67/0xa0 [<ffffffff8150321d>] apic_timer_interrupt+0x6d/0x80 This patch also attempts to remove this option. References: [1] https://www.kernel.org/doc/Documentation/timers/timers-howto.txt Signed-off-by: Tudor Cornea <tudor.cornea@gmail.com> Acked-by: Padraig Connolly <Padraig.J.Connolly@intel.com> Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com>
143 lines
3.6 KiB
C
143 lines
3.6 KiB
C
/* SPDX-License-Identifier: BSD-3-Clause
|
|
* Copyright(c) 2017 Intel Corporation
|
|
*/
|
|
|
|
/**
|
|
* @file Header file containing DPDK compilation parameters
|
|
*
|
|
* Header file containing DPDK compilation parameters. Also include the
|
|
* meson-generated header file containing the detected parameters that
|
|
* are variable across builds or build environments.
|
|
*/
|
|
#ifndef _RTE_CONFIG_H_
|
|
#define _RTE_CONFIG_H_
|
|
|
|
#include <rte_build_config.h>
|
|
|
|
/* legacy defines */
|
|
#ifdef RTE_EXEC_ENV_LINUX
|
|
#define RTE_EXEC_ENV_LINUXAPP 1
|
|
#endif
|
|
#ifdef RTE_EXEC_ENV_FREEBSD
|
|
#define RTE_EXEC_ENV_BSDAPP 1
|
|
#endif
|
|
|
|
/* String that appears before the version number */
|
|
#define RTE_VER_PREFIX "DPDK"
|
|
|
|
/****** library defines ********/
|
|
|
|
/* EAL defines */
|
|
#define RTE_MAX_HEAPS 32
|
|
#define RTE_MAX_MEMSEG_LISTS 128
|
|
#define RTE_MAX_MEMSEG_PER_LIST 8192
|
|
#define RTE_MAX_MEM_MB_PER_LIST 32768
|
|
#define RTE_MAX_MEMSEG_PER_TYPE 32768
|
|
#define RTE_MAX_MEM_MB_PER_TYPE 65536
|
|
#define RTE_MAX_MEMZONE 2560
|
|
#define RTE_MAX_TAILQ 32
|
|
#define RTE_LOG_DP_LEVEL RTE_LOG_INFO
|
|
#define RTE_BACKTRACE 1
|
|
#define RTE_MAX_VFIO_CONTAINERS 64
|
|
|
|
/* bsd module defines */
|
|
#define RTE_CONTIGMEM_MAX_NUM_BUFS 64
|
|
#define RTE_CONTIGMEM_DEFAULT_NUM_BUFS 1
|
|
#define RTE_CONTIGMEM_DEFAULT_BUF_SIZE (512*1024*1024)
|
|
|
|
/* mempool defines */
|
|
#define RTE_MEMPOOL_CACHE_MAX_SIZE 512
|
|
|
|
/* mbuf defines */
|
|
#define RTE_MBUF_DEFAULT_MEMPOOL_OPS "ring_mp_mc"
|
|
#define RTE_PKTMBUF_HEADROOM 128
|
|
|
|
/* ether defines */
|
|
#define RTE_MAX_QUEUES_PER_PORT 1024
|
|
#define RTE_ETHDEV_QUEUE_STAT_CNTRS 16 /* max 256 */
|
|
#define RTE_ETHDEV_RXTX_CALLBACKS 1
|
|
#define RTE_MAX_MULTI_HOST_CTRLS 4
|
|
|
|
/* cryptodev defines */
|
|
#define RTE_CRYPTO_MAX_DEVS 64
|
|
#define RTE_CRYPTODEV_NAME_LEN 64
|
|
#define RTE_CRYPTO_CALLBACKS 1
|
|
|
|
/* compressdev defines */
|
|
#define RTE_COMPRESS_MAX_DEVS 64
|
|
|
|
/* regexdev defines */
|
|
#define RTE_MAX_REGEXDEV_DEVS 32
|
|
|
|
/* eventdev defines */
|
|
#define RTE_EVENT_MAX_DEVS 16
|
|
#define RTE_EVENT_MAX_PORTS_PER_DEV 255
|
|
#define RTE_EVENT_MAX_QUEUES_PER_DEV 255
|
|
#define RTE_EVENT_TIMER_ADAPTER_NUM_MAX 32
|
|
#define RTE_EVENT_ETH_INTR_RING_SIZE 1024
|
|
#define RTE_EVENT_CRYPTO_ADAPTER_MAX_INSTANCE 32
|
|
#define RTE_EVENT_ETH_TX_ADAPTER_MAX_INSTANCE 32
|
|
|
|
/* rawdev defines */
|
|
#define RTE_RAWDEV_MAX_DEVS 64
|
|
|
|
/* ip_fragmentation defines */
|
|
#define RTE_LIBRTE_IP_FRAG_MAX_FRAG 8
|
|
#undef RTE_LIBRTE_IP_FRAG_TBL_STAT
|
|
|
|
/* rte_power defines */
|
|
#define RTE_MAX_LCORE_FREQS 64
|
|
|
|
/* rte_sched defines */
|
|
#undef RTE_SCHED_CMAN
|
|
#undef RTE_SCHED_COLLECT_STATS
|
|
#undef RTE_SCHED_SUBPORT_TC_OV
|
|
#define RTE_SCHED_PORT_N_GRINDERS 8
|
|
#undef RTE_SCHED_VECTOR
|
|
|
|
/* rte_graph defines */
|
|
#define RTE_GRAPH_BURST_SIZE 256
|
|
#define RTE_LIBRTE_GRAPH_STATS 1
|
|
|
|
/****** driver defines ********/
|
|
|
|
/* Packet prefetching in PMDs */
|
|
#define RTE_PMD_PACKET_PREFETCH 1
|
|
|
|
/* QuickAssist device */
|
|
/* Max. number of QuickAssist devices which can be attached */
|
|
#define RTE_PMD_QAT_MAX_PCI_DEVICES 48
|
|
#define RTE_PMD_QAT_COMP_SGL_MAX_SEGMENTS 16
|
|
#define RTE_PMD_QAT_COMP_IM_BUFFER_SIZE 65536
|
|
|
|
/* virtio crypto defines */
|
|
#define RTE_MAX_VIRTIO_CRYPTO 32
|
|
|
|
/* DPAA SEC max cryptodev devices*/
|
|
#define RTE_LIBRTE_DPAA_MAX_CRYPTODEV 4
|
|
|
|
/* fm10k defines */
|
|
#define RTE_LIBRTE_FM10K_RX_OLFLAGS_ENABLE 1
|
|
|
|
/* hns3 defines */
|
|
#define RTE_LIBRTE_HNS3_MAX_TQP_NUM_PER_PF 256
|
|
|
|
/* i40e defines */
|
|
#define RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC 1
|
|
#undef RTE_LIBRTE_I40E_16BYTE_RX_DESC
|
|
#define RTE_LIBRTE_I40E_QUEUE_NUM_PER_PF 64
|
|
#define RTE_LIBRTE_I40E_QUEUE_NUM_PER_VF 4
|
|
#define RTE_LIBRTE_I40E_QUEUE_NUM_PER_VM 4
|
|
|
|
/* Ring net PMD settings */
|
|
#define RTE_PMD_RING_MAX_RX_RINGS 16
|
|
#define RTE_PMD_RING_MAX_TX_RINGS 16
|
|
|
|
/* QEDE PMD defines */
|
|
#define RTE_LIBRTE_QEDE_FW ""
|
|
|
|
/* DLB2 defines */
|
|
#undef RTE_LIBRTE_PMD_DLB2_QUELL_STATS
|
|
|
|
#endif /* _RTE_CONFIG_H_ */
|