424 lines
9.9 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: BSD-3-Clause
eal: add channel for multi-process communication Previouly, there are three channels for multi-process (i.e., primary/secondary) communication. 1. Config-file based channel, in which, the primary process writes info into a pre-defined config file, and the secondary process reads the info out. 2. vfio submodule has its own channel based on unix socket for the secondary process to get container fd and group fd from the primary process. 3. pdump submodule also has its own channel based on unix socket for packet dump. It'd be good to have a generic communication channel for multi-process communication to accommodate the requirements including: a. Secondary wants to send info to primary, for example, secondary would like to send request (about some specific vdev to primary). b. Sending info at any time, instead of just initialization time. c. Share FDs with the other side, for vdev like vhost, related FDs (memory region, kick) should be shared. d. A send message request needs the other side to response immediately. This patch proposes to create a communication channel, based on datagram unix socket, for above requirements. Each process will block on a unix socket waiting for messages from the peers. Three new APIs are added: 1. rte_eal_mp_action_register() is used to register an action, indexed by a string, when a component at receiver side would like to response the messages from the peer processe. 2. rte_eal_mp_action_unregister() is used to unregister the action if the calling component does not want to response the messages. 3. rte_eal_mp_sendmsg() is used to send a message, and returns immediately. If there are n secondary processes, the primary process will send n messages. Suggested-by: Konstantin Ananyev <konstantin.ananyev@intel.com> Signed-off-by: Jianfeng Tan <jianfeng.tan@intel.com> Reviewed-by: Anatoly Burakov <anatoly.burakov@intel.com> Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
2018-01-30 06:58:08 +00:00
* Copyright(c) 2010-2018 Intel Corporation
*/
#ifndef _EAL_PRIVATE_H_
#define _EAL_PRIVATE_H_
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <rte_dev.h>
#include <rte_lcore.h>
/**
* Structure storing internal configuration (per-lcore)
*/
struct lcore_config {
pthread_t thread_id; /**< pthread identifier */
int pipe_master2slave[2]; /**< communication pipe with master */
int pipe_slave2master[2]; /**< communication pipe with master */
lcore_function_t * volatile f; /**< function to call */
void * volatile arg; /**< argument of function */
volatile int ret; /**< return value of function */
volatile enum rte_lcore_state_t state; /**< lcore state */
unsigned int socket_id; /**< physical socket id for this lcore */
unsigned int core_id; /**< core number on socket for this lcore */
int core_index; /**< relative index, starting from 0 */
uint8_t core_role; /**< role of core eg: OFF, RTE, SERVICE */
rte_cpuset_t cpuset; /**< cpu set which the lcore affinity to */
};
extern struct lcore_config lcore_config[RTE_MAX_LCORE];
/**
* The global RTE configuration structure.
*/
struct rte_config {
uint32_t master_lcore; /**< Id of the master lcore */
uint32_t lcore_count; /**< Number of available logical cores. */
uint32_t numa_node_count; /**< Number of detected NUMA nodes. */
uint32_t numa_nodes[RTE_MAX_NUMA_NODES]; /**< List of detected NUMA nodes. */
uint32_t service_lcore_count;/**< Number of available service cores. */
enum rte_lcore_role_t lcore_role[RTE_MAX_LCORE]; /**< State of cores. */
/** Primary or secondary configuration */
enum rte_proc_type_t process_type;
/** PA or VA mapping mode */
enum rte_iova_mode iova_mode;
/**
* Pointer to memory configuration, which may be shared across multiple
* DPDK instances
*/
struct rte_mem_config *mem_config;
} __rte_packed;
/**
* Get the global configuration structure.
*
* @return
* A pointer to the global configuration structure.
*/
struct rte_config *rte_eal_get_configuration(void);
/**
* Initialize the memzone subsystem (private to eal).
*
* @return
* - 0 on success
* - Negative on error
*/
int rte_eal_memzone_init(void);
/**
log: respect logger configured before EAL init Before this patch, application-specific loggers could not be installed before rte_eal_init completed (the initialization process called rte_openlog_stream, overwriting any previously installed logger). This made it impossible for an application to capture the initial log messages generated during rte_eal_init. This patch changes initialization so that information from a previous call to rte_openlog_stream is not lost. Specifically: * The default log stream is now maintained separately from an application-specific log stream installed with rte_openlog_stream. * rte_eal_common_log_init has been renamed to eal_log_set_default, since this is all it does. It no longer invokes rte_openlog_stream; it just updates the default stream. Also, this method now returns void, rather than int, since there are no errors. This patch also removes the "early log" mechanism and cleans up the log initialization mechanism: * The default log stream defaults to stderr on all platforms if eal_log_set_default hasn't been invoked (Linux used to use stdout during the first part of initialization). * Removed rte_eal_log_early_init; all of the desired functionality can be achieved by calling eal_log_set_default. * Removed lib/librte_eal/bsdapp/eal/eal_log.c: it contained only one function, rte_eal_log_init, which is not needed or invoked for BSD. * Removed declaration for eal_default_log_stream in rte_log.h (it's now private to eal_common_log.c). * Moved call to rte_eal_log_init earlier in rte_eal_init for Linux, so that it starts using the preferrred log ASAP. Signed-off-by: John Ousterhout <ouster@cs.stanford.edu>
2016-10-12 12:38:32 -07:00
* Common log initialization function (private to eal). Determines
* where log data is written when no call to rte_openlog_stream is
* in effect.
*
* @param default_log
* The default log stream to be used.
* @return
* - 0 on success
* - Negative on error
*/
log: respect logger configured before EAL init Before this patch, application-specific loggers could not be installed before rte_eal_init completed (the initialization process called rte_openlog_stream, overwriting any previously installed logger). This made it impossible for an application to capture the initial log messages generated during rte_eal_init. This patch changes initialization so that information from a previous call to rte_openlog_stream is not lost. Specifically: * The default log stream is now maintained separately from an application-specific log stream installed with rte_openlog_stream. * rte_eal_common_log_init has been renamed to eal_log_set_default, since this is all it does. It no longer invokes rte_openlog_stream; it just updates the default stream. Also, this method now returns void, rather than int, since there are no errors. This patch also removes the "early log" mechanism and cleans up the log initialization mechanism: * The default log stream defaults to stderr on all platforms if eal_log_set_default hasn't been invoked (Linux used to use stdout during the first part of initialization). * Removed rte_eal_log_early_init; all of the desired functionality can be achieved by calling eal_log_set_default. * Removed lib/librte_eal/bsdapp/eal/eal_log.c: it contained only one function, rte_eal_log_init, which is not needed or invoked for BSD. * Removed declaration for eal_default_log_stream in rte_log.h (it's now private to eal_common_log.c). * Moved call to rte_eal_log_init earlier in rte_eal_init for Linux, so that it starts using the preferrred log ASAP. Signed-off-by: John Ousterhout <ouster@cs.stanford.edu>
2016-10-12 12:38:32 -07:00
void eal_log_set_default(FILE *default_log);
/**
* Fill configuration with number of physical and logical processors
*
* This function is private to EAL.
*
* Parse /proc/cpuinfo to get the number of physical and logical
* processors on the machine.
*
* @return
* 0 on success, negative on error
*/
int rte_eal_cpu_init(void);
/**
* Create memseg lists
*
* This function is private to EAL.
*
* Preallocate virtual memory.
*
* @return
* 0 on success, negative on error
*/
int rte_eal_memseg_init(void);
/**
* Map memory
*
* This function is private to EAL.
*
* Fill configuration structure with these infos, and return 0 on success.
*
* @return
* 0 on success, negative on error
*/
int rte_eal_memory_init(void);
/**
* Configure timers
*
* This function is private to EAL.
*
* Mmap memory areas used by HPET (high precision event timer) that will
* provide our time reference, and configure the TSC frequency also for it
* to be used as a reference.
*
* @return
* 0 on success, negative on error
*/
int rte_eal_timer_init(void);
/**
* Init the default log stream
*
* This function is private to EAL.
*
* @return
* 0 on success, negative on error
*/
int rte_eal_log_init(const char *id, int facility);
/**
* Save the log regexp for later
*/
int rte_log_save_regexp(const char *type, int priority);
int rte_log_save_pattern(const char *pattern, int priority);
/**
* Init tail queues for non-EAL library structures. This is to allow
* the rings, mempools, etc. lists to be shared among multiple processes
*
* This function is private to EAL
*
* @return
* 0 on success, negative on error
*/
int rte_eal_tailqs_init(void);
/**
* Init interrupt handling.
*
* This function is private to EAL.
*
* @return
* 0 on success, negative on error
*/
int rte_eal_intr_init(void);
/**
* Init alarm mechanism. This is to allow a callback be called after
* specific time.
*
* This function is private to EAL.
*
* @return
* 0 on success, negative on error
*/
int rte_eal_alarm_init(void);
/**
* Function is to check if the kernel module(like, vfio, vfio_iommu_type1,
* etc.) loaded.
*
* @param module_name
* The module's name which need to be checked
*
* @return
* -1 means some error happens(NULL pointer or open failure)
* 0 means the module not loaded
* 1 means the module loaded
*/
int rte_eal_check_module(const char *module_name);
/**
* Get virtual area of specified size from the OS.
*
* This function is private to the EAL.
*
* @param requested_addr
* Address where to request address space.
* @param size
* Size of requested area.
* @param page_sz
* Page size on which to align requested virtual area.
* @param flags
* EAL_VIRTUAL_AREA_* flags.
* @param mmap_flags
* Extra flags passed directly to mmap().
*
* @return
* Virtual area address if successful.
* NULL if unsuccessful.
*/
#define EAL_VIRTUAL_AREA_ADDR_IS_HINT (1 << 0)
/**< don't fail if cannot get exact requested address. */
#define EAL_VIRTUAL_AREA_ALLOW_SHRINK (1 << 1)
/**< try getting smaller sized (decrement by page size) virtual areas if cannot
* get area of requested size.
*/
#define EAL_VIRTUAL_AREA_UNMAP (1 << 2)
/**< immediately unmap reserved virtual area. */
void *
eal_get_virtual_area(void *requested_addr, size_t *size,
size_t page_sz, int flags, int mmap_flags);
/**
* Get cpu core_id.
*
* This function is private to the EAL.
*/
unsigned eal_cpu_core_id(unsigned lcore_id);
/**
* Check if cpu is present.
*
* This function is private to the EAL.
*/
int eal_cpu_detected(unsigned lcore_id);
/**
* Set TSC frequency from precise value or estimation
*
* This function is private to the EAL.
*/
void set_tsc_freq(void);
/**
* Get precise TSC frequency from system
*
* This function is private to the EAL.
*/
uint64_t get_tsc_freq(void);
/**
* Get TSC frequency if the architecture supports.
*
* This function is private to the EAL.
*
* @return
* The number of TSC cycles in one second.
* Returns zero if the architecture support is not available.
*/
uint64_t get_tsc_freq_arch(void);
/**
* Prepare physical memory mapping
* i.e. hugepages on Linux and
* contigmem on BSD.
*
* This function is private to the EAL.
*/
int rte_eal_hugepage_init(void);
/**
* Creates memory mapping in secondary process
* i.e. hugepages on Linux and
* contigmem on BSD.
*
* This function is private to the EAL.
*/
int rte_eal_hugepage_attach(void);
/**
* Find a bus capable of identifying a device.
*
* @param str
* A device identifier (PCI address, virtual PMD name, ...).
*
* @return
* A valid bus handle if found.
* NULL if no bus is able to parse this device.
*/
struct rte_bus *rte_bus_find_by_device_name(const char *str);
eal: add channel for multi-process communication Previouly, there are three channels for multi-process (i.e., primary/secondary) communication. 1. Config-file based channel, in which, the primary process writes info into a pre-defined config file, and the secondary process reads the info out. 2. vfio submodule has its own channel based on unix socket for the secondary process to get container fd and group fd from the primary process. 3. pdump submodule also has its own channel based on unix socket for packet dump. It'd be good to have a generic communication channel for multi-process communication to accommodate the requirements including: a. Secondary wants to send info to primary, for example, secondary would like to send request (about some specific vdev to primary). b. Sending info at any time, instead of just initialization time. c. Share FDs with the other side, for vdev like vhost, related FDs (memory region, kick) should be shared. d. A send message request needs the other side to response immediately. This patch proposes to create a communication channel, based on datagram unix socket, for above requirements. Each process will block on a unix socket waiting for messages from the peers. Three new APIs are added: 1. rte_eal_mp_action_register() is used to register an action, indexed by a string, when a component at receiver side would like to response the messages from the peer processe. 2. rte_eal_mp_action_unregister() is used to unregister the action if the calling component does not want to response the messages. 3. rte_eal_mp_sendmsg() is used to send a message, and returns immediately. If there are n secondary processes, the primary process will send n messages. Suggested-by: Konstantin Ananyev <konstantin.ananyev@intel.com> Signed-off-by: Jianfeng Tan <jianfeng.tan@intel.com> Reviewed-by: Anatoly Burakov <anatoly.burakov@intel.com> Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
2018-01-30 06:58:08 +00:00
/**
* Create the unix channel for primary/secondary communication.
*
* @return
* 0 on success;
* (<0) on failure.
*/
int rte_mp_channel_init(void);
/**
* Primary/secondary communication cleanup.
*/
void rte_mp_channel_cleanup(void);
/**
* @internal
* Parse a device string and store its information in an
* rte_devargs structure.
*
* A device description is split by layers of abstraction of the device:
* bus, class and driver. Each layer will offer a set of properties that
* can be applied either to configure or recognize a device.
*
* This function will parse those properties and prepare the rte_devargs
* to be given to each layers for processing.
*
* Note: if the "data" field of the devargs points to devstr,
* then no dynamic allocation is performed and the rte_devargs
* can be safely discarded.
*
* Otherwise ``data`` will hold a workable copy of devstr, that will be
* used by layers descriptors within rte_devargs. In this case,
* any rte_devargs should be cleaned-up before being freed.
*
* @param da
* rte_devargs structure to fill.
*
* @param devstr
* Device string.
*
* @return
* 0 on success.
* Negative errno values on error (rte_errno is set).
*/
int
rte_devargs_layers_parse(struct rte_devargs *devargs,
const char *devstr);
eal: enable hotplug on multi-process We are going to introduce the solution to handle hotplug in multi-process, it includes the below scenario: 1. Attach a device from the primary 2. Detach a device from the primary 3. Attach a device from a secondary 4. Detach a device from a secondary In the primary-secondary process model, we assume devices are shared by default. that means attaches or detaches a device on any process will broadcast to all other processes through mp channel then device information will be synchronized on all processes. Any failure during attaching/detaching process will cause inconsistent status between processes, so proper rollback action should be considered. This patch covers the implementation of case 1,2. Case 3,4 will be implemented on a separate patch. IPC scenario for Case 1, 2: attach a device a) primary attach the new device if failed goto h). b) primary send attach sync request to all secondary. c) secondary receive request and attach the device and send a reply. d) primary check the reply if all success goes to i). e) primary send attach rollback sync request to all secondary. f) secondary receive the request and detach the device and send a reply. g) primary receive the reply and detach device as rollback action. h) attach fail i) attach success detach a device a) primary send detach sync request to all secondary b) secondary detach the device and send reply c) primary check the reply if all success goes to f). d) primary send detach rollback sync request to all secondary. e) secondary receive the request and attach back device. goto g) f) primary detach the device if success goto g), else goto d) g) detach fail. h) detach success. Signed-off-by: Qi Zhang <qi.z.zhang@intel.com> Acked-by: Anatoly Burakov <anatoly.burakov@intel.com>
2018-10-16 08:16:28 +08:00
/*
* probe a device at local process.
*
* @param devargs
* Device arguments including bus, class and driver properties.
* @param new_dev
* new device be probed as output.
* @return
* 0 on success, negative on error.
*/
int local_dev_probe(const char *devargs, struct rte_device **new_dev);
/**
* Hotplug remove a given device from a specific bus at local process.
*
* @param dev
* Data structure of the device to remove.
* @return
* 0 on success, negative on error.
*/
int local_dev_remove(struct rte_device *dev);
/**
* Iterate over all buses to find the corresponding bus to handle the sigbus
* error.
* @param failure_addr
* Pointer of the fault address of the sigbus error.
*
* @return
* 0 success to handle the sigbus.
* -1 failed to handle the sigbus
* 1 no bus can handler the sigbus
*/
int rte_bus_sigbus_handler(const void *failure_addr);
/**
* @internal
* Register the sigbus handler.
*
* @return
* - On success, zero.
* - On failure, a negative value.
*/
int
dev_sigbus_handler_register(void);
/**
* @internal
* Unregister the sigbus handler.
*
* @return
* - On success, zero.
* - On failure, a negative value.
*/
int
dev_sigbus_handler_unregister(void);
/**
* Get OS-specific EAL mapping base address.
*/
uint64_t
eal_get_baseaddr(void);
void *
eal_malloc_no_trace(const char *type, size_t size, unsigned int align);
void eal_free_no_trace(void *addr);
#endif /* _EAL_PRIVATE_H_ */