vdpa/mlx5: add multi-thread management for configuration

The LM process includes a lot of objects creations and destructions in the source and the destination servers. As much as LM time increases, the packet drop of the VM increases. To improve LM time need to parallel the configurations for mlx5 FW. Add internal multi-thread management in the driver for it. A new devarg defines the number of threads and their CPU. The management is shared between all the devices of the driver. Since the event_core also affects the datapath events thread, reduce the priority of the datapath event thread to allow fast configuration of the devices doing the LM. Signed-off-by: Li Zhang <lizh@nvidia.com> Acked-by: Matan Azrad <matan@nvidia.com> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
2022-06-18 12:02:51 +03:00 · 2022-06-18 12:02:51 +03:00 · 67b070936d
commit 67b070936d
parent 057f7d2084
7 changed files with 224 additions and 5 deletions
--- a/doc/guides/vdpadevs/mlx5.rst
+++ b/doc/guides/vdpadevs/mlx5.rst
@ -78,6 +78,18 @@ for an additional list of options shared with other mlx5 drivers.
  CPU core number to set polling thread affinity to, default to control plane
  cpu.

+- ``max_conf_threads`` parameter [int]
+
+  Allow the driver to use internal threads to obtain fast configuration.
+  All the threads will be open on the same core of the event completion queue scheduling thread.
+
+  - 0, default, don't use internal threads for configuration.
+
+  - 1 - 256, number of internal threads in addition to the caller thread (8 is suggested).
+    This value, if not 0, should be the same for all the devices;
+    the first probing will take it with the ``event_core``
+    for all the multi-thread configurations in the driver.
+
 - ``hw_latency_mode`` parameter [int]

  The completion queue moderation mode:
--- a/drivers/vdpa/mlx5/meson.build
+++ b/drivers/vdpa/mlx5/meson.build
@ -15,6 +15,7 @@ sources = files(
        'mlx5_vdpa_virtq.c',
        'mlx5_vdpa_steer.c',
        'mlx5_vdpa_lm.c',
+        'mlx5_vdpa_cthread.c',
 )
 cflags_options = [
        '-std=c11',
--- a/drivers/vdpa/mlx5/mlx5_vdpa.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.c
@ -51,6 +51,8 @@ TAILQ_HEAD(mlx5_vdpa_privs, mlx5_vdpa_priv) priv_list =
 					      TAILQ_HEAD_INITIALIZER(priv_list);
 static pthread_mutex_t priv_list_lock = PTHREAD_MUTEX_INITIALIZER;

+struct mlx5_vdpa_conf_thread_mng conf_thread_mng;
+
 static void mlx5_vdpa_dev_release(struct mlx5_vdpa_priv *priv);

 static struct mlx5_vdpa_priv *
@ -494,6 +496,29 @@ mlx5_vdpa_args_check_handler(const char *key, const char *val, void *opaque)
 			DRV_LOG(WARNING, "Invalid event_core %s.", val);
 		else
 			priv->event_core = tmp;
+	} else if (strcmp(key, "max_conf_threads") == 0) {
+		if (tmp) {
+			priv->use_c_thread = true;
+			if (!conf_thread_mng.initializer_priv) {
+				conf_thread_mng.initializer_priv = priv;
+				if (tmp > MLX5_VDPA_MAX_C_THRD) {
+					DRV_LOG(WARNING,
+				"Invalid max_conf_threads %s "
+				"and set max_conf_threads to %d",
+				val, MLX5_VDPA_MAX_C_THRD);
+					tmp = MLX5_VDPA_MAX_C_THRD;
+				}
+				conf_thread_mng.max_thrds = tmp;
+			} else if (tmp != conf_thread_mng.max_thrds) {
+				DRV_LOG(WARNING,
+	"max_conf_threads is PMD argument and not per device, "
+	"only the first device configuration set it, current value is %d "
+	"and will not be changed to %d.",
+				conf_thread_mng.max_thrds, (int)tmp);
+			}
+		} else {
+			priv->use_c_thread = false;
+		}
 	} else if (strcmp(key, "hw_latency_mode") == 0) {
 		priv->hw_latency_mode = (uint32_t)tmp;
 	} else if (strcmp(key, "hw_max_latency_us") == 0) {
@ -522,6 +547,9 @@ mlx5_vdpa_config_get(struct mlx5_kvargs_ctrl *mkvlist,
 		"hw_max_latency_us",
 		"hw_max_pending_comp",
 		"no_traffic_time",
+		"queue_size",
+		"queues",
+		"max_conf_threads",
 		NULL,
 	};

@ -729,6 +757,13 @@ mlx5_vdpa_dev_probe(struct mlx5_common_device *cdev,
 	pthread_mutex_init(&priv->steer_update_lock, NULL);
 	priv->cdev = cdev;
 	mlx5_vdpa_config_get(mkvlist, priv);
+	if (priv->use_c_thread) {
+		if (conf_thread_mng.initializer_priv == priv)
+			if (mlx5_vdpa_mult_threads_create(priv->event_core))
+				goto error;
+		__atomic_fetch_add(&conf_thread_mng.refcnt, 1,
+			__ATOMIC_RELAXED);
+	}
 	if (mlx5_vdpa_create_dev_resources(priv))
 		goto error;
 	priv->vdev = rte_vdpa_register_device(cdev->dev, &mlx5_vdpa_ops);
@ -743,6 +778,8 @@ mlx5_vdpa_dev_probe(struct mlx5_common_device *cdev,
 	pthread_mutex_unlock(&priv_list_lock);
 	return 0;
 error:
+	if (conf_thread_mng.initializer_priv == priv)
+		mlx5_vdpa_mult_threads_destroy(false);
 	if (priv)
 		mlx5_vdpa_dev_release(priv);
 	return -rte_errno;
@ -812,6 +849,10 @@ mlx5_vdpa_dev_release(struct mlx5_vdpa_priv *priv)
 	mlx5_vdpa_release_dev_resources(priv);
 	if (priv->vdev)
 		rte_vdpa_unregister_device(priv->vdev);
+	if (priv->use_c_thread)
+		if (__atomic_fetch_sub(&conf_thread_mng.refcnt,
+			1, __ATOMIC_RELAXED) == 1)
+			mlx5_vdpa_mult_threads_destroy(true);
 	rte_free(priv);
 }

--- a/drivers/vdpa/mlx5/mlx5_vdpa.h
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.h
@ -73,6 +73,22 @@ enum {
 	MLX5_VDPA_NOTIFIER_STATE_ERR
 };

+#define MLX5_VDPA_MAX_C_THRD 256
+
+/* Generic mlx5_vdpa_c_thread information. */
+struct mlx5_vdpa_c_thread {
+	pthread_t tid;
+};
+
+struct mlx5_vdpa_conf_thread_mng {
+	void *initializer_priv;
+	uint32_t refcnt;
+	uint32_t max_thrds;
+	pthread_mutex_t cthrd_lock;
+	struct mlx5_vdpa_c_thread cthrd[MLX5_VDPA_MAX_C_THRD];
+};
+extern struct mlx5_vdpa_conf_thread_mng conf_thread_mng;
+
 struct mlx5_vdpa_virtq {
 	SLIST_ENTRY(mlx5_vdpa_virtq) next;
 	uint8_t enable;
@ -126,6 +142,7 @@ enum mlx5_dev_state {
 struct mlx5_vdpa_priv {
 	TAILQ_ENTRY(mlx5_vdpa_priv) next;
 	bool connected;
+	bool use_c_thread;
 	enum mlx5_dev_state state;
 	rte_spinlock_t db_lock;
 	pthread_mutex_t steer_update_lock;
@ -496,4 +513,23 @@ mlx5_vdpa_drain_cq(struct mlx5_vdpa_priv *priv);

 bool
 mlx5_vdpa_is_modify_virtq_supported(struct mlx5_vdpa_priv *priv);
+
+/**
+ * Create configuration multi-threads resource
+ *
+ * @param[in] cpu_core
+ *   CPU core number to set configuration threads affinity to.
+ *
+ * @return
+ *   0 on success, a negative value otherwise.
+ */
+int
+mlx5_vdpa_mult_threads_create(int cpu_core);
+
+/**
+ * Destroy configuration multi-threads resource
+ *
+ */
+void
+mlx5_vdpa_mult_threads_destroy(bool need_unlock);
 #endif /* RTE_PMD_MLX5_VDPA_H_ */
--- a/drivers/vdpa/mlx5/mlx5_vdpa_cthread.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa_cthread.c
@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2022 NVIDIA Corporation & Affiliates
+ */
+#include <string.h>
+#include <unistd.h>
+#include <sys/eventfd.h>
+
+#include <rte_malloc.h>
+#include <rte_errno.h>
+#include <rte_io.h>
+#include <rte_alarm.h>
+#include <rte_tailq.h>
+#include <rte_ring_elem.h>
+
+#include <mlx5_common.h>
+
+#include "mlx5_vdpa_utils.h"
+#include "mlx5_vdpa.h"
+
+static void *
+mlx5_vdpa_c_thread_handle(void *arg)
+{
+	/* To be added later. */
+	return arg;
+}
+
+static void
+mlx5_vdpa_c_thread_destroy(uint32_t thrd_idx, bool need_unlock)
+{
+	if (conf_thread_mng.cthrd[thrd_idx].tid) {
+		pthread_cancel(conf_thread_mng.cthrd[thrd_idx].tid);
+		pthread_join(conf_thread_mng.cthrd[thrd_idx].tid, NULL);
+		conf_thread_mng.cthrd[thrd_idx].tid = 0;
+		if (need_unlock)
+			pthread_mutex_init(&conf_thread_mng.cthrd_lock, NULL);
+	}
+}
+
+static int
+mlx5_vdpa_c_thread_create(int cpu_core)
+{
+	const struct sched_param sp = {
+		.sched_priority = sched_get_priority_max(SCHED_RR),
+	};
+	rte_cpuset_t cpuset;
+	pthread_attr_t attr;
+	uint32_t thrd_idx;
+	char name[32];
+	int ret;
+
+	pthread_mutex_lock(&conf_thread_mng.cthrd_lock);
+	pthread_attr_init(&attr);
+	ret = pthread_attr_setschedpolicy(&attr, SCHED_RR);
+	if (ret) {
+		DRV_LOG(ERR, "Failed to set thread sched policy = RR.");
+		goto c_thread_err;
+	}
+	ret = pthread_attr_setschedparam(&attr, &sp);
+	if (ret) {
+		DRV_LOG(ERR, "Failed to set thread priority.");
+		goto c_thread_err;
+	}
+	for (thrd_idx = 0; thrd_idx < conf_thread_mng.max_thrds;
+		thrd_idx++) {
+		ret = pthread_create(&conf_thread_mng.cthrd[thrd_idx].tid,
+				&attr, mlx5_vdpa_c_thread_handle,
+				(void *)&conf_thread_mng);
+		if (ret) {
+			DRV_LOG(ERR, "Failed to create vdpa multi-threads %d.",
+					thrd_idx);
+			goto c_thread_err;
+		}
+		CPU_ZERO(&cpuset);
+		if (cpu_core != -1)
+			CPU_SET(cpu_core, &cpuset);
+		else
+			cpuset = rte_lcore_cpuset(rte_get_main_lcore());
+		ret = pthread_setaffinity_np(
+				conf_thread_mng.cthrd[thrd_idx].tid,
+				sizeof(cpuset), &cpuset);
+		if (ret) {
+			DRV_LOG(ERR, "Failed to set thread affinity for "
+			"vdpa multi-threads %d.", thrd_idx);
+			goto c_thread_err;
+		}
+		snprintf(name, sizeof(name), "vDPA-mthread-%d", thrd_idx);
+		ret = pthread_setname_np(
+				conf_thread_mng.cthrd[thrd_idx].tid, name);
+		if (ret)
+			DRV_LOG(ERR, "Failed to set vdpa multi-threads name %s.",
+					name);
+		else
+			DRV_LOG(DEBUG, "Thread name: %s.", name);
+	}
+	pthread_mutex_unlock(&conf_thread_mng.cthrd_lock);
+	return 0;
+c_thread_err:
+	for (thrd_idx = 0; thrd_idx < conf_thread_mng.max_thrds;
+		thrd_idx++)
+		mlx5_vdpa_c_thread_destroy(thrd_idx, false);
+	pthread_mutex_unlock(&conf_thread_mng.cthrd_lock);
+	return -1;
+}
+
+int
+mlx5_vdpa_mult_threads_create(int cpu_core)
+{
+	pthread_mutex_init(&conf_thread_mng.cthrd_lock, NULL);
+	if (mlx5_vdpa_c_thread_create(cpu_core)) {
+		DRV_LOG(ERR, "Cannot create vDPA configuration threads.");
+		mlx5_vdpa_mult_threads_destroy(false);
+		return -1;
+	}
+	return 0;
+}
+
+void
+mlx5_vdpa_mult_threads_destroy(bool need_unlock)
+{
+	uint32_t thrd_idx;
+
+	if (!conf_thread_mng.initializer_priv)
+		return;
+	for (thrd_idx = 0; thrd_idx < conf_thread_mng.max_thrds;
+		thrd_idx++)
+		mlx5_vdpa_c_thread_destroy(thrd_idx, need_unlock);
+	pthread_mutex_destroy(&conf_thread_mng.cthrd_lock);
+	memset(&conf_thread_mng, 0, sizeof(struct mlx5_vdpa_conf_thread_mng));
+}
--- a/drivers/vdpa/mlx5/mlx5_vdpa_event.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa_event.c
@ -507,7 +507,7 @@ mlx5_vdpa_cqe_event_setup(struct mlx5_vdpa_priv *priv)
 	pthread_attr_t attr;
 	char name[16];
 	const struct sched_param sp = {
-		.sched_priority = sched_get_priority_max(SCHED_RR),
+		.sched_priority = sched_get_priority_max(SCHED_RR) - 1,
 	};

 	if (!priv->eventc)
--- a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
@ -43,7 +43,7 @@ mlx5_vdpa_virtq_kick_handler(void *cb_arg)
 			    errno == EWOULDBLOCK ||
 			    errno == EAGAIN)
 				continue;
-			DRV_LOG(ERR,  "Failed to read kickfd of virtq %d: %s",
+			DRV_LOG(ERR,  "Failed to read kickfd of virtq %d: %s.",
 				virtq->index, strerror(errno));
 		}
 		break;
@ -57,7 +57,7 @@ mlx5_vdpa_virtq_kick_handler(void *cb_arg)
 	rte_spinlock_unlock(&priv->db_lock);
 	pthread_mutex_unlock(&virtq->virtq_lock);
 	if (priv->state != MLX5_VDPA_STATE_CONFIGURED && !virtq->enable) {
-		DRV_LOG(ERR,  "device %d queue %d down, skip kick handling",
+		DRV_LOG(ERR,  "device %d queue %d down, skip kick handling.",
 			priv->vid, virtq->index);
 		return;
 	}
@ -218,7 +218,7 @@ mlx5_vdpa_virtq_query(struct mlx5_vdpa_priv *priv, int index)
 		return -1;
 	}
 	if (attr.state == MLX5_VIRTQ_STATE_ERROR)
-		DRV_LOG(WARNING, "vid %d vring %d hw error=%hhu",
+		DRV_LOG(WARNING, "vid %d vring %d hw error=%hhu.",
 			priv->vid, index, attr.error_type);
 	return 0;
 }
@ -380,7 +380,7 @@ mlx5_vdpa_virtq_sub_objs_prepare(struct mlx5_vdpa_priv *priv,
 	if (ret) {
 		last_avail_idx = 0;
 		last_used_idx = 0;
-		DRV_LOG(WARNING, "Couldn't get vring base, idx are set to 0");
+		DRV_LOG(WARNING, "Couldn't get vring base, idx are set to 0.");
 	} else {
 		DRV_LOG(INFO, "vid %d: Init last_avail_idx=%d, last_used_idx=%d for "
 				"virtq %d.", priv->vid, last_avail_idx,