service: avoid false sharing on core state

For a valid service, the core mask of the service is checked against the current core and the corresponding entry in the active_on_lcore array is set or reset. Upto 8 cores share the same cache line for their service active_on_lcore array entries since each entry is a uint8_t. Some number of these entries also share the cache line with the internal_flags member of struct rte_service_spec_impl, hence this false sharing also makes the service_valid() check expensive. Eliminate false sharing by moving the active_on_lcore array to a per-core data structure. The array is now indexed by service id. Signed-off-by: Nikhil Rao <nikhil.rao@intel.com> Acked-by: Gage Eads <gage.eads@intel.com> Acked-by: Harry van Haaren <harry.van.haaren@intel.com>
2019-09-16 15:31:02 +05:30 · 2019-09-16 15:31:02 +05:30 · e484ccddbe
commit e484ccddbe
parent c1077933d4
1 changed files with 7 additions and 9 deletions
--- a/lib/librte_eal/common/rte_service.c
+++ b/lib/librte_eal/common/rte_service.c
@ -51,7 +51,6 @@ struct rte_service_spec_impl {
 	rte_atomic32_t num_mapped_cores;
 	uint64_t calls;
 	uint64_t cycles_spent;
-	uint8_t active_on_lcore[RTE_MAX_LCORE];
 } __rte_cache_aligned;

 /* the internal values of a service core */
@ -60,7 +59,7 @@ struct core_state {
 	uint64_t service_mask;
 	uint8_t runstate; /* running or stopped */
 	uint8_t is_service_core; /* set if core is currently a service core */
-
+	uint8_t service_active_on_lcore[RTE_SERVICE_NUM_MAX];
 	uint64_t loops;
 	uint64_t calls_per_service[RTE_SERVICE_NUM_MAX];
 } __rte_cache_aligned;
@ -344,7 +343,7 @@ rte_service_runner_do_callback(struct rte_service_spec_impl *s,


 static inline int32_t
-service_run(uint32_t i, int lcore, struct core_state *cs, uint64_t service_mask)
+service_run(uint32_t i, struct core_state *cs, uint64_t service_mask)
 {
 	if (!service_valid(i))
 		return -EINVAL;
@ -352,11 +351,11 @@ service_run(uint32_t i, int lcore, struct core_state *cs, uint64_t service_mask)
 	if (s->comp_runstate != RUNSTATE_RUNNING ||
 			s->app_runstate != RUNSTATE_RUNNING ||
 			!(service_mask & (UINT64_C(1) << i))) {
-		s->active_on_lcore[lcore] = 0;
+		cs->service_active_on_lcore[i] = 0;
 		return -ENOEXEC;
 	}

-	s->active_on_lcore[lcore] = 1;
+	cs->service_active_on_lcore[i] = 1;

 	/* check do we need cmpset, if MT safe or <= 1 core
 	 * mapped, atomic ops are not required.
@ -379,7 +378,6 @@ int32_t
 rte_service_may_be_active(uint32_t id)
 {
 	uint32_t ids[RTE_MAX_LCORE] = {0};
-	struct rte_service_spec_impl *s = &rte_services[id];
 	int32_t lcore_count = rte_service_lcore_list(ids, RTE_MAX_LCORE);
 	int i;

@ -387,7 +385,7 @@ rte_service_may_be_active(uint32_t id)
 		return -EINVAL;

 	for (i = 0; i < lcore_count; i++) {
-		if (s->active_on_lcore[ids[i]])
+		if (lcore_states[i].service_active_on_lcore[id])
 			return 1;
 	}

@ -418,7 +416,7 @@ rte_service_run_iter_on_app_lcore(uint32_t id, uint32_t serialize_mt_unsafe)
 		return -EBUSY;
 	}

-	int ret = service_run(id, rte_lcore_id(), cs, UINT64_MAX);
+	int ret = service_run(id, cs, UINT64_MAX);

 	if (serialize_mt_unsafe)
 		rte_atomic32_dec(&s->num_mapped_cores);
@ -439,7 +437,7 @@ rte_service_runner_func(void *arg)

 		for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
 			/* return value ignored as no change to code flow */
-			service_run(i, lcore, cs, service_mask);
+			service_run(i, cs, service_mask);
 		}

 		cs->loops++;