service: reduce average case service core overhead
Optimize service loop so that the starting point is the lowest-indexed service mapped to the lcore in question, and terminate the loop at the highest-indexed service. While the worst case latency remains the same, this patch significantly reduces the service framework overhead for the average case. In particular, scenarios where an lcore only runs a single service, or multiple services which id values are close (e.g., three services with ids 17, 18 and 22), show significant improvements. The worse case is a where the lcore two services mapped to it; one with service id 0 and the other with id 63. On a service lcore serving a single service, the service loop overhead is reduced from ~190 core clock cycles to ~46, on an Intel Cascade Lake generation Xeon. On weakly ordered CPUs, the gain is larger, since the loop included load-acquire atomic operations. Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com> Acked-by: Morten Brørup <mb@smartsharesystems.com> Acked-by: Harry van Haaren <harry.van.haaren@intel.com>
This commit is contained in:
parent
b54ade8f24
commit
074b4db247
@ -78,6 +78,11 @@ static uint32_t rte_service_library_initialized;
|
||||
int32_t
|
||||
rte_service_init(void)
|
||||
{
|
||||
/* Hard limit due to the use of an uint64_t-based bitmask (and the
|
||||
* clzl intrinsic).
|
||||
*/
|
||||
RTE_BUILD_BUG_ON(RTE_SERVICE_NUM_MAX > 64);
|
||||
|
||||
if (rte_service_library_initialized) {
|
||||
RTE_LOG(NOTICE, EAL,
|
||||
"service library init() called, init flag %d\n",
|
||||
@ -470,7 +475,6 @@ static int32_t
|
||||
service_runner_func(void *arg)
|
||||
{
|
||||
RTE_SET_USED(arg);
|
||||
uint32_t i;
|
||||
const int lcore = rte_lcore_id();
|
||||
struct core_state *cs = &lcore_states[lcore];
|
||||
|
||||
@ -484,10 +488,17 @@ service_runner_func(void *arg)
|
||||
RUNSTATE_RUNNING) {
|
||||
|
||||
const uint64_t service_mask = cs->service_mask;
|
||||
uint8_t start_id;
|
||||
uint8_t end_id;
|
||||
uint8_t i;
|
||||
|
||||
for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
|
||||
if (!service_registered(i))
|
||||
continue;
|
||||
if (service_mask == 0)
|
||||
continue;
|
||||
|
||||
start_id = __builtin_ctzl(service_mask);
|
||||
end_id = 64 - __builtin_clzl(service_mask);
|
||||
|
||||
for (i = start_id; i < end_id; i++) {
|
||||
/* return value ignored as no change to code flow */
|
||||
service_run(i, cs, service_mask, service_get(i), 1);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user