Micro-optimize cpu_search(), allowing compiler to use more efficient inline

ffsl() implementation, when it is available, instead of homegrown iteration.

On dual-E5645 amd64 system (2x6x2 cores) under heavy I/O load that reduces
time spent inside cpu_search() from 19% to 13%, while IOPS increased by 5%.
This commit is contained in:
Alexander Motin 2013-09-07 15:16:30 +00:00
parent a40c2646a4
commit 58909b74b9

View File

@ -667,10 +667,14 @@ cpu_search(const struct cpu_group *cg, struct cpu_search *low,
}
/* Iterate through the child CPU groups and then remaining CPUs. */
for (i = cg->cg_children, cpu = mp_maxid; i >= 0; ) {
for (i = cg->cg_children, cpu = mp_maxid; ; ) {
if (i == 0) {
#ifdef HAVE_INLINE_FFSL
cpu = CPU_FFS(&cpumask) - 1;
#else
while (cpu >= 0 && !CPU_ISSET(cpu, &cpumask))
cpu--;
#endif
if (cpu < 0)
break;
child = NULL;
@ -695,6 +699,7 @@ cpu_search(const struct cpu_group *cg, struct cpu_search *low,
break;
}
} else { /* Handle child CPU. */
CPU_CLR(cpu, &cpumask);
tdq = TDQ_CPU(cpu);
load = tdq->tdq_load * 256;
rndptr = DPCPU_PTR(randomval);
@ -742,8 +747,11 @@ cpu_search(const struct cpu_group *cg, struct cpu_search *low,
i--;
if (i == 0 && CPU_EMPTY(&cpumask))
break;
} else
}
#ifndef HAVE_INLINE_FFSL
else
cpu--;
#endif
}
return (total);
}