Micro-optimize cpu_search(), allowing compiler to use more efficient inline
ffsl() implementation, when it is available, instead of homegrown iteration. On dual-E5645 amd64 system (2x6x2 cores) under heavy I/O load that reduces time spent inside cpu_search() from 19% to 13%, while IOPS increased by 5%.
This commit is contained in:
parent
a40c2646a4
commit
58909b74b9
@ -667,10 +667,14 @@ cpu_search(const struct cpu_group *cg, struct cpu_search *low,
|
||||
}
|
||||
|
||||
/* Iterate through the child CPU groups and then remaining CPUs. */
|
||||
for (i = cg->cg_children, cpu = mp_maxid; i >= 0; ) {
|
||||
for (i = cg->cg_children, cpu = mp_maxid; ; ) {
|
||||
if (i == 0) {
|
||||
#ifdef HAVE_INLINE_FFSL
|
||||
cpu = CPU_FFS(&cpumask) - 1;
|
||||
#else
|
||||
while (cpu >= 0 && !CPU_ISSET(cpu, &cpumask))
|
||||
cpu--;
|
||||
#endif
|
||||
if (cpu < 0)
|
||||
break;
|
||||
child = NULL;
|
||||
@ -695,6 +699,7 @@ cpu_search(const struct cpu_group *cg, struct cpu_search *low,
|
||||
break;
|
||||
}
|
||||
} else { /* Handle child CPU. */
|
||||
CPU_CLR(cpu, &cpumask);
|
||||
tdq = TDQ_CPU(cpu);
|
||||
load = tdq->tdq_load * 256;
|
||||
rndptr = DPCPU_PTR(randomval);
|
||||
@ -742,8 +747,11 @@ cpu_search(const struct cpu_group *cg, struct cpu_search *low,
|
||||
i--;
|
||||
if (i == 0 && CPU_EMPTY(&cpumask))
|
||||
break;
|
||||
} else
|
||||
}
|
||||
#ifndef HAVE_INLINE_FFSL
|
||||
else
|
||||
cpu--;
|
||||
#endif
|
||||
}
|
||||
return (total);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user