Micro-manage clang to get the expected inlining for cpu_search().

Mark cpu_search_lowest/cpu_search_highest/cpu_search_both as noinline,
while cpu_search() gets always_inline.  With the attributes set,
cpu_search() is inlined in wrappers, and if()s with constant
conditionals are optimized.

On some tests on many-core machine, the hwpmc reported samples for
cpu_search*() are reduced from 25% total to 9%.

Submitted by:	"Rang, Anton" <anton.rang@isilon.com>
MFC after:	1 week
This commit is contained in:
Konstantin Belousov 2014-07-03 11:06:27 +00:00
parent 2110950be8
commit 2499a5ccef
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=268211

View File

@ -622,12 +622,14 @@ struct cpu_search {
for ((cpu) = 0; (cpu) <= mp_maxid; (cpu)++) \
if (CPU_ISSET(cpu, &mask))
static __inline int cpu_search(const struct cpu_group *cg, struct cpu_search *low,
struct cpu_search *high, const int match);
int cpu_search_lowest(const struct cpu_group *cg, struct cpu_search *low);
int cpu_search_highest(const struct cpu_group *cg, struct cpu_search *high);
int cpu_search_both(const struct cpu_group *cg, struct cpu_search *low,
static __always_inline int cpu_search(const struct cpu_group *cg,
struct cpu_search *low, struct cpu_search *high, const int match);
int __noinline cpu_search_lowest(const struct cpu_group *cg,
struct cpu_search *low);
int __noinline cpu_search_highest(const struct cpu_group *cg,
struct cpu_search *high);
int __noinline cpu_search_both(const struct cpu_group *cg,
struct cpu_search *low, struct cpu_search *high);
/*
* Search the tree of cpu_groups for the lowest or highest loaded cpu
@ -640,7 +642,7 @@ int cpu_search_both(const struct cpu_group *cg, struct cpu_search *low,
* match argument. It is reduced to the minimum set for each case. It is
* also recursive to the depth of the tree.
*/
static __inline int
static __always_inline int
cpu_search(const struct cpu_group *cg, struct cpu_search *low,
struct cpu_search *high, const int match)
{