Optimize tls_get_addr_common(). The change provides around 30% speedup

for TLS microbenchmark using global-dynamic TLS model on amd64 (which is
default for PIC dso objects).

Split the slow path into tls_get_addr_slow(), for which inlining is
disabled. This prevents the registers spill on tls_get_addr_common()
entry.

Provide static branch hint to the compiler, indicating that slow path
is not likely to be taken.

While there, do some minimal style adjustments.

Reported and tested by:	davidxu
MFC after:	1 week
This commit is contained in:
kib 2012-03-10 08:49:44 +00:00
parent b186de9a24
commit fb48ba5d90

View File

@ -3507,17 +3507,17 @@ unref_dag(Obj_Entry *root)
/*
* Common code for MD __tls_get_addr().
*/
void *
tls_get_addr_common(Elf_Addr** dtvp, int index, size_t offset)
static void *tls_get_addr_slow(Elf_Addr **, int, size_t) __noinline;
static void *
tls_get_addr_slow(Elf_Addr **dtvp, int index, size_t offset)
{
Elf_Addr* dtv = *dtvp;
Elf_Addr *newdtv, *dtv;
RtldLockState lockstate;
int to_copy;
dtv = *dtvp;
/* Check dtv generation in case new modules have arrived */
if (dtv[0] != tls_dtv_generation) {
Elf_Addr* newdtv;
int to_copy;
wlock_acquire(rtld_bind_lock, &lockstate);
newdtv = calloc(1, (tls_max_index + 2) * sizeof(Elf_Addr));
to_copy = dtv[1];
@ -3532,14 +3532,27 @@ tls_get_addr_common(Elf_Addr** dtvp, int index, size_t offset)
}
/* Dynamically allocate module TLS if necessary */
if (!dtv[index + 1]) {
if (dtv[index + 1] == 0) {
/* Signal safe, wlock will block out signals. */
wlock_acquire(rtld_bind_lock, &lockstate);
wlock_acquire(rtld_bind_lock, &lockstate);
if (!dtv[index + 1])
dtv[index + 1] = (Elf_Addr)allocate_module_tls(index);
lock_release(rtld_bind_lock, &lockstate);
}
return (void*) (dtv[index + 1] + offset);
return ((void *)(dtv[index + 1] + offset));
}
void *
tls_get_addr_common(Elf_Addr **dtvp, int index, size_t offset)
{
Elf_Addr *dtv;
dtv = *dtvp;
/* Check dtv generation in case new modules have arrived */
if (__predict_true(dtv[0] == tls_dtv_generation &&
dtv[index + 1] != 0))
return ((void *)(dtv[index + 1] + offset));
return (tls_get_addr_slow(dtvp, index, offset));
}
#if defined(__arm__) || defined(__ia64__) || defined(__mips__) || defined(__powerpc__)