Optimize tls_get_addr_common(). The change provides around 30% speedup
for TLS microbenchmark using global-dynamic TLS model on amd64 (which is default for PIC dso objects). Split the slow path into tls_get_addr_slow(), for which inlining is disabled. This prevents the registers spill on tls_get_addr_common() entry. Provide static branch hint to the compiler, indicating that slow path is not likely to be taken. While there, do some minimal style adjustments. Reported and tested by: davidxu MFC after: 1 week
This commit is contained in:
parent
b186de9a24
commit
fb48ba5d90
@ -3507,17 +3507,17 @@ unref_dag(Obj_Entry *root)
|
||||
/*
|
||||
* Common code for MD __tls_get_addr().
|
||||
*/
|
||||
void *
|
||||
tls_get_addr_common(Elf_Addr** dtvp, int index, size_t offset)
|
||||
static void *tls_get_addr_slow(Elf_Addr **, int, size_t) __noinline;
|
||||
static void *
|
||||
tls_get_addr_slow(Elf_Addr **dtvp, int index, size_t offset)
|
||||
{
|
||||
Elf_Addr* dtv = *dtvp;
|
||||
Elf_Addr *newdtv, *dtv;
|
||||
RtldLockState lockstate;
|
||||
int to_copy;
|
||||
|
||||
dtv = *dtvp;
|
||||
/* Check dtv generation in case new modules have arrived */
|
||||
if (dtv[0] != tls_dtv_generation) {
|
||||
Elf_Addr* newdtv;
|
||||
int to_copy;
|
||||
|
||||
wlock_acquire(rtld_bind_lock, &lockstate);
|
||||
newdtv = calloc(1, (tls_max_index + 2) * sizeof(Elf_Addr));
|
||||
to_copy = dtv[1];
|
||||
@ -3532,14 +3532,27 @@ tls_get_addr_common(Elf_Addr** dtvp, int index, size_t offset)
|
||||
}
|
||||
|
||||
/* Dynamically allocate module TLS if necessary */
|
||||
if (!dtv[index + 1]) {
|
||||
if (dtv[index + 1] == 0) {
|
||||
/* Signal safe, wlock will block out signals. */
|
||||
wlock_acquire(rtld_bind_lock, &lockstate);
|
||||
wlock_acquire(rtld_bind_lock, &lockstate);
|
||||
if (!dtv[index + 1])
|
||||
dtv[index + 1] = (Elf_Addr)allocate_module_tls(index);
|
||||
lock_release(rtld_bind_lock, &lockstate);
|
||||
}
|
||||
return (void*) (dtv[index + 1] + offset);
|
||||
return ((void *)(dtv[index + 1] + offset));
|
||||
}
|
||||
|
||||
void *
|
||||
tls_get_addr_common(Elf_Addr **dtvp, int index, size_t offset)
|
||||
{
|
||||
Elf_Addr *dtv;
|
||||
|
||||
dtv = *dtvp;
|
||||
/* Check dtv generation in case new modules have arrived */
|
||||
if (__predict_true(dtv[0] == tls_dtv_generation &&
|
||||
dtv[index + 1] != 0))
|
||||
return ((void *)(dtv[index + 1] + offset));
|
||||
return (tls_get_addr_slow(dtvp, index, offset));
|
||||
}
|
||||
|
||||
#if defined(__arm__) || defined(__ia64__) || defined(__mips__) || defined(__powerpc__)
|
||||
|
Loading…
Reference in New Issue
Block a user