TLS: Use <machine/tls.h> for libc and rtld.

- Include <machine/tls.h> in MD rtld_machdep.h headers.

- Remove local definitions of TLS_* constants from rtld_machdep.h
  headers and libc using the values from <machine/tls.h> instead.

- Use _tcb_set() instead of inlined versions in MD
  allocate_initial_tls() routines in rtld.  The one exception is amd64
  whose _tcb_set() invokes the amd64_set_fsbase ifunc.  rtld cannot
  use ifuncs, so amd64 inlines the logic to optionally write to fsbase
  directly.

- Use _tcb_set() instead of _set_tp() in libc.

- Use '&_tcb_get()->tcb_dtv' instead of _get_tp() in both rtld and libc.
  This permits removing _get_tp.c from rtld.

- Use TLS_TCB_SIZE and TLS_TCB_ALIGN with allocate_tls() in MD
  allocate_initial_tls() routines in rtld.

Reviewed by:	kib, jrtc27 (earlier version)
Differential Revision:	https://reviews.freebsd.org/D33353
This commit is contained in:
John Baldwin 2021-12-09 13:17:54 -08:00
parent 75395023ff
commit 8bcdb144eb
20 changed files with 53 additions and 116 deletions

View File

@ -72,18 +72,6 @@ void _rtld_free_tls(void *tls, size_t tcbsize, size_t tcbalign);
void *__libc_allocate_tls(void *oldtls, size_t tcbsize, size_t tcbalign);
void __libc_free_tls(void *tls, size_t tcbsize, size_t tcbalign);
#if defined(__amd64__) || defined(__aarch64__) || defined(__riscv)
#define TLS_TCB_ALIGN 16
#elif defined(__arm__) || defined(__mips__)
#define TLS_TCB_ALIGN 8
#elif defined(__powerpc__)
#define TLS_TCB_ALIGN TLS_TCB_SIZE
#elif defined(__i386__)
#define TLS_TCB_ALIGN 4
#else
#error TLS_TCB_ALIGN undefined for target architecture
#endif
#ifndef PIC
static size_t libc_tls_static_space;
@ -95,11 +83,10 @@ static void *libc_tls_init;
void *
__libc_tls_get_addr(void *vti)
{
Elf_Addr **dtvp, *dtv;
uintptr_t *dtv;
tls_index *ti;
dtvp = _get_tp();
dtv = *dtvp;
dtv = _tcb_get()->tcb_dtv;
ti = vti;
return ((char *)(dtv[ti->ti_module + 1] + ti->ti_offset) +
TLS_DTV_OFFSET);
@ -165,7 +152,7 @@ libc_free_aligned(void *ptr)
* described in [3] where TP points (with bias) to TLS and TCB immediately
* precedes TLS without any alignment gap[4]. Only TLS should be aligned.
* The TCB[0] points to DTV vector and DTV values are biased by constant
* value (0x8000) from real addresses[5].
* value (TLS_DTV_OFFSET) from real addresses[5].
*
* [1] Ulrich Drepper: ELF Handling for Thread-Local Storage
* www.akkadia.org/drepper/tls.pdf
@ -178,7 +165,7 @@ libc_free_aligned(void *ptr)
* https://members.openpowerfoundation.org/document/dl/576
*
* [4] Its unclear if "without any alignment gap" is hard ABI requirement,
* but we must follow this rule due to suboptimal _set_tp()
* but we must follow this rule due to suboptimal _tcb_set()
* (aka <ARCH>_SET_TP) implementation. This function doesn't expect TP but
* TCB as argument.
*
@ -310,8 +297,6 @@ __libc_allocate_tls(void *oldtcb, size_t tcbsize, size_t tcbalign)
#ifdef TLS_VARIANT_II
#define TLS_TCB_SIZE (3 * sizeof(Elf_Addr))
/*
* Free Static TLS using the Variant II method.
*/
@ -465,6 +450,6 @@ _init_tls(void)
}
tls = _rtld_allocate_tls(NULL, TLS_TCB_SIZE, TLS_TCB_ALIGN);
_set_tp(tls);
_tcb_set(tls);
#endif
}

View File

@ -516,7 +516,6 @@ reloc_non_plt(Obj_Entry *obj, Obj_Entry *obj_rtld, int flags,
void
allocate_initial_tls(Obj_Entry *objs)
{
Elf_Addr **tp;
/*
* Fix the size of the static TLS block by using the maximum
@ -526,16 +525,14 @@ allocate_initial_tls(Obj_Entry *objs)
tls_static_space = tls_last_offset + tls_last_size +
RTLD_STATIC_TLS_EXTRA;
tp = (Elf_Addr **) allocate_tls(objs, NULL, TLS_TCB_SIZE, 16);
asm volatile("msr tpidr_el0, %0" : : "r"(tp));
_tcb_set(allocate_tls(objs, NULL, TLS_TCB_SIZE, TLS_TCB_ALIGN));
}
void *
__tls_get_addr(tls_index* ti)
{
Elf_Addr **dtvp;
uintptr_t **dtvp;
dtvp = _get_tp();
dtvp = &_tcb_get()->tcb_dtv;
return (tls_get_addr_common(dtvp, ti->ti_module, ti->ti_offset));
}

View File

@ -35,6 +35,7 @@
#include <sys/types.h>
#include <machine/atomic.h>
#include <machine/tls.h>
struct Struct_Obj_Entry;
@ -79,7 +80,6 @@ Elf_Addr reloc_jmpslot(Elf_Addr *where, Elf_Addr target,
#define calculate_tls_post_size(align) \
round(TLS_TCB_SIZE, align) - TLS_TCB_SIZE
#define TLS_TCB_SIZE 16
typedef struct {
unsigned long ti_module;
unsigned long ti_offset;
@ -92,7 +92,4 @@ extern void *__tls_get_addr(tls_index *ti);
#define md_abi_variant_hook(x)
#define TLS_VARIANT_I 1
#define TLS_DTV_OFFSET 0
#endif

View File

@ -530,7 +530,12 @@ allocate_initial_tls(Obj_Entry *objs)
*/
tls_static_space = tls_last_offset + RTLD_STATIC_TLS_EXTRA;
addr = allocate_tls(objs, 0, 3 * sizeof(Elf_Addr), 16);
addr = allocate_tls(objs, 0, TLS_TCB_SIZE, TLS_TCB_ALIGN);
/*
* This does not use _tcb_set() as it calls amd64_set_fsbase()
* which is an ifunc and rtld must not use ifuncs.
*/
if (__getosreldate() >= P_OSREL_WRFSBASE &&
(cpu_stdext_feature & CPUID_STDEXT_FSGSBASE) != 0)
wrfsbase((uintptr_t)addr);
@ -541,9 +546,9 @@ allocate_initial_tls(Obj_Entry *objs)
void *
__tls_get_addr(tls_index *ti)
{
Elf_Addr **dtvp;
uintptr_t **dtvp;
dtvp = _get_tp();
dtvp = &_tcb_get()->tcb_dtv;
return (tls_get_addr_common(dtvp, ti->ti_module, ti->ti_offset));
}

View File

@ -33,6 +33,7 @@
#include <sys/types.h>
#include <machine/atomic.h>
#include <machine/tls.h>
struct Struct_Obj_Entry;
@ -73,9 +74,6 @@ void *__tls_get_addr(tls_index *ti) __exported;
#define md_abi_variant_hook(x)
#define TLS_VARIANT_II 1
#define TLS_DTV_OFFSET 0
size_t calculate_first_tls_offset(size_t size, size_t align, size_t offset);
size_t calculate_tls_offset(size_t prev_offset, size_t prev_size, size_t size,
size_t align, size_t offset);

View File

@ -499,17 +499,14 @@ allocate_initial_tls(Obj_Entry *objs)
tls_static_space = tls_last_offset + tls_last_size + RTLD_STATIC_TLS_EXTRA;
sysarch(ARM_SET_TP, allocate_tls(objs, NULL, TLS_TCB_SIZE, 8));
_tcb_set(allocate_tls(objs, NULL, TLS_TCB_SIZE, TLS_TCB_ALIGN));
}
void *
__tls_get_addr(tls_index* ti)
{
char *p;
void *_tp;
__asm __volatile("mrc p15, 0, %0, c13, c0, 3" \
: "=r" (_tp));
p = tls_get_addr_common((Elf_Addr **)(_tp), ti->ti_module, ti->ti_offset);
uintptr_t **dtvp;
return (p);
dtvp = &_tcb_get()->tcb_dtv;
return (tls_get_addr_common(dtvp, ti->ti_module, ti->ti_offset));
}

View File

@ -34,6 +34,7 @@
#include <sys/types.h>
#include <machine/atomic.h>
#include <machine/acle-compat.h>
#include <machine/tls.h>
struct Struct_Obj_Entry;
@ -56,7 +57,6 @@ Elf_Addr reloc_jmpslot(Elf_Addr *where, Elf_Addr target,
#define call_ifunc_resolver(ptr) \
(((Elf_Addr (*)(void))ptr)())
#define TLS_TCB_SIZE 8
typedef struct {
unsigned long ti_module;
unsigned long ti_offset;
@ -84,7 +84,4 @@ extern void arm_abi_variant_hook(Elf_Auxinfo **);
#define md_abi_variant_hook(x)
#endif
#define TLS_VARIANT_I 1
#define TLS_DTV_OFFSET 0
#endif

View File

@ -513,8 +513,8 @@ allocate_initial_tls(Obj_Entry *objs)
* use.
*/
tls_static_space = tls_last_offset + RTLD_STATIC_TLS_EXTRA;
tls = allocate_tls(objs, NULL, 3*sizeof(Elf_Addr), sizeof(Elf_Addr));
i386_set_gsbase(tls);
tls = allocate_tls(objs, NULL, TLS_TCB_SIZE, TLS_TCB_ALIGN);
_tcb_set(tls);
}
/* GNU ABI */
@ -522,9 +522,9 @@ __attribute__((__regparm__(1)))
void *
___tls_get_addr(tls_index *ti)
{
Elf_Addr **dtvp;
uintptr_t **dtvp;
dtvp = _get_tp();
dtvp = &_tcb_get()->tcb_dtv;
return (tls_get_addr_common(dtvp, ti->ti_module, ti->ti_offset));
}
@ -532,9 +532,9 @@ ___tls_get_addr(tls_index *ti)
void *
__tls_get_addr(tls_index *ti)
{
Elf_Addr **dtvp;
uintptr_t **dtvp;
dtvp = _get_tp();
dtvp = &_tcb_get()->tcb_dtv;
return (tls_get_addr_common(dtvp, ti->ti_module, ti->ti_offset));
}

View File

@ -33,6 +33,7 @@
#include <sys/types.h>
#include <machine/atomic.h>
#include <machine/tls.h>
struct Struct_Obj_Entry;
@ -74,9 +75,6 @@ void *__tls_get_addr(tls_index *ti) __exported;
#define md_abi_variant_hook(x)
#define TLS_VARIANT_II 1
#define TLS_DTV_OFFSET 0
size_t calculate_first_tls_offset(size_t size, size_t align, size_t offset);
size_t calculate_tls_offset(size_t prev_offset, size_t prev_size, size_t size,
size_t align, size_t offset);

View File

@ -762,7 +762,6 @@ ifunc_init(Elf_Auxinfo aux_info[__min_size(AT_COUNT)] __unused)
void
allocate_initial_tls(Obj_Entry *objs)
{
char *tls;
/*
* Fix the size of the static TLS block by using the maximum
@ -771,19 +770,17 @@ allocate_initial_tls(Obj_Entry *objs)
*/
tls_static_space = tls_last_offset + tls_last_size + RTLD_STATIC_TLS_EXTRA;
tls = (char *) allocate_tls(objs, NULL, TLS_TCB_SIZE, 8);
sysarch(MIPS_SET_TLS, tls);
_tcb_set(allocate_tls(objs, NULL, TLS_TCB_SIZE, TLS_TCB_ALIGN));
}
void *
__tls_get_addr(tls_index* ti)
{
Elf_Addr **tls;
uintptr_t **dtvp;
char *p;
tls = _get_tp();
p = tls_get_addr_common(tls, ti->ti_module, ti->ti_offset);
dtvp = &_tcb_get()->tcb_dtv;
p = tls_get_addr_common(dtvp, ti->ti_module, ti->ti_offset);
return (p + TLS_DTV_OFFSET);
}

View File

@ -77,6 +77,4 @@ extern void *__tls_get_addr(tls_index *ti);
#define md_abi_variant_hook(x)
#define TLS_VARIANT_I 1
#endif

View File

@ -812,7 +812,6 @@ ifunc_init(Elf_Auxinfo aux_info[__min_size(AT_COUNT)] __unused)
void
allocate_initial_tls(Obj_Entry *list)
{
Elf_Addr **tp;
/*
* Fix the size of the static TLS block by using the maximum
@ -822,25 +821,17 @@ allocate_initial_tls(Obj_Entry *list)
tls_static_space = tls_last_offset + tls_last_size + RTLD_STATIC_TLS_EXTRA;
tp = (Elf_Addr **)((char *) allocate_tls(list, NULL, TLS_TCB_SIZE, 8)
+ TLS_TP_OFFSET + TLS_TCB_SIZE);
/*
* XXX gcc seems to ignore 'tp = _tp;'
*/
__asm __volatile("mr 2,%0" :: "r"(tp));
_tcb_set(allocate_tls(list, NULL, TLS_TCB_SIZE, TLS_TCB_ALIGN));
}
void*
__tls_get_addr(tls_index* ti)
{
register Elf_Addr **tp;
uintptr_t **dtvp;
char *p;
__asm __volatile("mr %0,2" : "=r"(tp));
p = tls_get_addr_common((Elf_Addr**)((Elf_Addr)tp - TLS_TP_OFFSET
- TLS_TCB_SIZE), ti->ti_module, ti->ti_offset);
dtvp = &_tcb_get()->tcb_dtv;
p = tls_get_addr_common(dtvp, ti->ti_module, ti->ti_offset);
return (p + TLS_DTV_OFFSET);
}

View File

@ -33,6 +33,7 @@
#include <sys/types.h>
#include <machine/atomic.h>
#include <machine/tls.h>
struct Struct_Obj_Entry;
@ -73,11 +74,6 @@ void _rtld_powerpc_pltcall(void);
* TLS
*/
#define TLS_VARIANT_I 1
#define TLS_TP_OFFSET 0x7000
#define TLS_DTV_OFFSET 0x8000
#define TLS_TCB_SIZE 8
#define round(size, align) \
(((size) + (align) - 1) & ~((align) - 1))
#define calculate_first_tls_offset(size, align, offset) \

View File

@ -709,7 +709,6 @@ ifunc_init(Elf_Auxinfo aux_info[__min_size(AT_COUNT)] __unused)
void
allocate_initial_tls(Obj_Entry *list)
{
Elf_Addr **tp;
/*
* Fix the size of the static TLS block by using the maximum
@ -719,21 +718,17 @@ allocate_initial_tls(Obj_Entry *list)
tls_static_space = tls_last_offset + tls_last_size + RTLD_STATIC_TLS_EXTRA;
tp = (Elf_Addr **)((char *)allocate_tls(list, NULL, TLS_TCB_SIZE, 16)
+ TLS_TP_OFFSET + TLS_TCB_SIZE);
__asm __volatile("mr 13,%0" :: "r"(tp));
_tcb_set(allocate_tls(list, NULL, TLS_TCB_SIZE, TLS_TCB_ALIGN));
}
void*
__tls_get_addr(tls_index* ti)
{
Elf_Addr **tp;
uintptr_t **dtvp;
char *p;
__asm __volatile("mr %0,13" : "=r"(tp));
p = tls_get_addr_common((Elf_Addr**)((Elf_Addr)tp - TLS_TP_OFFSET
- TLS_TCB_SIZE), ti->ti_module, ti->ti_offset);
dtvp = &_tcb_get()->tcb_dtv;
p = tls_get_addr_common(dtvp, ti->ti_module, ti->ti_offset);
return (p + TLS_DTV_OFFSET);
}

View File

@ -33,6 +33,7 @@
#include <sys/types.h>
#include <machine/atomic.h>
#include <machine/tls.h>
struct Struct_Obj_Entry;
@ -65,11 +66,6 @@ extern u_long cpu_features2; /* r4 */
* TLS
*/
#define TLS_VARIANT_I 1
#define TLS_TP_OFFSET 0x7000
#define TLS_DTV_OFFSET 0x8000
#define TLS_TCB_SIZE 16
#define round(size, align) \
(((size) + (align) - 1) & ~((align) - 1))
#define calculate_first_tls_offset(size, align, offset) \

View File

@ -387,7 +387,6 @@ ifunc_init(Elf_Auxinfo aux_info[__min_size(AT_COUNT)] __unused)
void
allocate_initial_tls(Obj_Entry *objs)
{
Elf_Addr **tp;
/*
* Fix the size of the static TLS block by using the maximum
@ -397,19 +396,16 @@ allocate_initial_tls(Obj_Entry *objs)
tls_static_space = tls_last_offset + tls_last_size +
RTLD_STATIC_TLS_EXTRA;
tp = (Elf_Addr **)((char *)allocate_tls(objs, NULL, TLS_TCB_SIZE, 16)
+ TLS_TP_OFFSET + TLS_TCB_SIZE);
__asm __volatile("mv tp, %0" :: "r"(tp));
_tcb_set(allocate_tls(objs, NULL, TLS_TCB_SIZE, TLS_TCB_ALIGN));
}
void *
__tls_get_addr(tls_index* ti)
{
Elf_Addr **dtvp;
uintptr_t **dtvp;
void *p;
dtvp = _get_tp();
dtvp = &_tcb_get()->tcb_dtv;
p = tls_get_addr_common(dtvp, ti->ti_module, ti->ti_offset);
return ((char*)p + TLS_DTV_OFFSET);

View File

@ -40,6 +40,7 @@
#include <sys/types.h>
#include <machine/atomic.h>
#include <machine/tls.h>
struct Struct_Obj_Entry;
@ -82,10 +83,6 @@ Elf_Addr reloc_jmpslot(Elf_Addr *where, Elf_Addr target,
/*
* TLS
*/
#define TLS_VARIANT_I 1
#define TLS_TP_OFFSET 0x0
#define TLS_DTV_OFFSET 0x800
#define TLS_TCB_SIZE 16
#define round(size, align) \
(((size) + (align) - 1) & ~((align) - 1))

View File

@ -67,8 +67,6 @@ _libc_other_objects+=syncicache abs
_libc_other_objects+=syncicache
.endif
_libc_other_objects+=_get_tp
# Extract all the .o files from libc_nossp_pic.a. This ensures that
# we don't accidentally pull in the interposing table or similar by linking
# directly against libc_nossp_pic.a

View File

@ -4171,14 +4171,14 @@ dlinfo(void *handle, int request, void *p)
static void
rtld_fill_dl_phdr_info(const Obj_Entry *obj, struct dl_phdr_info *phdr_info)
{
Elf_Addr **dtvp;
uintptr_t **dtvp;
phdr_info->dlpi_addr = (Elf_Addr)obj->relocbase;
phdr_info->dlpi_name = obj->path;
phdr_info->dlpi_phdr = obj->phdr;
phdr_info->dlpi_phnum = obj->phsize / sizeof(obj->phdr[0]);
phdr_info->dlpi_tls_modid = obj->tlsindex;
dtvp = _get_tp();
dtvp = &_tcb_get()->tcb_dtv;
phdr_info->dlpi_tls_data = (char *)tls_get_addr_slow(dtvp,
obj->tlsindex, 0, true) + TLS_DTV_OFFSET;
phdr_info->dlpi_adds = obj_loads;
@ -5175,9 +5175,9 @@ tls_get_addr_slow(Elf_Addr **dtvp, int index, size_t offset, bool locked)
}
void *
tls_get_addr_common(Elf_Addr **dtvp, int index, size_t offset)
tls_get_addr_common(uintptr_t **dtvp, int index, size_t offset)
{
Elf_Addr *dtv;
uintptr_t *dtv;
dtv = *dtvp;
/* Check dtv generation in case new modules have arrived */

View File

@ -396,7 +396,7 @@ void _rtld_bind_start(void);
void *rtld_resolve_ifunc(const Obj_Entry *obj, const Elf_Sym *def);
void symlook_init(SymLook *, const char *);
int symlook_obj(SymLook *, const Obj_Entry *);
void *tls_get_addr_common(Elf_Addr** dtvp, int index, size_t offset);
void *tls_get_addr_common(uintptr_t **dtvp, int index, size_t offset);
void *allocate_tls(Obj_Entry *, void *, size_t, size_t);
void free_tls(void *, size_t, size_t);
void *allocate_module_tls(int index);
@ -404,7 +404,6 @@ bool allocate_tls_offset(Obj_Entry *obj);
void free_tls_offset(Obj_Entry *obj);
const Ver_Entry *fetch_ventry(const Obj_Entry *obj, unsigned long);
int convert_prot(int elfflags);
void *_get_tp(void); /* libc implementation */
bool check_elf_headers(const Elf_Ehdr *hdr, const char *path);
/*