[PowerPC] powerpc32 rtld IFUNC handling code

As PowerPC is moving to clang, we can finally start taking advantage of
IFUNC.

Implement the MD parts of IFUNC handling for rtld.

Currently, it is necessary to look for R_PPC_IRELATIVE in the PLT in
addition to RELA. This is an ABI violation, but LLD9 has some .iplt bugs
that require this as a workaround.

Reviewed by:	jhibbits
Differential Revision:	https://reviews.freebsd.org/D22855
This commit is contained in:
Brandon Bergren 2019-12-24 16:13:15 +00:00
parent 41b4ec8ab0
commit 57462f8f81
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=356056
2 changed files with 239 additions and 101 deletions

View File

@ -166,82 +166,84 @@ static int
reloc_nonplt_object(Obj_Entry *obj_rtld __unused, Obj_Entry *obj,
const Elf_Rela *rela, SymCache *cache, int flags, RtldLockState *lockstate)
{
Elf_Addr *where = (Elf_Addr *)(obj->relocbase + rela->r_offset);
const Elf_Sym *def;
const Obj_Entry *defobj;
Elf_Addr tmp;
const Elf_Sym *def = NULL;
const Obj_Entry *defobj;
Elf_Addr *where, symval = 0;
/*
* First, resolve symbol for relocations which
* reference symbols.
*/
switch (ELF_R_TYPE(rela->r_info)) {
case R_PPC_NONE:
break;
case R_PPC_ADDR32: /* word32 S + A */
case R_PPC_GLOB_DAT: /* word32 S + A */
case R_PPC_UADDR32: /* word32 S + A */
case R_PPC_ADDR32:
case R_PPC_GLOB_DAT: /* word32 S + A */
case R_PPC_DTPMOD32:
case R_PPC_TPREL32:
case R_PPC_DTPREL32:
def = find_symdef(ELF_R_SYM(rela->r_info), obj, &defobj,
flags, cache, lockstate);
if (def == NULL) {
return (-1);
}
tmp = (Elf_Addr)(defobj->relocbase + def->st_value +
rela->r_addend);
/*
* If symbol is IFUNC, only perform relocation
* when caller allowed it by passing
* SYMLOOK_IFUNC flag. Skip the relocations
* otherwise.
*
* Also error out in case IFUNC relocations
* are specified for TLS, which cannot be
* usefully interpreted.
*/
if (ELF_ST_TYPE(def->st_info) == STT_GNU_IFUNC) {
switch (ELF_R_TYPE(rela->r_info)) {
case R_PPC_UADDR32:
case R_PPC_ADDR32:
case R_PPC_GLOB_DAT:
if ((flags & SYMLOOK_IFUNC) == 0) {
dbg("Non-PLT reference to IFUNC found!");
obj->non_plt_gnu_ifunc = true;
return (0);
}
symval = (Elf_Addr)rtld_resolve_ifunc(
defobj, def);
break;
default:
_rtld_error("%s: IFUNC for TLS reloc",
obj->path);
return (-1);
}
} else {
if ((flags & SYMLOOK_IFUNC) != 0)
return (0);
symval = (Elf_Addr)defobj->relocbase +
def->st_value;
}
break;
default:
if ((flags & SYMLOOK_IFUNC) != 0)
return (0);
}
where = (Elf_Addr *)(obj->relocbase + rela->r_offset);
switch (ELF_R_TYPE(rela->r_info)) {
case R_PPC_NONE:
break;
case R_PPC_UADDR32:
case R_PPC_ADDR32:
case R_PPC_GLOB_DAT:
/* Don't issue write if unnecessary; avoid COW page fault */
if (*where != tmp) {
*where = tmp;
}
break;
case R_PPC_RELATIVE: /* word32 B + A */
tmp = (Elf_Addr)(obj->relocbase + rela->r_addend);
/* As above, don't issue write unnecessarily */
if (*where != tmp) {
*where = tmp;
if (*where != symval + rela->r_addend) {
*where = symval + rela->r_addend;
}
break;
case R_PPC_COPY:
/*
* These are deferred until all other relocations
* have been done. All we do here is make sure
* that the COPY relocation is not in a shared
* library. They are allowed only in executable
* files.
*/
if (!obj->mainprog) {
_rtld_error("%s: Unexpected R_COPY "
" relocation in shared library",
obj->path);
return (-1);
}
break;
case R_PPC_JMP_SLOT:
/*
* These will be handled by the plt/jmpslot routines
*/
break;
case R_PPC_DTPMOD32:
def = find_symdef(ELF_R_SYM(rela->r_info), obj, &defobj,
flags, cache, lockstate);
if (def == NULL)
return (-1);
*where = (Elf_Addr) defobj->tlsindex;
break;
case R_PPC_TPREL32:
def = find_symdef(ELF_R_SYM(rela->r_info), obj, &defobj,
flags, cache, lockstate);
if (def == NULL)
return (-1);
/*
* We lazily allocate offsets for static TLS as we
* see the first relocation that references the
@ -262,27 +264,52 @@ reloc_nonplt_object(Obj_Entry *obj_rtld __unused, Obj_Entry *obj,
*(Elf_Addr **)where = *where * sizeof(Elf_Addr)
+ (Elf_Addr *)(def->st_value + rela->r_addend
+ defobj->tlsoffset - TLS_TP_OFFSET - TLS_TCB_SIZE);
break;
case R_PPC_DTPREL32:
def = find_symdef(ELF_R_SYM(rela->r_info), obj, &defobj,
flags, cache, lockstate);
if (def == NULL)
return (-1);
*where += (Elf_Addr)(def->st_value + rela->r_addend
- TLS_DTV_OFFSET);
break;
case R_PPC_RELATIVE: /* word32 B + A */
symval = (Elf_Addr)(obj->relocbase + rela->r_addend);
/* As above, don't issue write unnecessarily */
if (*where != symval) {
*where = symval;
}
break;
case R_PPC_COPY:
/*
* These are deferred until all other relocations
* have been done. All we do here is make sure
* that the COPY relocation is not in a shared
* library. They are allowed only in executable
* files.
*/
if (!obj->mainprog) {
_rtld_error("%s: Unexpected R_COPY "
" relocation in shared library",
obj->path);
return (-1);
}
break;
case R_PPC_IRELATIVE:
/*
* These will be handled by reloc_iresolve().
*/
obj->irelative = true;
break;
case R_PPC_JMP_SLOT:
/*
* These will be handled by the plt/jmpslot routines
*/
break;
default:
_rtld_error("%s: Unsupported relocation type %d"
" in non-PLT relocations\n", obj->path,
ELF_R_TYPE(rela->r_info));
return (-1);
}
}
return (0);
}
@ -300,10 +327,6 @@ reloc_non_plt(Obj_Entry *obj, Obj_Entry *obj_rtld, int flags,
SymCache *cache;
int r = -1;
if ((flags & SYMLOOK_IFUNC) != 0)
/* XXX not implemented */
return (0);
/*
* The dynamic loader may be called from a thread, we have
* limited amounts of stack available so we cannot use alloca().
@ -404,7 +427,6 @@ reloc_plt_object(Obj_Entry *obj, const Elf_Rela *rela)
return (0);
}
/*
* Process the PLT relocations.
*/
@ -420,6 +442,17 @@ reloc_plt(Obj_Entry *obj, int flags __unused, RtldLockState *lockstate __unused)
relalim = (const Elf_Rela *)((const char *)obj->pltrela +
obj->pltrelasize);
for (rela = obj->pltrela; rela < relalim; rela++) {
if (ELF_R_TYPE(rela->r_info) == R_PPC_IRELATIVE) {
dbg("ABI violation - found IRELATIVE in the PLT.");
obj->irelative = true;
continue;
}
/*
* PowerPC(64) .rela.plt is composed of an array of
* R_PPC_JMP_SLOT relocations. Unlike other platforms,
* this is the ONLY relocation type that is valid here.
*/
assert(ELF_R_TYPE(rela->r_info) == R_PPC_JMP_SLOT);
if (reloc_plt_object(obj, rela) < 0) {
@ -438,7 +471,6 @@ reloc_plt(Obj_Entry *obj, int flags __unused, RtldLockState *lockstate __unused)
return (0);
}
/*
* LD_BIND_NOW was set - force relocation for all jump slots
*/
@ -455,6 +487,9 @@ reloc_jmpslots(Obj_Entry *obj, int flags, RtldLockState *lockstate)
relalim = (const Elf_Rela *)((const char *)obj->pltrela +
obj->pltrelasize);
for (rela = obj->pltrela; rela < relalim; rela++) {
/* This isn't actually a jump slot, ignore it. */
if (ELF_R_TYPE(rela->r_info) == R_PPC_IRELATIVE)
continue;
assert(ELF_R_TYPE(rela->r_info) == R_PPC_JMP_SLOT);
where = (Elf_Addr *)(obj->relocbase + rela->r_offset);
def = find_symdef(ELF_R_SYM(rela->r_info), obj, &defobj,
@ -466,15 +501,18 @@ reloc_jmpslots(Obj_Entry *obj, int flags, RtldLockState *lockstate)
target = (Elf_Addr)(defobj->relocbase + def->st_value);
#if 0
/* PG XXX */
dbg("\"%s\" in \"%s\" --> %p in \"%s\"",
defobj->strtab + def->st_name, basename(obj->path),
(void *)target, basename(defobj->path));
#endif
reloc_jmpslot(where, target, defobj, obj,
(const Elf_Rel *) rela);
if (def == &sym_zero) {
/* Zero undefined weak symbols */
*where = 0;
} else {
if (ELF_ST_TYPE(def->st_info) == STT_GNU_IFUNC) {
/* LD_BIND_NOW, ifunc in shared lib.*/
obj->gnu_ifunc = true;
continue;
}
reloc_jmpslot(where, target, defobj, obj,
(const Elf_Rel *) rela);
}
}
obj->jmpslots_done = true;
@ -484,9 +522,7 @@ reloc_jmpslots(Obj_Entry *obj, int flags, RtldLockState *lockstate)
/*
* Update the value of a PLT jump slot. Branch directly to the target if
* it is within +/- 32Mb, otherwise go indirectly via the pltcall
* trampoline call and jump table.
* Update the value of a PLT jump slot.
*/
Elf_Addr
reloc_jmpslot(Elf_Addr *wherep, Elf_Addr target,
@ -501,22 +537,32 @@ reloc_jmpslot(Elf_Addr *wherep, Elf_Addr target,
if (ld_bind_not)
goto out;
/*
* At the PLT entry pointed at by `wherep', construct
* a direct transfer to the now fully resolved function
* address.
*/
offset = target - (Elf_Addr)wherep;
/*
* Process Secure-PLT.
*/
if (obj->gotptr != NULL) {
assert(wherep >= (Elf_Word *)obj->pltgot);
assert(wherep <
(Elf_Word *)obj->pltgot + obj->pltrelasize);
*wherep = target;
if (*wherep != target)
*wherep = target;
goto out;
}
/*
* BSS-PLT optimization:
* Branch directly to the target if it is within +/- 32Mb,
* otherwise go indirectly via the pltcall trampoline call and
* jump table.
*/
offset = target - (Elf_Addr)wherep;
if (abs((int)offset) < 32*1024*1024) { /* inside 32MB? */
/*
* At the PLT entry pointed at by `wherep', construct
* a direct transfer to the now fully resolved function
* address.
*/
/* b value # branch directly */
*wherep = 0x48000000 | (offset & 0x03fffffc);
__syncicache(wherep, 4);
@ -557,11 +603,52 @@ reloc_jmpslot(Elf_Addr *wherep, Elf_Addr target,
}
int
reloc_iresolve(Obj_Entry *obj __unused,
struct Struct_RtldLockState *lockstate __unused)
reloc_iresolve(Obj_Entry *obj,
struct Struct_RtldLockState *lockstate)
{
/*
* Since PLT slots on PowerPC are always R_PPC_JMP_SLOT,
* R_PPC_IRELATIVE is in RELA.
*/
const Elf_Rela *relalim;
const Elf_Rela *rela;
Elf_Addr *where, target, *ptr;
/* XXX not implemented */
if (!obj->irelative)
return (0);
relalim = (const Elf_Rela *)((const char *)obj->rela + obj->relasize);
for (rela = obj->rela; rela < relalim; rela++) {
if (ELF_R_TYPE(rela->r_info) == R_PPC_IRELATIVE) {
ptr = (Elf_Addr *)(obj->relocbase + rela->r_addend);
where = (Elf_Addr *)(obj->relocbase + rela->r_offset);
lock_release(rtld_bind_lock, lockstate);
target = call_ifunc_resolver(ptr);
wlock_acquire(rtld_bind_lock, lockstate);
*where = target;
}
}
/*
* XXX Remove me when lld is fixed!
* LLD currently makes illegal relocations in the PLT.
*/
relalim = (const Elf_Rela *)((const char *)obj->pltrela + obj->pltrelasize);
for (rela = obj->pltrela; rela < relalim; rela++) {
if (ELF_R_TYPE(rela->r_info) == R_PPC_IRELATIVE) {
ptr = (Elf_Addr *)(obj->relocbase + rela->r_addend);
where = (Elf_Addr *)(obj->relocbase + rela->r_offset);
lock_release(rtld_bind_lock, lockstate);
target = call_ifunc_resolver(ptr);
wlock_acquire(rtld_bind_lock, lockstate);
*where = target;
}
}
obj->irelative = false;
return (0);
}
@ -569,8 +656,32 @@ int
reloc_gnu_ifunc(Obj_Entry *obj __unused, int flags __unused,
struct Struct_RtldLockState *lockstate __unused)
{
const Elf_Rela *relalim;
const Elf_Rela *rela;
Elf_Addr *where, target;
const Elf_Sym *def;
const Obj_Entry *defobj;
/* XXX not implemented */
if (!obj->gnu_ifunc)
return (0);
relalim = (const Elf_Rela *)((const char *)obj->pltrela + obj->pltrelasize);
for (rela = obj->pltrela; rela < relalim; rela++) {
if (ELF_R_TYPE(rela->r_info) == R_PPC_JMP_SLOT) {
where = (Elf_Addr *)(obj->relocbase + rela->r_offset);
def = find_symdef(ELF_R_SYM(rela->r_info), obj, &defobj,
SYMLOOK_IN_PLT | flags, NULL, lockstate);
if (def == NULL)
return (-1);
if (ELF_ST_TYPE(def->st_info) != STT_GNU_IFUNC)
continue;
lock_release(rtld_bind_lock, lockstate);
target = (Elf_Addr)rtld_resolve_ifunc(defobj, def);
wlock_acquire(rtld_bind_lock, lockstate);
reloc_jmpslot(where, target, defobj, obj,
(const Elf_Rel *)rela);
}
}
obj->gnu_ifunc = false;
return (0);
}
@ -664,6 +775,27 @@ init_pltgot(Obj_Entry *obj)
*/
}
/*
* 32 bit cpu feature flag fields.
*/
u_long cpu_features;
u_long cpu_features2;
void
powerpc_abi_variant_hook(Elf_Auxinfo** aux_info)
{
/*
* Since aux_info[] is easier to work with than aux, go ahead and
* initialize cpu_features / cpu_features2.
*/
cpu_features = -1UL;
cpu_features2 = -1UL;
if (aux_info[AT_HWCAP] != NULL)
cpu_features = aux_info[AT_HWCAP]->a_un.a_val;
if (aux_info[AT_HWCAP2] != NULL)
cpu_features2 = aux_info[AT_HWCAP2]->a_un.a_val;
}
void
ifunc_init(Elf_Auxinfo aux_info[__min_size(AT_COUNT)] __unused)
{

View File

@ -53,8 +53,13 @@ void reloc_non_plt_self(Elf_Dyn *dynp, Elf_Addr relocbase);
#define call_init_pointer(obj, target) \
(((InitArrFunc)(target))(main_argc, main_argv, environ))
extern u_long cpu_features; /* r3 */
extern u_long cpu_features2; /* r4 */
/* r5-10: ifunc resolver parameters reserved for future assignment. */
#define call_ifunc_resolver(ptr) \
(((Elf_Addr (*)(void))ptr)())
(((Elf_Addr (*)(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t, \
uint32_t, uint32_t, uint32_t))ptr)((uint32_t)cpu_features, \
(uint32_t)cpu_features2, 0, 0, 0, 0, 0, 0))
/*
* PLT functions. Not really correct prototypes, but the
@ -91,6 +96,7 @@ extern void *__tls_get_addr(tls_index* ti);
#define RTLD_DEFAULT_STACK_PF_EXEC PF_X
#define RTLD_DEFAULT_STACK_EXEC PROT_EXEC
#define md_abi_variant_hook(x)
extern void powerpc_abi_variant_hook(Elf_Auxinfo **);
#define md_abi_variant_hook(x) powerpc_abi_variant_hook(x)
#endif