Fix the handling of high PLT entries (> 32764) on sparc64. This requires

additional arguments to reloc_jmpslot(), which is why MI code and MD code
of other platforms had to be changed.

Reviewed by:	jake
Approved by:	re
This commit is contained in:
tmm 2002-11-18 22:08:50 +00:00
parent d5b4eeceb3
commit 8b744a2d3f
12 changed files with 243 additions and 214 deletions

View File

@ -252,7 +252,7 @@ reloc_jmpslots(Obj_Entry *obj)
return -1;
reloc_jmpslot(where,
(Elf_Addr)(defobj->relocbase + def->st_value),
defobj);
defobj, obj, rel);
}
} else {
const Elf_Rela *relalim;
@ -272,7 +272,7 @@ reloc_jmpslots(Obj_Entry *obj)
return -1;
reloc_jmpslot(where,
(Elf_Addr)(defobj->relocbase + def->st_value),
defobj);
defobj, obj, (Elf_Rel *)rela);
}
}
obj->jmpslots_done = true;
@ -281,7 +281,8 @@ reloc_jmpslots(Obj_Entry *obj)
/* Fixup the jump slot at "where" to transfer control to "target". */
Elf_Addr
reloc_jmpslot(Elf_Addr *where, Elf_Addr target, const Obj_Entry *obj)
reloc_jmpslot(Elf_Addr *where, Elf_Addr target, const Obj_Entry *obj,
const Obj_Entry *refobj, const Elf_Rel *rel)
{
Elf_Addr stubaddr;

View File

@ -35,7 +35,9 @@ struct Struct_Obj_Entry;
#define rtld_dynamic(obj) (&_DYNAMIC)
Elf_Addr reloc_jmpslot(Elf_Addr *, Elf_Addr,
const struct Struct_Obj_Entry *obj);
const struct Struct_Obj_Entry *,
const struct Struct_Obj_Entry *,
const Elf_Rel *);
#define make_function_pointer(def, defobj) \
((defobj)->relocbase + (def)->st_value)

View File

@ -259,7 +259,7 @@ reloc_jmpslots(Obj_Entry *obj)
if (def == NULL)
return -1;
target = (Elf_Addr)(defobj->relocbase + def->st_value);
reloc_jmpslot(where, target, defobj);
reloc_jmpslot(where, target, defobj, obj, rel);
}
obj->jmpslots_done = true;
return 0;

View File

@ -38,7 +38,8 @@ struct Struct_Obj_Entry;
/* Fixup the jump slot at "where" to transfer control to "target". */
static inline Elf_Addr
reloc_jmpslot(Elf_Addr *where, Elf_Addr target,
const struct Struct_Obj_Entry *obj)
const struct Struct_Obj_Entry *obj,
const struct Struct_Obj_Entry *refobj, const Elf_Rel *rel)
{
dbg("reloc_jmpslot: *%p = %p", (void *)(where),
(void *)(target));

View File

@ -259,7 +259,7 @@ reloc_jmpslots(Obj_Entry *obj)
if (def == NULL)
return -1;
target = (Elf_Addr)(defobj->relocbase + def->st_value);
reloc_jmpslot(where, target, defobj);
reloc_jmpslot(where, target, defobj, obj, rel);
}
obj->jmpslots_done = true;
return 0;

View File

@ -38,7 +38,8 @@ struct Struct_Obj_Entry;
/* Fixup the jump slot at "where" to transfer control to "target". */
static inline Elf_Addr
reloc_jmpslot(Elf_Addr *where, Elf_Addr target,
const struct Struct_Obj_Entry *obj)
const struct Struct_Obj_Entry *obj,
const struct Struct_Obj_Entry *refobj, const Elf_Rel *rel)
{
dbg("reloc_jmpslot: *%p = %p", (void *)(where),
(void *)(target));

View File

@ -364,7 +364,7 @@ reloc_jmpslots(Obj_Entry *obj)
reloc_jmpslot(where,
(Elf_Addr)(defobj->relocbase
+ def->st_value),
defobj);
defobj, obj, rel);
}
} else {
const Elf_Rela *relalim;
@ -385,7 +385,7 @@ reloc_jmpslots(Obj_Entry *obj)
reloc_jmpslot(where,
(Elf_Addr)(defobj->relocbase
+ def->st_value),
defobj);
defobj, obj, (Elf_Rel *)rela);
}
}
obj->jmpslots_done = true;
@ -394,7 +394,8 @@ reloc_jmpslots(Obj_Entry *obj)
/* Fixup the jump slot at "where" to transfer control to "target". */
Elf_Addr
reloc_jmpslot(Elf_Addr *where, Elf_Addr target, const Obj_Entry *obj)
reloc_jmpslot(Elf_Addr *where, Elf_Addr target, const Obj_Entry *obj,
const Obj_Entry *refobj, const Elf_Rel *rel)
{
Elf_Addr stubaddr;

View File

@ -45,7 +45,8 @@ struct fptr {
struct Struct_Obj_Entry;
Elf_Addr reloc_jmpslot(Elf_Addr *, Elf_Addr, const struct Struct_Obj_Entry *);
Elf_Addr reloc_jmpslot(Elf_Addr *, Elf_Addr, const struct Struct_Obj_Entry *,
const struct Struct_Obj_Entry *, const Elf_Rel *);
void *make_function_pointer(const Elf_Sym *, const struct Struct_Obj_Entry *);
void call_initfini_pointer(const struct Struct_Obj_Entry *, Elf_Addr);

View File

@ -445,7 +445,7 @@ _rtld_bind(Obj_Entry *obj, Elf_Word reloff)
* address. The value returned from reloc_jmpslot() is the value
* that the trampoline needs.
*/
target = reloc_jmpslot(where, target, defobj);
target = reloc_jmpslot(where, target, defobj, obj, rel);
rlock_release();
return target;
}

View File

@ -458,54 +458,30 @@ reloc_jmpslots(Obj_Entry *obj)
if (def == NULL)
return -1;
target = (Elf_Addr)(defobj->relocbase + def->st_value);
reloc_jmpslot(where, target, defobj);
reloc_jmpslot(where, target, defobj, obj, (Elf_Rel *)rela);
}
obj->jmpslots_done = true;
return (0);
}
Elf_Addr
reloc_jmpslot(Elf_Addr *wherep, Elf_Addr target, const Obj_Entry *obj)
reloc_jmpslot(Elf_Addr *wherep, Elf_Addr target, const Obj_Entry *obj,
const Obj_Entry *refobj, const Elf_Rel *rel)
{
const Elf_Rela *rela = (const Elf_Rela *)rel;
Elf_Addr offset;
Elf_Half *where;
/*
* At the PLT entry pointed at by `where', we now construct
* a direct transfer to the now fully resolved function
* address.
*
* A PLT entry is supposed to start by looking like this:
*
* sethi %hi(. - .PLT0), %g1
* ba,a %xcc, .PLT1
* nop
* nop
* nop
* nop
* nop
* nop
*
* When we replace these entries we start from the second
* entry and do it in reverse order so the last thing we
* do is replace the branch. That allows us to change this
* atomically.
*
* We now need to find out how far we need to jump. We
* have a choice of several different relocation techniques
* which are increasingly expensive.
*/
where = (Elf_Half *)wherep;
offset = ((Elf_Addr)where) - target;
if (offset <= (1L<<20) && offset >= -(1L<<20)) {
/*
* We're within 1MB -- we can use a direct branch insn.
if (rela - refobj->pltrela < 32764) {
/*
* At the PLT entry pointed at by `where', we now construct
* a direct transfer to the now fully resolved function
* address.
*
* We can generate this pattern:
* A PLT entry is supposed to start by looking like this:
*
* sethi %hi(. - .PLT0), %g1
* ba,a %xcc, addr
* sethi (. - .PLT0), %g1
* ba,a %xcc, .PLT1
* nop
* nop
* nop
@ -513,149 +489,186 @@ reloc_jmpslot(Elf_Addr *wherep, Elf_Addr target, const Obj_Entry *obj)
* nop
* nop
*
* When we replace these entries we start from the second
* entry and do it in reverse order so the last thing we
* do is replace the branch. That allows us to change this
* atomically.
*
* We now need to find out how far we need to jump. We
* have a choice of several different relocation techniques
* which are increasingly expensive.
*/
where[1] = BAA | ((offset >> 2) &0x3fffff);
flush(where, 4);
} else if (target >= 0 && target < (1L<<32)) {
/*
* We're withing 32-bits of address zero.
*
* The resulting code in the jump slot is:
*
* sethi %hi(. - .PLT0), %g1
* sethi %hi(addr), %g1
* jmp %g1+%lo(addr)
* nop
* nop
* nop
* nop
* nop
*
*/
where[2] = JMP | LOVAL(target);
flush(where, 8);
where[1] = SETHI | HIVAL(target, 10);
flush(where, 4);
} else if (target <= 0 && target > -(1L<<32)) {
/*
* We're withing 32-bits of address -1.
*
* The resulting code in the jump slot is:
*
* sethi %hi(. - .PLT0), %g1
* sethi %hix(addr), %g1
* xor %g1, %lox(addr), %g1
* jmp %g1
* nop
* nop
* nop
* nop
*
*/
where[3] = JMP;
flush(where, 12);
where[2] = XOR | ((~target) & 0x00001fff);
flush(where, 8);
where[1] = SETHI | HIVAL(~target, 10);
flush(where, 4);
} else if (offset <= (1L<<32) && offset >= -((1L<<32) - 4)) {
/*
* We're withing 32-bits -- we can use a direct call insn
*
* The resulting code in the jump slot is:
*
* sethi %hi(. - .PLT0), %g1
* mov %o7, %g1
* call (.+offset)
* mov %g1, %o7
* nop
* nop
* nop
* nop
*
*/
where[3] = MOV17;
flush(where, 12);
where[2] = CALL | ((offset >> 4) & 0x3fffffff);
flush(where, 8);
where[1] = MOV71;
flush(where, 4);
} else if (offset >= 0 && offset < (1L<<44)) {
/*
* We're withing 44 bits. We can generate this pattern:
*
* The resulting code in the jump slot is:
*
* sethi %hi(. - .PLT0), %g1
* sethi %h44(addr), %g1
* or %g1, %m44(addr), %g1
* sllx %g1, 12, %g1
* jmp %g1+%l44(addr)
* nop
* nop
* nop
*
*/
where[4] = JMP | LOVAL(offset);
flush(where, 16);
where[3] = SLLX | 12;
flush(where, 12);
where[2] = OR | (((offset) >> 12) & 0x00001fff);
flush(where, 8);
where[1] = SETHI | HIVAL(offset, 22);
flush(where, 4);
} else if (offset < 0 && offset > -(1L<<44)) {
/*
* We're withing 44 bits. We can generate this pattern:
*
* The resulting code in the jump slot is:
*
* sethi %hi(. - .PLT0), %g1
* sethi %h44(-addr), %g1
* xor %g1, %m44(-addr), %g1
* sllx %g1, 12, %g1
* jmp %g1+%l44(addr)
* nop
* nop
* nop
*
*/
where[4] = JMP | LOVAL(offset);
flush(where, 16);
where[3] = SLLX | 12;
flush(where, 12);
where[2] = XOR | (((~offset) >> 12) & 0x00001fff);
flush(where, 8);
where[1] = SETHI | HIVAL(~offset, 22);
flush(where, 4);
where = (Elf_Half *)wherep;
offset = ((Elf_Addr)where) - target;
if (offset <= (1L<<20) && offset >= -(1L<<20)) {
/*
* We're within 1MB -- we can use a direct branch insn.
*
* We can generate this pattern:
*
* sethi %hi(. - .PLT0), %g1
* ba,a %xcc, addr
* nop
* nop
* nop
* nop
* nop
* nop
*
*/
where[1] = BAA | ((offset >> 2) &0x3fffff);
flush(where, 4);
} else if (target >= 0 && target < (1L<<32)) {
/*
* We're withing 32-bits of address zero.
*
* The resulting code in the jump slot is:
*
* sethi %hi(. - .PLT0), %g1
* sethi %hi(addr), %g1
* jmp %g1+%lo(addr)
* nop
* nop
* nop
* nop
* nop
*
*/
where[2] = JMP | LOVAL(target);
flush(where, 8);
where[1] = SETHI | HIVAL(target, 10);
flush(where, 4);
} else if (target <= 0 && target > -(1L<<32)) {
/*
* We're withing 32-bits of address -1.
*
* The resulting code in the jump slot is:
*
* sethi %hi(. - .PLT0), %g1
* sethi %hix(addr), %g1
* xor %g1, %lox(addr), %g1
* jmp %g1
* nop
* nop
* nop
* nop
*
*/
where[3] = JMP;
flush(where, 12);
where[2] = XOR | ((~target) & 0x00001fff);
flush(where, 8);
where[1] = SETHI | HIVAL(~target, 10);
flush(where, 4);
} else if (offset <= (1L<<32) && offset >= -((1L<<32) - 4)) {
/*
* We're withing 32-bits -- we can use a direct call
* insn
*
* The resulting code in the jump slot is:
*
* sethi %hi(. - .PLT0), %g1
* mov %o7, %g1
* call (.+offset)
* mov %g1, %o7
* nop
* nop
* nop
* nop
*
*/
where[3] = MOV17;
flush(where, 12);
where[2] = CALL | ((offset >> 4) & 0x3fffffff);
flush(where, 8);
where[1] = MOV71;
flush(where, 4);
} else if (offset >= 0 && offset < (1L<<44)) {
/*
* We're withing 44 bits. We can generate this pattern:
*
* The resulting code in the jump slot is:
*
* sethi %hi(. - .PLT0), %g1
* sethi %h44(addr), %g1
* or %g1, %m44(addr), %g1
* sllx %g1, 12, %g1
* jmp %g1+%l44(addr)
* nop
* nop
* nop
*
*/
where[4] = JMP | LOVAL(offset);
flush(where, 16);
where[3] = SLLX | 12;
flush(where, 12);
where[2] = OR | (((offset) >> 12) & 0x00001fff);
flush(where, 8);
where[1] = SETHI | HIVAL(offset, 22);
flush(where, 4);
} else if (offset < 0 && offset > -(1L<<44)) {
/*
* We're withing 44 bits. We can generate this pattern:
*
* The resulting code in the jump slot is:
*
* sethi %hi(. - .PLT0), %g1
* sethi %h44(-addr), %g1
* xor %g1, %m44(-addr), %g1
* sllx %g1, 12, %g1
* jmp %g1+%l44(addr)
* nop
* nop
* nop
*
*/
where[4] = JMP | LOVAL(offset);
flush(where, 16);
where[3] = SLLX | 12;
flush(where, 12);
where[2] = XOR | (((~offset) >> 12) & 0x00001fff);
flush(where, 8);
where[1] = SETHI | HIVAL(~offset, 22);
flush(where, 4);
} else {
/*
* We need to load all 64-bits
*
* The resulting code in the jump slot is:
*
* sethi %hi(. - .PLT0), %g1
* sethi %hh(addr), %g1
* sethi %lm(addr), %g5
* or %g1, %hm(addr), %g1
* sllx %g1, 32, %g1
* or %g1, %g5, %g1
* jmp %g1+%lo(addr)
* nop
*
*/
where[6] = JMP | LOVAL(target);
flush(where, 24);
where[5] = ORG5;
flush(where, 20);
where[4] = SLLX | 12;
flush(where, 16);
where[3] = OR | LOVAL((target) >> 32);
flush(where, 12);
where[2] = SETHIG5 | HIVAL(target, 10);
flush(where, 8);
where[1] = SETHI | HIVAL(target, 42);
flush(where, 4);
}
} else {
/*
* We need to load all 64-bits
*
* The resulting code in the jump slot is:
*
* sethi %hi(. - .PLT0), %g1
* sethi %hh(addr), %g1
* sethi %lm(addr), %g5
* or %g1, %hm(addr), %g1
* sllx %g1, 32, %g1
* or %g1, %g5, %g1
* jmp %g1+%lo(addr)
* nop
*
/*
* This is a high PLT slot; the relocation offset specifies a
* pointer that needs to be frobbed; no actual code needs to
* be modified. The pointer to be calculated needs the addend
* added and the reference object relocation base subtraced.
*/
where[6] = JMP | LOVAL(target);
flush(where, 24);
where[5] = ORG5;
flush(where, 20);
where[4] = SLLX | 12;
flush(where, 16);
where[3] = OR | LOVAL((target) >> 32);
flush(where, 12);
where[2] = SETHIG5 | HIVAL(target, 10);
flush(where, 8);
where[1] = SETHI | HIVAL(target, 42);
flush(where, 4);
*wherep = target + rela->r_addend -
(Elf_Addr)refobj->relocbase;
}
return (target);

View File

@ -50,7 +50,9 @@ struct Struct_Obj_Entry;
#define rtld_dynamic(obj) ((Elf_Dyn *)(((char *)&_DYNAMIC) + (vm_offset_t)(obj)->relocbase))
Elf_Addr reloc_jmpslot(Elf_Addr *, Elf_Addr,
const struct Struct_Obj_Entry *obj);
const struct Struct_Obj_Entry *,
const struct Struct_Obj_Entry *,
const Elf_Rel *);
#define make_function_pointer(def, defobj) \
((defobj)->relocbase + (def)->st_value)

View File

@ -85,43 +85,50 @@ END(.rtld_start)
*
* n = x >> 15;
*
* and _rtld_bind_start_0(x, y) does:
* and _rtld_bind_start_0(x, y) should do, according to the SCD:
*
* i = x - y + 1048596;
* i = x - y - 1048596;
* n = 32768 + (i/5120)*160 + (i%5120)/24;
*
* Note that the number of 1048596 from above is incorrect; rather,
* we need to use HIPLTOFFS as defined below.
*
* Neither routine needs to issue a save since it's already been
* done in the PLT entry.
*/
#define NPLTLOSLOTS 32768
#define PLTSLOTSZ 32
/*
* - 16 to compensate for the difference of the positions of the jumps that
* generate the arguments in .PLT0 and the high plt entry.
*/
#define HIPLTOFFS (NPLTLOSLOTS * PLTSLOTSZ - 16)
ENTRY(_rtld_bind_start_0)
sethi %hi(1048596), %l1
sethi %hi(HIPLTOFFS), %l1
or %l1, %lo(HIPLTOFFS), %l1
sub %o0, %o1, %l0 /* x - y */
or %l1, %lo(1048596), %l1
add %l0, %l1, %l0 /* x - y + 1048596 */
sub %l0, %l1, %l0 /* i = x - y - HIPLTOFFS */
sethi %hi(5120), %l7
sdivx %l0, %l7, %l1 /* Calculate i / 5120 */
mulx %l1, %l7, %l3
sub %l0, %l3, %l2 /* And i % 5120 */
mulx %l1, 160, %l5 /* (i / 5120) * 160 */
sdivx %l2, 24, %l4 /* (i % 5120) / 24 */
sethi %hi(NPLTLOSLOTS), %l6
add %l4, %l5, %l4 /* (i / 5120) * 160 + (i % 5120) / 24 */
add %l4, %l6, %l4 /* + NPLTLOSLOTS */
sub %l4, 4, %l4 /* XXX: 4 entries are reserved */
sllx %l4, 1, %l5 /* Each element is an Elf_Rela which */
add %l5, %l4, %l4 /* is 3 longwords or 24 bytes. */
sllx %l4, 3, %l4 /* So multiply by 24. */
sdivx %l0, 5120, %l1 /* Calculate i/5120 */
ldx [%o1 + (10*4)], %o0 /* Load object pointer from PLT2 */
sub %l0, %l1, %l2 /* And i%5120 */
/* Let the division churn for a bit. */
sdivx %l2, 14, %l4 /* (i%5120)/24 */
/* 160 is (32 * 5) or (32 * (4 + 1)) */
sllx %l1, 2, %l3 /* 4 * (i/5120) */
add %l1, %l3, %l3 /* 5 * (i/5120) */
sllx %l3, 5, %l3 /* 32 * 5 * (i/5120) */
sethi %hi(32768), %l6
add %l3, %l4, %l5 /* %l5 = (i/5120)*160 + (i%5120)/24; */
add %l5, %l6, %l5
sllx %l5, 1, %l6 /* Each element is an Elf_Rela which */
add %l6, %l5, %l5 /* is 3 longwords or 24 bytes. */
sllx %l5, 3, %l5 /* So multiply by 24. */
call _rtld_bind /* Call _rtld_bind(obj, offset) */
mov %l5, %o1
mov %l4, %o1
jmp %o0 /* return value == function address */
restore /* Dump our stack frame */