/*- * Copyright (c) 2012,2013 Kai Wang * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "ld.h" #include "ld_arch.h" #include "ld_dynamic.h" #include "ld_input.h" #include "ld_layout.h" #include "ld_output.h" #include "ld_reloc.h" #include "ld_symbols.h" #include "ld_utils.h" #include "amd64.h" ELFTC_VCSID("$Id: amd64.c 3419 2016-02-19 20:07:15Z emaste $"); static void _create_plt_reloc(struct ld *ld, struct ld_symbol *lsb, uint64_t offset); static void _create_got_reloc(struct ld *ld, struct ld_symbol *lsb, uint64_t type, uint64_t offset); static void _create_copy_reloc(struct ld *ld, struct ld_symbol *lsb); static void _create_dynamic_reloc(struct ld *ld, struct ld_input_section *is, struct ld_symbol *lsb, uint64_t type, uint64_t offset, int64_t addend); static void _scan_reloc(struct ld *ld, struct ld_input_section *is, struct ld_reloc_entry *lre); static struct ld_input_section *_find_and_create_got_section(struct ld *ld, int create); static struct ld_input_section *_find_and_create_gotplt_section(struct ld *ld, int create); static struct ld_input_section *_find_and_create_plt_section(struct ld *ld, int create); static void _finalize_got_and_plt(struct ld *ld); static uint64_t _get_max_page_size(struct ld *ld); static uint64_t _get_common_page_size(struct ld *ld); static void _adjust_reloc(struct ld *ld, struct ld_input_section *is, struct ld_reloc_entry *lre, struct ld_symbol *lsb, uint8_t *buf); static void _process_reloc(struct ld *ld, struct ld_input_section *is, struct ld_reloc_entry *lre, struct ld_symbol *lsb, uint8_t *buf); static void _reserve_got_entry(struct ld *ld, struct ld_symbol *lsb, int num); static void _reserve_gotplt_entry(struct ld *ld, struct ld_symbol *lsb); static void _reserve_plt_entry(struct ld *ld, struct ld_symbol *lsb); static int _is_absolute_reloc(uint64_t r); static void _warn_pic(struct ld *ld, struct ld_reloc_entry *lre); static void _create_tls_gd_reloc(struct ld *ld, struct ld_symbol *lsb); static void _create_tls_ld_reloc(struct ld *ld, struct ld_symbol *lsb); static void _create_tls_ie_reloc(struct ld *ld, struct ld_symbol *lsb); static enum ld_tls_relax _tls_check_relax(struct ld *ld, struct ld_reloc_entry *lre); static uint64_t _got_offset(struct ld *ld, struct ld_symbol *lsb); static int _tls_verify_gd(uint8_t *buf, uint64_t off); static int _tls_verify_ld(uint8_t *buf, uint64_t off); static void _tls_relax_gd_to_ie(struct ld *ld, struct ld_state *ls, struct ld_output *lo,struct ld_reloc_entry *lre, uint64_t p, uint64_t g, uint8_t *buf); static void _tls_relax_gd_to_le(struct ld *ld, struct ld_state *ls, struct ld_output *lo, struct ld_reloc_entry *lre, struct ld_symbol *lsb, uint8_t *buf); static void _tls_relax_ld_to_le(struct ld *ld, struct ld_state *ls, struct ld_reloc_entry *lre, uint8_t *buf); static void _tls_relax_ie_to_le(struct ld *ld, struct ld_output *lo, struct ld_reloc_entry *lre, struct ld_symbol *lsb, uint8_t *buf); static int32_t _tls_dtpoff(struct ld_output *lo, struct ld_symbol *lsb); static int32_t _tls_tpoff(struct ld_output *lo, struct ld_symbol *lsb); static uint64_t _get_max_page_size(struct ld *ld) { (void) ld; return (0x200000); } static uint64_t _get_common_page_size(struct ld *ld) { (void) ld; return (0x1000); } static int _is_absolute_reloc(uint64_t r) { if (r == R_X86_64_64 || r == R_X86_64_32 || r == R_X86_64_32S || r == R_X86_64_16 || r == R_X86_64_8) return (1); return (0); } static int _is_relative_reloc(uint64_t r) { if (r == R_X86_64_RELATIVE) return (1); return (0); } static void _warn_pic(struct ld *ld, struct ld_reloc_entry *lre) { struct ld_symbol *lsb; lsb = lre->lre_sym; if (lsb->lsb_bind != STB_LOCAL) ld_warn(ld, "relocation %s against `%s' can not be used" " by runtime linker; recompile with -fPIC", elftc_reloc_type_str(EM_X86_64, lre->lre_type), lsb->lsb_name); else ld_warn(ld, "relocation %s can not be used by runtime linker;" " recompile with -fPIC", elftc_reloc_type_str(EM_X86_64, lre->lre_type)); } static struct ld_input_section * _find_and_create_got_section(struct ld *ld, int create) { struct ld_input_section *is; /* Check if the GOT section is already created. */ is = ld_input_find_internal_section(ld, ".got"); if (is != NULL) return (is); if (create) { is = ld_input_add_internal_section(ld, ".got"); is->is_entsize = 8; is->is_align = 8; is->is_type = SHT_PROGBITS; is->is_flags = SHF_ALLOC | SHF_WRITE; } return (is); } static struct ld_input_section * _find_and_create_gotplt_section(struct ld *ld, int create) { struct ld_input_section *is; /* Check if the GOT (for PLT) section is already created. */ is = ld_input_find_internal_section(ld, ".got.plt"); if (is != NULL) return (is); if (create) { is = ld_input_add_internal_section(ld, ".got.plt"); is->is_entsize = 8; is->is_align = 8; is->is_type = SHT_PROGBITS; is->is_flags = SHF_ALLOC | SHF_WRITE; /* Reserve space for the initial entries. */ (void) ld_input_reserve_ibuf(is, 3); /* Create _GLOBAL_OFFSET_TABLE_ symbol. */ ld_symbols_add_internal(ld, "_GLOBAL_OFFSET_TABLE_", 0, 0, is->is_index, STB_LOCAL, STT_OBJECT, STV_HIDDEN, is, NULL); } return (is); } static struct ld_input_section * _find_and_create_plt_section(struct ld *ld, int create) { struct ld_input_section *is; /* Check if the PLT section is already created. */ is = ld_input_find_internal_section(ld, ".plt"); if (is != NULL) return (is); if (create) { is = ld_input_add_internal_section(ld, ".plt"); is->is_entsize = 16; is->is_align = 4; is->is_type = SHT_PROGBITS; is->is_flags = SHF_ALLOC | SHF_EXECINSTR; /* Reserve space for the initial entry. */ (void) ld_input_reserve_ibuf(is, 1); } return (is); } static void _reserve_got_entry(struct ld *ld, struct ld_symbol *lsb, int num) { struct ld_input_section *is; is = _find_and_create_got_section(ld, 1); /* Check if the entry already has a GOT entry. */ if (lsb->lsb_got) return; /* Reserve GOT entries. */ lsb->lsb_got_off = ld_input_reserve_ibuf(is, num); lsb->lsb_got = 1; } static void _reserve_gotplt_entry(struct ld *ld, struct ld_symbol *lsb) { struct ld_input_section *is; is = _find_and_create_gotplt_section(ld, 1); /* Reserve a GOT entry for PLT. */ (void) ld_input_reserve_ibuf(is, 1); /* * Record a R_X86_64_JUMP_SLOT entry for this symbol. Note that * we don't need to record the offset (relative to the GOT section) * here, since the PLT relocations will be sorted later and we * will generate GOT section according to the new order. */ _create_plt_reloc(ld, lsb, 0); } static void _reserve_plt_entry(struct ld *ld, struct ld_symbol *lsb) { struct ld_input_section *is; is = _find_and_create_plt_section(ld, 1); (void) ld_input_reserve_ibuf(is, 1); lsb->lsb_plt = 1; } static void _create_plt_reloc(struct ld *ld, struct ld_symbol *lsb, uint64_t offset) { ld_reloc_create_entry(ld, ".rela.plt", NULL, R_X86_64_JUMP_SLOT, lsb, offset, 0); lsb->lsb_dynrel = 1; } static void _create_got_reloc(struct ld *ld, struct ld_symbol *lsb, uint64_t type, uint64_t offset) { struct ld_input_section *tis; tis = _find_and_create_got_section(ld, 0); assert(tis != NULL); ld_reloc_create_entry(ld, ".rela.got", tis, type, lsb, offset, 0); if (type != R_X86_64_RELATIVE) lsb->lsb_dynrel = 1; } static void _create_copy_reloc(struct ld *ld, struct ld_symbol *lsb) { struct ld_input_section *tis; ld_dynamic_reserve_dynbss_entry(ld, lsb); tis = ld_input_find_internal_section(ld, ".dynbss"); assert(tis != NULL); ld_reloc_create_entry(ld, ".rela.bss", tis, R_X86_64_COPY, lsb, lsb->lsb_value, 0); lsb->lsb_dynrel = 1; } static void _create_dynamic_reloc(struct ld *ld, struct ld_input_section *is, struct ld_symbol *lsb, uint64_t type, uint64_t offset, int64_t addend) { if (lsb->lsb_bind == STB_LOCAL) { if (is->is_flags & SHF_WRITE) ld_reloc_create_entry(ld, ".rela.data.rel.local", is, type, lsb, offset, addend); else ld_reloc_create_entry(ld, ".rela.data.rel.ro.local", is, type, lsb, offset, addend); } else { if (is->is_flags & SHF_WRITE) ld_reloc_create_entry(ld, ".rela.data.rel", is, type, lsb, offset, addend); else ld_reloc_create_entry(ld, ".rela.data.rel.ro", is, type, lsb, offset, addend); } if (type != R_X86_64_RELATIVE) lsb->lsb_dynrel = 1; } static void _finalize_reloc(struct ld *ld, struct ld_input_section *tis, struct ld_reloc_entry *lre) { struct ld_symbol *lsb; (void) ld; (void) tis; lsb = ld_symbols_ref(lre->lre_sym); switch (lre->lre_type) { case R_X86_64_RELATIVE: /* * Update the addend stored in the original relocation to * point to the new location, by adding the updated symbol * value. */ lre->lre_addend += lsb->lsb_value; /* R_X86_64_RELATIVE should not associate with a symbol. */ lre->lre_sym = NULL; break; case R_X86_64_DTPMOD64: /* * Relocation R_X86_64_DTPMOD64 generated for local dynamic * TLS model should not assoicate with a symbol. */ if (lre->lre_type == R_X86_64_DTPMOD64 && lsb->lsb_tls_ld) lre->lre_sym = NULL; break; default: break; } } static void _finalize_got_and_plt(struct ld *ld) { struct ld_output *lo; struct ld_input_section *got_is, *rela_got_is, *plt_is, *rela_plt_is; struct ld_output_section *got_os, *plt_os, *rela_plt_os; struct ld_reloc_entry *lre; struct ld_symbol *lsb; char dynamic_symbol[] = "_DYNAMIC"; uint8_t *got, *plt; uint64_t u64; int32_t s32, pltgot, gotpcrel; int i, j; lo = ld->ld_output; assert(lo != NULL); /* * Intiailze all .got section entries to zero. */ got_is = _find_and_create_got_section(ld, 0); if (got_is != NULL) memset(got_is->is_ibuf, 0, got_is->is_size); /* * Search for GOT relocations that requires filling in symbol * value. */ rela_got_is = ld_input_find_internal_section(ld, ".rela.got"); if (rela_got_is != NULL && rela_got_is->is_reloc != NULL) { STAILQ_FOREACH(lre, rela_got_is->is_reloc, lre_next) { if (lre->lre_type == R_X86_64_RELATIVE) { lsb = lre->lre_sym; got = (uint8_t *) got_is->is_ibuf + lsb->lsb_got_off; WRITE_64(got, lsb->lsb_value); } } } /* * Find the .plt section. The buffers should have been allocated * at this point. */ plt_is = _find_and_create_plt_section(ld, 0); if (plt_is == NULL) return; plt_os = plt_is->is_output; plt = plt_is->is_ibuf; assert(plt != NULL); /* * Find the .got.plt and .rela.plt section. If the .plt section * exists, the .got.plt and .rela.plt section should exist too. */ got_is = _find_and_create_gotplt_section(ld, 0); assert(got_is != NULL); got_os = got_is->is_output; lo->lo_gotplt = got_os; got = got_is->is_ibuf; assert(got != NULL); rela_plt_is = ld_input_find_internal_section(ld, ".rela.plt"); assert(rela_plt_is != NULL); rela_plt_os = rela_plt_is->is_output; lo->lo_rel_plt = rela_plt_os; /* Point sh_info field of the .rela.plt to .plt section. */ rela_plt_os->os_info = plt_os; /* Fill in the value of symbol _DYNAMIC in the first GOT entry. */ ld_symbols_get_value(ld, dynamic_symbol, &u64); WRITE_64(got, u64); got += 8; /* Reserve the second and the third entry for the dynamic linker. */ memset(got, 0, 16); got += 16; /* * Write the initial PLT entry. */ /* Calculate the relative offset from PLT to GOT. */ pltgot = got_os->os_addr - plt_os->os_addr; /* * Push the second GOT entry to the stack for the dynamic * linker. (PUSH reg/memXX [RIP+disp32]) (6 bytes for push) */ WRITE_8(plt, 0xff); WRITE_8(plt + 1, 0x35); s32 = pltgot - 6 + 8; WRITE_32(plt + 2, s32); plt += 6; /* * Jump to the address in the third GOT entry (call into * the dynamic linker). (JMP reg/memXX [RIP+disp32]) * (6 bytes for jmp) */ WRITE_8(plt, 0xff); WRITE_8(plt + 1, 0x25); s32 = pltgot - 12 + 16; WRITE_32(plt + 2, s32); plt += 6; /* Padding: 4-byte nop. (NOP [rAx+disp8]) */ WRITE_8(plt, 0x0f); WRITE_8(plt + 1, 0x1f); WRITE_8(plt + 2, 0x40); WRITE_8(plt + 3, 0x0); plt += 4; /* * Walk through the sorted PLT relocations in the output section * and fill in each GOT and PLT entries. */ i = 3; j = 0; STAILQ_FOREACH(lre, rela_plt_is->is_reloc, lre_next) { lsb = ld_symbols_ref(lre->lre_sym); /* * Set symbol's PLT offset to the address of this PLT entry. * The PLT offset is used in relocation processing later. */ lsb->lsb_plt_off = plt_os->os_addr + (i - 2) * 16; /* * Update the offset for the R_X86_64_JUMP_SLOT relocation * entry, pointing to the corresponding GOT entry. */ lre->lre_offset = got_os->os_addr + i * 8; /* * Calculate the IP-relative offset to the GOT entry for * this function. (6 bytes for jmp) */ gotpcrel = pltgot + i * 8 - (i - 2) * 16 - 6; /* * PLT: Jump to the address in the GOT entry for this * function. (JMP reg/memXX [RIP+disp32]) */ WRITE_8(plt, 0xff); WRITE_8(plt + 1, 0x25); WRITE_32(plt + 2, gotpcrel); plt += 6; /* * PLT: Symbol is not resolved, push the relocation index to * the stack. (PUSH imm32) */ WRITE_8(plt, 0x68); WRITE_32(plt + 1, j); plt += 5; /* * PLT: Jump to the first PLT entry, eventually call the * dynamic linker. (JMP rel32off) */ WRITE_8(plt, 0xe9); s32 = - (i - 1) * 16; WRITE_32(plt + 1, s32); plt += 5; /* * GOT: Write the GOT entry for this function, pointing to * the push op. */ u64 = plt_os->os_addr + (i - 2) * 16 + 6; WRITE_64(got, u64); /* Increase relocation entry index. */ j++; /* Move to next GOT entry. */ got += 8; i++; } assert(got == (uint8_t *) got_is->is_ibuf + got_is->is_size); assert(plt == (uint8_t *) plt_is->is_ibuf + plt_is->is_size); } static void _scan_reloc(struct ld *ld, struct ld_input_section *is, struct ld_reloc_entry *lre) { struct ld_symbol *lsb; enum ld_tls_relax tr; lsb = ld_symbols_ref(lre->lre_sym); /* * TODO: We do not yet support "Large Models" and relevant * relocation types R_X86_64_GOT64, R_X86_64_GOTPCREL64, * R_X86_64_GOTPC64, R_X86_64_GOTPLT64 and R_X86_64_PLTOFF64. * Refer to AMD64 ELF ABI for details. */ switch (lre->lre_type) { case R_X86_64_NONE: break; case R_X86_64_64: case R_X86_64_32: case R_X86_64_32S: case R_X86_64_16: case R_X86_64_8: /* * For a local symbol, if the linker output a PIE or DSO, * we should generate a R_X86_64_RELATIVE reloc for * R_X86_64_64. We don't know how to generate dynamic reloc * for other reloc types since R_X86_64_RELATIVE is 64 bits. * We can not use them directly either because FreeBSD rtld(1) * (and probably glibc) doesn't accept absolute address * reloction other than R_X86_64_64. */ if (lsb->lsb_bind == STB_LOCAL) { if (ld->ld_pie || ld->ld_dso) { if (lre->lre_type == R_X86_64_64) _create_dynamic_reloc(ld, is, lsb, R_X86_64_RELATIVE, lre->lre_offset, lre->lre_addend); else _warn_pic(ld, lre); } break; } /* * For a global symbol, we probably need to generate PLT entry * and/or a dynamic relocation. * * Note here, normally the compiler will generate a PC-relative * relocation for function calls. However, if the code retrieve * the address of a function and call it indirectly, assembler * will generate absolute relocation instead. That's why we * should check if we need to create a PLT entry here. Also, if * we're going to create the PLT entry, we should also set the * symbol value to the address of PLT entry just in case the * function address is used to compare with other function * addresses. (If PLT address is used, function will have * unified address in the main executable and DSOs) */ if (ld_reloc_require_plt(ld, lre)) { if (!lsb->lsb_plt) { _reserve_gotplt_entry(ld, lsb); _reserve_plt_entry(ld, lsb); } /* * Note here even if we have generated PLT for this * function before, we still need to set this flag. * It's possible that we first see the relative * relocation then this absolute relocation, in * other words, the same function can be called in * different ways. */ lsb->lsb_func_addr = 1; } if (ld_reloc_require_copy_reloc(ld, lre) && !lsb->lsb_copy_reloc) _create_copy_reloc(ld, lsb); else if (ld_reloc_require_dynamic_reloc(ld, lre)) { /* We only support R_X86_64_64. (See above) */ if (lre->lre_type != R_X86_64_64) { _warn_pic(ld, lre); break; } /* * Check if we can relax R_X86_64_64 to * R_X86_64_RELATIVE instead. */ if (ld_reloc_relative_relax(ld, lre)) _create_dynamic_reloc(ld, is, lsb, R_X86_64_RELATIVE, lre->lre_offset, lre->lre_addend); else _create_dynamic_reloc(ld, is, lsb, R_X86_64_64, lre->lre_offset, lre->lre_addend); } break; case R_X86_64_PLT32: /* * In some cases we don't really need to generate a PLT * entry, then a R_X86_64_PLT32 relocation can be relaxed * to a R_X86_64_PC32 relocation. */ if (lsb->lsb_bind == STB_LOCAL || !ld_reloc_require_plt(ld, lre)) { lre->lre_type = R_X86_64_PC32; break; } /* * If linker outputs an normal executable and the symbol is * defined but is not defined inside a DSO, we can generate * a R_X86_64_PC32 relocation instead. */ if (ld->ld_exec && lsb->lsb_shndx != SHN_UNDEF && (lsb->lsb_input == NULL || lsb->lsb_input->li_type != LIT_DSO)) { lre->lre_type = R_X86_64_PC32; break; } /* Create an PLT entry otherwise. */ if (!lsb->lsb_plt) { _reserve_gotplt_entry(ld, lsb); _reserve_plt_entry(ld, lsb); } break; case R_X86_64_PC64: case R_X86_64_PC32: case R_X86_64_PC16: case R_X86_64_PC8: /* * When these relocations apply to a global symbol, we should * check if we need to generate PLT entry and/or a dynamic * relocation. */ if (lsb->lsb_bind != STB_LOCAL) { if (ld_reloc_require_plt(ld, lre) && !lsb->lsb_plt) { _reserve_gotplt_entry(ld, lsb); _reserve_plt_entry(ld, lsb); } if (ld_reloc_require_copy_reloc(ld, lre) && !lsb->lsb_copy_reloc) _create_copy_reloc(ld, lsb); else if (ld_reloc_require_dynamic_reloc(ld, lre)) { /* * We can not generate dynamic relocation for * these PC-relative relocation since they * are probably not supported by the runtime * linkers. * * Note: FreeBSD rtld(1) does support * R_X86_64_PC32. */ _warn_pic(ld, lre); } } break; case R_X86_64_GOTOFF64: case R_X86_64_GOTPC32: /* * These relocation types use GOT address as a base address * and instruct the linker to build a GOT. */ (void) _find_and_create_got_section(ld, 1); break; case R_X86_64_GOT32: case R_X86_64_GOTPCREL: /* * These relocation types instruct the linker to build a * GOT and generate a GOT entry. */ if (!lsb->lsb_got) { _reserve_got_entry(ld, lsb, 1); /* * TODO: For now we always create a R_X86_64_GLOB_DAT * relocation for a GOT entry. There are cases that * the symbol's address is known at link time and * the GOT entry value can be filled in by the program * linker instead. */ if (ld_reloc_require_glob_dat(ld, lre)) _create_got_reloc(ld, lsb, R_X86_64_GLOB_DAT, lsb->lsb_got_off); else _create_got_reloc(ld, lsb, R_X86_64_RELATIVE, lsb->lsb_got_off); } break; case R_X86_64_TLSGD: /* Global Dynamic */ tr = _tls_check_relax(ld, lre); switch (tr) { case TLS_RELAX_NONE: _create_tls_gd_reloc(ld, lsb); break; case TLS_RELAX_INIT_EXEC: _create_tls_ie_reloc(ld, lsb); break; case TLS_RELAX_LOCAL_EXEC: break; default: ld_fatal(ld, "Internal: invalid TLS relaxation %d", tr); break; } break; case R_X86_64_TLSLD: /* Local Dynamic */ tr = _tls_check_relax(ld, lre); if (tr == TLS_RELAX_NONE) _create_tls_ld_reloc(ld, lsb); else if (tr != TLS_RELAX_LOCAL_EXEC) ld_fatal(ld, "Internal: invalid TLS relaxation %d", tr); break; case R_X86_64_DTPOFF32: /* Handled by R_X86_64_TLSLD case. */ break; case R_X86_64_GOTTPOFF: /* Initial Exec */ tr = _tls_check_relax(ld, lre); if (tr == TLS_RELAX_NONE) _create_tls_ie_reloc(ld, lsb); else if (tr != TLS_RELAX_LOCAL_EXEC) ld_fatal(ld, "Internal: invalid TLS relaxation %d", tr); break; case R_X86_64_TPOFF32: /* Local Exec */ /* No further relaxation possible. */ break; case R_X86_64_GOTPC32_TLSDESC: case R_X86_64_TLSDESC_CALL: /* TODO. */ break; default: ld_warn(ld, "can not handle relocation %ju", lre->lre_type); break; } } static uint64_t _got_offset(struct ld *ld, struct ld_symbol *lsb) { struct ld_output_section *os; assert(lsb->lsb_got); if (ld->ld_got == NULL) { ld->ld_got = _find_and_create_got_section(ld, 0); assert(ld->ld_got != NULL); } os = ld->ld_got->is_output; return (os->os_addr + ld->ld_got->is_reloff + lsb->lsb_got_off); } static void _process_reloc(struct ld *ld, struct ld_input_section *is, struct ld_reloc_entry *lre, struct ld_symbol *lsb, uint8_t *buf) { struct ld_state *ls; struct ld_output *lo; uint64_t u64, s, l, p, g; int64_t s64; uint32_t u32; int32_t s32; enum ld_tls_relax tr; ls = &ld->ld_state; lo = ld->ld_output; assert(lo != NULL); l = lsb->lsb_plt_off; p = lre->lre_offset + is->is_output->os_addr + is->is_reloff; s = lsb->lsb_value; switch (lre->lre_type) { case R_X86_64_NONE: break; case R_X86_64_64: WRITE_64(buf + lre->lre_offset, s + lre->lre_addend); break; case R_X86_64_PC32: if (lsb->lsb_plt) s32 = l + lre->lre_addend - p; else s32 = s + lre->lre_addend - p; WRITE_32(buf + lre->lre_offset, s32); break; case R_X86_64_PLT32: if (!ls->ls_ignore_next_plt) { s32 = l + lre->lre_addend - p; WRITE_32(buf + lre->lre_offset, s32); } else ls->ls_ignore_next_plt = 0; break; case R_X86_64_GOTPCREL: g = _got_offset(ld, lsb); s32 = g + lre->lre_addend - p; WRITE_32(buf + lre->lre_offset, s32); break; case R_X86_64_32: u64 = s + lre->lre_addend; u32 = u64 & 0xffffffff; if (u64 != u32) ld_fatal(ld, "R_X86_64_32 relocation failed"); WRITE_32(buf + lre->lre_offset, u32); break; case R_X86_64_32S: s64 = s + lre->lre_addend; s32 = s64 & 0xffffffff; if (s64 != s32) ld_fatal(ld, "R_X86_64_32S relocation failed"); WRITE_32(buf + lre->lre_offset, s32); break; case R_X86_64_TLSGD: /* Global Dynamic */ tr = _tls_check_relax(ld, lre); switch (tr) { case TLS_RELAX_NONE: g = _got_offset(ld, lsb); s32 = g + lre->lre_addend - p; WRITE_32(buf + lre->lre_offset, s32); break; case TLS_RELAX_INIT_EXEC: g = _got_offset(ld, lsb); _tls_relax_gd_to_ie(ld, ls, lo, lre, p, g, buf); break; case TLS_RELAX_LOCAL_EXEC: _tls_relax_gd_to_le(ld, ls, lo, lre, lsb, buf); break; default: ld_fatal(ld, "Internal: invalid TLS relaxation %d", tr); break; } break; case R_X86_64_TLSLD: /* Local Dynamic */ tr = _tls_check_relax(ld, lre); switch (tr) { case TLS_RELAX_NONE: g = _got_offset(ld, lsb); s32 = g + lre->lre_addend - p; WRITE_32(buf + lre->lre_offset, s32); break; case TLS_RELAX_LOCAL_EXEC: _tls_relax_ld_to_le(ld, ls, lre, buf); break; default: ld_fatal(ld, "Internal: invalid TLS relaxation %d", tr); break; } break; case R_X86_64_DTPOFF32: /* Local Dynamic (offset) */ tr = _tls_check_relax(ld, lre); switch (tr) { case TLS_RELAX_NONE: s32 = _tls_dtpoff(lo, lsb); WRITE_32(buf + lre->lre_offset, s32); break; case TLS_RELAX_LOCAL_EXEC: s32 = _tls_tpoff(lo, lsb); WRITE_32(buf + lre->lre_offset, s32); break; default: ld_fatal(ld, "Internal: invalid TLS relaxation %d", tr); break; } break; case R_X86_64_GOTTPOFF: /* Initial Exec */ tr = _tls_check_relax(ld, lre); switch (tr) { case TLS_RELAX_NONE: g = _got_offset(ld, lsb); s32 = g + lre->lre_addend - p; WRITE_32(buf + lre->lre_offset, s32); break; case TLS_RELAX_LOCAL_EXEC: _tls_relax_ie_to_le(ld, lo, lre, lsb, buf); break; default: ld_fatal(ld, "Internal: invalid TLS relaxation %d", tr); break; } break; case R_X86_64_TPOFF32: /* Local Exec */ s32 = _tls_tpoff(lo, lsb); WRITE_32(buf + lre->lre_offset, s32); break; default: ld_warn(ld, "Relocation %s not supported", elftc_reloc_type_str(EM_X86_64, lre->lre_type)); break; } } static void _adjust_reloc(struct ld *ld, struct ld_input_section *is, struct ld_reloc_entry *lre, struct ld_symbol *lsb, uint8_t *buf) { struct ld_input_section *_is; (void) ld; (void) is; (void) buf; /* Only need to adjust relocation against section symbols. */ if (lsb->lsb_type != STT_SECTION) return; if ((_is = lsb->lsb_is) == NULL || _is->is_output == NULL) return; /* * Update the relocation addend to point to the new location * in the output object. */ lre->lre_addend += _is->is_reloff; } static enum ld_tls_relax _tls_check_relax(struct ld *ld, struct ld_reloc_entry *lre) { struct ld_symbol *lsb; lsb = ld_symbols_ref(lre->lre_sym); /* * If the linker is performing -static linking, we should always * use the Local Exec model. */ if (!ld->ld_dynamic_link) return (TLS_RELAX_LOCAL_EXEC); /* * If the linker is creating a DSO, we can not perform any TLS * relaxation. */ if (ld->ld_dso) return (TLS_RELAX_NONE); /* * The linker is creating an executable, if the symbol is * defined in a regular object, we can use the Local Exec model. */ if (lsb->lsb_shndx != SHN_UNDEF && ld_symbols_in_regular(lsb)) return (TLS_RELAX_LOCAL_EXEC); /* * If the TLS model is Global Dynamic, we can relax it to Initial * Exec model since the linker is creating an executable. */ if (lre->lre_type == R_X86_64_TLSGD) return (TLS_RELAX_INIT_EXEC); /* For all the other cases, no relaxation can be done. */ return (TLS_RELAX_NONE); } static int32_t _tls_tpoff(struct ld_output *lo, struct ld_symbol *lsb) { int32_t tls_off; tls_off = -roundup(lo->lo_tls_size, lo->lo_tls_align); return (tls_off + (lsb->lsb_value - lo->lo_tls_addr)); } static int32_t _tls_dtpoff(struct ld_output *lo, struct ld_symbol *lsb) { return (lsb->lsb_value - lo->lo_tls_addr); } static int _tls_verify_gd(uint8_t *buf, uint64_t off) { /* * Global Dynamic model: * * 0x00 .byte 0x66 * 0x01 leaq x@tlsgd(%rip), %rdi * 0x08 .word 0x6666 * 0x0a rex64 * 0x0b call _tls_get_addr@plt */ uint8_t gd[] = "\x66\x48\x8d\x3d\x00\x00\x00\x00" "\x66\x66\x48\xe8\x00\x00\x00\x00"; if (memcmp(buf + off, gd, sizeof(gd) - 1) == 0) return (1); return (0); } static int _tls_verify_ld(uint8_t *buf, uint64_t off) { /* * Local Dynamic model: * * 0x00 leaq x@tlsld(%rip), %rdi * 0x07 call _tls_get_addr@plt */ uint8_t ld[] = "\x48\x8d\x3d\x00\x00\x00\x00" "\xe8\x00\x00\x00\x00"; if (memcmp(buf + off, ld, sizeof(ld) - 1) == 0) return (1); return (0); } static void _tls_relax_gd_to_ie(struct ld *ld, struct ld_state *ls, struct ld_output *lo, struct ld_reloc_entry *lre, uint64_t p, uint64_t g, uint8_t *buf) { /* * Initial Exec model: * * 0x00 movq %fs:0, %rax * 0x09 addq x@gottpoff(%rip), %rax */ uint8_t ie[] = "\x64\x48\x8b\x04\x25\x00\x00\x00\x00" "\x48\x03\x05\x00\x00\x00\x00"; int32_t s32; assert(lre->lre_type == R_X86_64_TLSGD); if (!_tls_verify_gd(buf, lre->lre_offset - 4)) ld_warn(ld, "unrecognized TLS global dynamic model code"); /* Rewrite Global Dynamic to Initial Exec model. */ memcpy((uint8_t *) buf + lre->lre_offset - 4, ie, sizeof(ie) - 1); /* * R_X86_64_TLSGD relocation is applied at gd[4]. After it's relaxed * to Initial Exec model, the resulting R_X86_64_GOTTPOFF relocation * should be applied at ie[12]. The addend should remain the same * since instruction "leaq x@tlsgd(%rip), %rdi" and * "addq x@gottpoff(%rip), %rax" has the same length. `p' is moved * 8 bytes forward. */ s32 = g + lre->lre_addend - (p + 8); WRITE_32(buf + lre->lre_offset + 8, s32); /* Ignore the next R_X86_64_PLT32 relocation for _tls_get_addr. */ ls->ls_ignore_next_plt = 1; } static void _tls_relax_gd_to_le(struct ld *ld, struct ld_state *ls, struct ld_output *lo, struct ld_reloc_entry *lre, struct ld_symbol *lsb, uint8_t *buf) { /* * Local Exec model: * * 0x00 movq %fs:0, %rax * 0x09 leaq x@tpoff(%rax), %rax */ uint8_t le[] = "\x64\x48\x8b\x04\x25\x00\x00\x00\x00" "\x48\x8d\x80\x00\x00\x00\x00"; int32_t s32; if (!_tls_verify_gd(buf, lre->lre_offset - 4)) ld_warn(ld, "unrecognized TLS global dynamic model code"); /* Rewrite Global Dynamic to Local Exec model. */ memcpy((uint8_t *) buf + lre->lre_offset - 4, le, sizeof(le) - 1); /* * R_X86_64_TLSGD relocation is applied at gd[4]. After it's relaxed * to Local Exec model, the resulting R_X86_64_TPOFF32 should be * applied at le[12]. */ s32 = _tls_tpoff(lo, lsb); WRITE_32(buf + lre->lre_offset + 8, s32); /* Ignore the next R_X86_64_PLT32 relocation for _tls_get_addr. */ ls->ls_ignore_next_plt = 1; } static void _tls_relax_ld_to_le(struct ld *ld, struct ld_state *ls, struct ld_reloc_entry *lre, uint8_t *buf) { /* * Local Exec model: (with padding) * * 0x00 .word 0x6666 * 0x02 .byte 0x66 * 0x03 movq %fs:0, %rax */ uint8_t le_p[] = "\x66\x66\x66\x64\x48\x8b\x04\x25\x00\x00\x00\x00"; assert(lre->lre_type == R_X86_64_TLSLD); if (!_tls_verify_ld(buf, lre->lre_offset - 3)) ld_warn(ld, "unrecognized TLS local dynamic model code"); /* Rewrite Local Dynamic to Local Exec model. */ memcpy(buf + lre->lre_offset - 3, le_p, sizeof(le_p) - 1); /* Ignore the next R_X86_64_PLT32 relocation for _tls_get_addr. */ ls->ls_ignore_next_plt = 1; } static void _tls_relax_ie_to_le(struct ld *ld, struct ld_output *lo, struct ld_reloc_entry *lre, struct ld_symbol *lsb, uint8_t *buf) { int32_t s32; uint8_t reg; (void) ld; assert(lre->lre_type == R_X86_64_GOTTPOFF); /* * Rewrite Initial Exec to Local Exec model: rewrite * "movq 0x0(%rip),%reg" to "movq 0x0,%reg". or, * "addq 0x0(%rip),%rsp" to "addq 0x0,%rsp". or, * "addq 0x0(%rip),%reg" to "leaq 0x0(%reg),%reg" */ reg = buf[lre->lre_offset - 1] >> 3; if (buf[lre->lre_offset - 2] == 0x8b) { /* movq 0x0(%rip),%reg -> movq 0x0,%reg. */ buf[lre->lre_offset - 2] = 0xc7; buf[lre->lre_offset - 1] = 0xc0 | reg; /* Set r/m to `reg' */ /* * Set REX.B (high bit for r/m) if REX.R (high bit for reg) * is set. */ if (buf[lre->lre_offset - 3] == 0x4c) buf[lre->lre_offset - 3] = 0x49; } else if (reg == 4) { /* addq 0x0(%rip),%rsp -> addq 0x0,%rsp */ buf[lre->lre_offset - 2] = 0x81; buf[lre->lre_offset - 1] = 0xc0 | reg; /* Set r/m to `reg' */ /* * Set REX.B (high bit for r/m) if REX.R (high bit for reg) * is set. */ if (buf[lre->lre_offset - 3] == 0x4c) buf[lre->lre_offset - 3] = 0x49; } else { /* addq 0x0(%rip),%reg -> leaq 0x0(%reg),%reg */ buf[lre->lre_offset - 2] = 0x8d; /* Both reg and r/m in ModRM should be set to `reg' */ buf[lre->lre_offset - 1] = 0x80 | reg | (reg << 3); /* Set both REX.B and REX.R if REX.R is set */ if (buf[lre->lre_offset - 3] == 0x4c) buf[lre->lre_offset - 3] = 0x4d; } /* * R_X86_64_GOTTPOFF relocation is applied at ie[12]. After it's * relaxed to Local Exec model, the resulting R_X86_64_TPOFF32 * should be applied at le[12]. Thus the offset remains the same. */ s32 = _tls_tpoff(lo, lsb); WRITE_32(buf + lre->lre_offset, s32); } static void _create_tls_gd_reloc(struct ld *ld, struct ld_symbol *lsb) { /* * Reserve 2 GOT entries and generate R_X86_64_DTPMOD64 and * R_X86_64_DTPOFF64 relocations. */ if (!lsb->lsb_got) { _reserve_got_entry(ld, lsb, 2); _create_got_reloc(ld, lsb, R_X86_64_DTPMOD64, lsb->lsb_got_off); _create_got_reloc(ld, lsb, R_X86_64_DTPOFF64, lsb->lsb_got_off + 8); } } static void _create_tls_ld_reloc(struct ld *ld, struct ld_symbol *lsb) { /* Reserve 2 GOT entries and generate R_X86_64_DTPMOD64 reloation. */ if (!lsb->lsb_got) { _reserve_got_entry(ld, lsb, 2); _create_got_reloc(ld, lsb, R_X86_64_DTPMOD64, lsb->lsb_got_off); lsb->lsb_tls_ld = 1; } } static void _create_tls_ie_reloc(struct ld *ld, struct ld_symbol *lsb) { /* Reserve 1 GOT entry and generate R_X86_64_TPOFF64 relocation. */ if (!lsb->lsb_got) { _reserve_got_entry(ld, lsb, 1); _create_got_reloc(ld, lsb, R_X86_64_TPOFF64, lsb->lsb_got_off); } } void amd64_register(struct ld *ld) { struct ld_arch *amd64, *amd64_alt; if ((amd64 = calloc(1, sizeof(*amd64))) == NULL) ld_fatal_std(ld, "calloc"); snprintf(amd64->name, sizeof(amd64->name), "%s", "amd64"); amd64->script = amd64_script; amd64->interp = "/libexec/ld-elf.so.1"; amd64->get_max_page_size = _get_max_page_size; amd64->get_common_page_size = _get_common_page_size; amd64->scan_reloc = _scan_reloc; amd64->process_reloc = _process_reloc; amd64->adjust_reloc = _adjust_reloc; amd64->is_absolute_reloc = _is_absolute_reloc; amd64->is_relative_reloc = _is_relative_reloc; amd64->finalize_reloc = _finalize_reloc; amd64->finalize_got_and_plt = _finalize_got_and_plt; amd64->reloc_is_64bit = 1; amd64->reloc_is_rela = 1; amd64->reloc_entsize = sizeof(Elf64_Rela); HASH_ADD_STR(ld->ld_arch_list, name, amd64); if ((amd64_alt = calloc(1, sizeof(*amd64_alt))) == NULL) ld_fatal_std(ld, "calloc"); memcpy(amd64_alt, amd64, sizeof(struct ld_arch)); amd64_alt->alias = amd64; snprintf(amd64_alt->name, sizeof(amd64_alt->name), "%s", "x86-64"); HASH_ADD_STR(ld->ld_arch_list, name, amd64_alt); }