Bring lld (release_39 branch, r279477) to contrib

This commit is contained in:
Ed Maste 2016-08-30 16:15:19 +00:00
commit 97c3811c96
154 changed files with 45405 additions and 0 deletions

View File

@ -0,0 +1,4 @@
{
"project_id" : "lld",
"conduit_uri" : "https://reviews.llvm.org/"
}

View File

@ -0,0 +1 @@
BasedOnStyle: LLVM

24
contrib/llvm/tools/lld/.gitignore vendored Normal file
View File

@ -0,0 +1,24 @@
#==============================================================================#
# This file specifies intentionally untracked files that git should ignore.
# See: http://www.kernel.org/pub/software/scm/git/docs/gitignore.html
#==============================================================================#
#==============================================================================#
# File extensions to be ignored anywhere in the tree.
#==============================================================================#
# Temp files created by most text editors.
*~
# Merge files created by git.
*.orig
# Byte compiled python modules.
*.pyc
# vim swap files
.*.swp
# Mac OS X Finder layout info
.DS_Store
#==============================================================================#
# Directories to be ignored.
#==============================================================================#
# Sphinx build files.
docs/_build

View File

@ -0,0 +1,106 @@
set(LLD_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
set(LLD_INCLUDE_DIR ${LLD_SOURCE_DIR}/include )
set(LLD_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
# Compute the LLD version from the LLVM version.
string(REGEX MATCH "[0-9]+\\.[0-9]+(\\.[0-9]+)?" LLD_VERSION
${PACKAGE_VERSION})
message(STATUS "LLD version: ${LLD_VERSION}")
string(REGEX REPLACE "([0-9]+)\\.[0-9]+(\\.[0-9]+)?" "\\1" LLD_VERSION_MAJOR
${LLD_VERSION})
string(REGEX REPLACE "[0-9]+\\.([0-9]+)(\\.[0-9]+)?" "\\1" LLD_VERSION_MINOR
${LLD_VERSION})
# Determine LLD revision and repository.
# TODO: Figure out a way to get the revision and the repository on windows.
if ( NOT CMAKE_SYSTEM_NAME MATCHES "Windows" )
execute_process(COMMAND ${CMAKE_SOURCE_DIR}/utils/GetSourceVersion ${LLD_SOURCE_DIR}
OUTPUT_VARIABLE LLD_REVISION)
execute_process(COMMAND ${CMAKE_SOURCE_DIR}/utils/GetRepositoryPath ${LLD_SOURCE_DIR}
OUTPUT_VARIABLE LLD_REPOSITORY)
if ( LLD_REPOSITORY )
# Replace newline characters with spaces
string(REGEX REPLACE "(\r?\n)+" " " LLD_REPOSITORY ${LLD_REPOSITORY})
# Remove leading spaces
STRING(REGEX REPLACE "^[ \t\r\n]+" "" LLD_REPOSITORY "${LLD_REPOSITORY}" )
# Remove trailing spaces
string(REGEX REPLACE "(\ )+$" "" LLD_REPOSITORY ${LLD_REPOSITORY})
endif()
if ( LLD_REVISION )
# Replace newline characters with spaces
string(REGEX REPLACE "(\r?\n)+" " " LLD_REVISION ${LLD_REVISION})
# Remove leading spaces
STRING(REGEX REPLACE "^[ \t\r\n]+" "" LLD_REVISION "${LLD_REVISION}" )
# Remove trailing spaces
string(REGEX REPLACE "(\ )+$" "" LLD_REVISION ${LLD_REVISION})
endif()
endif ()
# Configure the Version.inc file.
configure_file(
${CMAKE_CURRENT_SOURCE_DIR}/include/lld/Config/Version.inc.in
${CMAKE_CURRENT_BINARY_DIR}/include/lld/Config/Version.inc)
if (CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR)
message(FATAL_ERROR "In-source builds are not allowed. CMake would overwrite "
"the makefiles distributed with LLVM. Please create a directory and run cmake "
"from there, passing the path to this source directory as the last argument. "
"This process created the file `CMakeCache.txt' and the directory "
"`CMakeFiles'. Please delete them.")
endif()
list (APPEND CMAKE_MODULE_PATH "${LLD_SOURCE_DIR}/cmake/modules")
option(LLD_USE_VTUNE
"Enable VTune user task tracking."
OFF)
if (LLD_USE_VTUNE)
find_package(VTune)
if (VTUNE_FOUND)
include_directories(${VTune_INCLUDE_DIRS})
list(APPEND LLVM_COMMON_LIBS ${VTune_LIBRARIES})
add_definitions(-DLLD_HAS_VTUNE)
endif()
endif()
if (MSVC)
add_definitions(-wd4530) # Suppress 'warning C4530: C++ exception handler used, but unwind semantics are not enabled.'
add_definitions(-wd4062) # Suppress 'warning C4062: enumerator X in switch of enum Y is not handled' from system header.
endif()
include_directories(BEFORE
${CMAKE_CURRENT_BINARY_DIR}/include
${CMAKE_CURRENT_SOURCE_DIR}/include
)
if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY)
install(DIRECTORY include/
DESTINATION include
FILES_MATCHING
PATTERN "*.h"
PATTERN ".svn" EXCLUDE
)
endif()
macro(add_lld_library name)
add_llvm_library(${name} ${ARGN})
set_target_properties(${name} PROPERTIES FOLDER "lld libraries")
endmacro(add_lld_library)
add_subdirectory(lib)
add_subdirectory(tools/lld)
if (LLVM_INCLUDE_TESTS)
add_subdirectory(test)
add_subdirectory(unittests)
endif()
add_subdirectory(docs)
add_subdirectory(COFF)
add_subdirectory(ELF)

View File

@ -0,0 +1,19 @@
This file is a list of the people responsible for ensuring that patches for a
particular part of LLD are reviewed, either by themself or by someone else.
They are also the gatekeepers for their part of LLD, with the final word on
what goes in or not.
The list is sorted by surname and formatted to allow easy grepping and
beautification by scripts. The fields are: name (N), email (E), web-address
(W), PGP key ID and fingerprint (P), description (D), and snail-mail address
(S). Each entry should contain at least the (N), (E) and (D) fields.
N: Rui Ueyama
E: ruiu@google.com
D: COFF, ELF backends (COFF/* ELF/*)
N: Lang Hames, Nick Kledzik
E: lhames@gmail.com, kledzik@apple.com
D: Mach-O backend

View File

@ -0,0 +1,36 @@
set(LLVM_TARGET_DEFINITIONS Options.td)
tablegen(LLVM Options.inc -gen-opt-parser-defs)
add_public_tablegen_target(COFFOptionsTableGen)
add_lld_library(lldCOFF
Chunks.cpp
DLL.cpp
Driver.cpp
DriverUtils.cpp
Error.cpp
ICF.cpp
InputFiles.cpp
Librarian.cpp
MarkLive.cpp
ModuleDef.cpp
PDB.cpp
SymbolTable.cpp
Symbols.cpp
Writer.cpp
LINK_COMPONENTS
${LLVM_TARGETS_TO_BUILD}
Core
LTO
LibDriver
Object
MC
MCDisassembler
Target
Option
Support
LINK_LIBS ${PTHREAD_LIB}
)
add_dependencies(lldCOFF COFFOptionsTableGen)

View File

@ -0,0 +1,337 @@
//===- Chunks.cpp ---------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "Chunks.h"
#include "Error.h"
#include "InputFiles.h"
#include "Symbols.h"
#include "llvm/Object/COFF.h"
#include "llvm/Support/COFF.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
using namespace llvm;
using namespace llvm::object;
using namespace llvm::support::endian;
using namespace llvm::COFF;
using llvm::support::ulittle32_t;
namespace lld {
namespace coff {
SectionChunk::SectionChunk(ObjectFile *F, const coff_section *H)
: Chunk(SectionKind), Repl(this), File(F), Header(H),
Relocs(File->getCOFFObj()->getRelocations(Header)),
NumRelocs(std::distance(Relocs.begin(), Relocs.end())) {
// Initialize SectionName.
File->getCOFFObj()->getSectionName(Header, SectionName);
Align = Header->getAlignment();
// Only COMDAT sections are subject of dead-stripping.
Live = !isCOMDAT();
}
static void add16(uint8_t *P, int16_t V) { write16le(P, read16le(P) + V); }
static void add32(uint8_t *P, int32_t V) { write32le(P, read32le(P) + V); }
static void add64(uint8_t *P, int64_t V) { write64le(P, read64le(P) + V); }
static void or16(uint8_t *P, uint16_t V) { write16le(P, read16le(P) | V); }
void SectionChunk::applyRelX64(uint8_t *Off, uint16_t Type, Defined *Sym,
uint64_t P) const {
uint64_t S = Sym->getRVA();
switch (Type) {
case IMAGE_REL_AMD64_ADDR32: add32(Off, S + Config->ImageBase); break;
case IMAGE_REL_AMD64_ADDR64: add64(Off, S + Config->ImageBase); break;
case IMAGE_REL_AMD64_ADDR32NB: add32(Off, S); break;
case IMAGE_REL_AMD64_REL32: add32(Off, S - P - 4); break;
case IMAGE_REL_AMD64_REL32_1: add32(Off, S - P - 5); break;
case IMAGE_REL_AMD64_REL32_2: add32(Off, S - P - 6); break;
case IMAGE_REL_AMD64_REL32_3: add32(Off, S - P - 7); break;
case IMAGE_REL_AMD64_REL32_4: add32(Off, S - P - 8); break;
case IMAGE_REL_AMD64_REL32_5: add32(Off, S - P - 9); break;
case IMAGE_REL_AMD64_SECTION: add16(Off, Sym->getSectionIndex()); break;
case IMAGE_REL_AMD64_SECREL: add32(Off, Sym->getSecrel()); break;
default:
fatal("unsupported relocation type");
}
}
void SectionChunk::applyRelX86(uint8_t *Off, uint16_t Type, Defined *Sym,
uint64_t P) const {
uint64_t S = Sym->getRVA();
switch (Type) {
case IMAGE_REL_I386_ABSOLUTE: break;
case IMAGE_REL_I386_DIR32: add32(Off, S + Config->ImageBase); break;
case IMAGE_REL_I386_DIR32NB: add32(Off, S); break;
case IMAGE_REL_I386_REL32: add32(Off, S - P - 4); break;
case IMAGE_REL_I386_SECTION: add16(Off, Sym->getSectionIndex()); break;
case IMAGE_REL_I386_SECREL: add32(Off, Sym->getSecrel()); break;
default:
fatal("unsupported relocation type");
}
}
static void applyMOV(uint8_t *Off, uint16_t V) {
or16(Off, ((V & 0x800) >> 1) | ((V >> 12) & 0xf));
or16(Off + 2, ((V & 0x700) << 4) | (V & 0xff));
}
static void applyMOV32T(uint8_t *Off, uint32_t V) {
applyMOV(Off, V); // set MOVW operand
applyMOV(Off + 4, V >> 16); // set MOVT operand
}
static void applyBranch20T(uint8_t *Off, int32_t V) {
uint32_t S = V < 0 ? 1 : 0;
uint32_t J1 = (V >> 19) & 1;
uint32_t J2 = (V >> 18) & 1;
or16(Off, (S << 10) | ((V >> 12) & 0x3f));
or16(Off + 2, (J1 << 13) | (J2 << 11) | ((V >> 1) & 0x7ff));
}
static void applyBranch24T(uint8_t *Off, int32_t V) {
uint32_t S = V < 0 ? 1 : 0;
uint32_t J1 = ((~V >> 23) & 1) ^ S;
uint32_t J2 = ((~V >> 22) & 1) ^ S;
or16(Off, (S << 10) | ((V >> 12) & 0x3ff));
or16(Off + 2, (J1 << 13) | (J2 << 11) | ((V >> 1) & 0x7ff));
}
void SectionChunk::applyRelARM(uint8_t *Off, uint16_t Type, Defined *Sym,
uint64_t P) const {
uint64_t S = Sym->getRVA();
// Pointer to thumb code must have the LSB set.
if (Sym->isExecutable())
S |= 1;
switch (Type) {
case IMAGE_REL_ARM_ADDR32: add32(Off, S + Config->ImageBase); break;
case IMAGE_REL_ARM_ADDR32NB: add32(Off, S); break;
case IMAGE_REL_ARM_MOV32T: applyMOV32T(Off, S + Config->ImageBase); break;
case IMAGE_REL_ARM_BRANCH20T: applyBranch20T(Off, S - P - 4); break;
case IMAGE_REL_ARM_BRANCH24T: applyBranch24T(Off, S - P - 4); break;
case IMAGE_REL_ARM_BLX23T: applyBranch24T(Off, S - P - 4); break;
default:
fatal("unsupported relocation type");
}
}
void SectionChunk::writeTo(uint8_t *Buf) const {
if (!hasData())
return;
// Copy section contents from source object file to output file.
ArrayRef<uint8_t> A = getContents();
memcpy(Buf + OutputSectionOff, A.data(), A.size());
// Apply relocations.
for (const coff_relocation &Rel : Relocs) {
uint8_t *Off = Buf + OutputSectionOff + Rel.VirtualAddress;
SymbolBody *Body = File->getSymbolBody(Rel.SymbolTableIndex)->repl();
Defined *Sym = cast<Defined>(Body);
uint64_t P = RVA + Rel.VirtualAddress;
switch (Config->Machine) {
case AMD64:
applyRelX64(Off, Rel.Type, Sym, P);
break;
case I386:
applyRelX86(Off, Rel.Type, Sym, P);
break;
case ARMNT:
applyRelARM(Off, Rel.Type, Sym, P);
break;
default:
llvm_unreachable("unknown machine type");
}
}
}
void SectionChunk::addAssociative(SectionChunk *Child) {
AssocChildren.push_back(Child);
}
static uint8_t getBaserelType(const coff_relocation &Rel) {
switch (Config->Machine) {
case AMD64:
if (Rel.Type == IMAGE_REL_AMD64_ADDR64)
return IMAGE_REL_BASED_DIR64;
return IMAGE_REL_BASED_ABSOLUTE;
case I386:
if (Rel.Type == IMAGE_REL_I386_DIR32)
return IMAGE_REL_BASED_HIGHLOW;
return IMAGE_REL_BASED_ABSOLUTE;
case ARMNT:
if (Rel.Type == IMAGE_REL_ARM_ADDR32)
return IMAGE_REL_BASED_HIGHLOW;
if (Rel.Type == IMAGE_REL_ARM_MOV32T)
return IMAGE_REL_BASED_ARM_MOV32T;
return IMAGE_REL_BASED_ABSOLUTE;
default:
llvm_unreachable("unknown machine type");
}
}
// Windows-specific.
// Collect all locations that contain absolute addresses, which need to be
// fixed by the loader if load-time relocation is needed.
// Only called when base relocation is enabled.
void SectionChunk::getBaserels(std::vector<Baserel> *Res) {
for (const coff_relocation &Rel : Relocs) {
uint8_t Ty = getBaserelType(Rel);
if (Ty == IMAGE_REL_BASED_ABSOLUTE)
continue;
SymbolBody *Body = File->getSymbolBody(Rel.SymbolTableIndex)->repl();
if (isa<DefinedAbsolute>(Body))
continue;
Res->emplace_back(RVA + Rel.VirtualAddress, Ty);
}
}
bool SectionChunk::hasData() const {
return !(Header->Characteristics & IMAGE_SCN_CNT_UNINITIALIZED_DATA);
}
uint32_t SectionChunk::getPermissions() const {
return Header->Characteristics & PermMask;
}
bool SectionChunk::isCOMDAT() const {
return Header->Characteristics & IMAGE_SCN_LNK_COMDAT;
}
void SectionChunk::printDiscardedMessage() const {
// Removed by dead-stripping. If it's removed by ICF, ICF already
// printed out the name, so don't repeat that here.
if (Sym && this == Repl)
llvm::outs() << "Discarded " << Sym->getName() << "\n";
}
StringRef SectionChunk::getDebugName() {
if (Sym)
return Sym->getName();
return "";
}
ArrayRef<uint8_t> SectionChunk::getContents() const {
ArrayRef<uint8_t> A;
File->getCOFFObj()->getSectionContents(Header, A);
return A;
}
void SectionChunk::replace(SectionChunk *Other) {
Other->Repl = Repl;
Other->Live = false;
}
CommonChunk::CommonChunk(const COFFSymbolRef S) : Sym(S) {
// Common symbols are aligned on natural boundaries up to 32 bytes.
// This is what MSVC link.exe does.
Align = std::min(uint64_t(32), NextPowerOf2(Sym.getValue()));
}
uint32_t CommonChunk::getPermissions() const {
return IMAGE_SCN_CNT_UNINITIALIZED_DATA | IMAGE_SCN_MEM_READ |
IMAGE_SCN_MEM_WRITE;
}
void StringChunk::writeTo(uint8_t *Buf) const {
memcpy(Buf + OutputSectionOff, Str.data(), Str.size());
}
ImportThunkChunkX64::ImportThunkChunkX64(Defined *S) : ImpSymbol(S) {
// Intel Optimization Manual says that all branch targets
// should be 16-byte aligned. MSVC linker does this too.
Align = 16;
}
void ImportThunkChunkX64::writeTo(uint8_t *Buf) const {
memcpy(Buf + OutputSectionOff, ImportThunkX86, sizeof(ImportThunkX86));
// The first two bytes is a JMP instruction. Fill its operand.
write32le(Buf + OutputSectionOff + 2, ImpSymbol->getRVA() - RVA - getSize());
}
void ImportThunkChunkX86::getBaserels(std::vector<Baserel> *Res) {
Res->emplace_back(getRVA() + 2);
}
void ImportThunkChunkX86::writeTo(uint8_t *Buf) const {
memcpy(Buf + OutputSectionOff, ImportThunkX86, sizeof(ImportThunkX86));
// The first two bytes is a JMP instruction. Fill its operand.
write32le(Buf + OutputSectionOff + 2,
ImpSymbol->getRVA() + Config->ImageBase);
}
void ImportThunkChunkARM::getBaserels(std::vector<Baserel> *Res) {
Res->emplace_back(getRVA(), IMAGE_REL_BASED_ARM_MOV32T);
}
void ImportThunkChunkARM::writeTo(uint8_t *Buf) const {
memcpy(Buf + OutputSectionOff, ImportThunkARM, sizeof(ImportThunkARM));
// Fix mov.w and mov.t operands.
applyMOV32T(Buf + OutputSectionOff, ImpSymbol->getRVA() + Config->ImageBase);
}
void LocalImportChunk::getBaserels(std::vector<Baserel> *Res) {
Res->emplace_back(getRVA());
}
size_t LocalImportChunk::getSize() const {
return Config->is64() ? 8 : 4;
}
void LocalImportChunk::writeTo(uint8_t *Buf) const {
if (Config->is64()) {
write64le(Buf + OutputSectionOff, Sym->getRVA() + Config->ImageBase);
} else {
write32le(Buf + OutputSectionOff, Sym->getRVA() + Config->ImageBase);
}
}
void SEHTableChunk::writeTo(uint8_t *Buf) const {
ulittle32_t *Begin = reinterpret_cast<ulittle32_t *>(Buf + OutputSectionOff);
size_t Cnt = 0;
for (Defined *D : Syms)
Begin[Cnt++] = D->getRVA();
std::sort(Begin, Begin + Cnt);
}
// Windows-specific.
// This class represents a block in .reloc section.
BaserelChunk::BaserelChunk(uint32_t Page, Baserel *Begin, Baserel *End) {
// Block header consists of 4 byte page RVA and 4 byte block size.
// Each entry is 2 byte. Last entry may be padding.
Data.resize(alignTo((End - Begin) * 2 + 8, 4));
uint8_t *P = Data.data();
write32le(P, Page);
write32le(P + 4, Data.size());
P += 8;
for (Baserel *I = Begin; I != End; ++I) {
write16le(P, (I->Type << 12) | (I->RVA - Page));
P += 2;
}
}
void BaserelChunk::writeTo(uint8_t *Buf) const {
memcpy(Buf + OutputSectionOff, Data.data(), Data.size());
}
uint8_t Baserel::getDefaultType() {
switch (Config->Machine) {
case AMD64:
return IMAGE_REL_BASED_DIR64;
case I386:
return IMAGE_REL_BASED_HIGHLOW;
default:
llvm_unreachable("unknown machine type");
}
}
} // namespace coff
} // namespace lld

View File

@ -0,0 +1,332 @@
//===- Chunks.h -------------------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_COFF_CHUNKS_H
#define LLD_COFF_CHUNKS_H
#include "Config.h"
#include "InputFiles.h"
#include "lld/Core/LLVM.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Object/COFF.h"
#include <atomic>
#include <utility>
#include <vector>
namespace lld {
namespace coff {
using llvm::COFF::ImportDirectoryTableEntry;
using llvm::object::COFFSymbolRef;
using llvm::object::SectionRef;
using llvm::object::coff_relocation;
using llvm::object::coff_section;
using llvm::sys::fs::file_magic;
class Baserel;
class Defined;
class DefinedImportData;
class DefinedRegular;
class ObjectFile;
class OutputSection;
class SymbolBody;
// Mask for section types (code, data, bss, disacardable, etc.)
// and permissions (writable, readable or executable).
const uint32_t PermMask = 0xFF0000F0;
// A Chunk represents a chunk of data that will occupy space in the
// output (if the resolver chose that). It may or may not be backed by
// a section of an input file. It could be linker-created data, or
// doesn't even have actual data (if common or bss).
class Chunk {
public:
enum Kind { SectionKind, OtherKind };
Kind kind() const { return ChunkKind; }
virtual ~Chunk() = default;
// Returns the size of this chunk (even if this is a common or BSS.)
virtual size_t getSize() const = 0;
// Write this chunk to a mmap'ed file, assuming Buf is pointing to
// beginning of the file. Because this function may use RVA values
// of other chunks for relocations, you need to set them properly
// before calling this function.
virtual void writeTo(uint8_t *Buf) const {}
// The writer sets and uses the addresses.
uint64_t getRVA() const { return RVA; }
uint32_t getAlign() const { return Align; }
void setRVA(uint64_t V) { RVA = V; }
void setOutputSectionOff(uint64_t V) { OutputSectionOff = V; }
// Returns true if this has non-zero data. BSS chunks return
// false. If false is returned, the space occupied by this chunk
// will be filled with zeros.
virtual bool hasData() const { return true; }
// Returns readable/writable/executable bits.
virtual uint32_t getPermissions() const { return 0; }
// Returns the section name if this is a section chunk.
// It is illegal to call this function on non-section chunks.
virtual StringRef getSectionName() const {
llvm_unreachable("unimplemented getSectionName");
}
// An output section has pointers to chunks in the section, and each
// chunk has a back pointer to an output section.
void setOutputSection(OutputSection *O) { Out = O; }
OutputSection *getOutputSection() { return Out; }
// Windows-specific.
// Collect all locations that contain absolute addresses for base relocations.
virtual void getBaserels(std::vector<Baserel> *Res) {}
// Returns a human-readable name of this chunk. Chunks are unnamed chunks of
// bytes, so this is used only for logging or debugging.
virtual StringRef getDebugName() { return ""; }
protected:
Chunk(Kind K = OtherKind) : ChunkKind(K) {}
const Kind ChunkKind;
// The RVA of this chunk in the output. The writer sets a value.
uint64_t RVA = 0;
// The offset from beginning of the output section. The writer sets a value.
uint64_t OutputSectionOff = 0;
// The output section for this chunk.
OutputSection *Out = nullptr;
// The alignment of this chunk. The writer uses the value.
uint32_t Align = 1;
};
// A chunk corresponding a section of an input file.
class SectionChunk : public Chunk {
// Identical COMDAT Folding feature accesses section internal data.
friend class ICF;
public:
class symbol_iterator : public llvm::iterator_adaptor_base<
symbol_iterator, const coff_relocation *,
std::random_access_iterator_tag, SymbolBody *> {
friend SectionChunk;
ObjectFile *File;
symbol_iterator(ObjectFile *File, const coff_relocation *I)
: symbol_iterator::iterator_adaptor_base(I), File(File) {}
public:
symbol_iterator() = default;
SymbolBody *operator*() const {
return File->getSymbolBody(I->SymbolTableIndex);
}
};
SectionChunk(ObjectFile *File, const coff_section *Header);
static bool classof(const Chunk *C) { return C->kind() == SectionKind; }
size_t getSize() const override { return Header->SizeOfRawData; }
ArrayRef<uint8_t> getContents() const;
void writeTo(uint8_t *Buf) const override;
bool hasData() const override;
uint32_t getPermissions() const override;
StringRef getSectionName() const override { return SectionName; }
void getBaserels(std::vector<Baserel> *Res) override;
bool isCOMDAT() const;
void applyRelX64(uint8_t *Off, uint16_t Type, Defined *Sym, uint64_t P) const;
void applyRelX86(uint8_t *Off, uint16_t Type, Defined *Sym, uint64_t P) const;
void applyRelARM(uint8_t *Off, uint16_t Type, Defined *Sym, uint64_t P) const;
// Called if the garbage collector decides to not include this chunk
// in a final output. It's supposed to print out a log message to stdout.
void printDiscardedMessage() const;
// Adds COMDAT associative sections to this COMDAT section. A chunk
// and its children are treated as a group by the garbage collector.
void addAssociative(SectionChunk *Child);
StringRef getDebugName() override;
void setSymbol(DefinedRegular *S) { if (!Sym) Sym = S; }
// Used by the garbage collector.
bool isLive() { return !Config->DoGC || Live; }
void markLive() {
assert(!isLive() && "Cannot mark an already live section!");
Live = true;
}
// Allow iteration over the bodies of this chunk's relocated symbols.
llvm::iterator_range<symbol_iterator> symbols() const {
return llvm::make_range(symbol_iterator(File, Relocs.begin()),
symbol_iterator(File, Relocs.end()));
}
// Allow iteration over the associated child chunks for this section.
ArrayRef<SectionChunk *> children() const { return AssocChildren; }
// A pointer pointing to a replacement for this chunk.
// Initially it points to "this" object. If this chunk is merged
// with other chunk by ICF, it points to another chunk,
// and this chunk is considrered as dead.
SectionChunk *Repl;
// The CRC of the contents as described in the COFF spec 4.5.5.
// Auxiliary Format 5: Section Definitions. Used for ICF.
uint32_t Checksum = 0;
private:
// A file this chunk was created from.
ObjectFile *File;
const coff_section *Header;
StringRef SectionName;
std::vector<SectionChunk *> AssocChildren;
llvm::iterator_range<const coff_relocation *> Relocs;
size_t NumRelocs;
// Used by the garbage collector.
bool Live;
// Used for ICF (Identical COMDAT Folding)
void replace(SectionChunk *Other);
std::atomic<uint64_t> GroupID = { 0 };
// Sym points to a section symbol if this is a COMDAT chunk.
DefinedRegular *Sym = nullptr;
};
// A chunk for common symbols. Common chunks don't have actual data.
class CommonChunk : public Chunk {
public:
CommonChunk(const COFFSymbolRef Sym);
size_t getSize() const override { return Sym.getValue(); }
bool hasData() const override { return false; }
uint32_t getPermissions() const override;
StringRef getSectionName() const override { return ".bss"; }
private:
const COFFSymbolRef Sym;
};
// A chunk for linker-created strings.
class StringChunk : public Chunk {
public:
explicit StringChunk(StringRef S) : Str(S) {}
size_t getSize() const override { return Str.size() + 1; }
void writeTo(uint8_t *Buf) const override;
private:
StringRef Str;
};
static const uint8_t ImportThunkX86[] = {
0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // JMP *0x0
};
static const uint8_t ImportThunkARM[] = {
0x40, 0xf2, 0x00, 0x0c, // mov.w ip, #0
0xc0, 0xf2, 0x00, 0x0c, // mov.t ip, #0
0xdc, 0xf8, 0x00, 0xf0, // ldr.w pc, [ip]
};
// Windows-specific.
// A chunk for DLL import jump table entry. In a final output, it's
// contents will be a JMP instruction to some __imp_ symbol.
class ImportThunkChunkX64 : public Chunk {
public:
explicit ImportThunkChunkX64(Defined *S);
size_t getSize() const override { return sizeof(ImportThunkX86); }
void writeTo(uint8_t *Buf) const override;
private:
Defined *ImpSymbol;
};
class ImportThunkChunkX86 : public Chunk {
public:
explicit ImportThunkChunkX86(Defined *S) : ImpSymbol(S) {}
size_t getSize() const override { return sizeof(ImportThunkX86); }
void getBaserels(std::vector<Baserel> *Res) override;
void writeTo(uint8_t *Buf) const override;
private:
Defined *ImpSymbol;
};
class ImportThunkChunkARM : public Chunk {
public:
explicit ImportThunkChunkARM(Defined *S) : ImpSymbol(S) {}
size_t getSize() const override { return sizeof(ImportThunkARM); }
void getBaserels(std::vector<Baserel> *Res) override;
void writeTo(uint8_t *Buf) const override;
private:
Defined *ImpSymbol;
};
// Windows-specific.
// See comments for DefinedLocalImport class.
class LocalImportChunk : public Chunk {
public:
explicit LocalImportChunk(Defined *S) : Sym(S) {}
size_t getSize() const override;
void getBaserels(std::vector<Baserel> *Res) override;
void writeTo(uint8_t *Buf) const override;
private:
Defined *Sym;
};
// Windows-specific.
// A chunk for SEH table which contains RVAs of safe exception handler
// functions. x86-only.
class SEHTableChunk : public Chunk {
public:
explicit SEHTableChunk(std::set<Defined *> S) : Syms(std::move(S)) {}
size_t getSize() const override { return Syms.size() * 4; }
void writeTo(uint8_t *Buf) const override;
private:
std::set<Defined *> Syms;
};
// Windows-specific.
// This class represents a block in .reloc section.
// See the PE/COFF spec 5.6 for details.
class BaserelChunk : public Chunk {
public:
BaserelChunk(uint32_t Page, Baserel *Begin, Baserel *End);
size_t getSize() const override { return Data.size(); }
void writeTo(uint8_t *Buf) const override;
private:
std::vector<uint8_t> Data;
};
class Baserel {
public:
Baserel(uint32_t V, uint8_t Ty) : RVA(V), Type(Ty) {}
explicit Baserel(uint32_t V) : Baserel(V, getDefaultType()) {}
uint8_t getDefaultType();
uint32_t RVA;
uint8_t Type;
};
} // namespace coff
} // namespace lld
#endif

View File

@ -0,0 +1,151 @@
//===- Config.h -------------------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_COFF_CONFIG_H
#define LLD_COFF_CONFIG_H
#include "llvm/ADT/StringRef.h"
#include "llvm/Object/COFF.h"
#include <cstdint>
#include <map>
#include <set>
#include <string>
namespace lld {
namespace coff {
using llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN;
using llvm::COFF::WindowsSubsystem;
using llvm::StringRef;
class DefinedAbsolute;
class DefinedRelative;
class StringChunk;
class Undefined;
// Short aliases.
static const auto AMD64 = llvm::COFF::IMAGE_FILE_MACHINE_AMD64;
static const auto ARMNT = llvm::COFF::IMAGE_FILE_MACHINE_ARMNT;
static const auto I386 = llvm::COFF::IMAGE_FILE_MACHINE_I386;
// Represents an /export option.
struct Export {
StringRef Name; // N in /export:N or /export:E=N
StringRef ExtName; // E in /export:E=N
Undefined *Sym = nullptr;
uint16_t Ordinal = 0;
bool Noname = false;
bool Data = false;
bool Private = false;
// If an export is a form of /export:foo=dllname.bar, that means
// that foo should be exported as an alias to bar in the DLL.
// ForwardTo is set to "dllname.bar" part. Usually empty.
StringRef ForwardTo;
StringChunk *ForwardChunk = nullptr;
// True if this /export option was in .drectves section.
bool Directives = false;
StringRef SymbolName;
StringRef ExportName; // Name in DLL
bool operator==(const Export &E) {
return (Name == E.Name && ExtName == E.ExtName &&
Ordinal == E.Ordinal && Noname == E.Noname &&
Data == E.Data && Private == E.Private);
}
};
// Global configuration.
struct Configuration {
enum ManifestKind { SideBySide, Embed, No };
bool is64() { return Machine == AMD64; }
llvm::COFF::MachineTypes Machine = IMAGE_FILE_MACHINE_UNKNOWN;
bool Verbose = false;
WindowsSubsystem Subsystem = llvm::COFF::IMAGE_SUBSYSTEM_UNKNOWN;
Undefined *Entry = nullptr;
bool NoEntry = false;
std::string OutputFile;
bool DoGC = true;
bool DoICF = true;
bool Relocatable = true;
bool Force = false;
bool Debug = false;
bool WriteSymtab = true;
// Symbols in this set are considered as live by the garbage collector.
std::set<Undefined *> GCRoot;
std::set<StringRef> NoDefaultLibs;
bool NoDefaultLibAll = false;
// True if we are creating a DLL.
bool DLL = false;
StringRef Implib;
std::vector<Export> Exports;
std::set<std::string> DelayLoads;
std::map<std::string, int> DLLOrder;
Undefined *DelayLoadHelper = nullptr;
// Used for SafeSEH.
DefinedRelative *SEHTable = nullptr;
DefinedAbsolute *SEHCount = nullptr;
// Used for /opt:lldlto=N
unsigned LTOOptLevel = 2;
// Used for /opt:lldltojobs=N
unsigned LTOJobs = 1;
// Used for /merge:from=to (e.g. /merge:.rdata=.text)
std::map<StringRef, StringRef> Merge;
// Used for /section=.name,{DEKPRSW} to set section attributes.
std::map<StringRef, uint32_t> Section;
// Options for manifest files.
ManifestKind Manifest = SideBySide;
int ManifestID = 1;
StringRef ManifestDependency;
bool ManifestUAC = true;
std::vector<std::string> ManifestInput;
StringRef ManifestLevel = "'asInvoker'";
StringRef ManifestUIAccess = "'false'";
StringRef ManifestFile;
// Used for /failifmismatch.
std::map<StringRef, StringRef> MustMatch;
// Used for /alternatename.
std::map<StringRef, StringRef> AlternateNames;
uint64_t ImageBase = -1;
uint64_t StackReserve = 1024 * 1024;
uint64_t StackCommit = 4096;
uint64_t HeapReserve = 1024 * 1024;
uint64_t HeapCommit = 4096;
uint32_t MajorImageVersion = 0;
uint32_t MinorImageVersion = 0;
uint32_t MajorOSVersion = 6;
uint32_t MinorOSVersion = 0;
bool DynamicBase = true;
bool AllowBind = true;
bool NxCompat = true;
bool AllowIsolation = true;
bool TerminalServerAware = true;
bool LargeAddressAware = false;
bool HighEntropyVA = false;
};
extern Configuration *Config;
} // namespace coff
} // namespace lld
#endif

View File

@ -0,0 +1,571 @@
//===- DLL.cpp ------------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines various types of chunks for the DLL import or export
// descriptor tables. They are inherently Windows-specific.
// You need to read Microsoft PE/COFF spec to understand details
// about the data structures.
//
// If you are not particularly interested in linking against Windows
// DLL, you can skip this file, and you should still be able to
// understand the rest of the linker.
//
//===----------------------------------------------------------------------===//
#include "Chunks.h"
#include "DLL.h"
#include "llvm/Object/COFF.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Path.h"
using namespace llvm;
using namespace llvm::object;
using namespace llvm::support::endian;
using namespace llvm::COFF;
namespace lld {
namespace coff {
namespace {
// Import table
static int ptrSize() { return Config->is64() ? 8 : 4; }
// A chunk for the import descriptor table.
class HintNameChunk : public Chunk {
public:
HintNameChunk(StringRef N, uint16_t H) : Name(N), Hint(H) {}
size_t getSize() const override {
// Starts with 2 byte Hint field, followed by a null-terminated string,
// ends with 0 or 1 byte padding.
return alignTo(Name.size() + 3, 2);
}
void writeTo(uint8_t *Buf) const override {
write16le(Buf + OutputSectionOff, Hint);
memcpy(Buf + OutputSectionOff + 2, Name.data(), Name.size());
}
private:
StringRef Name;
uint16_t Hint;
};
// A chunk for the import descriptor table.
class LookupChunk : public Chunk {
public:
explicit LookupChunk(Chunk *C) : HintName(C) {}
size_t getSize() const override { return ptrSize(); }
void writeTo(uint8_t *Buf) const override {
write32le(Buf + OutputSectionOff, HintName->getRVA());
}
Chunk *HintName;
};
// A chunk for the import descriptor table.
// This chunk represent import-by-ordinal symbols.
// See Microsoft PE/COFF spec 7.1. Import Header for details.
class OrdinalOnlyChunk : public Chunk {
public:
explicit OrdinalOnlyChunk(uint16_t V) : Ordinal(V) {}
size_t getSize() const override { return ptrSize(); }
void writeTo(uint8_t *Buf) const override {
// An import-by-ordinal slot has MSB 1 to indicate that
// this is import-by-ordinal (and not import-by-name).
if (Config->is64()) {
write64le(Buf + OutputSectionOff, (1ULL << 63) | Ordinal);
} else {
write32le(Buf + OutputSectionOff, (1ULL << 31) | Ordinal);
}
}
uint16_t Ordinal;
};
// A chunk for the import descriptor table.
class ImportDirectoryChunk : public Chunk {
public:
explicit ImportDirectoryChunk(Chunk *N) : DLLName(N) {}
size_t getSize() const override { return sizeof(ImportDirectoryTableEntry); }
void writeTo(uint8_t *Buf) const override {
auto *E = (coff_import_directory_table_entry *)(Buf + OutputSectionOff);
E->ImportLookupTableRVA = LookupTab->getRVA();
E->NameRVA = DLLName->getRVA();
E->ImportAddressTableRVA = AddressTab->getRVA();
}
Chunk *DLLName;
Chunk *LookupTab;
Chunk *AddressTab;
};
// A chunk representing null terminator in the import table.
// Contents of this chunk is always null bytes.
class NullChunk : public Chunk {
public:
explicit NullChunk(size_t N) : Size(N) {}
bool hasData() const override { return false; }
size_t getSize() const override { return Size; }
void setAlign(size_t N) { Align = N; }
private:
size_t Size;
};
static std::vector<std::vector<DefinedImportData *>>
binImports(const std::vector<DefinedImportData *> &Imports) {
// Group DLL-imported symbols by DLL name because that's how
// symbols are layed out in the import descriptor table.
auto Less = [](const std::string &A, const std::string &B) {
return Config->DLLOrder[A] < Config->DLLOrder[B];
};
std::map<std::string, std::vector<DefinedImportData *>,
bool(*)(const std::string &, const std::string &)> M(Less);
for (DefinedImportData *Sym : Imports)
M[Sym->getDLLName().lower()].push_back(Sym);
std::vector<std::vector<DefinedImportData *>> V;
for (auto &P : M) {
// Sort symbols by name for each group.
std::vector<DefinedImportData *> &Syms = P.second;
std::sort(Syms.begin(), Syms.end(),
[](DefinedImportData *A, DefinedImportData *B) {
return A->getName() < B->getName();
});
V.push_back(std::move(Syms));
}
return V;
}
// Export table
// See Microsoft PE/COFF spec 4.3 for details.
// A chunk for the delay import descriptor table etnry.
class DelayDirectoryChunk : public Chunk {
public:
explicit DelayDirectoryChunk(Chunk *N) : DLLName(N) {}
size_t getSize() const override {
return sizeof(delay_import_directory_table_entry);
}
void writeTo(uint8_t *Buf) const override {
auto *E = (delay_import_directory_table_entry *)(Buf + OutputSectionOff);
E->Attributes = 1;
E->Name = DLLName->getRVA();
E->ModuleHandle = ModuleHandle->getRVA();
E->DelayImportAddressTable = AddressTab->getRVA();
E->DelayImportNameTable = NameTab->getRVA();
}
Chunk *DLLName;
Chunk *ModuleHandle;
Chunk *AddressTab;
Chunk *NameTab;
};
// Initial contents for delay-loaded functions.
// This code calls __delayLoadHelper2 function to resolve a symbol
// and then overwrites its jump table slot with the result
// for subsequent function calls.
static const uint8_t ThunkX64[] = {
0x51, // push rcx
0x52, // push rdx
0x41, 0x50, // push r8
0x41, 0x51, // push r9
0x48, 0x83, 0xEC, 0x48, // sub rsp, 48h
0x66, 0x0F, 0x7F, 0x04, 0x24, // movdqa xmmword ptr [rsp], xmm0
0x66, 0x0F, 0x7F, 0x4C, 0x24, 0x10, // movdqa xmmword ptr [rsp+10h], xmm1
0x66, 0x0F, 0x7F, 0x54, 0x24, 0x20, // movdqa xmmword ptr [rsp+20h], xmm2
0x66, 0x0F, 0x7F, 0x5C, 0x24, 0x30, // movdqa xmmword ptr [rsp+30h], xmm3
0x48, 0x8D, 0x15, 0, 0, 0, 0, // lea rdx, [__imp_<FUNCNAME>]
0x48, 0x8D, 0x0D, 0, 0, 0, 0, // lea rcx, [___DELAY_IMPORT_...]
0xE8, 0, 0, 0, 0, // call __delayLoadHelper2
0x66, 0x0F, 0x6F, 0x04, 0x24, // movdqa xmm0, xmmword ptr [rsp]
0x66, 0x0F, 0x6F, 0x4C, 0x24, 0x10, // movdqa xmm1, xmmword ptr [rsp+10h]
0x66, 0x0F, 0x6F, 0x54, 0x24, 0x20, // movdqa xmm2, xmmword ptr [rsp+20h]
0x66, 0x0F, 0x6F, 0x5C, 0x24, 0x30, // movdqa xmm3, xmmword ptr [rsp+30h]
0x48, 0x83, 0xC4, 0x48, // add rsp, 48h
0x41, 0x59, // pop r9
0x41, 0x58, // pop r8
0x5A, // pop rdx
0x59, // pop rcx
0xFF, 0xE0, // jmp rax
};
static const uint8_t ThunkX86[] = {
0x51, // push ecx
0x52, // push edx
0x68, 0, 0, 0, 0, // push offset ___imp__<FUNCNAME>
0x68, 0, 0, 0, 0, // push offset ___DELAY_IMPORT_DESCRIPTOR_<DLLNAME>_dll
0xE8, 0, 0, 0, 0, // call ___delayLoadHelper2@8
0x5A, // pop edx
0x59, // pop ecx
0xFF, 0xE0, // jmp eax
};
// A chunk for the delay import thunk.
class ThunkChunkX64 : public Chunk {
public:
ThunkChunkX64(Defined *I, Chunk *D, Defined *H)
: Imp(I), Desc(D), Helper(H) {}
size_t getSize() const override { return sizeof(ThunkX64); }
void writeTo(uint8_t *Buf) const override {
memcpy(Buf + OutputSectionOff, ThunkX64, sizeof(ThunkX64));
write32le(Buf + OutputSectionOff + 36, Imp->getRVA() - RVA - 40);
write32le(Buf + OutputSectionOff + 43, Desc->getRVA() - RVA - 47);
write32le(Buf + OutputSectionOff + 48, Helper->getRVA() - RVA - 52);
}
Defined *Imp = nullptr;
Chunk *Desc = nullptr;
Defined *Helper = nullptr;
};
class ThunkChunkX86 : public Chunk {
public:
ThunkChunkX86(Defined *I, Chunk *D, Defined *H)
: Imp(I), Desc(D), Helper(H) {}
size_t getSize() const override { return sizeof(ThunkX86); }
void writeTo(uint8_t *Buf) const override {
memcpy(Buf + OutputSectionOff, ThunkX86, sizeof(ThunkX86));
write32le(Buf + OutputSectionOff + 3, Imp->getRVA() + Config->ImageBase);
write32le(Buf + OutputSectionOff + 8, Desc->getRVA() + Config->ImageBase);
write32le(Buf + OutputSectionOff + 13, Helper->getRVA() - RVA - 17);
}
void getBaserels(std::vector<Baserel> *Res) override {
Res->emplace_back(RVA + 3);
Res->emplace_back(RVA + 8);
}
Defined *Imp = nullptr;
Chunk *Desc = nullptr;
Defined *Helper = nullptr;
};
// A chunk for the import descriptor table.
class DelayAddressChunk : public Chunk {
public:
explicit DelayAddressChunk(Chunk *C) : Thunk(C) {}
size_t getSize() const override { return ptrSize(); }
void writeTo(uint8_t *Buf) const override {
if (Config->is64()) {
write64le(Buf + OutputSectionOff, Thunk->getRVA() + Config->ImageBase);
} else {
write32le(Buf + OutputSectionOff, Thunk->getRVA() + Config->ImageBase);
}
}
void getBaserels(std::vector<Baserel> *Res) override {
Res->emplace_back(RVA);
}
Chunk *Thunk;
};
// Export table
// Read Microsoft PE/COFF spec 5.3 for details.
// A chunk for the export descriptor table.
class ExportDirectoryChunk : public Chunk {
public:
ExportDirectoryChunk(int I, int J, Chunk *D, Chunk *A, Chunk *N, Chunk *O)
: MaxOrdinal(I), NameTabSize(J), DLLName(D), AddressTab(A), NameTab(N),
OrdinalTab(O) {}
size_t getSize() const override {
return sizeof(export_directory_table_entry);
}
void writeTo(uint8_t *Buf) const override {
auto *E = (export_directory_table_entry *)(Buf + OutputSectionOff);
E->NameRVA = DLLName->getRVA();
E->OrdinalBase = 0;
E->AddressTableEntries = MaxOrdinal + 1;
E->NumberOfNamePointers = NameTabSize;
E->ExportAddressTableRVA = AddressTab->getRVA();
E->NamePointerRVA = NameTab->getRVA();
E->OrdinalTableRVA = OrdinalTab->getRVA();
}
uint16_t MaxOrdinal;
uint16_t NameTabSize;
Chunk *DLLName;
Chunk *AddressTab;
Chunk *NameTab;
Chunk *OrdinalTab;
};
class AddressTableChunk : public Chunk {
public:
explicit AddressTableChunk(size_t MaxOrdinal) : Size(MaxOrdinal + 1) {}
size_t getSize() const override { return Size * 4; }
void writeTo(uint8_t *Buf) const override {
for (Export &E : Config->Exports) {
uint8_t *P = Buf + OutputSectionOff + E.Ordinal * 4;
if (E.ForwardChunk) {
write32le(P, E.ForwardChunk->getRVA());
} else {
write32le(P, cast<Defined>(E.Sym->repl())->getRVA());
}
}
}
private:
size_t Size;
};
class NamePointersChunk : public Chunk {
public:
explicit NamePointersChunk(std::vector<Chunk *> &V) : Chunks(V) {}
size_t getSize() const override { return Chunks.size() * 4; }
void writeTo(uint8_t *Buf) const override {
uint8_t *P = Buf + OutputSectionOff;
for (Chunk *C : Chunks) {
write32le(P, C->getRVA());
P += 4;
}
}
private:
std::vector<Chunk *> Chunks;
};
class ExportOrdinalChunk : public Chunk {
public:
explicit ExportOrdinalChunk(size_t I) : Size(I) {}
size_t getSize() const override { return Size * 2; }
void writeTo(uint8_t *Buf) const override {
uint8_t *P = Buf + OutputSectionOff;
for (Export &E : Config->Exports) {
if (E.Noname)
continue;
write16le(P, E.Ordinal);
P += 2;
}
}
private:
size_t Size;
};
} // anonymous namespace
uint64_t IdataContents::getDirSize() {
return Dirs.size() * sizeof(ImportDirectoryTableEntry);
}
uint64_t IdataContents::getIATSize() {
return Addresses.size() * ptrSize();
}
// Returns a list of .idata contents.
// See Microsoft PE/COFF spec 5.4 for details.
std::vector<Chunk *> IdataContents::getChunks() {
create();
std::vector<Chunk *> V;
// The loader assumes a specific order of data.
// Add each type in the correct order.
for (std::unique_ptr<Chunk> &C : Dirs)
V.push_back(C.get());
for (std::unique_ptr<Chunk> &C : Lookups)
V.push_back(C.get());
for (std::unique_ptr<Chunk> &C : Addresses)
V.push_back(C.get());
for (std::unique_ptr<Chunk> &C : Hints)
V.push_back(C.get());
for (auto &P : DLLNames) {
std::unique_ptr<Chunk> &C = P.second;
V.push_back(C.get());
}
return V;
}
void IdataContents::create() {
std::vector<std::vector<DefinedImportData *>> V = binImports(Imports);
// Create .idata contents for each DLL.
for (std::vector<DefinedImportData *> &Syms : V) {
StringRef Name = Syms[0]->getDLLName();
// Create lookup and address tables. If they have external names,
// we need to create HintName chunks to store the names.
// If they don't (if they are import-by-ordinals), we store only
// ordinal values to the table.
size_t Base = Lookups.size();
for (DefinedImportData *S : Syms) {
uint16_t Ord = S->getOrdinal();
if (S->getExternalName().empty()) {
Lookups.push_back(make_unique<OrdinalOnlyChunk>(Ord));
Addresses.push_back(make_unique<OrdinalOnlyChunk>(Ord));
continue;
}
auto C = make_unique<HintNameChunk>(S->getExternalName(), Ord);
Lookups.push_back(make_unique<LookupChunk>(C.get()));
Addresses.push_back(make_unique<LookupChunk>(C.get()));
Hints.push_back(std::move(C));
}
// Terminate with null values.
Lookups.push_back(make_unique<NullChunk>(ptrSize()));
Addresses.push_back(make_unique<NullChunk>(ptrSize()));
for (int I = 0, E = Syms.size(); I < E; ++I)
Syms[I]->setLocation(Addresses[Base + I].get());
// Create the import table header.
if (!DLLNames.count(Name))
DLLNames[Name] = make_unique<StringChunk>(Name);
auto Dir = make_unique<ImportDirectoryChunk>(DLLNames[Name].get());
Dir->LookupTab = Lookups[Base].get();
Dir->AddressTab = Addresses[Base].get();
Dirs.push_back(std::move(Dir));
}
// Add null terminator.
Dirs.push_back(make_unique<NullChunk>(sizeof(ImportDirectoryTableEntry)));
}
std::vector<Chunk *> DelayLoadContents::getChunks() {
std::vector<Chunk *> V;
for (std::unique_ptr<Chunk> &C : Dirs)
V.push_back(C.get());
for (std::unique_ptr<Chunk> &C : Names)
V.push_back(C.get());
for (std::unique_ptr<Chunk> &C : HintNames)
V.push_back(C.get());
for (auto &P : DLLNames) {
std::unique_ptr<Chunk> &C = P.second;
V.push_back(C.get());
}
return V;
}
std::vector<Chunk *> DelayLoadContents::getDataChunks() {
std::vector<Chunk *> V;
for (std::unique_ptr<Chunk> &C : ModuleHandles)
V.push_back(C.get());
for (std::unique_ptr<Chunk> &C : Addresses)
V.push_back(C.get());
return V;
}
uint64_t DelayLoadContents::getDirSize() {
return Dirs.size() * sizeof(delay_import_directory_table_entry);
}
void DelayLoadContents::create(Defined *H) {
Helper = H;
std::vector<std::vector<DefinedImportData *>> V = binImports(Imports);
// Create .didat contents for each DLL.
for (std::vector<DefinedImportData *> &Syms : V) {
StringRef Name = Syms[0]->getDLLName();
// Create the delay import table header.
if (!DLLNames.count(Name))
DLLNames[Name] = make_unique<StringChunk>(Name);
auto Dir = make_unique<DelayDirectoryChunk>(DLLNames[Name].get());
size_t Base = Addresses.size();
for (DefinedImportData *S : Syms) {
Chunk *T = newThunkChunk(S, Dir.get());
auto A = make_unique<DelayAddressChunk>(T);
Addresses.push_back(std::move(A));
Thunks.push_back(std::unique_ptr<Chunk>(T));
StringRef ExtName = S->getExternalName();
if (ExtName.empty()) {
Names.push_back(make_unique<OrdinalOnlyChunk>(S->getOrdinal()));
} else {
auto C = make_unique<HintNameChunk>(ExtName, 0);
Names.push_back(make_unique<LookupChunk>(C.get()));
HintNames.push_back(std::move(C));
}
}
// Terminate with null values.
Addresses.push_back(make_unique<NullChunk>(8));
Names.push_back(make_unique<NullChunk>(8));
for (int I = 0, E = Syms.size(); I < E; ++I)
Syms[I]->setLocation(Addresses[Base + I].get());
auto *MH = new NullChunk(8);
MH->setAlign(8);
ModuleHandles.push_back(std::unique_ptr<Chunk>(MH));
// Fill the delay import table header fields.
Dir->ModuleHandle = MH;
Dir->AddressTab = Addresses[Base].get();
Dir->NameTab = Names[Base].get();
Dirs.push_back(std::move(Dir));
}
// Add null terminator.
Dirs.push_back(
make_unique<NullChunk>(sizeof(delay_import_directory_table_entry)));
}
Chunk *DelayLoadContents::newThunkChunk(DefinedImportData *S, Chunk *Dir) {
switch (Config->Machine) {
case AMD64:
return new ThunkChunkX64(S, Dir, Helper);
case I386:
return new ThunkChunkX86(S, Dir, Helper);
default:
llvm_unreachable("unsupported machine type");
}
}
EdataContents::EdataContents() {
uint16_t MaxOrdinal = 0;
for (Export &E : Config->Exports)
MaxOrdinal = std::max(MaxOrdinal, E.Ordinal);
auto *DLLName = new StringChunk(sys::path::filename(Config->OutputFile));
auto *AddressTab = new AddressTableChunk(MaxOrdinal);
std::vector<Chunk *> Names;
for (Export &E : Config->Exports)
if (!E.Noname)
Names.push_back(new StringChunk(E.ExportName));
std::vector<Chunk *> Forwards;
for (Export &E : Config->Exports) {
if (E.ForwardTo.empty())
continue;
E.ForwardChunk = new StringChunk(E.ForwardTo);
Forwards.push_back(E.ForwardChunk);
}
auto *NameTab = new NamePointersChunk(Names);
auto *OrdinalTab = new ExportOrdinalChunk(Names.size());
auto *Dir = new ExportDirectoryChunk(MaxOrdinal, Names.size(), DLLName,
AddressTab, NameTab, OrdinalTab);
Chunks.push_back(std::unique_ptr<Chunk>(Dir));
Chunks.push_back(std::unique_ptr<Chunk>(DLLName));
Chunks.push_back(std::unique_ptr<Chunk>(AddressTab));
Chunks.push_back(std::unique_ptr<Chunk>(NameTab));
Chunks.push_back(std::unique_ptr<Chunk>(OrdinalTab));
for (Chunk *C : Names)
Chunks.push_back(std::unique_ptr<Chunk>(C));
for (Chunk *C : Forwards)
Chunks.push_back(std::unique_ptr<Chunk>(C));
}
} // namespace coff
} // namespace lld

View File

@ -0,0 +1,84 @@
//===- DLL.h ----------------------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_COFF_DLL_H
#define LLD_COFF_DLL_H
#include "Chunks.h"
#include "Symbols.h"
namespace lld {
namespace coff {
// Windows-specific.
// IdataContents creates all chunks for the DLL import table.
// You are supposed to call add() to add symbols and then
// call getChunks() to get a list of chunks.
class IdataContents {
public:
void add(DefinedImportData *Sym) { Imports.push_back(Sym); }
bool empty() { return Imports.empty(); }
std::vector<Chunk *> getChunks();
uint64_t getDirRVA() { return Dirs[0]->getRVA(); }
uint64_t getDirSize();
uint64_t getIATRVA() { return Addresses[0]->getRVA(); }
uint64_t getIATSize();
private:
void create();
std::vector<DefinedImportData *> Imports;
std::vector<std::unique_ptr<Chunk>> Dirs;
std::vector<std::unique_ptr<Chunk>> Lookups;
std::vector<std::unique_ptr<Chunk>> Addresses;
std::vector<std::unique_ptr<Chunk>> Hints;
std::map<StringRef, std::unique_ptr<Chunk>> DLLNames;
};
// Windows-specific.
// DelayLoadContents creates all chunks for the delay-load DLL import table.
class DelayLoadContents {
public:
void add(DefinedImportData *Sym) { Imports.push_back(Sym); }
bool empty() { return Imports.empty(); }
void create(Defined *Helper);
std::vector<Chunk *> getChunks();
std::vector<Chunk *> getDataChunks();
std::vector<std::unique_ptr<Chunk>> &getCodeChunks() { return Thunks; }
uint64_t getDirRVA() { return Dirs[0]->getRVA(); }
uint64_t getDirSize();
private:
Chunk *newThunkChunk(DefinedImportData *S, Chunk *Dir);
Defined *Helper;
std::vector<DefinedImportData *> Imports;
std::vector<std::unique_ptr<Chunk>> Dirs;
std::vector<std::unique_ptr<Chunk>> ModuleHandles;
std::vector<std::unique_ptr<Chunk>> Addresses;
std::vector<std::unique_ptr<Chunk>> Names;
std::vector<std::unique_ptr<Chunk>> HintNames;
std::vector<std::unique_ptr<Chunk>> Thunks;
std::map<StringRef, std::unique_ptr<Chunk>> DLLNames;
};
// Windows-specific.
// EdataContents creates all chunks for the DLL export table.
class EdataContents {
public:
EdataContents();
std::vector<std::unique_ptr<Chunk>> Chunks;
};
} // namespace coff
} // namespace lld
#endif

View File

@ -0,0 +1,694 @@
//===- Driver.cpp ---------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "Config.h"
#include "Driver.h"
#include "Error.h"
#include "InputFiles.h"
#include "SymbolTable.h"
#include "Symbols.h"
#include "Writer.h"
#include "lld/Driver/Driver.h"
#include "llvm/ADT/Optional.h"
#include "llvm/LibDriver/LibDriver.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Option/Option.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <memory>
using namespace llvm;
using namespace llvm::COFF;
using llvm::sys::Process;
using llvm::sys::fs::OpenFlags;
using llvm::sys::fs::file_magic;
using llvm::sys::fs::identify_magic;
namespace lld {
namespace coff {
Configuration *Config;
LinkerDriver *Driver;
bool link(llvm::ArrayRef<const char *> Args) {
Configuration C;
LinkerDriver D;
Config = &C;
Driver = &D;
Driver->link(Args);
return true;
}
// Drop directory components and replace extension with ".exe" or ".dll".
static std::string getOutputPath(StringRef Path) {
auto P = Path.find_last_of("\\/");
StringRef S = (P == StringRef::npos) ? Path : Path.substr(P + 1);
const char* E = Config->DLL ? ".dll" : ".exe";
return (S.substr(0, S.rfind('.')) + E).str();
}
// Opens a file. Path has to be resolved already.
// Newly created memory buffers are owned by this driver.
MemoryBufferRef LinkerDriver::openFile(StringRef Path) {
std::unique_ptr<MemoryBuffer> MB =
check(MemoryBuffer::getFile(Path), "could not open " + Path);
MemoryBufferRef MBRef = MB->getMemBufferRef();
OwningMBs.push_back(std::move(MB)); // take ownership
return MBRef;
}
static std::unique_ptr<InputFile> createFile(MemoryBufferRef MB) {
// File type is detected by contents, not by file extension.
file_magic Magic = identify_magic(MB.getBuffer());
if (Magic == file_magic::archive)
return std::unique_ptr<InputFile>(new ArchiveFile(MB));
if (Magic == file_magic::bitcode)
return std::unique_ptr<InputFile>(new BitcodeFile(MB));
if (Config->OutputFile == "")
Config->OutputFile = getOutputPath(MB.getBufferIdentifier());
return std::unique_ptr<InputFile>(new ObjectFile(MB));
}
static bool isDecorated(StringRef Sym) {
return Sym.startswith("_") || Sym.startswith("@") || Sym.startswith("?");
}
// Parses .drectve section contents and returns a list of files
// specified by /defaultlib.
void LinkerDriver::parseDirectives(StringRef S) {
llvm::opt::InputArgList Args = Parser.parse(S);
for (auto *Arg : Args) {
switch (Arg->getOption().getID()) {
case OPT_alternatename:
parseAlternateName(Arg->getValue());
break;
case OPT_defaultlib:
if (Optional<StringRef> Path = findLib(Arg->getValue())) {
MemoryBufferRef MB = openFile(*Path);
Symtab.addFile(createFile(MB));
}
break;
case OPT_export: {
Export E = parseExport(Arg->getValue());
E.Directives = true;
Config->Exports.push_back(E);
break;
}
case OPT_failifmismatch:
checkFailIfMismatch(Arg->getValue());
break;
case OPT_incl:
addUndefined(Arg->getValue());
break;
case OPT_merge:
parseMerge(Arg->getValue());
break;
case OPT_nodefaultlib:
Config->NoDefaultLibs.insert(doFindLib(Arg->getValue()));
break;
case OPT_section:
parseSection(Arg->getValue());
break;
case OPT_editandcontinue:
case OPT_fastfail:
case OPT_guardsym:
case OPT_throwingnew:
break;
default:
fatal(Arg->getSpelling() + " is not allowed in .drectve");
}
}
}
// Find file from search paths. You can omit ".obj", this function takes
// care of that. Note that the returned path is not guaranteed to exist.
StringRef LinkerDriver::doFindFile(StringRef Filename) {
bool hasPathSep = (Filename.find_first_of("/\\") != StringRef::npos);
if (hasPathSep)
return Filename;
bool hasExt = (Filename.find('.') != StringRef::npos);
for (StringRef Dir : SearchPaths) {
SmallString<128> Path = Dir;
llvm::sys::path::append(Path, Filename);
if (llvm::sys::fs::exists(Path.str()))
return Alloc.save(Path.str());
if (!hasExt) {
Path.append(".obj");
if (llvm::sys::fs::exists(Path.str()))
return Alloc.save(Path.str());
}
}
return Filename;
}
// Resolves a file path. This never returns the same path
// (in that case, it returns None).
Optional<StringRef> LinkerDriver::findFile(StringRef Filename) {
StringRef Path = doFindFile(Filename);
bool Seen = !VisitedFiles.insert(Path.lower()).second;
if (Seen)
return None;
return Path;
}
// Find library file from search path.
StringRef LinkerDriver::doFindLib(StringRef Filename) {
// Add ".lib" to Filename if that has no file extension.
bool hasExt = (Filename.find('.') != StringRef::npos);
if (!hasExt)
Filename = Alloc.save(Filename + ".lib");
return doFindFile(Filename);
}
// Resolves a library path. /nodefaultlib options are taken into
// consideration. This never returns the same path (in that case,
// it returns None).
Optional<StringRef> LinkerDriver::findLib(StringRef Filename) {
if (Config->NoDefaultLibAll)
return None;
StringRef Path = doFindLib(Filename);
if (Config->NoDefaultLibs.count(Path))
return None;
bool Seen = !VisitedFiles.insert(Path.lower()).second;
if (Seen)
return None;
return Path;
}
// Parses LIB environment which contains a list of search paths.
void LinkerDriver::addLibSearchPaths() {
Optional<std::string> EnvOpt = Process::GetEnv("LIB");
if (!EnvOpt.hasValue())
return;
StringRef Env = Alloc.save(*EnvOpt);
while (!Env.empty()) {
StringRef Path;
std::tie(Path, Env) = Env.split(';');
SearchPaths.push_back(Path);
}
}
Undefined *LinkerDriver::addUndefined(StringRef Name) {
Undefined *U = Symtab.addUndefined(Name);
Config->GCRoot.insert(U);
return U;
}
// Symbol names are mangled by appending "_" prefix on x86.
StringRef LinkerDriver::mangle(StringRef Sym) {
assert(Config->Machine != IMAGE_FILE_MACHINE_UNKNOWN);
if (Config->Machine == I386)
return Alloc.save("_" + Sym);
return Sym;
}
// Windows specific -- find default entry point name.
StringRef LinkerDriver::findDefaultEntry() {
// User-defined main functions and their corresponding entry points.
static const char *Entries[][2] = {
{"main", "mainCRTStartup"},
{"wmain", "wmainCRTStartup"},
{"WinMain", "WinMainCRTStartup"},
{"wWinMain", "wWinMainCRTStartup"},
};
for (auto E : Entries) {
StringRef Entry = Symtab.findMangle(mangle(E[0]));
if (!Entry.empty() && !isa<Undefined>(Symtab.find(Entry)->Body))
return mangle(E[1]);
}
return "";
}
WindowsSubsystem LinkerDriver::inferSubsystem() {
if (Config->DLL)
return IMAGE_SUBSYSTEM_WINDOWS_GUI;
if (Symtab.findUnderscore("main") || Symtab.findUnderscore("wmain"))
return IMAGE_SUBSYSTEM_WINDOWS_CUI;
if (Symtab.findUnderscore("WinMain") || Symtab.findUnderscore("wWinMain"))
return IMAGE_SUBSYSTEM_WINDOWS_GUI;
return IMAGE_SUBSYSTEM_UNKNOWN;
}
static uint64_t getDefaultImageBase() {
if (Config->is64())
return Config->DLL ? 0x180000000 : 0x140000000;
return Config->DLL ? 0x10000000 : 0x400000;
}
void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) {
// If the first command line argument is "/lib", link.exe acts like lib.exe.
// We call our own implementation of lib.exe that understands bitcode files.
if (ArgsArr.size() > 1 && StringRef(ArgsArr[1]).equals_lower("/lib")) {
if (llvm::libDriverMain(ArgsArr.slice(1)) != 0)
fatal("lib failed");
return;
}
// Needed for LTO.
llvm::InitializeAllTargetInfos();
llvm::InitializeAllTargets();
llvm::InitializeAllTargetMCs();
llvm::InitializeAllAsmParsers();
llvm::InitializeAllAsmPrinters();
llvm::InitializeAllDisassemblers();
// Parse command line options.
llvm::opt::InputArgList Args = Parser.parseLINK(ArgsArr.slice(1));
// Handle /help
if (Args.hasArg(OPT_help)) {
printHelp(ArgsArr[0]);
return;
}
if (Args.filtered_begin(OPT_INPUT) == Args.filtered_end())
fatal("no input files");
// Construct search path list.
SearchPaths.push_back("");
for (auto *Arg : Args.filtered(OPT_libpath))
SearchPaths.push_back(Arg->getValue());
addLibSearchPaths();
// Handle /out
if (auto *Arg = Args.getLastArg(OPT_out))
Config->OutputFile = Arg->getValue();
// Handle /verbose
if (Args.hasArg(OPT_verbose))
Config->Verbose = true;
// Handle /force or /force:unresolved
if (Args.hasArg(OPT_force) || Args.hasArg(OPT_force_unresolved))
Config->Force = true;
// Handle /debug
if (Args.hasArg(OPT_debug))
Config->Debug = true;
// Handle /noentry
if (Args.hasArg(OPT_noentry)) {
if (!Args.hasArg(OPT_dll))
fatal("/noentry must be specified with /dll");
Config->NoEntry = true;
}
// Handle /dll
if (Args.hasArg(OPT_dll)) {
Config->DLL = true;
Config->ManifestID = 2;
}
// Handle /fixed
if (Args.hasArg(OPT_fixed)) {
if (Args.hasArg(OPT_dynamicbase))
fatal("/fixed must not be specified with /dynamicbase");
Config->Relocatable = false;
Config->DynamicBase = false;
}
// Handle /machine
if (auto *Arg = Args.getLastArg(OPT_machine))
Config->Machine = getMachineType(Arg->getValue());
// Handle /nodefaultlib:<filename>
for (auto *Arg : Args.filtered(OPT_nodefaultlib))
Config->NoDefaultLibs.insert(doFindLib(Arg->getValue()));
// Handle /nodefaultlib
if (Args.hasArg(OPT_nodefaultlib_all))
Config->NoDefaultLibAll = true;
// Handle /base
if (auto *Arg = Args.getLastArg(OPT_base))
parseNumbers(Arg->getValue(), &Config->ImageBase);
// Handle /stack
if (auto *Arg = Args.getLastArg(OPT_stack))
parseNumbers(Arg->getValue(), &Config->StackReserve, &Config->StackCommit);
// Handle /heap
if (auto *Arg = Args.getLastArg(OPT_heap))
parseNumbers(Arg->getValue(), &Config->HeapReserve, &Config->HeapCommit);
// Handle /version
if (auto *Arg = Args.getLastArg(OPT_version))
parseVersion(Arg->getValue(), &Config->MajorImageVersion,
&Config->MinorImageVersion);
// Handle /subsystem
if (auto *Arg = Args.getLastArg(OPT_subsystem))
parseSubsystem(Arg->getValue(), &Config->Subsystem, &Config->MajorOSVersion,
&Config->MinorOSVersion);
// Handle /alternatename
for (auto *Arg : Args.filtered(OPT_alternatename))
parseAlternateName(Arg->getValue());
// Handle /include
for (auto *Arg : Args.filtered(OPT_incl))
addUndefined(Arg->getValue());
// Handle /implib
if (auto *Arg = Args.getLastArg(OPT_implib))
Config->Implib = Arg->getValue();
// Handle /opt
for (auto *Arg : Args.filtered(OPT_opt)) {
std::string Str = StringRef(Arg->getValue()).lower();
SmallVector<StringRef, 1> Vec;
StringRef(Str).split(Vec, ',');
for (StringRef S : Vec) {
if (S == "noref") {
Config->DoGC = false;
Config->DoICF = false;
continue;
}
if (S == "icf" || StringRef(S).startswith("icf=")) {
Config->DoICF = true;
continue;
}
if (S == "noicf") {
Config->DoICF = false;
continue;
}
if (StringRef(S).startswith("lldlto=")) {
StringRef OptLevel = StringRef(S).substr(7);
if (OptLevel.getAsInteger(10, Config->LTOOptLevel) ||
Config->LTOOptLevel > 3)
fatal("/opt:lldlto: invalid optimization level: " + OptLevel);
continue;
}
if (StringRef(S).startswith("lldltojobs=")) {
StringRef Jobs = StringRef(S).substr(11);
if (Jobs.getAsInteger(10, Config->LTOJobs) || Config->LTOJobs == 0)
fatal("/opt:lldltojobs: invalid job count: " + Jobs);
continue;
}
if (S != "ref" && S != "lbr" && S != "nolbr")
fatal("/opt: unknown option: " + S);
}
}
// Handle /failifmismatch
for (auto *Arg : Args.filtered(OPT_failifmismatch))
checkFailIfMismatch(Arg->getValue());
// Handle /merge
for (auto *Arg : Args.filtered(OPT_merge))
parseMerge(Arg->getValue());
// Handle /section
for (auto *Arg : Args.filtered(OPT_section))
parseSection(Arg->getValue());
// Handle /manifest
if (auto *Arg = Args.getLastArg(OPT_manifest_colon))
parseManifest(Arg->getValue());
// Handle /manifestuac
if (auto *Arg = Args.getLastArg(OPT_manifestuac))
parseManifestUAC(Arg->getValue());
// Handle /manifestdependency
if (auto *Arg = Args.getLastArg(OPT_manifestdependency))
Config->ManifestDependency = Arg->getValue();
// Handle /manifestfile
if (auto *Arg = Args.getLastArg(OPT_manifestfile))
Config->ManifestFile = Arg->getValue();
// Handle /manifestinput
for (auto *Arg : Args.filtered(OPT_manifestinput))
Config->ManifestInput.push_back(Arg->getValue());
// Handle miscellaneous boolean flags.
if (Args.hasArg(OPT_allowbind_no))
Config->AllowBind = false;
if (Args.hasArg(OPT_allowisolation_no))
Config->AllowIsolation = false;
if (Args.hasArg(OPT_dynamicbase_no))
Config->DynamicBase = false;
if (Args.hasArg(OPT_nxcompat_no))
Config->NxCompat = false;
if (Args.hasArg(OPT_tsaware_no))
Config->TerminalServerAware = false;
if (Args.hasArg(OPT_nosymtab))
Config->WriteSymtab = false;
// Create a list of input files. Files can be given as arguments
// for /defaultlib option.
std::vector<StringRef> Paths;
std::vector<MemoryBufferRef> MBs;
for (auto *Arg : Args.filtered(OPT_INPUT))
if (Optional<StringRef> Path = findFile(Arg->getValue()))
Paths.push_back(*Path);
for (auto *Arg : Args.filtered(OPT_defaultlib))
if (Optional<StringRef> Path = findLib(Arg->getValue()))
Paths.push_back(*Path);
for (StringRef Path : Paths)
MBs.push_back(openFile(Path));
// Windows specific -- Create a resource file containing a manifest file.
if (Config->Manifest == Configuration::Embed) {
std::unique_ptr<MemoryBuffer> MB = createManifestRes();
MBs.push_back(MB->getMemBufferRef());
OwningMBs.push_back(std::move(MB)); // take ownership
}
// Windows specific -- Input files can be Windows resource files (.res files).
// We invoke cvtres.exe to convert resource files to a regular COFF file
// then link the result file normally.
std::vector<MemoryBufferRef> Resources;
auto NotResource = [](MemoryBufferRef MB) {
return identify_magic(MB.getBuffer()) != file_magic::windows_resource;
};
auto It = std::stable_partition(MBs.begin(), MBs.end(), NotResource);
if (It != MBs.end()) {
Resources.insert(Resources.end(), It, MBs.end());
MBs.erase(It, MBs.end());
}
// Read all input files given via the command line. Note that step()
// doesn't read files that are specified by directive sections.
for (MemoryBufferRef MB : MBs)
Symtab.addFile(createFile(MB));
Symtab.step();
// Determine machine type and check if all object files are
// for the same CPU type. Note that this needs to be done before
// any call to mangle().
for (std::unique_ptr<InputFile> &File : Symtab.getFiles()) {
MachineTypes MT = File->getMachineType();
if (MT == IMAGE_FILE_MACHINE_UNKNOWN)
continue;
if (Config->Machine == IMAGE_FILE_MACHINE_UNKNOWN) {
Config->Machine = MT;
continue;
}
if (Config->Machine != MT)
fatal(File->getShortName() + ": machine type " + machineToStr(MT) +
" conflicts with " + machineToStr(Config->Machine));
}
if (Config->Machine == IMAGE_FILE_MACHINE_UNKNOWN) {
llvm::errs() << "warning: /machine is not specified. x64 is assumed.\n";
Config->Machine = AMD64;
}
// Windows specific -- Convert Windows resource files to a COFF file.
if (!Resources.empty()) {
std::unique_ptr<MemoryBuffer> MB = convertResToCOFF(Resources);
Symtab.addFile(createFile(MB->getMemBufferRef()));
OwningMBs.push_back(std::move(MB)); // take ownership
}
// Handle /largeaddressaware
if (Config->is64() || Args.hasArg(OPT_largeaddressaware))
Config->LargeAddressAware = true;
// Handle /highentropyva
if (Config->is64() && !Args.hasArg(OPT_highentropyva_no))
Config->HighEntropyVA = true;
// Handle /entry and /dll
if (auto *Arg = Args.getLastArg(OPT_entry)) {
Config->Entry = addUndefined(mangle(Arg->getValue()));
} else if (Args.hasArg(OPT_dll) && !Config->NoEntry) {
StringRef S = (Config->Machine == I386) ? "__DllMainCRTStartup@12"
: "_DllMainCRTStartup";
Config->Entry = addUndefined(S);
} else if (!Config->NoEntry) {
// Windows specific -- If entry point name is not given, we need to
// infer that from user-defined entry name.
StringRef S = findDefaultEntry();
if (S.empty())
fatal("entry point must be defined");
Config->Entry = addUndefined(S);
if (Config->Verbose)
llvm::outs() << "Entry name inferred: " << S << "\n";
}
// Handle /export
for (auto *Arg : Args.filtered(OPT_export)) {
Export E = parseExport(Arg->getValue());
if (Config->Machine == I386) {
if (!isDecorated(E.Name))
E.Name = Alloc.save("_" + E.Name);
if (!E.ExtName.empty() && !isDecorated(E.ExtName))
E.ExtName = Alloc.save("_" + E.ExtName);
}
Config->Exports.push_back(E);
}
// Handle /def
if (auto *Arg = Args.getLastArg(OPT_deffile)) {
MemoryBufferRef MB = openFile(Arg->getValue());
// parseModuleDefs mutates Config object.
parseModuleDefs(MB, &Alloc);
}
// Handle /delayload
for (auto *Arg : Args.filtered(OPT_delayload)) {
Config->DelayLoads.insert(StringRef(Arg->getValue()).lower());
if (Config->Machine == I386) {
Config->DelayLoadHelper = addUndefined("___delayLoadHelper2@8");
} else {
Config->DelayLoadHelper = addUndefined("__delayLoadHelper2");
}
}
// Set default image base if /base is not given.
if (Config->ImageBase == uint64_t(-1))
Config->ImageBase = getDefaultImageBase();
Symtab.addRelative(mangle("__ImageBase"), 0);
if (Config->Machine == I386) {
Config->SEHTable = Symtab.addRelative("___safe_se_handler_table", 0);
Config->SEHCount = Symtab.addAbsolute("___safe_se_handler_count", 0);
}
// We do not support /guard:cf (control flow protection) yet.
// Define CFG symbols anyway so that we can link MSVC 2015 CRT.
Symtab.addAbsolute(mangle("__guard_fids_table"), 0);
Symtab.addAbsolute(mangle("__guard_fids_count"), 0);
Symtab.addAbsolute(mangle("__guard_flags"), 0x100);
// Read as much files as we can from directives sections.
Symtab.run();
// Resolve auxiliary symbols until we get a convergence.
// (Trying to resolve a symbol may trigger a Lazy symbol to load a new file.
// A new file may contain a directive section to add new command line options.
// That's why we have to repeat until converge.)
for (;;) {
// Windows specific -- if entry point is not found,
// search for its mangled names.
if (Config->Entry)
Symtab.mangleMaybe(Config->Entry);
// Windows specific -- Make sure we resolve all dllexported symbols.
for (Export &E : Config->Exports) {
if (!E.ForwardTo.empty())
continue;
E.Sym = addUndefined(E.Name);
if (!E.Directives)
Symtab.mangleMaybe(E.Sym);
}
// Add weak aliases. Weak aliases is a mechanism to give remaining
// undefined symbols final chance to be resolved successfully.
for (auto Pair : Config->AlternateNames) {
StringRef From = Pair.first;
StringRef To = Pair.second;
Symbol *Sym = Symtab.find(From);
if (!Sym)
continue;
if (auto *U = dyn_cast<Undefined>(Sym->Body))
if (!U->WeakAlias)
U->WeakAlias = Symtab.addUndefined(To);
}
// Windows specific -- if __load_config_used can be resolved, resolve it.
if (Symtab.findUnderscore("_load_config_used"))
addUndefined(mangle("_load_config_used"));
if (Symtab.queueEmpty())
break;
Symtab.run();
}
// Do LTO by compiling bitcode input files to a set of native COFF files then
// link those files.
Symtab.addCombinedLTOObjects();
// Make sure we have resolved all symbols.
Symtab.reportRemainingUndefines(/*Resolve=*/true);
// Windows specific -- if no /subsystem is given, we need to infer
// that from entry point name.
if (Config->Subsystem == IMAGE_SUBSYSTEM_UNKNOWN) {
Config->Subsystem = inferSubsystem();
if (Config->Subsystem == IMAGE_SUBSYSTEM_UNKNOWN)
fatal("subsystem must be defined");
}
// Handle /safeseh.
if (Args.hasArg(OPT_safeseh))
for (ObjectFile *File : Symtab.ObjectFiles)
if (!File->SEHCompat)
fatal("/safeseh: " + File->getName() + " is not compatible with SEH");
// Windows specific -- when we are creating a .dll file, we also
// need to create a .lib file.
if (!Config->Exports.empty() || Config->DLL) {
fixupExports();
writeImportLibrary();
assignExportOrdinals();
}
// Windows specific -- Create a side-by-side manifest file.
if (Config->Manifest == Configuration::SideBySide)
createSideBySideManifest();
// Create a dummy PDB file to satisfy build sytem rules.
if (auto *Arg = Args.getLastArg(OPT_pdb))
createPDB(Arg->getValue());
// Identify unreferenced COMDAT sections.
if (Config->DoGC)
markLive(Symtab.getChunks());
// Identify identical COMDAT sections to merge them.
if (Config->DoICF)
doICF(Symtab.getChunks());
// Write the result.
writeResult(&Symtab);
// Create a symbol map file containing symbol VAs and their names
// to help debugging.
if (auto *Arg = Args.getLastArg(OPT_lldmap)) {
std::error_code EC;
llvm::raw_fd_ostream Out(Arg->getValue(), EC, OpenFlags::F_Text);
if (EC)
fatal(EC, "could not create the symbol map");
Symtab.printMap(Out);
}
// Call exit to avoid calling destructors.
exit(0);
}
} // namespace coff
} // namespace lld

View File

@ -0,0 +1,177 @@
//===- Driver.h -------------------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_COFF_DRIVER_H
#define LLD_COFF_DRIVER_H
#include "Config.h"
#include "SymbolTable.h"
#include "lld/Core/LLVM.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Object/COFF.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Support/StringSaver.h"
#include <memory>
#include <set>
#include <vector>
namespace lld {
namespace coff {
class LinkerDriver;
extern LinkerDriver *Driver;
using llvm::COFF::MachineTypes;
using llvm::COFF::WindowsSubsystem;
using llvm::Optional;
class InputFile;
// Implemented in MarkLive.cpp.
void markLive(const std::vector<Chunk *> &Chunks);
// Implemented in ICF.cpp.
void doICF(const std::vector<Chunk *> &Chunks);
class ArgParser {
public:
ArgParser() : Alloc(AllocAux) {}
// Parses command line options.
llvm::opt::InputArgList parse(llvm::ArrayRef<const char *> Args);
// Concatenate LINK environment varirable and given arguments and parse them.
llvm::opt::InputArgList parseLINK(llvm::ArrayRef<const char *> Args);
// Tokenizes a given string and then parses as command line options.
llvm::opt::InputArgList parse(StringRef S) { return parse(tokenize(S)); }
private:
std::vector<const char *> tokenize(StringRef S);
std::vector<const char *> replaceResponseFiles(std::vector<const char *>);
llvm::BumpPtrAllocator AllocAux;
llvm::StringSaver Alloc;
};
class LinkerDriver {
public:
LinkerDriver() : Alloc(AllocAux) {}
void link(llvm::ArrayRef<const char *> Args);
// Used by the resolver to parse .drectve section contents.
void parseDirectives(StringRef S);
private:
llvm::BumpPtrAllocator AllocAux;
llvm::StringSaver Alloc;
ArgParser Parser;
SymbolTable Symtab;
// Opens a file. Path has to be resolved already.
MemoryBufferRef openFile(StringRef Path);
// Searches a file from search paths.
Optional<StringRef> findFile(StringRef Filename);
Optional<StringRef> findLib(StringRef Filename);
StringRef doFindFile(StringRef Filename);
StringRef doFindLib(StringRef Filename);
// Parses LIB environment which contains a list of search paths.
void addLibSearchPaths();
// Library search path. The first element is always "" (current directory).
std::vector<StringRef> SearchPaths;
std::set<std::string> VisitedFiles;
Undefined *addUndefined(StringRef Sym);
StringRef mangle(StringRef Sym);
// Windows specific -- "main" is not the only main function in Windows.
// You can choose one from these four -- {w,}{WinMain,main}.
// There are four different entry point functions for them,
// {w,}{WinMain,main}CRTStartup, respectively. The linker needs to
// choose the right one depending on which "main" function is defined.
// This function looks up the symbol table and resolve corresponding
// entry point name.
StringRef findDefaultEntry();
WindowsSubsystem inferSubsystem();
// Driver is the owner of all opened files.
// InputFiles have MemoryBufferRefs to them.
std::vector<std::unique_ptr<MemoryBuffer>> OwningMBs;
};
void parseModuleDefs(MemoryBufferRef MB, llvm::StringSaver *Alloc);
void writeImportLibrary();
// Functions below this line are defined in DriverUtils.cpp.
void printHelp(const char *Argv0);
// For /machine option.
MachineTypes getMachineType(StringRef Arg);
StringRef machineToStr(MachineTypes MT);
// Parses a string in the form of "<integer>[,<integer>]".
void parseNumbers(StringRef Arg, uint64_t *Addr, uint64_t *Size = nullptr);
// Parses a string in the form of "<integer>[.<integer>]".
// Minor's default value is 0.
void parseVersion(StringRef Arg, uint32_t *Major, uint32_t *Minor);
// Parses a string in the form of "<subsystem>[,<integer>[.<integer>]]".
void parseSubsystem(StringRef Arg, WindowsSubsystem *Sys, uint32_t *Major,
uint32_t *Minor);
void parseAlternateName(StringRef);
void parseMerge(StringRef);
void parseSection(StringRef);
// Parses a string in the form of "EMBED[,=<integer>]|NO".
void parseManifest(StringRef Arg);
// Parses a string in the form of "level=<string>|uiAccess=<string>"
void parseManifestUAC(StringRef Arg);
// Create a resource file containing a manifest XML.
std::unique_ptr<MemoryBuffer> createManifestRes();
void createSideBySideManifest();
// Used for dllexported symbols.
Export parseExport(StringRef Arg);
void fixupExports();
void assignExportOrdinals();
// Parses a string in the form of "key=value" and check
// if value matches previous values for the key.
// This feature used in the directive section to reject
// incompatible objects.
void checkFailIfMismatch(StringRef Arg);
// Convert Windows resource files (.res files) to a .obj file
// using cvtres.exe.
std::unique_ptr<MemoryBuffer>
convertResToCOFF(const std::vector<MemoryBufferRef> &MBs);
void createPDB(StringRef Path);
// Create enum with OPT_xxx values for each option in Options.td
enum {
OPT_INVALID = 0,
#define OPTION(_1, _2, ID, _4, _5, _6, _7, _8, _9, _10, _11) OPT_##ID,
#include "Options.inc"
#undef OPTION
};
} // namespace coff
} // namespace lld
#endif

View File

@ -0,0 +1,659 @@
//===- DriverUtils.cpp ----------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains utility functions for the driver. Because there
// are so many small functions, we created this separate file to make
// Driver.cpp less cluttered.
//
//===----------------------------------------------------------------------===//
#include "Config.h"
#include "Driver.h"
#include "Error.h"
#include "Symbols.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Object/COFF.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Option/Option.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileUtilities.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/raw_ostream.h"
#include <memory>
using namespace llvm::COFF;
using namespace llvm;
using llvm::cl::ExpandResponseFiles;
using llvm::cl::TokenizeWindowsCommandLine;
using llvm::sys::Process;
namespace lld {
namespace coff {
namespace {
class Executor {
public:
explicit Executor(StringRef S) : Saver(Alloc), Prog(Saver.save(S)) {}
void add(StringRef S) { Args.push_back(Saver.save(S)); }
void add(std::string &S) { Args.push_back(Saver.save(S)); }
void add(Twine S) { Args.push_back(Saver.save(S)); }
void add(const char *S) { Args.push_back(Saver.save(S)); }
void run() {
ErrorOr<std::string> ExeOrErr = llvm::sys::findProgramByName(Prog);
if (auto EC = ExeOrErr.getError())
fatal(EC, "unable to find " + Prog + " in PATH: ");
const char *Exe = Saver.save(*ExeOrErr);
Args.insert(Args.begin(), Exe);
Args.push_back(nullptr);
if (llvm::sys::ExecuteAndWait(Args[0], Args.data()) != 0) {
for (const char *S : Args)
if (S)
llvm::errs() << S << " ";
fatal("ExecuteAndWait failed");
}
}
private:
llvm::BumpPtrAllocator Alloc;
llvm::StringSaver Saver;
StringRef Prog;
std::vector<const char *> Args;
};
} // anonymous namespace
// Returns /machine's value.
MachineTypes getMachineType(StringRef S) {
MachineTypes MT = StringSwitch<MachineTypes>(S.lower())
.Case("x64", AMD64)
.Case("amd64", AMD64)
.Case("x86", I386)
.Case("i386", I386)
.Case("arm", ARMNT)
.Default(IMAGE_FILE_MACHINE_UNKNOWN);
if (MT != IMAGE_FILE_MACHINE_UNKNOWN)
return MT;
fatal("unknown /machine argument: " + S);
}
StringRef machineToStr(MachineTypes MT) {
switch (MT) {
case ARMNT:
return "arm";
case AMD64:
return "x64";
case I386:
return "x86";
default:
llvm_unreachable("unknown machine type");
}
}
// Parses a string in the form of "<integer>[,<integer>]".
void parseNumbers(StringRef Arg, uint64_t *Addr, uint64_t *Size) {
StringRef S1, S2;
std::tie(S1, S2) = Arg.split(',');
if (S1.getAsInteger(0, *Addr))
fatal("invalid number: " + S1);
if (Size && !S2.empty() && S2.getAsInteger(0, *Size))
fatal("invalid number: " + S2);
}
// Parses a string in the form of "<integer>[.<integer>]".
// If second number is not present, Minor is set to 0.
void parseVersion(StringRef Arg, uint32_t *Major, uint32_t *Minor) {
StringRef S1, S2;
std::tie(S1, S2) = Arg.split('.');
if (S1.getAsInteger(0, *Major))
fatal("invalid number: " + S1);
*Minor = 0;
if (!S2.empty() && S2.getAsInteger(0, *Minor))
fatal("invalid number: " + S2);
}
// Parses a string in the form of "<subsystem>[,<integer>[.<integer>]]".
void parseSubsystem(StringRef Arg, WindowsSubsystem *Sys, uint32_t *Major,
uint32_t *Minor) {
StringRef SysStr, Ver;
std::tie(SysStr, Ver) = Arg.split(',');
*Sys = StringSwitch<WindowsSubsystem>(SysStr.lower())
.Case("boot_application", IMAGE_SUBSYSTEM_WINDOWS_BOOT_APPLICATION)
.Case("console", IMAGE_SUBSYSTEM_WINDOWS_CUI)
.Case("efi_application", IMAGE_SUBSYSTEM_EFI_APPLICATION)
.Case("efi_boot_service_driver", IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER)
.Case("efi_rom", IMAGE_SUBSYSTEM_EFI_ROM)
.Case("efi_runtime_driver", IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER)
.Case("native", IMAGE_SUBSYSTEM_NATIVE)
.Case("posix", IMAGE_SUBSYSTEM_POSIX_CUI)
.Case("windows", IMAGE_SUBSYSTEM_WINDOWS_GUI)
.Default(IMAGE_SUBSYSTEM_UNKNOWN);
if (*Sys == IMAGE_SUBSYSTEM_UNKNOWN)
fatal("unknown subsystem: " + SysStr);
if (!Ver.empty())
parseVersion(Ver, Major, Minor);
}
// Parse a string of the form of "<from>=<to>".
// Results are directly written to Config.
void parseAlternateName(StringRef S) {
StringRef From, To;
std::tie(From, To) = S.split('=');
if (From.empty() || To.empty())
fatal("/alternatename: invalid argument: " + S);
auto It = Config->AlternateNames.find(From);
if (It != Config->AlternateNames.end() && It->second != To)
fatal("/alternatename: conflicts: " + S);
Config->AlternateNames.insert(It, std::make_pair(From, To));
}
// Parse a string of the form of "<from>=<to>".
// Results are directly written to Config.
void parseMerge(StringRef S) {
StringRef From, To;
std::tie(From, To) = S.split('=');
if (From.empty() || To.empty())
fatal("/merge: invalid argument: " + S);
auto Pair = Config->Merge.insert(std::make_pair(From, To));
bool Inserted = Pair.second;
if (!Inserted) {
StringRef Existing = Pair.first->second;
if (Existing != To)
llvm::errs() << "warning: " << S << ": already merged into "
<< Existing << "\n";
}
}
static uint32_t parseSectionAttributes(StringRef S) {
uint32_t Ret = 0;
for (char C : S.lower()) {
switch (C) {
case 'd':
Ret |= IMAGE_SCN_MEM_DISCARDABLE;
break;
case 'e':
Ret |= IMAGE_SCN_MEM_EXECUTE;
break;
case 'k':
Ret |= IMAGE_SCN_MEM_NOT_CACHED;
break;
case 'p':
Ret |= IMAGE_SCN_MEM_NOT_PAGED;
break;
case 'r':
Ret |= IMAGE_SCN_MEM_READ;
break;
case 's':
Ret |= IMAGE_SCN_MEM_SHARED;
break;
case 'w':
Ret |= IMAGE_SCN_MEM_WRITE;
break;
default:
fatal("/section: invalid argument: " + S);
}
}
return Ret;
}
// Parses /section option argument.
void parseSection(StringRef S) {
StringRef Name, Attrs;
std::tie(Name, Attrs) = S.split(',');
if (Name.empty() || Attrs.empty())
fatal("/section: invalid argument: " + S);
Config->Section[Name] = parseSectionAttributes(Attrs);
}
// Parses a string in the form of "EMBED[,=<integer>]|NO".
// Results are directly written to Config.
void parseManifest(StringRef Arg) {
if (Arg.equals_lower("no")) {
Config->Manifest = Configuration::No;
return;
}
if (!Arg.startswith_lower("embed"))
fatal("invalid option " + Arg);
Config->Manifest = Configuration::Embed;
Arg = Arg.substr(strlen("embed"));
if (Arg.empty())
return;
if (!Arg.startswith_lower(",id="))
fatal("invalid option " + Arg);
Arg = Arg.substr(strlen(",id="));
if (Arg.getAsInteger(0, Config->ManifestID))
fatal("invalid option " + Arg);
}
// Parses a string in the form of "level=<string>|uiAccess=<string>|NO".
// Results are directly written to Config.
void parseManifestUAC(StringRef Arg) {
if (Arg.equals_lower("no")) {
Config->ManifestUAC = false;
return;
}
for (;;) {
Arg = Arg.ltrim();
if (Arg.empty())
return;
if (Arg.startswith_lower("level=")) {
Arg = Arg.substr(strlen("level="));
std::tie(Config->ManifestLevel, Arg) = Arg.split(" ");
continue;
}
if (Arg.startswith_lower("uiaccess=")) {
Arg = Arg.substr(strlen("uiaccess="));
std::tie(Config->ManifestUIAccess, Arg) = Arg.split(" ");
continue;
}
fatal("invalid option " + Arg);
}
}
// Quote each line with "". Existing double-quote is converted
// to two double-quotes.
static void quoteAndPrint(raw_ostream &Out, StringRef S) {
while (!S.empty()) {
StringRef Line;
std::tie(Line, S) = S.split("\n");
if (Line.empty())
continue;
Out << '\"';
for (int I = 0, E = Line.size(); I != E; ++I) {
if (Line[I] == '\"') {
Out << "\"\"";
} else {
Out << Line[I];
}
}
Out << "\"\n";
}
}
// Create the default manifest file as a temporary file.
static std::string createDefaultXml() {
// Create a temporary file.
SmallString<128> Path;
if (auto EC = sys::fs::createTemporaryFile("tmp", "manifest", Path))
fatal(EC, "cannot create a temporary file");
// Open the temporary file for writing.
std::error_code EC;
llvm::raw_fd_ostream OS(Path, EC, sys::fs::F_Text);
if (EC)
fatal(EC, "failed to open " + Path);
// Emit the XML. Note that we do *not* verify that the XML attributes are
// syntactically correct. This is intentional for link.exe compatibility.
OS << "<?xml version=\"1.0\" standalone=\"yes\"?>\n"
<< "<assembly xmlns=\"urn:schemas-microsoft-com:asm.v1\"\n"
<< " manifestVersion=\"1.0\">\n";
if (Config->ManifestUAC) {
OS << " <trustInfo>\n"
<< " <security>\n"
<< " <requestedPrivileges>\n"
<< " <requestedExecutionLevel level=" << Config->ManifestLevel
<< " uiAccess=" << Config->ManifestUIAccess << "/>\n"
<< " </requestedPrivileges>\n"
<< " </security>\n"
<< " </trustInfo>\n";
if (!Config->ManifestDependency.empty()) {
OS << " <dependency>\n"
<< " <dependentAssembly>\n"
<< " <assemblyIdentity " << Config->ManifestDependency << " />\n"
<< " </dependentAssembly>\n"
<< " </dependency>\n";
}
}
OS << "</assembly>\n";
OS.close();
return StringRef(Path);
}
static std::string readFile(StringRef Path) {
std::unique_ptr<MemoryBuffer> MB =
check(MemoryBuffer::getFile(Path), "could not open " + Path);
std::unique_ptr<MemoryBuffer> Buf(std::move(MB));
return Buf->getBuffer();
}
static std::string createManifestXml() {
// Create the default manifest file.
std::string Path1 = createDefaultXml();
if (Config->ManifestInput.empty())
return readFile(Path1);
// If manifest files are supplied by the user using /MANIFESTINPUT
// option, we need to merge them with the default manifest.
SmallString<128> Path2;
if (auto EC = sys::fs::createTemporaryFile("tmp", "manifest", Path2))
fatal(EC, "cannot create a temporary file");
FileRemover Remover1(Path1);
FileRemover Remover2(Path2);
Executor E("mt.exe");
E.add("/manifest");
E.add(Path1);
for (StringRef Filename : Config->ManifestInput) {
E.add("/manifest");
E.add(Filename);
}
E.add("/nologo");
E.add("/out:" + StringRef(Path2));
E.run();
return readFile(Path2);
}
// Create a resource file containing a manifest XML.
std::unique_ptr<MemoryBuffer> createManifestRes() {
// Create a temporary file for the resource script file.
SmallString<128> RCPath;
if (auto EC = sys::fs::createTemporaryFile("tmp", "rc", RCPath))
fatal(EC, "cannot create a temporary file");
FileRemover RCRemover(RCPath);
// Open the temporary file for writing.
std::error_code EC;
llvm::raw_fd_ostream Out(RCPath, EC, sys::fs::F_Text);
if (EC)
fatal(EC, "failed to open " + RCPath);
// Write resource script to the RC file.
Out << "#define LANG_ENGLISH 9\n"
<< "#define SUBLANG_DEFAULT 1\n"
<< "#define APP_MANIFEST " << Config->ManifestID << "\n"
<< "#define RT_MANIFEST 24\n"
<< "LANGUAGE LANG_ENGLISH, SUBLANG_DEFAULT\n"
<< "APP_MANIFEST RT_MANIFEST {\n";
quoteAndPrint(Out, createManifestXml());
Out << "}\n";
Out.close();
// Create output resource file.
SmallString<128> ResPath;
if (auto EC = sys::fs::createTemporaryFile("tmp", "res", ResPath))
fatal(EC, "cannot create a temporary file");
Executor E("rc.exe");
E.add("/fo");
E.add(ResPath.str());
E.add("/nologo");
E.add(RCPath.str());
E.run();
return check(MemoryBuffer::getFile(ResPath), "could not open " + ResPath);
}
void createSideBySideManifest() {
std::string Path = Config->ManifestFile;
if (Path == "")
Path = Config->OutputFile + ".manifest";
std::error_code EC;
llvm::raw_fd_ostream Out(Path, EC, llvm::sys::fs::F_Text);
if (EC)
fatal(EC, "failed to create manifest");
Out << createManifestXml();
}
// Parse a string in the form of
// "<name>[=<internalname>][,@ordinal[,NONAME]][,DATA][,PRIVATE]"
// or "<name>=<dllname>.<name>".
// Used for parsing /export arguments.
Export parseExport(StringRef Arg) {
Export E;
StringRef Rest;
std::tie(E.Name, Rest) = Arg.split(",");
if (E.Name.empty())
goto err;
if (E.Name.find('=') != StringRef::npos) {
StringRef X, Y;
std::tie(X, Y) = E.Name.split("=");
// If "<name>=<dllname>.<name>".
if (Y.find(".") != StringRef::npos) {
E.Name = X;
E.ForwardTo = Y;
return E;
}
E.ExtName = X;
E.Name = Y;
if (E.Name.empty())
goto err;
}
// If "<name>=<internalname>[,@ordinal[,NONAME]][,DATA][,PRIVATE]"
while (!Rest.empty()) {
StringRef Tok;
std::tie(Tok, Rest) = Rest.split(",");
if (Tok.equals_lower("noname")) {
if (E.Ordinal == 0)
goto err;
E.Noname = true;
continue;
}
if (Tok.equals_lower("data")) {
E.Data = true;
continue;
}
if (Tok.equals_lower("private")) {
E.Private = true;
continue;
}
if (Tok.startswith("@")) {
int32_t Ord;
if (Tok.substr(1).getAsInteger(0, Ord))
goto err;
if (Ord <= 0 || 65535 < Ord)
goto err;
E.Ordinal = Ord;
continue;
}
goto err;
}
return E;
err:
fatal("invalid /export: " + Arg);
}
static StringRef undecorate(StringRef Sym) {
if (Config->Machine != I386)
return Sym;
return Sym.startswith("_") ? Sym.substr(1) : Sym;
}
// Performs error checking on all /export arguments.
// It also sets ordinals.
void fixupExports() {
// Symbol ordinals must be unique.
std::set<uint16_t> Ords;
for (Export &E : Config->Exports) {
if (E.Ordinal == 0)
continue;
if (!Ords.insert(E.Ordinal).second)
fatal("duplicate export ordinal: " + E.Name);
}
for (Export &E : Config->Exports) {
if (!E.ForwardTo.empty()) {
E.SymbolName = E.Name;
} else if (Undefined *U = cast_or_null<Undefined>(E.Sym->WeakAlias)) {
E.SymbolName = U->getName();
} else {
E.SymbolName = E.Sym->getName();
}
}
for (Export &E : Config->Exports) {
if (!E.ForwardTo.empty()) {
E.ExportName = undecorate(E.Name);
} else {
E.ExportName = undecorate(E.ExtName.empty() ? E.Name : E.ExtName);
}
}
// Uniquefy by name.
std::map<StringRef, Export *> Map;
std::vector<Export> V;
for (Export &E : Config->Exports) {
auto Pair = Map.insert(std::make_pair(E.ExportName, &E));
bool Inserted = Pair.second;
if (Inserted) {
V.push_back(E);
continue;
}
Export *Existing = Pair.first->second;
if (E == *Existing || E.Name != Existing->Name)
continue;
llvm::errs() << "warning: duplicate /export option: " << E.Name << "\n";
}
Config->Exports = std::move(V);
// Sort by name.
std::sort(Config->Exports.begin(), Config->Exports.end(),
[](const Export &A, const Export &B) {
return A.ExportName < B.ExportName;
});
}
void assignExportOrdinals() {
// Assign unique ordinals if default (= 0).
uint16_t Max = 0;
for (Export &E : Config->Exports)
Max = std::max(Max, E.Ordinal);
for (Export &E : Config->Exports)
if (E.Ordinal == 0)
E.Ordinal = ++Max;
}
// Parses a string in the form of "key=value" and check
// if value matches previous values for the same key.
void checkFailIfMismatch(StringRef Arg) {
StringRef K, V;
std::tie(K, V) = Arg.split('=');
if (K.empty() || V.empty())
fatal("/failifmismatch: invalid argument: " + Arg);
StringRef Existing = Config->MustMatch[K];
if (!Existing.empty() && V != Existing)
fatal("/failifmismatch: mismatch detected: " + Existing + " and " + V +
" for key " + K);
Config->MustMatch[K] = V;
}
// Convert Windows resource files (.res files) to a .obj file
// using cvtres.exe.
std::unique_ptr<MemoryBuffer>
convertResToCOFF(const std::vector<MemoryBufferRef> &MBs) {
// Create an output file path.
SmallString<128> Path;
if (auto EC = llvm::sys::fs::createTemporaryFile("resource", "obj", Path))
fatal(EC, "could not create temporary file");
// Execute cvtres.exe.
Executor E("cvtres.exe");
E.add("/machine:" + machineToStr(Config->Machine));
E.add("/readonly");
E.add("/nologo");
E.add("/out:" + Path);
for (MemoryBufferRef MB : MBs)
E.add(MB.getBufferIdentifier());
E.run();
return check(MemoryBuffer::getFile(Path), "could not open " + Path);
}
// Create OptTable
// Create prefix string literals used in Options.td
#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE;
#include "Options.inc"
#undef PREFIX
// Create table mapping all options defined in Options.td
static const llvm::opt::OptTable::Info infoTable[] = {
#define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X6, X7, X8, X9, X10) \
{ \
X1, X2, X9, X10, OPT_##ID, llvm::opt::Option::KIND##Class, X8, X7, \
OPT_##GROUP, OPT_##ALIAS, X6 \
},
#include "Options.inc"
#undef OPTION
};
class COFFOptTable : public llvm::opt::OptTable {
public:
COFFOptTable() : OptTable(infoTable, true) {}
};
// Parses a given list of options.
llvm::opt::InputArgList ArgParser::parse(ArrayRef<const char *> ArgsArr) {
// First, replace respnose files (@<file>-style options).
std::vector<const char *> Argv = replaceResponseFiles(ArgsArr);
// Make InputArgList from string vectors.
COFFOptTable Table;
unsigned MissingIndex;
unsigned MissingCount;
llvm::opt::InputArgList Args =
Table.ParseArgs(Argv, MissingIndex, MissingCount);
// Print the real command line if response files are expanded.
if (Args.hasArg(OPT_verbose) && ArgsArr.size() != Argv.size()) {
llvm::outs() << "Command line:";
for (const char *S : Argv)
llvm::outs() << " " << S;
llvm::outs() << "\n";
}
if (MissingCount)
fatal("missing arg value for \"" + Twine(Args.getArgString(MissingIndex)) +
"\", expected " + Twine(MissingCount) +
(MissingCount == 1 ? " argument." : " arguments."));
for (auto *Arg : Args.filtered(OPT_UNKNOWN))
llvm::errs() << "ignoring unknown argument: " << Arg->getSpelling() << "\n";
return Args;
}
llvm::opt::InputArgList ArgParser::parseLINK(ArrayRef<const char *> Args) {
// Concatenate LINK env and given arguments and parse them.
Optional<std::string> Env = Process::GetEnv("LINK");
if (!Env)
return parse(Args);
std::vector<const char *> V = tokenize(*Env);
V.insert(V.end(), Args.begin(), Args.end());
return parse(V);
}
std::vector<const char *> ArgParser::tokenize(StringRef S) {
SmallVector<const char *, 16> Tokens;
StringSaver Saver(AllocAux);
llvm::cl::TokenizeWindowsCommandLine(S, Saver, Tokens);
return std::vector<const char *>(Tokens.begin(), Tokens.end());
}
// Creates a new command line by replacing options starting with '@'
// character. '@<filename>' is replaced by the file's contents.
std::vector<const char *>
ArgParser::replaceResponseFiles(std::vector<const char *> Argv) {
SmallVector<const char *, 256> Tokens(Argv.data(), Argv.data() + Argv.size());
StringSaver Saver(AllocAux);
ExpandResponseFiles(Saver, TokenizeWindowsCommandLine, Tokens);
return std::vector<const char *>(Tokens.begin(), Tokens.end());
}
void printHelp(const char *Argv0) {
COFFOptTable Table;
Table.PrintHelp(llvm::outs(), Argv0, "LLVM Linker", false);
}
} // namespace coff
} // namespace lld

View File

@ -0,0 +1,33 @@
//===- Error.cpp ----------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "Error.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/raw_ostream.h"
namespace lld {
namespace coff {
void fatal(const Twine &Msg) {
llvm::errs() << Msg << "\n";
exit(1);
}
void fatal(std::error_code EC, const Twine &Msg) {
fatal(Msg + ": " + EC.message());
}
void fatal(llvm::Error &Err, const Twine &Msg) {
fatal(errorToErrorCode(std::move(Err)), Msg);
}
} // namespace coff
} // namespace lld

View File

@ -0,0 +1,38 @@
//===- Error.h --------------------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_COFF_ERROR_H
#define LLD_COFF_ERROR_H
#include "lld/Core/LLVM.h"
#include "llvm/Support/Error.h"
namespace lld {
namespace coff {
LLVM_ATTRIBUTE_NORETURN void fatal(const Twine &Msg);
LLVM_ATTRIBUTE_NORETURN void fatal(std::error_code EC, const Twine &Prefix);
LLVM_ATTRIBUTE_NORETURN void fatal(llvm::Error &Err, const Twine &Prefix);
template <class T> T check(ErrorOr<T> &&V, const Twine &Prefix) {
if (auto EC = V.getError())
fatal(EC, Prefix);
return std::move(*V);
}
template <class T> T check(Expected<T> E, const Twine &Prefix) {
if (llvm::Error Err = E.takeError())
fatal(Err, Prefix);
return std::move(*E);
}
} // namespace coff
} // namespace lld
#endif

View File

@ -0,0 +1,244 @@
//===- ICF.cpp ------------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Identical COMDAT Folding is a feature to merge COMDAT sections not by
// name (which is regular COMDAT handling) but by contents. If two COMDAT
// sections have the same data, relocations, attributes, etc., then the two
// are considered identical and merged by the linker. This optimization
// makes outputs smaller.
//
// ICF is theoretically a problem of reducing graphs by merging as many
// identical subgraphs as possible, if we consider sections as vertices and
// relocations as edges. This may be a bit more complicated problem than you
// might think. The order of processing sections matters since merging two
// sections can make other sections, whose relocations now point to the same
// section, mergeable. Graphs may contain cycles, which is common in COFF.
// We need a sophisticated algorithm to do this properly and efficiently.
//
// What we do in this file is this. We split sections into groups. Sections
// in the same group are considered identical.
//
// First, all sections are grouped by their "constant" values. Constant
// values are values that are never changed by ICF, such as section contents,
// section name, number of relocations, type and offset of each relocation,
// etc. Because we do not care about some relocation targets in this step,
// two sections in the same group may not be identical, but at least two
// sections in different groups can never be identical.
//
// Then, we try to split each group by relocation targets. Relocations are
// considered identical if and only if the relocation targets are in the
// same group. Splitting a group may make more groups to be splittable,
// because two relocations that were previously considered identical might
// now point to different groups. We repeat this step until the convergence
// is obtained.
//
// This algorithm is so-called "optimistic" algorithm described in
// http://research.google.com/pubs/pub36912.html.
//
//===----------------------------------------------------------------------===//
#include "Chunks.h"
#include "Symbols.h"
#include "lld/Core/Parallel.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <atomic>
#include <vector>
using namespace llvm;
namespace lld {
namespace coff {
typedef std::vector<SectionChunk *>::iterator ChunkIterator;
typedef bool (*Comparator)(const SectionChunk *, const SectionChunk *);
class ICF {
public:
void run(const std::vector<Chunk *> &V);
private:
static uint64_t getHash(SectionChunk *C);
static bool equalsConstant(const SectionChunk *A, const SectionChunk *B);
static bool equalsVariable(const SectionChunk *A, const SectionChunk *B);
bool forEachGroup(std::vector<SectionChunk *> &Chunks, Comparator Eq);
bool segregate(ChunkIterator Begin, ChunkIterator End, Comparator Eq);
std::atomic<uint64_t> NextID = { 1 };
};
// Entry point to ICF.
void doICF(const std::vector<Chunk *> &Chunks) {
ICF().run(Chunks);
}
uint64_t ICF::getHash(SectionChunk *C) {
return hash_combine(C->getPermissions(),
hash_value(C->SectionName),
C->NumRelocs,
C->getAlign(),
uint32_t(C->Header->SizeOfRawData),
C->Checksum);
}
bool ICF::equalsConstant(const SectionChunk *A, const SectionChunk *B) {
if (A->AssocChildren.size() != B->AssocChildren.size() ||
A->NumRelocs != B->NumRelocs) {
return false;
}
// Compare associative sections.
for (size_t I = 0, E = A->AssocChildren.size(); I != E; ++I)
if (A->AssocChildren[I]->GroupID != B->AssocChildren[I]->GroupID)
return false;
// Compare relocations.
auto Eq = [&](const coff_relocation &R1, const coff_relocation &R2) {
if (R1.Type != R2.Type ||
R1.VirtualAddress != R2.VirtualAddress) {
return false;
}
SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex)->repl();
SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex)->repl();
if (B1 == B2)
return true;
if (auto *D1 = dyn_cast<DefinedRegular>(B1))
if (auto *D2 = dyn_cast<DefinedRegular>(B2))
return D1->getValue() == D2->getValue() &&
D1->getChunk()->GroupID == D2->getChunk()->GroupID;
return false;
};
if (!std::equal(A->Relocs.begin(), A->Relocs.end(), B->Relocs.begin(), Eq))
return false;
// Compare section attributes and contents.
return A->getPermissions() == B->getPermissions() &&
A->SectionName == B->SectionName &&
A->getAlign() == B->getAlign() &&
A->Header->SizeOfRawData == B->Header->SizeOfRawData &&
A->Checksum == B->Checksum &&
A->getContents() == B->getContents();
}
bool ICF::equalsVariable(const SectionChunk *A, const SectionChunk *B) {
// Compare associative sections.
for (size_t I = 0, E = A->AssocChildren.size(); I != E; ++I)
if (A->AssocChildren[I]->GroupID != B->AssocChildren[I]->GroupID)
return false;
// Compare relocations.
auto Eq = [&](const coff_relocation &R1, const coff_relocation &R2) {
SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex)->repl();
SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex)->repl();
if (B1 == B2)
return true;
if (auto *D1 = dyn_cast<DefinedRegular>(B1))
if (auto *D2 = dyn_cast<DefinedRegular>(B2))
return D1->getChunk()->GroupID == D2->getChunk()->GroupID;
return false;
};
return std::equal(A->Relocs.begin(), A->Relocs.end(), B->Relocs.begin(), Eq);
}
bool ICF::segregate(ChunkIterator Begin, ChunkIterator End, Comparator Eq) {
bool R = false;
for (auto It = Begin;;) {
SectionChunk *Head = *It;
auto Bound = std::partition(It + 1, End, [&](SectionChunk *SC) {
return Eq(Head, SC);
});
if (Bound == End)
return R;
uint64_t ID = NextID++;
std::for_each(It, Bound, [&](SectionChunk *SC) { SC->GroupID = ID; });
It = Bound;
R = true;
}
}
bool ICF::forEachGroup(std::vector<SectionChunk *> &Chunks, Comparator Eq) {
bool R = false;
for (auto It = Chunks.begin(), End = Chunks.end(); It != End;) {
SectionChunk *Head = *It;
auto Bound = std::find_if(It + 1, End, [&](SectionChunk *SC) {
return SC->GroupID != Head->GroupID;
});
if (segregate(It, Bound, Eq))
R = true;
It = Bound;
}
return R;
}
// Merge identical COMDAT sections.
// Two sections are considered the same if their section headers,
// contents and relocations are all the same.
void ICF::run(const std::vector<Chunk *> &Vec) {
// Collect only mergeable sections and group by hash value.
parallel_for_each(Vec.begin(), Vec.end(), [&](Chunk *C) {
if (auto *SC = dyn_cast<SectionChunk>(C)) {
bool Global = SC->Sym && SC->Sym->isExternal();
bool Writable = SC->getPermissions() & llvm::COFF::IMAGE_SCN_MEM_WRITE;
if (SC->isCOMDAT() && SC->isLive() && Global && !Writable)
SC->GroupID = getHash(SC) | (uint64_t(1) << 63);
}
});
std::vector<SectionChunk *> Chunks;
for (Chunk *C : Vec) {
if (auto *SC = dyn_cast<SectionChunk>(C)) {
if (SC->GroupID) {
Chunks.push_back(SC);
} else {
SC->GroupID = NextID++;
}
}
}
// From now on, sections in Chunks are ordered so that sections in
// the same group are consecutive in the vector.
std::sort(Chunks.begin(), Chunks.end(),
[](SectionChunk *A, SectionChunk *B) {
return A->GroupID < B->GroupID;
});
// Split groups until we get a convergence.
int Cnt = 1;
forEachGroup(Chunks, equalsConstant);
for (;;) {
if (!forEachGroup(Chunks, equalsVariable))
break;
++Cnt;
}
if (Config->Verbose)
llvm::outs() << "\nICF needed " << Cnt << " iterations.\n";
// Merge sections in the same group.
for (auto It = Chunks.begin(), End = Chunks.end(); It != End;) {
SectionChunk *Head = *It++;
auto Bound = std::find_if(It, End, [&](SectionChunk *SC) {
return Head->GroupID != SC->GroupID;
});
if (It == Bound)
continue;
if (Config->Verbose)
llvm::outs() << "Selected " << Head->getDebugName() << "\n";
while (It != Bound) {
SectionChunk *SC = *It++;
if (Config->Verbose)
llvm::outs() << " Removed " << SC->getDebugName() << "\n";
Head->replace(SC);
}
}
}
} // namespace coff
} // namespace lld

View File

@ -0,0 +1,373 @@
//===- InputFiles.cpp -----------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "Chunks.h"
#include "Config.h"
#include "Error.h"
#include "InputFiles.h"
#include "Symbols.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/LTO/legacy/LTOModule.h"
#include "llvm/Object/Binary.h"
#include "llvm/Object/COFF.h"
#include "llvm/Support/COFF.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm-c/lto.h"
#include <cstring>
#include <system_error>
#include <utility>
using namespace llvm::COFF;
using namespace llvm::object;
using namespace llvm::support::endian;
using llvm::Triple;
using llvm::support::ulittle32_t;
namespace lld {
namespace coff {
int InputFile::NextIndex = 0;
llvm::LLVMContext BitcodeFile::Context;
// Returns the last element of a path, which is supposed to be a filename.
static StringRef getBasename(StringRef Path) {
size_t Pos = Path.find_last_of("\\/");
if (Pos == StringRef::npos)
return Path;
return Path.substr(Pos + 1);
}
// Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)".
std::string InputFile::getShortName() {
if (ParentName == "")
return getName().lower();
std::string Res = (getBasename(ParentName) + "(" +
getBasename(getName()) + ")").str();
return StringRef(Res).lower();
}
void ArchiveFile::parse() {
// Parse a MemoryBufferRef as an archive file.
File = check(Archive::create(MB), "failed to parse static library");
// Allocate a buffer for Lazy objects.
size_t NumSyms = File->getNumberOfSymbols();
LazySymbols.reserve(NumSyms);
// Read the symbol table to construct Lazy objects.
for (const Archive::Symbol &Sym : File->symbols())
LazySymbols.emplace_back(this, Sym);
// Seen is a map from member files to boolean values. Initially
// all members are mapped to false, which indicates all these files
// are not read yet.
Error Err;
for (auto &Child : File->children(Err))
Seen[Child.getChildOffset()].clear();
if (Err)
fatal(Err, "failed to parse static library");
}
// Returns a buffer pointing to a member file containing a given symbol.
// This function is thread-safe.
MemoryBufferRef ArchiveFile::getMember(const Archive::Symbol *Sym) {
const Archive::Child &C =
check(Sym->getMember(),
"could not get the member for symbol " + Sym->getName());
// Return an empty buffer if we have already returned the same buffer.
if (Seen[C.getChildOffset()].test_and_set())
return MemoryBufferRef();
return check(C.getMemoryBufferRef(),
"could not get the buffer for the member defining symbol " +
Sym->getName());
}
void ObjectFile::parse() {
// Parse a memory buffer as a COFF file.
std::unique_ptr<Binary> Bin =
check(createBinary(MB), "failed to parse object file");
if (auto *Obj = dyn_cast<COFFObjectFile>(Bin.get())) {
Bin.release();
COFFObj.reset(Obj);
} else {
fatal(getName() + " is not a COFF file");
}
// Read section and symbol tables.
initializeChunks();
initializeSymbols();
initializeSEH();
}
void ObjectFile::initializeChunks() {
uint32_t NumSections = COFFObj->getNumberOfSections();
Chunks.reserve(NumSections);
SparseChunks.resize(NumSections + 1);
for (uint32_t I = 1; I < NumSections + 1; ++I) {
const coff_section *Sec;
StringRef Name;
if (auto EC = COFFObj->getSection(I, Sec))
fatal(EC, "getSection failed: #" + Twine(I));
if (auto EC = COFFObj->getSectionName(Sec, Name))
fatal(EC, "getSectionName failed: #" + Twine(I));
if (Name == ".sxdata") {
SXData = Sec;
continue;
}
if (Name == ".drectve") {
ArrayRef<uint8_t> Data;
COFFObj->getSectionContents(Sec, Data);
Directives = std::string((const char *)Data.data(), Data.size());
continue;
}
// Skip non-DWARF debug info. MSVC linker converts the sections into
// a PDB file, but we don't support that.
if (Name == ".debug" || Name.startswith(".debug$"))
continue;
// We want to preserve DWARF debug sections only when /debug is on.
if (!Config->Debug && Name.startswith(".debug"))
continue;
if (Sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE)
continue;
auto *C = new (Alloc) SectionChunk(this, Sec);
Chunks.push_back(C);
SparseChunks[I] = C;
}
}
void ObjectFile::initializeSymbols() {
uint32_t NumSymbols = COFFObj->getNumberOfSymbols();
SymbolBodies.reserve(NumSymbols);
SparseSymbolBodies.resize(NumSymbols);
llvm::SmallVector<std::pair<Undefined *, uint32_t>, 8> WeakAliases;
int32_t LastSectionNumber = 0;
for (uint32_t I = 0; I < NumSymbols; ++I) {
// Get a COFFSymbolRef object.
COFFSymbolRef Sym =
check(COFFObj->getSymbol(I), "broken object file: " + getName());
const void *AuxP = nullptr;
if (Sym.getNumberOfAuxSymbols())
AuxP = COFFObj->getSymbol(I + 1)->getRawPtr();
bool IsFirst = (LastSectionNumber != Sym.getSectionNumber());
SymbolBody *Body = nullptr;
if (Sym.isUndefined()) {
Body = createUndefined(Sym);
} else if (Sym.isWeakExternal()) {
Body = createUndefined(Sym);
uint32_t TagIndex =
static_cast<const coff_aux_weak_external *>(AuxP)->TagIndex;
WeakAliases.emplace_back((Undefined *)Body, TagIndex);
} else {
Body = createDefined(Sym, AuxP, IsFirst);
}
if (Body) {
SymbolBodies.push_back(Body);
SparseSymbolBodies[I] = Body;
}
I += Sym.getNumberOfAuxSymbols();
LastSectionNumber = Sym.getSectionNumber();
}
for (auto WeakAlias : WeakAliases)
WeakAlias.first->WeakAlias = SparseSymbolBodies[WeakAlias.second];
}
Undefined *ObjectFile::createUndefined(COFFSymbolRef Sym) {
StringRef Name;
COFFObj->getSymbolName(Sym, Name);
return new (Alloc) Undefined(Name);
}
Defined *ObjectFile::createDefined(COFFSymbolRef Sym, const void *AuxP,
bool IsFirst) {
StringRef Name;
if (Sym.isCommon()) {
auto *C = new (Alloc) CommonChunk(Sym);
Chunks.push_back(C);
return new (Alloc) DefinedCommon(this, Sym, C);
}
if (Sym.isAbsolute()) {
COFFObj->getSymbolName(Sym, Name);
// Skip special symbols.
if (Name == "@comp.id")
return nullptr;
// COFF spec 5.10.1. The .sxdata section.
if (Name == "@feat.00") {
if (Sym.getValue() & 1)
SEHCompat = true;
return nullptr;
}
return new (Alloc) DefinedAbsolute(Name, Sym);
}
int32_t SectionNumber = Sym.getSectionNumber();
if (SectionNumber == llvm::COFF::IMAGE_SYM_DEBUG)
return nullptr;
// Reserved sections numbers don't have contents.
if (llvm::COFF::isReservedSectionNumber(SectionNumber))
fatal("broken object file: " + getName());
// This symbol references a section which is not present in the section
// header.
if ((uint32_t)SectionNumber >= SparseChunks.size())
fatal("broken object file: " + getName());
// Nothing else to do without a section chunk.
auto *SC = cast_or_null<SectionChunk>(SparseChunks[SectionNumber]);
if (!SC)
return nullptr;
// Handle section definitions
if (IsFirst && AuxP) {
auto *Aux = reinterpret_cast<const coff_aux_section_definition *>(AuxP);
if (Aux->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE)
if (auto *ParentSC = cast_or_null<SectionChunk>(
SparseChunks[Aux->getNumber(Sym.isBigObj())]))
ParentSC->addAssociative(SC);
SC->Checksum = Aux->CheckSum;
}
auto *B = new (Alloc) DefinedRegular(this, Sym, SC);
if (SC->isCOMDAT() && Sym.getValue() == 0 && !AuxP)
SC->setSymbol(B);
return B;
}
void ObjectFile::initializeSEH() {
if (!SEHCompat || !SXData)
return;
ArrayRef<uint8_t> A;
COFFObj->getSectionContents(SXData, A);
if (A.size() % 4 != 0)
fatal(".sxdata must be an array of symbol table indices");
auto *I = reinterpret_cast<const ulittle32_t *>(A.data());
auto *E = reinterpret_cast<const ulittle32_t *>(A.data() + A.size());
for (; I != E; ++I)
SEHandlers.insert(SparseSymbolBodies[*I]);
}
MachineTypes ObjectFile::getMachineType() {
if (COFFObj)
return static_cast<MachineTypes>(COFFObj->getMachine());
return IMAGE_FILE_MACHINE_UNKNOWN;
}
StringRef ltrim1(StringRef S, const char *Chars) {
if (!S.empty() && strchr(Chars, S[0]))
return S.substr(1);
return S;
}
void ImportFile::parse() {
const char *Buf = MB.getBufferStart();
const char *End = MB.getBufferEnd();
const auto *Hdr = reinterpret_cast<const coff_import_header *>(Buf);
// Check if the total size is valid.
if ((size_t)(End - Buf) != (sizeof(*Hdr) + Hdr->SizeOfData))
fatal("broken import library");
// Read names and create an __imp_ symbol.
StringRef Name = StringAlloc.save(StringRef(Buf + sizeof(*Hdr)));
StringRef ImpName = StringAlloc.save("__imp_" + Name);
const char *NameStart = Buf + sizeof(coff_import_header) + Name.size() + 1;
DLLName = StringRef(NameStart);
StringRef ExtName;
switch (Hdr->getNameType()) {
case IMPORT_ORDINAL:
ExtName = "";
break;
case IMPORT_NAME:
ExtName = Name;
break;
case IMPORT_NAME_NOPREFIX:
ExtName = ltrim1(Name, "?@_");
break;
case IMPORT_NAME_UNDECORATE:
ExtName = ltrim1(Name, "?@_");
ExtName = ExtName.substr(0, ExtName.find('@'));
break;
}
ImpSym = new (Alloc) DefinedImportData(DLLName, ImpName, ExtName, Hdr);
SymbolBodies.push_back(ImpSym);
// If type is function, we need to create a thunk which jump to an
// address pointed by the __imp_ symbol. (This allows you to call
// DLL functions just like regular non-DLL functions.)
if (Hdr->getType() != llvm::COFF::IMPORT_CODE)
return;
ThunkSym = new (Alloc) DefinedImportThunk(Name, ImpSym, Hdr->Machine);
SymbolBodies.push_back(ThunkSym);
}
void BitcodeFile::parse() {
// Usually parse() is thread-safe, but bitcode file is an exception.
std::lock_guard<std::mutex> Lock(Mu);
Context.enableDebugTypeODRUniquing();
ErrorOr<std::unique_ptr<LTOModule>> ModOrErr = LTOModule::createFromBuffer(
Context, MB.getBufferStart(), MB.getBufferSize(), llvm::TargetOptions());
M = check(std::move(ModOrErr), "could not create LTO module");
llvm::StringSaver Saver(Alloc);
for (unsigned I = 0, E = M->getSymbolCount(); I != E; ++I) {
lto_symbol_attributes Attrs = M->getSymbolAttributes(I);
if ((Attrs & LTO_SYMBOL_SCOPE_MASK) == LTO_SYMBOL_SCOPE_INTERNAL)
continue;
StringRef SymName = Saver.save(M->getSymbolName(I));
int SymbolDef = Attrs & LTO_SYMBOL_DEFINITION_MASK;
if (SymbolDef == LTO_SYMBOL_DEFINITION_UNDEFINED) {
SymbolBodies.push_back(new (Alloc) Undefined(SymName));
} else {
bool Replaceable =
(SymbolDef == LTO_SYMBOL_DEFINITION_TENTATIVE || // common
(Attrs & LTO_SYMBOL_COMDAT) || // comdat
(SymbolDef == LTO_SYMBOL_DEFINITION_WEAK && // weak external
(Attrs & LTO_SYMBOL_ALIAS)));
SymbolBodies.push_back(new (Alloc) DefinedBitcode(this, SymName,
Replaceable));
}
}
Directives = M->getLinkerOpts();
}
MachineTypes BitcodeFile::getMachineType() {
if (!M)
return IMAGE_FILE_MACHINE_UNKNOWN;
switch (Triple(M->getTargetTriple()).getArch()) {
case Triple::x86_64:
return AMD64;
case Triple::x86:
return I386;
case Triple::arm:
return ARMNT;
default:
return IMAGE_FILE_MACHINE_UNKNOWN;
}
}
std::mutex BitcodeFile::Mu;
} // namespace coff
} // namespace lld

View File

@ -0,0 +1,223 @@
//===- InputFiles.h ---------------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_COFF_INPUT_FILES_H
#define LLD_COFF_INPUT_FILES_H
#include "lld/Core/LLVM.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/LTO/legacy/LTOModule.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/COFF.h"
#include "llvm/Support/StringSaver.h"
#include <memory>
#include <mutex>
#include <set>
#include <vector>
namespace lld {
namespace coff {
using llvm::LTOModule;
using llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN;
using llvm::COFF::MachineTypes;
using llvm::object::Archive;
using llvm::object::COFFObjectFile;
using llvm::object::COFFSymbolRef;
using llvm::object::coff_section;
class Chunk;
class Defined;
class DefinedImportData;
class DefinedImportThunk;
class Lazy;
class SymbolBody;
class Undefined;
// The root class of input files.
class InputFile {
public:
enum Kind { ArchiveKind, ObjectKind, ImportKind, BitcodeKind };
Kind kind() const { return FileKind; }
virtual ~InputFile() {}
// Returns the filename.
StringRef getName() { return MB.getBufferIdentifier(); }
// Returns symbols defined by this file.
virtual std::vector<SymbolBody *> &getSymbols() = 0;
// Reads a file (the constructor doesn't do that).
virtual void parse() = 0;
// Returns the CPU type this file was compiled to.
virtual MachineTypes getMachineType() { return IMAGE_FILE_MACHINE_UNKNOWN; }
// Returns a short, human-friendly filename. If this is a member of
// an archive file, a returned value includes parent's filename.
// Used for logging or debugging.
std::string getShortName();
// Sets a parent filename if this file is created from an archive.
void setParentName(StringRef N) { ParentName = N; }
// Returns .drectve section contents if exist.
StringRef getDirectives() { return StringRef(Directives).trim(); }
// Each file has a unique index. The index number is used to
// resolve ties in symbol resolution.
int Index;
static int NextIndex;
protected:
InputFile(Kind K, MemoryBufferRef M)
: Index(NextIndex++), MB(M), FileKind(K) {}
MemoryBufferRef MB;
std::string Directives;
private:
const Kind FileKind;
StringRef ParentName;
};
// .lib or .a file.
class ArchiveFile : public InputFile {
public:
explicit ArchiveFile(MemoryBufferRef M) : InputFile(ArchiveKind, M) {}
static bool classof(const InputFile *F) { return F->kind() == ArchiveKind; }
void parse() override;
// Returns a memory buffer for a given symbol. An empty memory buffer
// is returned if we have already returned the same memory buffer.
// (So that we don't instantiate same members more than once.)
MemoryBufferRef getMember(const Archive::Symbol *Sym);
llvm::MutableArrayRef<Lazy> getLazySymbols() { return LazySymbols; }
// All symbols returned by ArchiveFiles are of Lazy type.
std::vector<SymbolBody *> &getSymbols() override {
llvm_unreachable("internal fatal");
}
private:
std::unique_ptr<Archive> File;
std::string Filename;
std::vector<Lazy> LazySymbols;
std::map<uint64_t, std::atomic_flag> Seen;
};
// .obj or .o file. This may be a member of an archive file.
class ObjectFile : public InputFile {
public:
explicit ObjectFile(MemoryBufferRef M) : InputFile(ObjectKind, M) {}
static bool classof(const InputFile *F) { return F->kind() == ObjectKind; }
void parse() override;
MachineTypes getMachineType() override;
std::vector<Chunk *> &getChunks() { return Chunks; }
std::vector<SymbolBody *> &getSymbols() override { return SymbolBodies; }
// Returns a SymbolBody object for the SymbolIndex'th symbol in the
// underlying object file.
SymbolBody *getSymbolBody(uint32_t SymbolIndex) {
return SparseSymbolBodies[SymbolIndex];
}
// Returns the underying COFF file.
COFFObjectFile *getCOFFObj() { return COFFObj.get(); }
// True if this object file is compatible with SEH.
// COFF-specific and x86-only.
bool SEHCompat = false;
// The list of safe exception handlers listed in .sxdata section.
// COFF-specific and x86-only.
std::set<SymbolBody *> SEHandlers;
private:
void initializeChunks();
void initializeSymbols();
void initializeSEH();
Defined *createDefined(COFFSymbolRef Sym, const void *Aux, bool IsFirst);
Undefined *createUndefined(COFFSymbolRef Sym);
std::unique_ptr<COFFObjectFile> COFFObj;
llvm::BumpPtrAllocator Alloc;
const coff_section *SXData = nullptr;
// List of all chunks defined by this file. This includes both section
// chunks and non-section chunks for common symbols.
std::vector<Chunk *> Chunks;
// This vector contains the same chunks as Chunks, but they are
// indexed such that you can get a SectionChunk by section index.
// Nonexistent section indices are filled with null pointers.
// (Because section number is 1-based, the first slot is always a
// null pointer.)
std::vector<Chunk *> SparseChunks;
// List of all symbols referenced or defined by this file.
std::vector<SymbolBody *> SymbolBodies;
// This vector contains the same symbols as SymbolBodies, but they
// are indexed such that you can get a SymbolBody by symbol
// index. Nonexistent indices (which are occupied by auxiliary
// symbols in the real symbol table) are filled with null pointers.
std::vector<SymbolBody *> SparseSymbolBodies;
};
// This type represents import library members that contain DLL names
// and symbols exported from the DLLs. See Microsoft PE/COFF spec. 7
// for details about the format.
class ImportFile : public InputFile {
public:
explicit ImportFile(MemoryBufferRef M)
: InputFile(ImportKind, M), StringAlloc(StringAllocAux) {}
static bool classof(const InputFile *F) { return F->kind() == ImportKind; }
std::vector<SymbolBody *> &getSymbols() override { return SymbolBodies; }
DefinedImportData *ImpSym = nullptr;
DefinedImportThunk *ThunkSym = nullptr;
std::string DLLName;
private:
void parse() override;
std::vector<SymbolBody *> SymbolBodies;
llvm::BumpPtrAllocator Alloc;
llvm::BumpPtrAllocator StringAllocAux;
llvm::StringSaver StringAlloc;
};
// Used for LTO.
class BitcodeFile : public InputFile {
public:
explicit BitcodeFile(MemoryBufferRef M) : InputFile(BitcodeKind, M) {}
static bool classof(const InputFile *F) { return F->kind() == BitcodeKind; }
std::vector<SymbolBody *> &getSymbols() override { return SymbolBodies; }
MachineTypes getMachineType() override;
std::unique_ptr<LTOModule> takeModule() { return std::move(M); }
static llvm::LLVMContext Context;
private:
void parse() override;
std::vector<SymbolBody *> SymbolBodies;
llvm::BumpPtrAllocator Alloc;
std::unique_ptr<LTOModule> M;
static std::mutex Mu;
};
} // namespace coff
} // namespace lld
#endif

View File

@ -0,0 +1,489 @@
//===- Librarian.cpp ------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains functions for the Librarian. The librarian creates and
// manages libraries of the Common Object File Format (COFF) object files. It
// primarily is used for creating static libraries and import libraries.
//
//===----------------------------------------------------------------------===//
#include "Config.h"
#include "Driver.h"
#include "Error.h"
#include "Symbols.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/ArchiveWriter.h"
#include "llvm/Object/COFF.h"
#include "llvm/Support/Path.h"
#include <vector>
using namespace lld::coff;
using namespace llvm::COFF;
using namespace llvm::object;
using namespace llvm;
static bool is32bit() {
switch (Config->Machine) {
default:
llvm_unreachable("unsupported machine");
case IMAGE_FILE_MACHINE_AMD64:
return false;
case IMAGE_FILE_MACHINE_ARMNT:
case IMAGE_FILE_MACHINE_I386:
return true;
}
}
static uint16_t getImgRelRelocation() {
switch (Config->Machine) {
default:
llvm_unreachable("unsupported machine");
case IMAGE_FILE_MACHINE_AMD64:
return IMAGE_REL_AMD64_ADDR32NB;
case IMAGE_FILE_MACHINE_ARMNT:
return IMAGE_REL_ARM_ADDR32NB;
case IMAGE_FILE_MACHINE_I386:
return IMAGE_REL_I386_DIR32NB;
}
}
template <class T> void append(std::vector<uint8_t> &B, const T &Data) {
size_t S = B.size();
B.resize(S + sizeof(T));
memcpy(&B[S], &Data, sizeof(T));
}
static void writeStringTable(std::vector<uint8_t> &B,
ArrayRef<const std::string> Strings) {
// The COFF string table consists of a 4-byte value which is the size of the
// table, including the length field itself. This value is followed by the
// string content itself, which is an array of null-terminated C-style
// strings. The termination is important as they are referenced to by offset
// by the symbol entity in the file format.
std::vector<uint8_t>::size_type Pos = B.size();
std::vector<uint8_t>::size_type Offset = B.size();
// Skip over the length field, we will fill it in later as we will have
// computed the length while emitting the string content itself.
Pos += sizeof(uint32_t);
for (const auto &S : Strings) {
B.resize(Pos + S.length() + 1);
strcpy(reinterpret_cast<char *>(&B[Pos]), S.c_str());
Pos += S.length() + 1;
}
// Backfill the length of the table now that it has been computed.
support::ulittle32_t Length(B.size() - Offset);
memcpy(&B[Offset], &Length, sizeof(Length));
}
static std::string getImplibPath() {
if (!Config->Implib.empty())
return Config->Implib;
SmallString<128> Out = StringRef(Config->OutputFile);
sys::path::replace_extension(Out, ".lib");
return Out.str();
}
static ImportNameType getNameType(StringRef Sym, StringRef ExtName) {
if (Sym != ExtName)
return IMPORT_NAME_UNDECORATE;
if (Config->Machine == I386 && Sym.startswith("_"))
return IMPORT_NAME_NOPREFIX;
return IMPORT_NAME;
}
static std::string replace(StringRef S, StringRef From, StringRef To) {
size_t Pos = S.find(From);
assert(Pos != StringRef::npos);
return (Twine(S.substr(0, Pos)) + To + S.substr(Pos + From.size())).str();
}
static const std::string NullImportDescriptorSymbolName =
"__NULL_IMPORT_DESCRIPTOR";
namespace {
// This class constructs various small object files necessary to support linking
// symbols imported from a DLL. The contents are pretty strictly defined and
// nearly entirely static. The details of the structures files are defined in
// WINNT.h and the PE/COFF specification.
class ObjectFactory {
using u16 = support::ulittle16_t;
using u32 = support::ulittle32_t;
BumpPtrAllocator Alloc;
StringRef DLLName;
StringRef Library;
std::string ImportDescriptorSymbolName;
std::string NullThunkSymbolName;
public:
ObjectFactory(StringRef S)
: DLLName(S), Library(S.drop_back(4)),
ImportDescriptorSymbolName(("__IMPORT_DESCRIPTOR_" + Library).str()),
NullThunkSymbolName(("\x7f" + Library + "_NULL_THUNK_DATA").str()) {}
// Creates an Import Descriptor. This is a small object file which contains a
// reference to the terminators and contains the library name (entry) for the
// import name table. It will force the linker to construct the necessary
// structure to import symbols from the DLL.
NewArchiveMember createImportDescriptor(std::vector<uint8_t> &Buffer);
// Creates a NULL import descriptor. This is a small object file whcih
// contains a NULL import descriptor. It is used to terminate the imports
// from a specific DLL.
NewArchiveMember createNullImportDescriptor(std::vector<uint8_t> &Buffer);
// Create a NULL Thunk Entry. This is a small object file which contains a
// NULL Import Address Table entry and a NULL Import Lookup Table Entry. It
// is used to terminate the IAT and ILT.
NewArchiveMember createNullThunk(std::vector<uint8_t> &Buffer);
// Create a short import file which is described in PE/COFF spec 7. Import
// Library Format.
NewArchiveMember createShortImport(StringRef Sym, uint16_t Ordinal,
ImportNameType NameType, bool isData);
};
}
NewArchiveMember
ObjectFactory::createImportDescriptor(std::vector<uint8_t> &Buffer) {
static const uint32_t NumberOfSections = 2;
static const uint32_t NumberOfSymbols = 7;
static const uint32_t NumberOfRelocations = 3;
// COFF Header
coff_file_header Header{
u16(Config->Machine), u16(NumberOfSections), u32(0),
u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) +
// .idata$2
sizeof(coff_import_directory_table_entry) +
NumberOfRelocations * sizeof(coff_relocation) +
// .idata$4
(DLLName.size() + 1)),
u32(NumberOfSymbols), u16(0),
u16(is32bit() ? IMAGE_FILE_32BIT_MACHINE : 0),
};
append(Buffer, Header);
// Section Header Table
static const coff_section SectionTable[NumberOfSections] = {
{{'.', 'i', 'd', 'a', 't', 'a', '$', '2'},
u32(0),
u32(0),
u32(sizeof(coff_import_directory_table_entry)),
u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section)),
u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) +
sizeof(coff_import_directory_table_entry)),
u32(0),
u16(NumberOfRelocations),
u16(0),
u32(IMAGE_SCN_ALIGN_4BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA |
IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)},
{{'.', 'i', 'd', 'a', 't', 'a', '$', '6'},
u32(0),
u32(0),
u32(DLLName.size() + 1),
u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) +
sizeof(coff_import_directory_table_entry) +
NumberOfRelocations * sizeof(coff_relocation)),
u32(0),
u32(0),
u16(0),
u16(0),
u32(IMAGE_SCN_ALIGN_2BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA |
IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)},
};
append(Buffer, SectionTable);
// .idata$2
static const coff_import_directory_table_entry ImportDescriptor{
u32(0), u32(0), u32(0), u32(0), u32(0),
};
append(Buffer, ImportDescriptor);
static const coff_relocation RelocationTable[NumberOfRelocations] = {
{u32(offsetof(coff_import_directory_table_entry, NameRVA)), u32(2),
u16(getImgRelRelocation())},
{u32(offsetof(coff_import_directory_table_entry, ImportLookupTableRVA)),
u32(3), u16(getImgRelRelocation())},
{u32(offsetof(coff_import_directory_table_entry, ImportAddressTableRVA)),
u32(4), u16(getImgRelRelocation())},
};
append(Buffer, RelocationTable);
// .idata$6
auto S = Buffer.size();
Buffer.resize(S + DLLName.size() + 1);
memcpy(&Buffer[S], DLLName.data(), DLLName.size());
Buffer[S + DLLName.size()] = '\0';
// Symbol Table
coff_symbol16 SymbolTable[NumberOfSymbols] = {
{{{0, 0, 0, 0, 0, 0, 0, 0}},
u32(0),
u16(1),
u16(0),
IMAGE_SYM_CLASS_EXTERNAL,
0},
{{{'.', 'i', 'd', 'a', 't', 'a', '$', '2'}},
u32(0),
u16(1),
u16(0),
IMAGE_SYM_CLASS_SECTION,
0},
{{{'.', 'i', 'd', 'a', 't', 'a', '$', '6'}},
u32(0),
u16(2),
u16(0),
IMAGE_SYM_CLASS_STATIC,
0},
{{{'.', 'i', 'd', 'a', 't', 'a', '$', '4'}},
u32(0),
u16(0),
u16(0),
IMAGE_SYM_CLASS_SECTION,
0},
{{{'.', 'i', 'd', 'a', 't', 'a', '$', '5'}},
u32(0),
u16(0),
u16(0),
IMAGE_SYM_CLASS_SECTION,
0},
{{{0, 0, 0, 0, 0, 0, 0, 0}},
u32(0),
u16(0),
u16(0),
IMAGE_SYM_CLASS_EXTERNAL,
0},
{{{0, 0, 0, 0, 0, 0, 0, 0}},
u32(0),
u16(0),
u16(0),
IMAGE_SYM_CLASS_EXTERNAL,
0},
};
reinterpret_cast<StringTableOffset &>(SymbolTable[0].Name).Offset =
sizeof(uint32_t);
reinterpret_cast<StringTableOffset &>(SymbolTable[5].Name).Offset =
sizeof(uint32_t) + ImportDescriptorSymbolName.length() + 1;
reinterpret_cast<StringTableOffset &>(SymbolTable[6].Name).Offset =
sizeof(uint32_t) + ImportDescriptorSymbolName.length() + 1 +
NullImportDescriptorSymbolName.length() + 1;
append(Buffer, SymbolTable);
// String Table
writeStringTable(Buffer,
{ImportDescriptorSymbolName, NullImportDescriptorSymbolName,
NullThunkSymbolName});
StringRef F{reinterpret_cast<const char *>(Buffer.data()), Buffer.size()};
return {MemoryBufferRef(F, DLLName)};
}
NewArchiveMember
ObjectFactory::createNullImportDescriptor(std::vector<uint8_t> &Buffer) {
static const uint32_t NumberOfSections = 1;
static const uint32_t NumberOfSymbols = 1;
// COFF Header
coff_file_header Header{
u16(Config->Machine), u16(NumberOfSections), u32(0),
u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) +
// .idata$3
sizeof(coff_import_directory_table_entry)),
u32(NumberOfSymbols), u16(0),
u16(is32bit() ? IMAGE_FILE_32BIT_MACHINE : 0),
};
append(Buffer, Header);
// Section Header Table
static const coff_section SectionTable[NumberOfSections] = {
{{'.', 'i', 'd', 'a', 't', 'a', '$', '3'},
u32(0),
u32(0),
u32(sizeof(coff_import_directory_table_entry)),
u32(sizeof(coff_file_header) +
(NumberOfSections * sizeof(coff_section))),
u32(0),
u32(0),
u16(0),
u16(0),
u32(IMAGE_SCN_ALIGN_4BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA |
IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)},
};
append(Buffer, SectionTable);
// .idata$3
static const coff_import_directory_table_entry ImportDescriptor{
u32(0), u32(0), u32(0), u32(0), u32(0),
};
append(Buffer, ImportDescriptor);
// Symbol Table
coff_symbol16 SymbolTable[NumberOfSymbols] = {
{{{0, 0, 0, 0, 0, 0, 0, 0}},
u32(0),
u16(1),
u16(0),
IMAGE_SYM_CLASS_EXTERNAL,
0},
};
reinterpret_cast<StringTableOffset &>(SymbolTable[0].Name).Offset =
sizeof(uint32_t);
append(Buffer, SymbolTable);
// String Table
writeStringTable(Buffer, {NullImportDescriptorSymbolName});
StringRef F{reinterpret_cast<const char *>(Buffer.data()), Buffer.size()};
return {MemoryBufferRef(F, DLLName)};
}
NewArchiveMember ObjectFactory::createNullThunk(std::vector<uint8_t> &Buffer) {
static const uint32_t NumberOfSections = 2;
static const uint32_t NumberOfSymbols = 1;
// COFF Header
coff_file_header Header{
u16(Config->Machine), u16(NumberOfSections), u32(0),
u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) +
// .idata$5
sizeof(export_address_table_entry) +
// .idata$4
sizeof(export_address_table_entry)),
u32(NumberOfSymbols), u16(0),
u16(is32bit() ? IMAGE_FILE_32BIT_MACHINE : 0),
};
append(Buffer, Header);
// Section Header Table
static const coff_section SectionTable[NumberOfSections] = {
{{'.', 'i', 'd', 'a', 't', 'a', '$', '5'},
u32(0),
u32(0),
u32(sizeof(export_address_table_entry)),
u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section)),
u32(0),
u32(0),
u16(0),
u16(0),
u32(IMAGE_SCN_ALIGN_4BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA |
IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)},
{{'.', 'i', 'd', 'a', 't', 'a', '$', '4'},
u32(0),
u32(0),
u32(sizeof(export_address_table_entry)),
u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) +
sizeof(export_address_table_entry)),
u32(0),
u32(0),
u16(0),
u16(0),
u32(IMAGE_SCN_ALIGN_4BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA |
IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)},
};
append(Buffer, SectionTable);
// .idata$5
static const export_address_table_entry ILT{u32(0)};
append(Buffer, ILT);
// .idata$4
static const export_address_table_entry IAT{u32(0)};
append(Buffer, IAT);
// Symbol Table
coff_symbol16 SymbolTable[NumberOfSymbols] = {
{{{0, 0, 0, 0, 0, 0, 0, 0}},
u32(0),
u16(1),
u16(0),
IMAGE_SYM_CLASS_EXTERNAL,
0},
};
reinterpret_cast<StringTableOffset &>(SymbolTable[0].Name).Offset =
sizeof(uint32_t);
append(Buffer, SymbolTable);
// String Table
writeStringTable(Buffer, {NullThunkSymbolName});
StringRef F{reinterpret_cast<const char *>(Buffer.data()), Buffer.size()};
return {MemoryBufferRef{F, DLLName}};
}
NewArchiveMember ObjectFactory::createShortImport(StringRef Sym,
uint16_t Ordinal,
ImportNameType NameType,
bool isData) {
size_t ImpSize = DLLName.size() + Sym.size() + 2; // +2 for NULs
size_t Size = sizeof(coff_import_header) + ImpSize;
char *Buf = Alloc.Allocate<char>(Size);
memset(Buf, 0, Size);
char *P = Buf;
// Write short import library.
auto *Imp = reinterpret_cast<coff_import_header *>(P);
P += sizeof(*Imp);
Imp->Sig2 = 0xFFFF;
Imp->Machine = Config->Machine;
Imp->SizeOfData = ImpSize;
if (Ordinal > 0)
Imp->OrdinalHint = Ordinal;
Imp->TypeInfo = (isData ? IMPORT_DATA : IMPORT_CODE);
Imp->TypeInfo |= NameType << 2;
// Write symbol name and DLL name.
memcpy(P, Sym.data(), Sym.size());
P += Sym.size() + 1;
memcpy(P, DLLName.data(), DLLName.size());
return {MemoryBufferRef(StringRef(Buf, Size), DLLName)};
}
// Creates an import library for a DLL. In this function, we first
// create an empty import library using lib.exe and then adds short
// import files to that file.
void lld::coff::writeImportLibrary() {
std::vector<NewArchiveMember> Members;
std::string Path = getImplibPath();
std::string DLLName = llvm::sys::path::filename(Config->OutputFile);
ObjectFactory OF(DLLName);
std::vector<uint8_t> ImportDescriptor;
Members.push_back(OF.createImportDescriptor(ImportDescriptor));
std::vector<uint8_t> NullImportDescriptor;
Members.push_back(OF.createNullImportDescriptor(NullImportDescriptor));
std::vector<uint8_t> NullThunk;
Members.push_back(OF.createNullThunk(NullThunk));
for (Export &E : Config->Exports) {
if (E.Private)
continue;
ImportNameType Type = getNameType(E.SymbolName, E.Name);
std::string Name = E.ExtName.empty()
? std::string(E.SymbolName)
: replace(E.SymbolName, E.Name, E.ExtName);
Members.push_back(OF.createShortImport(Name, E.Ordinal, Type, E.Data));
}
std::pair<StringRef, std::error_code> Result =
writeArchive(Path, Members, /*WriteSymtab*/ true, object::Archive::K_GNU,
/*Deterministic*/ true, /*Thin*/ false);
if (auto EC = Result.second)
fatal(EC, "failed to write " + Path);
}

View File

@ -0,0 +1,61 @@
//===- MarkLive.cpp -------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "Chunks.h"
#include "Symbols.h"
#include "llvm/ADT/STLExtras.h"
#include <vector>
namespace lld {
namespace coff {
// Set live bit on for each reachable chunk. Unmarked (unreachable)
// COMDAT chunks will be ignored by Writer, so they will be excluded
// from the final output.
void markLive(const std::vector<Chunk *> &Chunks) {
// We build up a worklist of sections which have been marked as live. We only
// push into the worklist when we discover an unmarked section, and we mark
// as we push, so sections never appear twice in the list.
SmallVector<SectionChunk *, 256> Worklist;
// COMDAT section chunks are dead by default. Add non-COMDAT chunks.
for (Chunk *C : Chunks)
if (auto *SC = dyn_cast<SectionChunk>(C))
if (SC->isLive())
Worklist.push_back(SC);
auto Enqueue = [&](SectionChunk *C) {
if (C->isLive())
return;
C->markLive();
Worklist.push_back(C);
};
// Add GC root chunks.
for (Undefined *U : Config->GCRoot)
if (auto *D = dyn_cast<DefinedRegular>(U->repl()))
Enqueue(D->getChunk());
while (!Worklist.empty()) {
SectionChunk *SC = Worklist.pop_back_val();
assert(SC->isLive() && "We mark as live when pushing onto the worklist!");
// Mark all symbols listed in the relocation table for this section.
for (SymbolBody *S : SC->symbols())
if (auto *D = dyn_cast<DefinedRegular>(S->repl()))
Enqueue(D->getChunk());
// Mark associative sections if any.
for (SectionChunk *C : SC->children())
Enqueue(C);
}
}
}
}

View File

@ -0,0 +1,291 @@
//===- COFF/ModuleDef.cpp -------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Windows-specific.
// A parser for the module-definition file (.def file).
// Parsed results are directly written to Config global variable.
//
// The format of module-definition files are described in this document:
// https://msdn.microsoft.com/en-us/library/28d6s79h.aspx
//
//===----------------------------------------------------------------------===//
#include "Config.h"
#include "Error.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/StringSaver.h"
#include "llvm/Support/raw_ostream.h"
#include <system_error>
using namespace llvm;
namespace lld {
namespace coff {
namespace {
enum Kind {
Unknown,
Eof,
Identifier,
Comma,
Equal,
KwBase,
KwData,
KwExports,
KwHeapsize,
KwLibrary,
KwName,
KwNoname,
KwPrivate,
KwStacksize,
KwVersion,
};
struct Token {
explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {}
Kind K;
StringRef Value;
};
static bool isDecorated(StringRef Sym) {
return Sym.startswith("_") || Sym.startswith("@") || Sym.startswith("?");
}
class Lexer {
public:
explicit Lexer(StringRef S) : Buf(S) {}
Token lex() {
Buf = Buf.trim();
if (Buf.empty())
return Token(Eof);
switch (Buf[0]) {
case '\0':
return Token(Eof);
case ';': {
size_t End = Buf.find('\n');
Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
return lex();
}
case '=':
Buf = Buf.drop_front();
return Token(Equal, "=");
case ',':
Buf = Buf.drop_front();
return Token(Comma, ",");
case '"': {
StringRef S;
std::tie(S, Buf) = Buf.substr(1).split('"');
return Token(Identifier, S);
}
default: {
size_t End = Buf.find_first_of("=,\r\n \t\v");
StringRef Word = Buf.substr(0, End);
Kind K = llvm::StringSwitch<Kind>(Word)
.Case("BASE", KwBase)
.Case("DATA", KwData)
.Case("EXPORTS", KwExports)
.Case("HEAPSIZE", KwHeapsize)
.Case("LIBRARY", KwLibrary)
.Case("NAME", KwName)
.Case("NONAME", KwNoname)
.Case("PRIVATE", KwPrivate)
.Case("STACKSIZE", KwStacksize)
.Case("VERSION", KwVersion)
.Default(Identifier);
Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
return Token(K, Word);
}
}
}
private:
StringRef Buf;
};
class Parser {
public:
explicit Parser(StringRef S, StringSaver *A) : Lex(S), Alloc(A) {}
void parse() {
do {
parseOne();
} while (Tok.K != Eof);
}
private:
void read() {
if (Stack.empty()) {
Tok = Lex.lex();
return;
}
Tok = Stack.back();
Stack.pop_back();
}
void readAsInt(uint64_t *I) {
read();
if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I))
fatal("integer expected");
}
void expect(Kind Expected, StringRef Msg) {
read();
if (Tok.K != Expected)
fatal(Msg);
}
void unget() { Stack.push_back(Tok); }
void parseOne() {
read();
switch (Tok.K) {
case Eof:
return;
case KwExports:
for (;;) {
read();
if (Tok.K != Identifier) {
unget();
return;
}
parseExport();
}
case KwHeapsize:
parseNumbers(&Config->HeapReserve, &Config->HeapCommit);
return;
case KwLibrary:
parseName(&Config->OutputFile, &Config->ImageBase);
if (!StringRef(Config->OutputFile).endswith_lower(".dll"))
Config->OutputFile += ".dll";
return;
case KwStacksize:
parseNumbers(&Config->StackReserve, &Config->StackCommit);
return;
case KwName:
parseName(&Config->OutputFile, &Config->ImageBase);
return;
case KwVersion:
parseVersion(&Config->MajorImageVersion, &Config->MinorImageVersion);
return;
default:
fatal("unknown directive: " + Tok.Value);
}
}
void parseExport() {
Export E;
E.Name = Tok.Value;
read();
if (Tok.K == Equal) {
read();
if (Tok.K != Identifier)
fatal("identifier expected, but got " + Tok.Value);
E.ExtName = E.Name;
E.Name = Tok.Value;
} else {
unget();
}
if (Config->Machine == I386) {
if (!isDecorated(E.Name))
E.Name = Alloc->save("_" + E.Name);
if (!E.ExtName.empty() && !isDecorated(E.ExtName))
E.ExtName = Alloc->save("_" + E.ExtName);
}
for (;;) {
read();
if (Tok.K == Identifier && Tok.Value[0] == '@') {
Tok.Value.drop_front().getAsInteger(10, E.Ordinal);
read();
if (Tok.K == KwNoname) {
E.Noname = true;
} else {
unget();
}
continue;
}
if (Tok.K == KwData) {
E.Data = true;
continue;
}
if (Tok.K == KwPrivate) {
E.Private = true;
continue;
}
unget();
Config->Exports.push_back(E);
return;
}
}
// HEAPSIZE/STACKSIZE reserve[,commit]
void parseNumbers(uint64_t *Reserve, uint64_t *Commit) {
readAsInt(Reserve);
read();
if (Tok.K != Comma) {
unget();
Commit = nullptr;
return;
}
readAsInt(Commit);
}
// NAME outputPath [BASE=address]
void parseName(std::string *Out, uint64_t *Baseaddr) {
read();
if (Tok.K == Identifier) {
*Out = Tok.Value;
} else {
*Out = "";
unget();
return;
}
read();
if (Tok.K == KwBase) {
expect(Equal, "'=' expected");
readAsInt(Baseaddr);
} else {
unget();
*Baseaddr = 0;
}
}
// VERSION major[.minor]
void parseVersion(uint32_t *Major, uint32_t *Minor) {
read();
if (Tok.K != Identifier)
fatal("identifier expected, but got " + Tok.Value);
StringRef V1, V2;
std::tie(V1, V2) = Tok.Value.split('.');
if (V1.getAsInteger(10, *Major))
fatal("integer expected, but got " + Tok.Value);
if (V2.empty())
*Minor = 0;
else if (V2.getAsInteger(10, *Minor))
fatal("integer expected, but got " + Tok.Value);
}
Lexer Lex;
Token Tok;
std::vector<Token> Stack;
StringSaver *Alloc;
};
} // anonymous namespace
void parseModuleDefs(MemoryBufferRef MB, StringSaver *Alloc) {
Parser(MB.getBuffer(), Alloc).parse();
}
} // namespace coff
} // namespace lld

View File

@ -0,0 +1,127 @@
include "llvm/Option/OptParser.td"
// link.exe accepts options starting with either a dash or a slash.
// Flag that takes no arguments.
class F<string name> : Flag<["/", "-", "-?"], name>;
// Flag that takes one argument after ":".
class P<string name, string help> :
Joined<["/", "-", "-?"], name#":">, HelpText<help>;
// Boolean flag suffixed by ":no".
multiclass B<string name, string help> {
def "" : F<name>;
def _no : F<name#":no">, HelpText<help>;
}
def align : P<"align", "Section alignment">;
def alternatename : P<"alternatename", "Define weak alias">;
def base : P<"base", "Base address of the program">;
def defaultlib : P<"defaultlib", "Add the library to the list of input files">;
def delayload : P<"delayload", "Delay loaded DLL name">;
def entry : P<"entry", "Name of entry point symbol">;
def export : P<"export", "Export a function">;
// No help text because /failifmismatch is not intended to be used by the user.
def failifmismatch : P<"failifmismatch", "">;
def heap : P<"heap", "Size of the heap">;
def implib : P<"implib", "Import library name">;
def libpath : P<"libpath", "Additional library search path">;
def machine : P<"machine", "Specify target platform">;
def merge : P<"merge", "Combine sections">;
def mllvm : P<"mllvm", "Options to pass to LLVM">;
def nodefaultlib : P<"nodefaultlib", "Remove a default library">;
def opt : P<"opt", "Control optimizations">;
def out : P<"out", "Path to file to write output">;
def pdb : P<"pdb", "PDB file path">;
def section : P<"section", "Specify section attributes">;
def stack : P<"stack", "Size of the stack">;
def stub : P<"stub", "Specify DOS stub file">;
def subsystem : P<"subsystem", "Specify subsystem">;
def version : P<"version", "Specify a version number in the PE header">;
def disallowlib : Joined<["/", "-", "-?"], "disallowlib:">, Alias<nodefaultlib>;
def manifest : F<"manifest">;
def manifest_colon : P<"manifest", "Create manifest file">;
def manifestuac : P<"manifestuac", "User access control">;
def manifestfile : P<"manifestfile", "Manifest file path">;
def manifestdependency : P<"manifestdependency",
"Attributes for <dependency> in manifest file">;
def manifestinput : P<"manifestinput", "Specify manifest file">;
// We cannot use multiclass P because class name "incl" is different
// from its command line option name. We do this because "include" is
// a reserved keyword in tablegen.
def incl : Joined<["/", "-"], "include:">,
HelpText<"Force symbol to be added to symbol table as undefined one">;
// "def" is also a keyword.
def deffile : Joined<["/", "-"], "def:">,
HelpText<"Use module-definition file">;
def debug : F<"debug">, HelpText<"Embed a symbol table in the image">;
def dll : F<"dll">, HelpText<"Create a DLL">;
def nodefaultlib_all : F<"nodefaultlib">;
def noentry : F<"noentry">;
def profile : F<"profile">;
def swaprun_cd : F<"swaprun:cd">;
def swaprun_net : F<"swaprun:net">;
def verbose : F<"verbose">;
def force : F<"force">,
HelpText<"Allow undefined symbols when creating executables">;
def force_unresolved : F<"force:unresolved">;
defm allowbind: B<"allowbind", "Disable DLL binding">;
defm allowisolation : B<"allowisolation", "Set NO_ISOLATION bit">;
defm dynamicbase : B<"dynamicbase",
"Disable address space layout randomization">;
defm fixed : B<"fixed", "Enable base relocations">;
defm highentropyva : B<"highentropyva", "Set HIGH_ENTROPY_VA bit">;
defm largeaddressaware : B<"largeaddressaware", "Disable large addresses">;
defm nxcompat : B<"nxcompat", "Disable data execution provention">;
defm safeseh : B<"safeseh", "Produce an image with Safe Exception Handler">;
defm tsaware : B<"tsaware", "Create non-Terminal Server aware executable">;
def help : F<"help">;
def help_q : Flag<["/?", "-?"], "">, Alias<help>;
// LLD extensions
def nosymtab : F<"nosymtab">;
// Flags for debugging
def lldmap : Joined<["/", "-"], "lldmap:">;
//==============================================================================
// The flags below do nothing. They are defined only for link.exe compatibility.
//==============================================================================
class QF<string name> : Joined<["/", "-", "-?"], name#":">;
multiclass QB<string name> {
def "" : F<name>;
def _no : F<name#":no">;
}
def functionpadmin : F<"functionpadmin">;
def ignoreidl : F<"ignoreidl">;
def incremental : F<"incremental">;
def no_incremental : F<"incremental:no">;
def nologo : F<"nologo">;
def throwingnew : F<"throwingnew">;
def editandcontinue : F<"editandcontinue">;
def fastfail : F<"fastfail">;
def delay : QF<"delay">;
def errorreport : QF<"errorreport">;
def idlout : QF<"idlout">;
def ignore : QF<"ignore">;
def maxilksize : QF<"maxilksize">;
def pdbaltpath : QF<"pdbaltpath">;
def tlbid : QF<"tlbid">;
def tlbout : QF<"tlbout">;
def verbose_all : QF<"verbose">;
def guardsym : QF<"guardsym">;
defm wx : QB<"wx">;

View File

@ -0,0 +1,61 @@
//===- PDB.cpp ------------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "Driver.h"
#include "Error.h"
#include "Symbols.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/FileOutputBuffer.h"
#include <memory>
using namespace llvm;
using namespace llvm::support;
using namespace llvm::support::endian;
const int PageSize = 4096;
const uint8_t Magic[32] = "Microsoft C/C++ MSF 7.00\r\n\032DS\0\0";
namespace {
struct PDBHeader {
uint8_t Magic[32];
ulittle32_t PageSize;
ulittle32_t FpmPage;
ulittle32_t PageCount;
ulittle32_t RootSize;
ulittle32_t Reserved;
ulittle32_t RootPointer;
};
}
void lld::coff::createPDB(StringRef Path) {
// Create a file.
size_t FileSize = PageSize * 3;
ErrorOr<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
FileOutputBuffer::create(Path, FileSize);
if (auto EC = BufferOrErr.getError())
fatal(EC, "failed to open " + Path);
std::unique_ptr<FileOutputBuffer> Buffer = std::move(*BufferOrErr);
// Write the file header.
uint8_t *Buf = Buffer->getBufferStart();
auto *Hdr = reinterpret_cast<PDBHeader *>(Buf);
memcpy(Hdr->Magic, Magic, sizeof(Magic));
Hdr->PageSize = PageSize;
// I don't know what FpmPage field means, but it must not be 0.
Hdr->FpmPage = 1;
Hdr->PageCount = FileSize / PageSize;
// Root directory is empty, containing only the length field.
Hdr->RootSize = 4;
// Root directory is on page 1.
Hdr->RootPointer = 1;
// Write the root directory. Root stream is on page 2.
write32le(Buf + PageSize, 2);
Buffer->commit();
}

View File

@ -0,0 +1 @@
See docs/NewLLD.rst

View File

@ -0,0 +1,448 @@
//===- SymbolTable.cpp ----------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "Config.h"
#include "Driver.h"
#include "Error.h"
#include "SymbolTable.h"
#include "Symbols.h"
#include "lld/Core/Parallel.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/LTO/legacy/LTOCodeGenerator.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <utility>
using namespace llvm;
namespace lld {
namespace coff {
void SymbolTable::addFile(std::unique_ptr<InputFile> FileP) {
#if LLVM_ENABLE_THREADS
std::launch Policy = std::launch::async;
#else
std::launch Policy = std::launch::deferred;
#endif
InputFile *File = FileP.get();
Files.push_back(std::move(FileP));
if (auto *F = dyn_cast<ArchiveFile>(File)) {
ArchiveQueue.push_back(
std::async(Policy, [=]() { F->parse(); return F; }));
return;
}
ObjectQueue.push_back(
std::async(Policy, [=]() { File->parse(); return File; }));
if (auto *F = dyn_cast<ObjectFile>(File)) {
ObjectFiles.push_back(F);
} else if (auto *F = dyn_cast<BitcodeFile>(File)) {
BitcodeFiles.push_back(F);
} else {
ImportFiles.push_back(cast<ImportFile>(File));
}
}
void SymbolTable::step() {
if (queueEmpty())
return;
readObjects();
readArchives();
}
void SymbolTable::run() {
while (!queueEmpty())
step();
}
void SymbolTable::readArchives() {
if (ArchiveQueue.empty())
return;
// Add lazy symbols to the symbol table. Lazy symbols that conflict
// with existing undefined symbols are accumulated in LazySyms.
std::vector<Symbol *> LazySyms;
for (std::future<ArchiveFile *> &Future : ArchiveQueue) {
ArchiveFile *File = Future.get();
if (Config->Verbose)
llvm::outs() << "Reading " << File->getShortName() << "\n";
for (Lazy &Sym : File->getLazySymbols())
addLazy(&Sym, &LazySyms);
}
ArchiveQueue.clear();
// Add archive member files to ObjectQueue that should resolve
// existing undefined symbols.
for (Symbol *Sym : LazySyms)
addMemberFile(cast<Lazy>(Sym->Body));
}
void SymbolTable::readObjects() {
if (ObjectQueue.empty())
return;
// Add defined and undefined symbols to the symbol table.
std::vector<StringRef> Directives;
for (size_t I = 0; I < ObjectQueue.size(); ++I) {
InputFile *File = ObjectQueue[I].get();
if (Config->Verbose)
llvm::outs() << "Reading " << File->getShortName() << "\n";
// Adding symbols may add more files to ObjectQueue
// (but not to ArchiveQueue).
for (SymbolBody *Sym : File->getSymbols())
if (Sym->isExternal())
addSymbol(Sym);
StringRef S = File->getDirectives();
if (!S.empty()) {
Directives.push_back(S);
if (Config->Verbose)
llvm::outs() << "Directives: " << File->getShortName()
<< ": " << S << "\n";
}
}
ObjectQueue.clear();
// Parse directive sections. This may add files to
// ArchiveQueue and ObjectQueue.
for (StringRef S : Directives)
Driver->parseDirectives(S);
}
bool SymbolTable::queueEmpty() {
return ArchiveQueue.empty() && ObjectQueue.empty();
}
void SymbolTable::reportRemainingUndefines(bool Resolve) {
llvm::SmallPtrSet<SymbolBody *, 8> Undefs;
for (auto &I : Symtab) {
Symbol *Sym = I.second;
auto *Undef = dyn_cast<Undefined>(Sym->Body);
if (!Undef)
continue;
StringRef Name = Undef->getName();
// A weak alias may have been resolved, so check for that.
if (Defined *D = Undef->getWeakAlias()) {
if (Resolve)
Sym->Body = D;
continue;
}
// If we can resolve a symbol by removing __imp_ prefix, do that.
// This odd rule is for compatibility with MSVC linker.
if (Name.startswith("__imp_")) {
Symbol *Imp = find(Name.substr(strlen("__imp_")));
if (Imp && isa<Defined>(Imp->Body)) {
if (!Resolve)
continue;
auto *D = cast<Defined>(Imp->Body);
auto *S = new (Alloc) DefinedLocalImport(Name, D);
LocalImportChunks.push_back(S->getChunk());
Sym->Body = S;
continue;
}
}
// Remaining undefined symbols are not fatal if /force is specified.
// They are replaced with dummy defined symbols.
if (Config->Force && Resolve)
Sym->Body = new (Alloc) DefinedAbsolute(Name, 0);
Undefs.insert(Sym->Body);
}
if (Undefs.empty())
return;
for (Undefined *U : Config->GCRoot)
if (Undefs.count(U->repl()))
llvm::errs() << "<root>: undefined symbol: " << U->getName() << "\n";
for (std::unique_ptr<InputFile> &File : Files)
if (!isa<ArchiveFile>(File.get()))
for (SymbolBody *Sym : File->getSymbols())
if (Undefs.count(Sym->repl()))
llvm::errs() << File->getShortName() << ": undefined symbol: "
<< Sym->getName() << "\n";
if (!Config->Force)
fatal("link failed");
}
void SymbolTable::addLazy(Lazy *New, std::vector<Symbol *> *Accum) {
Symbol *Sym = insert(New);
if (Sym->Body == New)
return;
SymbolBody *Existing = Sym->Body;
if (isa<Defined>(Existing))
return;
if (Lazy *L = dyn_cast<Lazy>(Existing))
if (L->getFileIndex() < New->getFileIndex())
return;
Sym->Body = New;
New->setBackref(Sym);
if (isa<Undefined>(Existing))
Accum->push_back(Sym);
}
void SymbolTable::addSymbol(SymbolBody *New) {
// Find an existing symbol or create and insert a new one.
assert(isa<Defined>(New) || isa<Undefined>(New));
Symbol *Sym = insert(New);
if (Sym->Body == New)
return;
SymbolBody *Existing = Sym->Body;
// If we have an undefined symbol and a lazy symbol,
// let the lazy symbol to read a member file.
if (auto *L = dyn_cast<Lazy>(Existing)) {
// Undefined symbols with weak aliases need not to be resolved,
// since they would be replaced with weak aliases if they remain
// undefined.
if (auto *U = dyn_cast<Undefined>(New)) {
if (!U->WeakAlias) {
addMemberFile(L);
return;
}
}
Sym->Body = New;
return;
}
// compare() returns -1, 0, or 1 if the lhs symbol is less preferable,
// equivalent (conflicting), or more preferable, respectively.
int Comp = Existing->compare(New);
if (Comp == 0)
fatal("duplicate symbol: " + Existing->getDebugName() + " and " +
New->getDebugName());
if (Comp < 0)
Sym->Body = New;
}
Symbol *SymbolTable::insert(SymbolBody *New) {
Symbol *&Sym = Symtab[New->getName()];
if (Sym) {
New->setBackref(Sym);
return Sym;
}
Sym = new (Alloc) Symbol(New);
New->setBackref(Sym);
return Sym;
}
// Reads an archive member file pointed by a given symbol.
void SymbolTable::addMemberFile(Lazy *Body) {
std::unique_ptr<InputFile> File = Body->getMember();
// getMember returns an empty buffer if the member was already
// read from the library.
if (!File)
return;
if (Config->Verbose)
llvm::outs() << "Loaded " << File->getShortName() << " for "
<< Body->getName() << "\n";
addFile(std::move(File));
}
std::vector<Chunk *> SymbolTable::getChunks() {
std::vector<Chunk *> Res;
for (ObjectFile *File : ObjectFiles) {
std::vector<Chunk *> &V = File->getChunks();
Res.insert(Res.end(), V.begin(), V.end());
}
return Res;
}
Symbol *SymbolTable::find(StringRef Name) {
auto It = Symtab.find(Name);
if (It == Symtab.end())
return nullptr;
return It->second;
}
Symbol *SymbolTable::findUnderscore(StringRef Name) {
if (Config->Machine == I386)
return find(("_" + Name).str());
return find(Name);
}
StringRef SymbolTable::findByPrefix(StringRef Prefix) {
for (auto Pair : Symtab) {
StringRef Name = Pair.first;
if (Name.startswith(Prefix))
return Name;
}
return "";
}
StringRef SymbolTable::findMangle(StringRef Name) {
if (Symbol *Sym = find(Name))
if (!isa<Undefined>(Sym->Body))
return Name;
if (Config->Machine != I386)
return findByPrefix(("?" + Name + "@@Y").str());
if (!Name.startswith("_"))
return "";
// Search for x86 C function.
StringRef S = findByPrefix((Name + "@").str());
if (!S.empty())
return S;
// Search for x86 C++ non-member function.
return findByPrefix(("?" + Name.substr(1) + "@@Y").str());
}
void SymbolTable::mangleMaybe(Undefined *U) {
if (U->WeakAlias)
return;
if (!isa<Undefined>(U->repl()))
return;
StringRef Alias = findMangle(U->getName());
if (!Alias.empty())
U->WeakAlias = addUndefined(Alias);
}
Undefined *SymbolTable::addUndefined(StringRef Name) {
auto *New = new (Alloc) Undefined(Name);
addSymbol(New);
if (auto *U = dyn_cast<Undefined>(New->repl()))
return U;
return New;
}
DefinedRelative *SymbolTable::addRelative(StringRef Name, uint64_t VA) {
auto *New = new (Alloc) DefinedRelative(Name, VA);
addSymbol(New);
return New;
}
DefinedAbsolute *SymbolTable::addAbsolute(StringRef Name, uint64_t VA) {
auto *New = new (Alloc) DefinedAbsolute(Name, VA);
addSymbol(New);
return New;
}
void SymbolTable::printMap(llvm::raw_ostream &OS) {
for (ObjectFile *File : ObjectFiles) {
OS << File->getShortName() << ":\n";
for (SymbolBody *Body : File->getSymbols())
if (auto *R = dyn_cast<DefinedRegular>(Body))
if (R->getChunk()->isLive())
OS << Twine::utohexstr(Config->ImageBase + R->getRVA())
<< " " << R->getName() << "\n";
}
}
void SymbolTable::addCombinedLTOObject(ObjectFile *Obj) {
for (SymbolBody *Body : Obj->getSymbols()) {
if (!Body->isExternal())
continue;
// We should not see any new undefined symbols at this point, but we'll
// diagnose them later in reportRemainingUndefines().
StringRef Name = Body->getName();
Symbol *Sym = insert(Body);
SymbolBody *Existing = Sym->Body;
if (Existing == Body)
continue;
if (isa<DefinedBitcode>(Existing)) {
Sym->Body = Body;
continue;
}
if (auto *L = dyn_cast<Lazy>(Existing)) {
// We may see new references to runtime library symbols such as __chkstk
// here. These symbols must be wholly defined in non-bitcode files.
addMemberFile(L);
continue;
}
int Comp = Existing->compare(Body);
if (Comp == 0)
fatal("LTO: unexpected duplicate symbol: " + Name);
if (Comp < 0)
Sym->Body = Body;
}
}
void SymbolTable::addCombinedLTOObjects() {
if (BitcodeFiles.empty())
return;
// Diagnose any undefined symbols early, but do not resolve weak externals,
// as resolution breaks the invariant that each Symbol points to a unique
// SymbolBody, which we rely on to replace DefinedBitcode symbols correctly.
reportRemainingUndefines(/*Resolve=*/false);
// Create an object file and add it to the symbol table by replacing any
// DefinedBitcode symbols with the definitions in the object file.
LTOCodeGenerator CG(BitcodeFile::Context);
CG.setOptLevel(Config->LTOOptLevel);
std::vector<ObjectFile *> Objs = createLTOObjects(&CG);
for (ObjectFile *Obj : Objs)
addCombinedLTOObject(Obj);
size_t NumBitcodeFiles = BitcodeFiles.size();
run();
if (BitcodeFiles.size() != NumBitcodeFiles)
fatal("LTO: late loaded symbol created new bitcode reference");
}
// Combine and compile bitcode files and then return the result
// as a vector of regular COFF object files.
std::vector<ObjectFile *> SymbolTable::createLTOObjects(LTOCodeGenerator *CG) {
// All symbols referenced by non-bitcode objects must be preserved.
for (ObjectFile *File : ObjectFiles)
for (SymbolBody *Body : File->getSymbols())
if (auto *S = dyn_cast<DefinedBitcode>(Body->repl()))
CG->addMustPreserveSymbol(S->getName());
// Likewise for bitcode symbols which we initially resolved to non-bitcode.
for (BitcodeFile *File : BitcodeFiles)
for (SymbolBody *Body : File->getSymbols())
if (isa<DefinedBitcode>(Body) && !isa<DefinedBitcode>(Body->repl()))
CG->addMustPreserveSymbol(Body->getName());
// Likewise for other symbols that must be preserved.
for (Undefined *U : Config->GCRoot) {
if (auto *S = dyn_cast<DefinedBitcode>(U->repl()))
CG->addMustPreserveSymbol(S->getName());
else if (auto *S = dyn_cast_or_null<DefinedBitcode>(U->getWeakAlias()))
CG->addMustPreserveSymbol(S->getName());
}
CG->setModule(BitcodeFiles[0]->takeModule());
for (unsigned I = 1, E = BitcodeFiles.size(); I != E; ++I)
CG->addModule(BitcodeFiles[I]->takeModule().get());
bool DisableVerify = true;
#ifdef NDEBUG
DisableVerify = false;
#endif
if (!CG->optimize(DisableVerify, false, false, false))
fatal(""); // optimize() should have emitted any error message.
Objs.resize(Config->LTOJobs);
// Use std::list to avoid invalidation of pointers in OSPtrs.
std::list<raw_svector_ostream> OSs;
std::vector<raw_pwrite_stream *> OSPtrs;
for (SmallString<0> &Obj : Objs) {
OSs.emplace_back(Obj);
OSPtrs.push_back(&OSs.back());
}
if (!CG->compileOptimized(OSPtrs))
fatal(""); // compileOptimized() should have emitted any error message.
std::vector<ObjectFile *> ObjFiles;
for (SmallString<0> &Obj : Objs) {
auto *ObjFile = new ObjectFile(MemoryBufferRef(Obj, "<LTO object>"));
Files.emplace_back(ObjFile);
ObjectFiles.push_back(ObjFile);
ObjFile->parse();
ObjFiles.push_back(ObjFile);
}
return ObjFiles;
}
} // namespace coff
} // namespace lld

View File

@ -0,0 +1,125 @@
//===- SymbolTable.h --------------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_COFF_SYMBOL_TABLE_H
#define LLD_COFF_SYMBOL_TABLE_H
#include "InputFiles.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/raw_ostream.h"
#ifdef _MSC_VER
// <future> depends on <eh.h> for __uncaught_exception.
#include <eh.h>
#endif
#include <future>
namespace llvm {
struct LTOCodeGenerator;
}
namespace lld {
namespace coff {
class Chunk;
class Defined;
class Lazy;
class SymbolBody;
struct Symbol;
// SymbolTable is a bucket of all known symbols, including defined,
// undefined, or lazy symbols (the last one is symbols in archive
// files whose archive members are not yet loaded).
//
// We put all symbols of all files to a SymbolTable, and the
// SymbolTable selects the "best" symbols if there are name
// conflicts. For example, obviously, a defined symbol is better than
// an undefined symbol. Or, if there's a conflict between a lazy and a
// undefined, it'll read an archive member to read a real definition
// to replace the lazy symbol. The logic is implemented in resolve().
class SymbolTable {
public:
void addFile(std::unique_ptr<InputFile> File);
std::vector<std::unique_ptr<InputFile>> &getFiles() { return Files; }
void step();
void run();
bool queueEmpty();
// Print an error message on undefined symbols. If Resolve is true, try to
// resolve any undefined symbols and update the symbol table accordingly.
void reportRemainingUndefines(bool Resolve);
// Returns a list of chunks of selected symbols.
std::vector<Chunk *> getChunks();
// Returns a symbol for a given name. Returns a nullptr if not found.
Symbol *find(StringRef Name);
Symbol *findUnderscore(StringRef Name);
// Occasionally we have to resolve an undefined symbol to its
// mangled symbol. This function tries to find a mangled name
// for U from the symbol table, and if found, set the symbol as
// a weak alias for U.
void mangleMaybe(Undefined *U);
StringRef findMangle(StringRef Name);
// Print a layout map to OS.
void printMap(llvm::raw_ostream &OS);
// Build a set of COFF objects representing the combined contents of
// BitcodeFiles and add them to the symbol table. Called after all files are
// added and before the writer writes results to a file.
void addCombinedLTOObjects();
// The writer needs to handle DLL import libraries specially in
// order to create the import descriptor table.
std::vector<ImportFile *> ImportFiles;
// The writer needs to infer the machine type from the object files.
std::vector<ObjectFile *> ObjectFiles;
// Creates an Undefined symbol for a given name.
Undefined *addUndefined(StringRef Name);
DefinedRelative *addRelative(StringRef Name, uint64_t VA);
DefinedAbsolute *addAbsolute(StringRef Name, uint64_t VA);
// A list of chunks which to be added to .rdata.
std::vector<Chunk *> LocalImportChunks;
private:
void readArchives();
void readObjects();
void addSymbol(SymbolBody *New);
void addLazy(Lazy *New, std::vector<Symbol *> *Accum);
Symbol *insert(SymbolBody *New);
StringRef findByPrefix(StringRef Prefix);
void addMemberFile(Lazy *Body);
void addCombinedLTOObject(ObjectFile *Obj);
std::vector<ObjectFile *> createLTOObjects(llvm::LTOCodeGenerator *CG);
llvm::DenseMap<StringRef, Symbol *> Symtab;
std::vector<std::unique_ptr<InputFile>> Files;
std::vector<std::future<ArchiveFile *>> ArchiveQueue;
std::vector<std::future<InputFile *>> ObjectQueue;
std::vector<BitcodeFile *> BitcodeFiles;
std::vector<SmallString<0>> Objs;
llvm::BumpPtrAllocator Alloc;
};
} // namespace coff
} // namespace lld
#endif

View File

@ -0,0 +1,217 @@
//===- Symbols.cpp --------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "Error.h"
#include "InputFiles.h"
#include "Symbols.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm::object;
using llvm::sys::fs::identify_magic;
using llvm::sys::fs::file_magic;
namespace lld {
namespace coff {
StringRef SymbolBody::getName() {
// DefinedCOFF names are read lazily for a performance reason.
// Non-external symbol names are never used by the linker except for logging
// or debugging. Their internal references are resolved not by name but by
// symbol index. And because they are not external, no one can refer them by
// name. Object files contain lots of non-external symbols, and creating
// StringRefs for them (which involves lots of strlen() on the string table)
// is a waste of time.
if (Name.empty()) {
auto *D = cast<DefinedCOFF>(this);
D->File->getCOFFObj()->getSymbolName(D->Sym, Name);
}
return Name;
}
// Returns 1, 0 or -1 if this symbol should take precedence
// over the Other, tie or lose, respectively.
int SymbolBody::compare(SymbolBody *Other) {
Kind LK = kind(), RK = Other->kind();
// Normalize so that the smaller kind is on the left.
if (LK > RK)
return -Other->compare(this);
// First handle comparisons between two different kinds.
if (LK != RK) {
if (RK > LastDefinedKind) {
if (LK == LazyKind && cast<Undefined>(Other)->WeakAlias)
return -1;
// The LHS is either defined or lazy and so it wins.
assert((LK <= LastDefinedKind || LK == LazyKind) && "Bad kind!");
return 1;
}
// Bitcode has special complexities.
if (RK == DefinedBitcodeKind) {
auto *RHS = cast<DefinedBitcode>(Other);
switch (LK) {
case DefinedCommonKind:
return 1;
case DefinedRegularKind:
// As an approximation, regular symbols win over bitcode symbols,
// but we definitely have a conflict if the regular symbol is not
// replaceable and neither is the bitcode symbol. We do not
// replicate the rest of the symbol resolution logic here; symbol
// resolution will be done accurately after lowering bitcode symbols
// to regular symbols in addCombinedLTOObject().
if (cast<DefinedRegular>(this)->isCOMDAT() || RHS->IsReplaceable)
return 1;
// Fallthrough to the default of a tie otherwise.
default:
return 0;
}
}
// Either of the object file kind will trump a higher kind.
if (LK <= LastDefinedCOFFKind)
return 1;
// The remaining kind pairs are ties amongst defined symbols.
return 0;
}
// Now handle the case where the kinds are the same.
switch (LK) {
case DefinedRegularKind: {
auto *LHS = cast<DefinedRegular>(this);
auto *RHS = cast<DefinedRegular>(Other);
if (LHS->isCOMDAT() && RHS->isCOMDAT())
return LHS->getFileIndex() < RHS->getFileIndex() ? 1 : -1;
return 0;
}
case DefinedCommonKind: {
auto *LHS = cast<DefinedCommon>(this);
auto *RHS = cast<DefinedCommon>(Other);
if (LHS->getSize() == RHS->getSize())
return LHS->getFileIndex() < RHS->getFileIndex() ? 1 : -1;
return LHS->getSize() > RHS->getSize() ? 1 : -1;
}
case DefinedBitcodeKind: {
auto *LHS = cast<DefinedBitcode>(this);
auto *RHS = cast<DefinedBitcode>(Other);
// If both are non-replaceable, we have a tie.
if (!LHS->IsReplaceable && !RHS->IsReplaceable)
return 0;
// Non-replaceable symbols win, but even two replaceable symboles don't
// tie. If both symbols are replaceable, choice is arbitrary.
if (RHS->IsReplaceable && LHS->IsReplaceable)
return uintptr_t(LHS) < uintptr_t(RHS) ? 1 : -1;
return LHS->IsReplaceable ? -1 : 1;
}
case LazyKind: {
// Don't tie, pick the earliest.
auto *LHS = cast<Lazy>(this);
auto *RHS = cast<Lazy>(Other);
return LHS->getFileIndex() < RHS->getFileIndex() ? 1 : -1;
}
case UndefinedKind: {
auto *LHS = cast<Undefined>(this);
auto *RHS = cast<Undefined>(Other);
// Tie if both undefined symbols have different weak aliases.
if (LHS->WeakAlias && RHS->WeakAlias) {
if (LHS->WeakAlias->getName() != RHS->WeakAlias->getName())
return 0;
return uintptr_t(LHS) < uintptr_t(RHS) ? 1 : -1;
}
return LHS->WeakAlias ? 1 : -1;
}
case DefinedLocalImportKind:
case DefinedImportThunkKind:
case DefinedImportDataKind:
case DefinedAbsoluteKind:
case DefinedRelativeKind:
// These all simply tie.
return 0;
}
llvm_unreachable("unknown symbol kind");
}
std::string SymbolBody::getDebugName() {
std::string N = getName().str();
if (auto *D = dyn_cast<DefinedCOFF>(this)) {
N += " ";
N += D->File->getShortName();
} else if (auto *D = dyn_cast<DefinedBitcode>(this)) {
N += " ";
N += D->File->getShortName();
}
return N;
}
COFFSymbolRef DefinedCOFF::getCOFFSymbol() {
size_t SymSize = File->getCOFFObj()->getSymbolTableEntrySize();
if (SymSize == sizeof(coff_symbol16))
return COFFSymbolRef(reinterpret_cast<const coff_symbol16 *>(Sym));
assert(SymSize == sizeof(coff_symbol32));
return COFFSymbolRef(reinterpret_cast<const coff_symbol32 *>(Sym));
}
DefinedImportThunk::DefinedImportThunk(StringRef Name, DefinedImportData *S,
uint16_t Machine)
: Defined(DefinedImportThunkKind, Name) {
switch (Machine) {
case AMD64: Data.reset(new ImportThunkChunkX64(S)); return;
case I386: Data.reset(new ImportThunkChunkX86(S)); return;
case ARMNT: Data.reset(new ImportThunkChunkARM(S)); return;
default: llvm_unreachable("unknown machine type");
}
}
std::unique_ptr<InputFile> Lazy::getMember() {
MemoryBufferRef MBRef = File->getMember(&Sym);
// getMember returns an empty buffer if the member was already
// read from the library.
if (MBRef.getBuffer().empty())
return std::unique_ptr<InputFile>(nullptr);
file_magic Magic = identify_magic(MBRef.getBuffer());
if (Magic == file_magic::coff_import_library)
return std::unique_ptr<InputFile>(new ImportFile(MBRef));
std::unique_ptr<InputFile> Obj;
if (Magic == file_magic::coff_object)
Obj.reset(new ObjectFile(MBRef));
else if (Magic == file_magic::bitcode)
Obj.reset(new BitcodeFile(MBRef));
else
fatal("unknown file type: " + File->getName());
Obj->setParentName(File->getName());
return Obj;
}
Defined *Undefined::getWeakAlias() {
// A weak alias may be a weak alias to another symbol, so check recursively.
for (SymbolBody *A = WeakAlias; A; A = cast<Undefined>(A)->WeakAlias)
if (auto *D = dyn_cast<Defined>(A->repl()))
return D;
return nullptr;
}
} // namespace coff
} // namespace lld

View File

@ -0,0 +1,403 @@
//===- Symbols.h ------------------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_COFF_SYMBOLS_H
#define LLD_COFF_SYMBOLS_H
#include "Chunks.h"
#include "Config.h"
#include "lld/Core/LLVM.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/COFF.h"
#include <atomic>
#include <memory>
#include <vector>
namespace lld {
namespace coff {
using llvm::object::Archive;
using llvm::object::COFFSymbolRef;
using llvm::object::coff_import_header;
using llvm::object::coff_symbol_generic;
class ArchiveFile;
class BitcodeFile;
class InputFile;
class ObjectFile;
class SymbolBody;
// A real symbol object, SymbolBody, is usually accessed indirectly
// through a Symbol. There's always one Symbol for each symbol name.
// The resolver updates SymbolBody pointers as it resolves symbols.
struct Symbol {
explicit Symbol(SymbolBody *P) : Body(P) {}
SymbolBody *Body;
};
// The base class for real symbol classes.
class SymbolBody {
public:
enum Kind {
// The order of these is significant. We start with the regular defined
// symbols as those are the most prevelant and the zero tag is the cheapest
// to set. Among the defined kinds, the lower the kind is preferred over
// the higher kind when testing wether one symbol should take precedence
// over another.
DefinedRegularKind = 0,
DefinedCommonKind,
DefinedLocalImportKind,
DefinedImportThunkKind,
DefinedImportDataKind,
DefinedAbsoluteKind,
DefinedRelativeKind,
DefinedBitcodeKind,
UndefinedKind,
LazyKind,
LastDefinedCOFFKind = DefinedCommonKind,
LastDefinedKind = DefinedBitcodeKind,
};
Kind kind() const { return static_cast<Kind>(SymbolKind); }
// Returns true if this is an external symbol.
bool isExternal() { return IsExternal; }
// Returns the symbol name.
StringRef getName();
// A SymbolBody has a backreference to a Symbol. Originally they are
// doubly-linked. A backreference will never change. But the pointer
// in the Symbol may be mutated by the resolver. If you have a
// pointer P to a SymbolBody and are not sure whether the resolver
// has chosen the object among other objects having the same name,
// you can access P->Backref->Body to get the resolver's result.
void setBackref(Symbol *P) { Backref = P; }
SymbolBody *repl() { return Backref ? Backref->Body : this; }
// Decides which symbol should "win" in the symbol table, this or
// the Other. Returns 1 if this wins, -1 if the Other wins, or 0 if
// they are duplicate (conflicting) symbols.
int compare(SymbolBody *Other);
// Returns a name of this symbol including source file name.
// Used only for debugging and logging.
std::string getDebugName();
protected:
explicit SymbolBody(Kind K, StringRef N = "")
: SymbolKind(K), IsExternal(true), IsCOMDAT(false),
IsReplaceable(false), Name(N) {}
const unsigned SymbolKind : 8;
unsigned IsExternal : 1;
// This bit is used by the \c DefinedRegular subclass.
unsigned IsCOMDAT : 1;
// This bit is used by the \c DefinedBitcode subclass.
unsigned IsReplaceable : 1;
StringRef Name;
Symbol *Backref = nullptr;
};
// The base class for any defined symbols, including absolute symbols,
// etc.
class Defined : public SymbolBody {
public:
Defined(Kind K, StringRef N = "") : SymbolBody(K, N) {}
static bool classof(const SymbolBody *S) {
return S->kind() <= LastDefinedKind;
}
// Returns the RVA (relative virtual address) of this symbol. The
// writer sets and uses RVAs.
uint64_t getRVA();
// Returns the RVA relative to the beginning of the output section.
// Used to implement SECREL relocation type.
uint64_t getSecrel();
// Returns the output section index.
// Used to implement SECTION relocation type.
uint64_t getSectionIndex();
// Returns true if this symbol points to an executable (e.g. .text) section.
// Used to implement ARM relocations.
bool isExecutable();
};
// Symbols defined via a COFF object file.
class DefinedCOFF : public Defined {
friend SymbolBody;
public:
DefinedCOFF(Kind K, ObjectFile *F, COFFSymbolRef S)
: Defined(K), File(F), Sym(S.getGeneric()) {}
static bool classof(const SymbolBody *S) {
return S->kind() <= LastDefinedCOFFKind;
}
int getFileIndex() { return File->Index; }
COFFSymbolRef getCOFFSymbol();
protected:
ObjectFile *File;
const coff_symbol_generic *Sym;
};
// Regular defined symbols read from object file symbol tables.
class DefinedRegular : public DefinedCOFF {
public:
DefinedRegular(ObjectFile *F, COFFSymbolRef S, SectionChunk *C)
: DefinedCOFF(DefinedRegularKind, F, S), Data(&C->Repl) {
IsExternal = S.isExternal();
IsCOMDAT = C->isCOMDAT();
}
static bool classof(const SymbolBody *S) {
return S->kind() == DefinedRegularKind;
}
uint64_t getRVA() { return (*Data)->getRVA() + Sym->Value; }
bool isCOMDAT() { return IsCOMDAT; }
SectionChunk *getChunk() { return *Data; }
uint32_t getValue() { return Sym->Value; }
private:
SectionChunk **Data;
};
class DefinedCommon : public DefinedCOFF {
public:
DefinedCommon(ObjectFile *F, COFFSymbolRef S, CommonChunk *C)
: DefinedCOFF(DefinedCommonKind, F, S), Data(C) {
IsExternal = S.isExternal();
}
static bool classof(const SymbolBody *S) {
return S->kind() == DefinedCommonKind;
}
uint64_t getRVA() { return Data->getRVA(); }
private:
friend SymbolBody;
uint64_t getSize() { return Sym->Value; }
CommonChunk *Data;
};
// Absolute symbols.
class DefinedAbsolute : public Defined {
public:
DefinedAbsolute(StringRef N, COFFSymbolRef S)
: Defined(DefinedAbsoluteKind, N), VA(S.getValue()) {
IsExternal = S.isExternal();
}
DefinedAbsolute(StringRef N, uint64_t V)
: Defined(DefinedAbsoluteKind, N), VA(V) {}
static bool classof(const SymbolBody *S) {
return S->kind() == DefinedAbsoluteKind;
}
uint64_t getRVA() { return VA - Config->ImageBase; }
void setVA(uint64_t V) { VA = V; }
private:
uint64_t VA;
};
// This is a kind of absolute symbol but relative to the image base.
// Unlike absolute symbols, relocations referring this kind of symbols
// are subject of the base relocation. This type is used rarely --
// mainly for __ImageBase.
class DefinedRelative : public Defined {
public:
explicit DefinedRelative(StringRef Name, uint64_t V = 0)
: Defined(DefinedRelativeKind, Name), RVA(V) {}
static bool classof(const SymbolBody *S) {
return S->kind() == DefinedRelativeKind;
}
uint64_t getRVA() { return RVA; }
void setRVA(uint64_t V) { RVA = V; }
private:
uint64_t RVA;
};
// This class represents a symbol defined in an archive file. It is
// created from an archive file header, and it knows how to load an
// object file from an archive to replace itself with a defined
// symbol. If the resolver finds both Undefined and Lazy for
// the same name, it will ask the Lazy to load a file.
class Lazy : public SymbolBody {
public:
Lazy(ArchiveFile *F, const Archive::Symbol S)
: SymbolBody(LazyKind, S.getName()), File(F), Sym(S) {}
static bool classof(const SymbolBody *S) { return S->kind() == LazyKind; }
// Returns an object file for this symbol, or a nullptr if the file
// was already returned.
std::unique_ptr<InputFile> getMember();
int getFileIndex() { return File->Index; }
private:
ArchiveFile *File;
const Archive::Symbol Sym;
};
// Undefined symbols.
class Undefined : public SymbolBody {
public:
explicit Undefined(StringRef N) : SymbolBody(UndefinedKind, N) {}
static bool classof(const SymbolBody *S) {
return S->kind() == UndefinedKind;
}
// An undefined symbol can have a fallback symbol which gives an
// undefined symbol a second chance if it would remain undefined.
// If it remains undefined, it'll be replaced with whatever the
// Alias pointer points to.
SymbolBody *WeakAlias = nullptr;
// If this symbol is external weak, try to resolve it to a defined
// symbol by searching the chain of fallback symbols. Returns the symbol if
// successful, otherwise returns null.
Defined *getWeakAlias();
};
// Windows-specific classes.
// This class represents a symbol imported from a DLL. This has two
// names for internal use and external use. The former is used for
// name resolution, and the latter is used for the import descriptor
// table in an output. The former has "__imp_" prefix.
class DefinedImportData : public Defined {
public:
DefinedImportData(StringRef D, StringRef N, StringRef E,
const coff_import_header *H)
: Defined(DefinedImportDataKind, N), DLLName(D), ExternalName(E), Hdr(H) {
}
static bool classof(const SymbolBody *S) {
return S->kind() == DefinedImportDataKind;
}
uint64_t getRVA() { return Location->getRVA(); }
StringRef getDLLName() { return DLLName; }
StringRef getExternalName() { return ExternalName; }
void setLocation(Chunk *AddressTable) { Location = AddressTable; }
uint16_t getOrdinal() { return Hdr->OrdinalHint; }
private:
StringRef DLLName;
StringRef ExternalName;
const coff_import_header *Hdr;
Chunk *Location = nullptr;
};
// This class represents a symbol for a jump table entry which jumps
// to a function in a DLL. Linker are supposed to create such symbols
// without "__imp_" prefix for all function symbols exported from
// DLLs, so that you can call DLL functions as regular functions with
// a regular name. A function pointer is given as a DefinedImportData.
class DefinedImportThunk : public Defined {
public:
DefinedImportThunk(StringRef Name, DefinedImportData *S, uint16_t Machine);
static bool classof(const SymbolBody *S) {
return S->kind() == DefinedImportThunkKind;
}
uint64_t getRVA() { return Data->getRVA(); }
Chunk *getChunk() { return Data.get(); }
private:
std::unique_ptr<Chunk> Data;
};
// If you have a symbol "__imp_foo" in your object file, a symbol name
// "foo" becomes automatically available as a pointer to "__imp_foo".
// This class is for such automatically-created symbols.
// Yes, this is an odd feature. We didn't intend to implement that.
// This is here just for compatibility with MSVC.
class DefinedLocalImport : public Defined {
public:
DefinedLocalImport(StringRef N, Defined *S)
: Defined(DefinedLocalImportKind, N), Data(S) {}
static bool classof(const SymbolBody *S) {
return S->kind() == DefinedLocalImportKind;
}
uint64_t getRVA() { return Data.getRVA(); }
Chunk *getChunk() { return &Data; }
private:
LocalImportChunk Data;
};
class DefinedBitcode : public Defined {
friend SymbolBody;
public:
DefinedBitcode(BitcodeFile *F, StringRef N, bool IsReplaceable)
: Defined(DefinedBitcodeKind, N), File(F) {
this->IsReplaceable = IsReplaceable;
}
static bool classof(const SymbolBody *S) {
return S->kind() == DefinedBitcodeKind;
}
private:
BitcodeFile *File;
};
inline uint64_t Defined::getRVA() {
switch (kind()) {
case DefinedAbsoluteKind:
return cast<DefinedAbsolute>(this)->getRVA();
case DefinedRelativeKind:
return cast<DefinedRelative>(this)->getRVA();
case DefinedImportDataKind:
return cast<DefinedImportData>(this)->getRVA();
case DefinedImportThunkKind:
return cast<DefinedImportThunk>(this)->getRVA();
case DefinedLocalImportKind:
return cast<DefinedLocalImport>(this)->getRVA();
case DefinedCommonKind:
return cast<DefinedCommon>(this)->getRVA();
case DefinedRegularKind:
return cast<DefinedRegular>(this)->getRVA();
case DefinedBitcodeKind:
llvm_unreachable("There is no address for a bitcode symbol.");
case LazyKind:
case UndefinedKind:
llvm_unreachable("Cannot get the address for an undefined symbol.");
}
llvm_unreachable("unknown symbol kind");
}
} // namespace coff
} // namespace lld
#endif

View File

@ -0,0 +1,796 @@
//===- Writer.cpp ---------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "Config.h"
#include "DLL.h"
#include "Error.h"
#include "InputFiles.h"
#include "SymbolTable.h"
#include "Symbols.h"
#include "Writer.h"
#include "lld/Core/Parallel.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/FileOutputBuffer.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cstdio>
#include <map>
#include <memory>
#include <utility>
using namespace llvm;
using namespace llvm::COFF;
using namespace llvm::object;
using namespace llvm::support;
using namespace llvm::support::endian;
using namespace lld;
using namespace lld::coff;
static const int PageSize = 4096;
static const int SectorSize = 512;
static const int DOSStubSize = 64;
static const int NumberfOfDataDirectory = 16;
namespace {
// The writer writes a SymbolTable result to a file.
class Writer {
public:
Writer(SymbolTable *T) : Symtab(T) {}
void run();
private:
void createSections();
void createMiscChunks();
void createImportTables();
void createExportTable();
void assignAddresses();
void removeEmptySections();
void createSymbolAndStringTable();
void openFile(StringRef OutputPath);
template <typename PEHeaderTy> void writeHeader();
void fixSafeSEHSymbols();
void setSectionPermissions();
void writeSections();
void sortExceptionTable();
void applyRelocations();
llvm::Optional<coff_symbol16> createSymbol(Defined *D);
size_t addEntryToStringTable(StringRef Str);
OutputSection *findSection(StringRef Name);
OutputSection *createSection(StringRef Name);
void addBaserels(OutputSection *Dest);
void addBaserelBlocks(OutputSection *Dest, std::vector<Baserel> &V);
uint32_t getSizeOfInitializedData();
std::map<StringRef, std::vector<DefinedImportData *>> binImports();
SymbolTable *Symtab;
std::unique_ptr<llvm::FileOutputBuffer> Buffer;
llvm::SpecificBumpPtrAllocator<OutputSection> CAlloc;
llvm::SpecificBumpPtrAllocator<BaserelChunk> BAlloc;
std::vector<OutputSection *> OutputSections;
std::vector<char> Strtab;
std::vector<llvm::object::coff_symbol16> OutputSymtab;
IdataContents Idata;
DelayLoadContents DelayIdata;
EdataContents Edata;
std::unique_ptr<SEHTableChunk> SEHTable;
uint64_t FileSize;
uint32_t PointerToSymbolTable = 0;
uint64_t SizeOfImage;
uint64_t SizeOfHeaders;
std::vector<std::unique_ptr<Chunk>> Chunks;
};
} // anonymous namespace
namespace lld {
namespace coff {
void writeResult(SymbolTable *T) { Writer(T).run(); }
// OutputSection represents a section in an output file. It's a
// container of chunks. OutputSection and Chunk are 1:N relationship.
// Chunks cannot belong to more than one OutputSections. The writer
// creates multiple OutputSections and assign them unique,
// non-overlapping file offsets and RVAs.
class OutputSection {
public:
OutputSection(StringRef N) : Name(N), Header({}) {}
void setRVA(uint64_t);
void setFileOffset(uint64_t);
void addChunk(Chunk *C);
StringRef getName() { return Name; }
std::vector<Chunk *> &getChunks() { return Chunks; }
void addPermissions(uint32_t C);
void setPermissions(uint32_t C);
uint32_t getPermissions() { return Header.Characteristics & PermMask; }
uint32_t getCharacteristics() { return Header.Characteristics; }
uint64_t getRVA() { return Header.VirtualAddress; }
uint64_t getFileOff() { return Header.PointerToRawData; }
void writeHeaderTo(uint8_t *Buf);
// Returns the size of this section in an executable memory image.
// This may be smaller than the raw size (the raw size is multiple
// of disk sector size, so there may be padding at end), or may be
// larger (if that's the case, the loader reserves spaces after end
// of raw data).
uint64_t getVirtualSize() { return Header.VirtualSize; }
// Returns the size of the section in the output file.
uint64_t getRawSize() { return Header.SizeOfRawData; }
// Set offset into the string table storing this section name.
// Used only when the name is longer than 8 bytes.
void setStringTableOff(uint32_t V) { StringTableOff = V; }
// N.B. The section index is one based.
uint32_t SectionIndex = 0;
private:
StringRef Name;
coff_section Header;
uint32_t StringTableOff = 0;
std::vector<Chunk *> Chunks;
};
void OutputSection::setRVA(uint64_t RVA) {
Header.VirtualAddress = RVA;
for (Chunk *C : Chunks)
C->setRVA(C->getRVA() + RVA);
}
void OutputSection::setFileOffset(uint64_t Off) {
// If a section has no actual data (i.e. BSS section), we want to
// set 0 to its PointerToRawData. Otherwise the output is rejected
// by the loader.
if (Header.SizeOfRawData == 0)
return;
Header.PointerToRawData = Off;
}
void OutputSection::addChunk(Chunk *C) {
Chunks.push_back(C);
C->setOutputSection(this);
uint64_t Off = Header.VirtualSize;
Off = alignTo(Off, C->getAlign());
C->setRVA(Off);
C->setOutputSectionOff(Off);
Off += C->getSize();
Header.VirtualSize = Off;
if (C->hasData())
Header.SizeOfRawData = alignTo(Off, SectorSize);
}
void OutputSection::addPermissions(uint32_t C) {
Header.Characteristics |= C & PermMask;
}
void OutputSection::setPermissions(uint32_t C) {
Header.Characteristics = C & PermMask;
}
// Write the section header to a given buffer.
void OutputSection::writeHeaderTo(uint8_t *Buf) {
auto *Hdr = reinterpret_cast<coff_section *>(Buf);
*Hdr = Header;
if (StringTableOff) {
// If name is too long, write offset into the string table as a name.
sprintf(Hdr->Name, "/%d", StringTableOff);
} else {
assert(!Config->Debug || Name.size() <= COFF::NameSize);
strncpy(Hdr->Name, Name.data(),
std::min(Name.size(), (size_t)COFF::NameSize));
}
}
uint64_t Defined::getSecrel() {
if (auto *D = dyn_cast<DefinedRegular>(this))
return getRVA() - D->getChunk()->getOutputSection()->getRVA();
fatal("SECREL relocation points to a non-regular symbol");
}
uint64_t Defined::getSectionIndex() {
if (auto *D = dyn_cast<DefinedRegular>(this))
return D->getChunk()->getOutputSection()->SectionIndex;
fatal("SECTION relocation points to a non-regular symbol");
}
bool Defined::isExecutable() {
const auto X = IMAGE_SCN_MEM_EXECUTE;
if (auto *D = dyn_cast<DefinedRegular>(this))
return D->getChunk()->getOutputSection()->getPermissions() & X;
return isa<DefinedImportThunk>(this);
}
} // namespace coff
} // namespace lld
// The main function of the writer.
void Writer::run() {
createSections();
createMiscChunks();
createImportTables();
createExportTable();
if (Config->Relocatable)
createSection(".reloc");
assignAddresses();
removeEmptySections();
setSectionPermissions();
createSymbolAndStringTable();
openFile(Config->OutputFile);
if (Config->is64()) {
writeHeader<pe32plus_header>();
} else {
writeHeader<pe32_header>();
}
fixSafeSEHSymbols();
writeSections();
sortExceptionTable();
if (auto EC = Buffer->commit())
fatal(EC, "failed to write the output file");
}
static StringRef getOutputSection(StringRef Name) {
StringRef S = Name.split('$').first;
auto It = Config->Merge.find(S);
if (It == Config->Merge.end())
return S;
return It->second;
}
// Create output section objects and add them to OutputSections.
void Writer::createSections() {
// First, bin chunks by name.
std::map<StringRef, std::vector<Chunk *>> Map;
for (Chunk *C : Symtab->getChunks()) {
auto *SC = dyn_cast<SectionChunk>(C);
if (SC && !SC->isLive()) {
if (Config->Verbose)
SC->printDiscardedMessage();
continue;
}
Map[C->getSectionName()].push_back(C);
}
// Then create an OutputSection for each section.
// '$' and all following characters in input section names are
// discarded when determining output section. So, .text$foo
// contributes to .text, for example. See PE/COFF spec 3.2.
SmallDenseMap<StringRef, OutputSection *> Sections;
for (auto Pair : Map) {
StringRef Name = getOutputSection(Pair.first);
OutputSection *&Sec = Sections[Name];
if (!Sec) {
Sec = new (CAlloc.Allocate()) OutputSection(Name);
OutputSections.push_back(Sec);
}
std::vector<Chunk *> &Chunks = Pair.second;
for (Chunk *C : Chunks) {
Sec->addChunk(C);
Sec->addPermissions(C->getPermissions());
}
}
}
void Writer::createMiscChunks() {
// Create thunks for locally-dllimported symbols.
if (!Symtab->LocalImportChunks.empty()) {
OutputSection *Sec = createSection(".rdata");
for (Chunk *C : Symtab->LocalImportChunks)
Sec->addChunk(C);
}
// Create SEH table. x86-only.
if (Config->Machine != I386)
return;
std::set<Defined *> Handlers;
for (lld::coff::ObjectFile *File : Symtab->ObjectFiles) {
if (!File->SEHCompat)
return;
for (SymbolBody *B : File->SEHandlers)
Handlers.insert(cast<Defined>(B->repl()));
}
SEHTable.reset(new SEHTableChunk(Handlers));
createSection(".rdata")->addChunk(SEHTable.get());
}
// Create .idata section for the DLL-imported symbol table.
// The format of this section is inherently Windows-specific.
// IdataContents class abstracted away the details for us,
// so we just let it create chunks and add them to the section.
void Writer::createImportTables() {
if (Symtab->ImportFiles.empty())
return;
// Initialize DLLOrder so that import entries are ordered in
// the same order as in the command line. (That affects DLL
// initialization order, and this ordering is MSVC-compatible.)
for (ImportFile *File : Symtab->ImportFiles) {
std::string DLL = StringRef(File->DLLName).lower();
if (Config->DLLOrder.count(DLL) == 0)
Config->DLLOrder[DLL] = Config->DLLOrder.size();
}
OutputSection *Text = createSection(".text");
for (ImportFile *File : Symtab->ImportFiles) {
if (DefinedImportThunk *Thunk = File->ThunkSym)
Text->addChunk(Thunk->getChunk());
if (Config->DelayLoads.count(StringRef(File->DLLName).lower())) {
DelayIdata.add(File->ImpSym);
} else {
Idata.add(File->ImpSym);
}
}
if (!Idata.empty()) {
OutputSection *Sec = createSection(".idata");
for (Chunk *C : Idata.getChunks())
Sec->addChunk(C);
}
if (!DelayIdata.empty()) {
Defined *Helper = cast<Defined>(Config->DelayLoadHelper->repl());
DelayIdata.create(Helper);
OutputSection *Sec = createSection(".didat");
for (Chunk *C : DelayIdata.getChunks())
Sec->addChunk(C);
Sec = createSection(".data");
for (Chunk *C : DelayIdata.getDataChunks())
Sec->addChunk(C);
Sec = createSection(".text");
for (std::unique_ptr<Chunk> &C : DelayIdata.getCodeChunks())
Sec->addChunk(C.get());
}
}
void Writer::createExportTable() {
if (Config->Exports.empty())
return;
OutputSection *Sec = createSection(".edata");
for (std::unique_ptr<Chunk> &C : Edata.Chunks)
Sec->addChunk(C.get());
}
// The Windows loader doesn't seem to like empty sections,
// so we remove them if any.
void Writer::removeEmptySections() {
auto IsEmpty = [](OutputSection *S) { return S->getVirtualSize() == 0; };
OutputSections.erase(
std::remove_if(OutputSections.begin(), OutputSections.end(), IsEmpty),
OutputSections.end());
uint32_t Idx = 1;
for (OutputSection *Sec : OutputSections)
Sec->SectionIndex = Idx++;
}
size_t Writer::addEntryToStringTable(StringRef Str) {
assert(Str.size() > COFF::NameSize);
size_t OffsetOfEntry = Strtab.size() + 4; // +4 for the size field
Strtab.insert(Strtab.end(), Str.begin(), Str.end());
Strtab.push_back('\0');
return OffsetOfEntry;
}
Optional<coff_symbol16> Writer::createSymbol(Defined *Def) {
if (auto *D = dyn_cast<DefinedRegular>(Def))
if (!D->getChunk()->isLive())
return None;
coff_symbol16 Sym;
StringRef Name = Def->getName();
if (Name.size() > COFF::NameSize) {
Sym.Name.Offset.Zeroes = 0;
Sym.Name.Offset.Offset = addEntryToStringTable(Name);
} else {
memset(Sym.Name.ShortName, 0, COFF::NameSize);
memcpy(Sym.Name.ShortName, Name.data(), Name.size());
}
if (auto *D = dyn_cast<DefinedCOFF>(Def)) {
COFFSymbolRef Ref = D->getCOFFSymbol();
Sym.Type = Ref.getType();
Sym.StorageClass = Ref.getStorageClass();
} else {
Sym.Type = IMAGE_SYM_TYPE_NULL;
Sym.StorageClass = IMAGE_SYM_CLASS_EXTERNAL;
}
Sym.NumberOfAuxSymbols = 0;
switch (Def->kind()) {
case SymbolBody::DefinedAbsoluteKind:
case SymbolBody::DefinedRelativeKind:
Sym.Value = Def->getRVA();
Sym.SectionNumber = IMAGE_SYM_ABSOLUTE;
break;
default: {
uint64_t RVA = Def->getRVA();
OutputSection *Sec = nullptr;
for (OutputSection *S : OutputSections) {
if (S->getRVA() > RVA)
break;
Sec = S;
}
Sym.Value = RVA - Sec->getRVA();
Sym.SectionNumber = Sec->SectionIndex;
break;
}
}
return Sym;
}
void Writer::createSymbolAndStringTable() {
if (!Config->Debug || !Config->WriteSymtab)
return;
// Name field in the section table is 8 byte long. Longer names need
// to be written to the string table. First, construct string table.
for (OutputSection *Sec : OutputSections) {
StringRef Name = Sec->getName();
if (Name.size() <= COFF::NameSize)
continue;
Sec->setStringTableOff(addEntryToStringTable(Name));
}
for (lld::coff::ObjectFile *File : Symtab->ObjectFiles)
for (SymbolBody *B : File->getSymbols())
if (auto *D = dyn_cast<Defined>(B))
if (Optional<coff_symbol16> Sym = createSymbol(D))
OutputSymtab.push_back(*Sym);
for (ImportFile *File : Symtab->ImportFiles)
for (SymbolBody *B : File->getSymbols())
if (Optional<coff_symbol16> Sym = createSymbol(cast<Defined>(B)))
OutputSymtab.push_back(*Sym);
OutputSection *LastSection = OutputSections.back();
// We position the symbol table to be adjacent to the end of the last section.
uint64_t FileOff = LastSection->getFileOff() +
alignTo(LastSection->getRawSize(), SectorSize);
if (!OutputSymtab.empty()) {
PointerToSymbolTable = FileOff;
FileOff += OutputSymtab.size() * sizeof(coff_symbol16);
}
if (!Strtab.empty())
FileOff += Strtab.size() + 4;
FileSize = alignTo(FileOff, SectorSize);
}
// Visits all sections to assign incremental, non-overlapping RVAs and
// file offsets.
void Writer::assignAddresses() {
SizeOfHeaders = DOSStubSize + sizeof(PEMagic) + sizeof(coff_file_header) +
sizeof(data_directory) * NumberfOfDataDirectory +
sizeof(coff_section) * OutputSections.size();
SizeOfHeaders +=
Config->is64() ? sizeof(pe32plus_header) : sizeof(pe32_header);
SizeOfHeaders = alignTo(SizeOfHeaders, SectorSize);
uint64_t RVA = 0x1000; // The first page is kept unmapped.
FileSize = SizeOfHeaders;
// Move DISCARDABLE (or non-memory-mapped) sections to the end of file because
// the loader cannot handle holes.
std::stable_partition(
OutputSections.begin(), OutputSections.end(), [](OutputSection *S) {
return (S->getPermissions() & IMAGE_SCN_MEM_DISCARDABLE) == 0;
});
for (OutputSection *Sec : OutputSections) {
if (Sec->getName() == ".reloc")
addBaserels(Sec);
Sec->setRVA(RVA);
Sec->setFileOffset(FileSize);
RVA += alignTo(Sec->getVirtualSize(), PageSize);
FileSize += alignTo(Sec->getRawSize(), SectorSize);
}
SizeOfImage = SizeOfHeaders + alignTo(RVA - 0x1000, PageSize);
}
template <typename PEHeaderTy> void Writer::writeHeader() {
// Write DOS stub
uint8_t *Buf = Buffer->getBufferStart();
auto *DOS = reinterpret_cast<dos_header *>(Buf);
Buf += DOSStubSize;
DOS->Magic[0] = 'M';
DOS->Magic[1] = 'Z';
DOS->AddressOfRelocationTable = sizeof(dos_header);
DOS->AddressOfNewExeHeader = DOSStubSize;
// Write PE magic
memcpy(Buf, PEMagic, sizeof(PEMagic));
Buf += sizeof(PEMagic);
// Write COFF header
auto *COFF = reinterpret_cast<coff_file_header *>(Buf);
Buf += sizeof(*COFF);
COFF->Machine = Config->Machine;
COFF->NumberOfSections = OutputSections.size();
COFF->Characteristics = IMAGE_FILE_EXECUTABLE_IMAGE;
if (Config->LargeAddressAware)
COFF->Characteristics |= IMAGE_FILE_LARGE_ADDRESS_AWARE;
if (!Config->is64())
COFF->Characteristics |= IMAGE_FILE_32BIT_MACHINE;
if (Config->DLL)
COFF->Characteristics |= IMAGE_FILE_DLL;
if (!Config->Relocatable)
COFF->Characteristics |= IMAGE_FILE_RELOCS_STRIPPED;
COFF->SizeOfOptionalHeader =
sizeof(PEHeaderTy) + sizeof(data_directory) * NumberfOfDataDirectory;
// Write PE header
auto *PE = reinterpret_cast<PEHeaderTy *>(Buf);
Buf += sizeof(*PE);
PE->Magic = Config->is64() ? PE32Header::PE32_PLUS : PE32Header::PE32;
PE->ImageBase = Config->ImageBase;
PE->SectionAlignment = PageSize;
PE->FileAlignment = SectorSize;
PE->MajorImageVersion = Config->MajorImageVersion;
PE->MinorImageVersion = Config->MinorImageVersion;
PE->MajorOperatingSystemVersion = Config->MajorOSVersion;
PE->MinorOperatingSystemVersion = Config->MinorOSVersion;
PE->MajorSubsystemVersion = Config->MajorOSVersion;
PE->MinorSubsystemVersion = Config->MinorOSVersion;
PE->Subsystem = Config->Subsystem;
PE->SizeOfImage = SizeOfImage;
PE->SizeOfHeaders = SizeOfHeaders;
if (!Config->NoEntry) {
Defined *Entry = cast<Defined>(Config->Entry->repl());
PE->AddressOfEntryPoint = Entry->getRVA();
// Pointer to thumb code must have the LSB set, so adjust it.
if (Config->Machine == ARMNT)
PE->AddressOfEntryPoint |= 1;
}
PE->SizeOfStackReserve = Config->StackReserve;
PE->SizeOfStackCommit = Config->StackCommit;
PE->SizeOfHeapReserve = Config->HeapReserve;
PE->SizeOfHeapCommit = Config->HeapCommit;
if (Config->DynamicBase)
PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE;
if (Config->HighEntropyVA)
PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_HIGH_ENTROPY_VA;
if (!Config->AllowBind)
PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NO_BIND;
if (Config->NxCompat)
PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NX_COMPAT;
if (!Config->AllowIsolation)
PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NO_ISOLATION;
if (Config->TerminalServerAware)
PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE;
PE->NumberOfRvaAndSize = NumberfOfDataDirectory;
if (OutputSection *Text = findSection(".text")) {
PE->BaseOfCode = Text->getRVA();
PE->SizeOfCode = Text->getRawSize();
}
PE->SizeOfInitializedData = getSizeOfInitializedData();
// Write data directory
auto *Dir = reinterpret_cast<data_directory *>(Buf);
Buf += sizeof(*Dir) * NumberfOfDataDirectory;
if (OutputSection *Sec = findSection(".edata")) {
Dir[EXPORT_TABLE].RelativeVirtualAddress = Sec->getRVA();
Dir[EXPORT_TABLE].Size = Sec->getVirtualSize();
}
if (!Idata.empty()) {
Dir[IMPORT_TABLE].RelativeVirtualAddress = Idata.getDirRVA();
Dir[IMPORT_TABLE].Size = Idata.getDirSize();
Dir[IAT].RelativeVirtualAddress = Idata.getIATRVA();
Dir[IAT].Size = Idata.getIATSize();
}
if (!DelayIdata.empty()) {
Dir[DELAY_IMPORT_DESCRIPTOR].RelativeVirtualAddress =
DelayIdata.getDirRVA();
Dir[DELAY_IMPORT_DESCRIPTOR].Size = DelayIdata.getDirSize();
}
if (OutputSection *Sec = findSection(".rsrc")) {
Dir[RESOURCE_TABLE].RelativeVirtualAddress = Sec->getRVA();
Dir[RESOURCE_TABLE].Size = Sec->getVirtualSize();
}
if (OutputSection *Sec = findSection(".reloc")) {
Dir[BASE_RELOCATION_TABLE].RelativeVirtualAddress = Sec->getRVA();
Dir[BASE_RELOCATION_TABLE].Size = Sec->getVirtualSize();
}
if (OutputSection *Sec = findSection(".pdata")) {
Dir[EXCEPTION_TABLE].RelativeVirtualAddress = Sec->getRVA();
Dir[EXCEPTION_TABLE].Size = Sec->getVirtualSize();
}
if (Symbol *Sym = Symtab->findUnderscore("_tls_used")) {
if (Defined *B = dyn_cast<Defined>(Sym->Body)) {
Dir[TLS_TABLE].RelativeVirtualAddress = B->getRVA();
Dir[TLS_TABLE].Size = Config->is64()
? sizeof(object::coff_tls_directory64)
: sizeof(object::coff_tls_directory32);
}
}
if (Symbol *Sym = Symtab->findUnderscore("_load_config_used")) {
if (auto *B = dyn_cast<DefinedRegular>(Sym->Body)) {
SectionChunk *SC = B->getChunk();
assert(B->getRVA() >= SC->getRVA());
uint64_t OffsetInChunk = B->getRVA() - SC->getRVA();
if (!SC->hasData() || OffsetInChunk + 4 > SC->getSize())
fatal("_load_config_used is malformed");
ArrayRef<uint8_t> SecContents = SC->getContents();
uint32_t LoadConfigSize =
*reinterpret_cast<const ulittle32_t *>(&SecContents[OffsetInChunk]);
if (OffsetInChunk + LoadConfigSize > SC->getSize())
fatal("_load_config_used is too large");
Dir[LOAD_CONFIG_TABLE].RelativeVirtualAddress = B->getRVA();
Dir[LOAD_CONFIG_TABLE].Size = LoadConfigSize;
}
}
// Write section table
for (OutputSection *Sec : OutputSections) {
Sec->writeHeaderTo(Buf);
Buf += sizeof(coff_section);
}
if (OutputSymtab.empty())
return;
COFF->PointerToSymbolTable = PointerToSymbolTable;
uint32_t NumberOfSymbols = OutputSymtab.size();
COFF->NumberOfSymbols = NumberOfSymbols;
auto *SymbolTable = reinterpret_cast<coff_symbol16 *>(
Buffer->getBufferStart() + COFF->PointerToSymbolTable);
for (size_t I = 0; I != NumberOfSymbols; ++I)
SymbolTable[I] = OutputSymtab[I];
// Create the string table, it follows immediately after the symbol table.
// The first 4 bytes is length including itself.
Buf = reinterpret_cast<uint8_t *>(&SymbolTable[NumberOfSymbols]);
write32le(Buf, Strtab.size() + 4);
if (!Strtab.empty())
memcpy(Buf + 4, Strtab.data(), Strtab.size());
}
void Writer::openFile(StringRef Path) {
Buffer = check(
FileOutputBuffer::create(Path, FileSize, FileOutputBuffer::F_executable),
"failed to open " + Path);
}
void Writer::fixSafeSEHSymbols() {
if (!SEHTable)
return;
Config->SEHTable->setRVA(SEHTable->getRVA());
Config->SEHCount->setVA(SEHTable->getSize() / 4);
}
// Handles /section options to allow users to overwrite
// section attributes.
void Writer::setSectionPermissions() {
for (auto &P : Config->Section) {
StringRef Name = P.first;
uint32_t Perm = P.second;
if (auto *Sec = findSection(Name))
Sec->setPermissions(Perm);
}
}
// Write section contents to a mmap'ed file.
void Writer::writeSections() {
uint8_t *Buf = Buffer->getBufferStart();
for (OutputSection *Sec : OutputSections) {
uint8_t *SecBuf = Buf + Sec->getFileOff();
// Fill gaps between functions in .text with INT3 instructions
// instead of leaving as NUL bytes (which can be interpreted as
// ADD instructions).
if (Sec->getPermissions() & IMAGE_SCN_CNT_CODE)
memset(SecBuf, 0xCC, Sec->getRawSize());
parallel_for_each(Sec->getChunks().begin(), Sec->getChunks().end(),
[&](Chunk *C) { C->writeTo(SecBuf); });
}
}
// Sort .pdata section contents according to PE/COFF spec 5.5.
void Writer::sortExceptionTable() {
OutputSection *Sec = findSection(".pdata");
if (!Sec)
return;
// We assume .pdata contains function table entries only.
uint8_t *Begin = Buffer->getBufferStart() + Sec->getFileOff();
uint8_t *End = Begin + Sec->getVirtualSize();
if (Config->Machine == AMD64) {
struct Entry { ulittle32_t Begin, End, Unwind; };
parallel_sort(
(Entry *)Begin, (Entry *)End,
[](const Entry &A, const Entry &B) { return A.Begin < B.Begin; });
return;
}
if (Config->Machine == ARMNT) {
struct Entry { ulittle32_t Begin, Unwind; };
parallel_sort(
(Entry *)Begin, (Entry *)End,
[](const Entry &A, const Entry &B) { return A.Begin < B.Begin; });
return;
}
errs() << "warning: don't know how to handle .pdata.\n";
}
OutputSection *Writer::findSection(StringRef Name) {
for (OutputSection *Sec : OutputSections)
if (Sec->getName() == Name)
return Sec;
return nullptr;
}
uint32_t Writer::getSizeOfInitializedData() {
uint32_t Res = 0;
for (OutputSection *S : OutputSections)
if (S->getPermissions() & IMAGE_SCN_CNT_INITIALIZED_DATA)
Res += S->getRawSize();
return Res;
}
// Returns an existing section or create a new one if not found.
OutputSection *Writer::createSection(StringRef Name) {
if (auto *Sec = findSection(Name))
return Sec;
const auto DATA = IMAGE_SCN_CNT_INITIALIZED_DATA;
const auto BSS = IMAGE_SCN_CNT_UNINITIALIZED_DATA;
const auto CODE = IMAGE_SCN_CNT_CODE;
const auto DISCARDABLE = IMAGE_SCN_MEM_DISCARDABLE;
const auto R = IMAGE_SCN_MEM_READ;
const auto W = IMAGE_SCN_MEM_WRITE;
const auto X = IMAGE_SCN_MEM_EXECUTE;
uint32_t Perms = StringSwitch<uint32_t>(Name)
.Case(".bss", BSS | R | W)
.Case(".data", DATA | R | W)
.Case(".didat", DATA | R)
.Case(".edata", DATA | R)
.Case(".idata", DATA | R)
.Case(".rdata", DATA | R)
.Case(".reloc", DATA | DISCARDABLE | R)
.Case(".text", CODE | R | X)
.Default(0);
if (!Perms)
llvm_unreachable("unknown section name");
auto Sec = new (CAlloc.Allocate()) OutputSection(Name);
Sec->addPermissions(Perms);
OutputSections.push_back(Sec);
return Sec;
}
// Dest is .reloc section. Add contents to that section.
void Writer::addBaserels(OutputSection *Dest) {
std::vector<Baserel> V;
for (OutputSection *Sec : OutputSections) {
if (Sec == Dest)
continue;
// Collect all locations for base relocations.
for (Chunk *C : Sec->getChunks())
C->getBaserels(&V);
// Add the addresses to .reloc section.
if (!V.empty())
addBaserelBlocks(Dest, V);
V.clear();
}
}
// Add addresses to .reloc section. Note that addresses are grouped by page.
void Writer::addBaserelBlocks(OutputSection *Dest, std::vector<Baserel> &V) {
const uint32_t Mask = ~uint32_t(PageSize - 1);
uint32_t Page = V[0].RVA & Mask;
size_t I = 0, J = 1;
for (size_t E = V.size(); J < E; ++J) {
uint32_t P = V[J].RVA & Mask;
if (P == Page)
continue;
BaserelChunk *Buf = BAlloc.Allocate();
Dest->addChunk(new (Buf) BaserelChunk(Page, &V[I], &V[0] + J));
I = J;
Page = P;
}
if (I == J)
return;
BaserelChunk *Buf = BAlloc.Allocate();
Dest->addChunk(new (Buf) BaserelChunk(Page, &V[I], &V[0] + J));
}

View File

@ -0,0 +1,26 @@
//===- Writer.h -------------------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_COFF_WRITER_H
#define LLD_COFF_WRITER_H
#include <vector>
namespace lld {
namespace coff {
class Chunk;
class OutputSection;
void writeResult(SymbolTable *T);
}
}
#endif

View File

@ -0,0 +1,50 @@
set(LLVM_TARGET_DEFINITIONS Options.td)
tablegen(LLVM Options.inc -gen-opt-parser-defs)
add_public_tablegen_target(ELFOptionsTableGen)
add_lld_library(lldELF
Driver.cpp
DriverUtils.cpp
EhFrame.cpp
Error.cpp
ICF.cpp
InputFiles.cpp
InputSection.cpp
LTO.cpp
LinkerScript.cpp
MarkLive.cpp
OutputSections.cpp
Relocations.cpp
ScriptParser.cpp
Strings.cpp
SymbolListFile.cpp
SymbolTable.cpp
Symbols.cpp
Target.cpp
Thunks.cpp
Writer.cpp
LINK_COMPONENTS
${LLVM_TARGETS_TO_BUILD}
Analysis
BitReader
BitWriter
Codegen
Core
IPO
Linker
LTO
Object
Option
Passes
MC
Support
Target
TransformUtils
LINK_LIBS
lldConfig
${PTHREAD_LIB}
)
add_dependencies(lldELF intrinsics_gen ELFOptionsTableGen)

View File

@ -0,0 +1,134 @@
//===- Config.h -------------------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_ELF_CONFIG_H
#define LLD_ELF_CONFIG_H
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/ELF.h"
#include <vector>
namespace lld {
namespace elf {
class InputFile;
struct Symbol;
enum ELFKind {
ELFNoneKind,
ELF32LEKind,
ELF32BEKind,
ELF64LEKind,
ELF64BEKind
};
enum class BuildIdKind { None, Fnv1, Md5, Sha1, Hexstring };
enum class UnresolvedPolicy { NoUndef, Error, Warn, Ignore };
struct SymbolVersion {
llvm::StringRef Name;
bool IsExternCpp;
};
// This struct contains symbols version definition that
// can be found in version script if it is used for link.
struct VersionDefinition {
VersionDefinition(llvm::StringRef Name, size_t Id) : Name(Name), Id(Id) {}
llvm::StringRef Name;
size_t Id;
std::vector<SymbolVersion> Globals;
size_t NameOff; // Offset in string table.
};
// This struct contains the global configuration for the linker.
// Most fields are direct mapping from the command line options
// and such fields have the same name as the corresponding options.
// Most fields are initialized by the driver.
struct Configuration {
Symbol *EntrySym = nullptr;
InputFile *FirstElf = nullptr;
llvm::StringRef DynamicLinker;
llvm::StringRef Entry;
llvm::StringRef Emulation;
llvm::StringRef Fini;
llvm::StringRef Init;
llvm::StringRef LtoAAPipeline;
llvm::StringRef LtoNewPmPasses;
llvm::StringRef OutputFile;
llvm::StringRef SoName;
llvm::StringRef Sysroot;
std::string RPath;
std::vector<VersionDefinition> VersionDefinitions;
std::vector<llvm::StringRef> DynamicList;
std::vector<llvm::StringRef> SearchPaths;
std::vector<llvm::StringRef> Undefined;
std::vector<SymbolVersion> VersionScriptGlobals;
std::vector<uint8_t> BuildIdVector;
bool AllowMultipleDefinition;
bool AsNeeded = false;
bool Bsymbolic;
bool BsymbolicFunctions;
bool Demangle = true;
bool DisableVerify;
bool DiscardAll;
bool DiscardLocals;
bool DiscardNone;
bool EhFrameHdr;
bool EnableNewDtags;
bool ExportDynamic;
bool FatalWarnings;
bool GcSections;
bool GnuHash = false;
bool ICF;
bool Mips64EL = false;
bool NoGnuUnique;
bool NoUndefinedVersion;
bool Pic;
bool Pie;
bool PrintGcSections;
bool Rela;
bool Relocatable;
bool SaveTemps;
bool Shared;
bool Static = false;
bool StripAll;
bool StripDebug;
bool SysvHash = true;
bool Threads;
bool Trace;
bool Verbose;
bool WarnCommon;
bool ZCombreloc;
bool ZExecStack;
bool ZNodelete;
bool ZNow;
bool ZOrigin;
bool ZRelro;
UnresolvedPolicy UnresolvedSymbols;
BuildIdKind BuildId = BuildIdKind::None;
ELFKind EKind = ELFNoneKind;
uint16_t DefaultSymbolVersion = llvm::ELF::VER_NDX_GLOBAL;
uint16_t EMachine = llvm::ELF::EM_NONE;
uint64_t EntryAddr = -1;
uint64_t ImageBase;
unsigned LtoJobs;
unsigned LtoO;
unsigned Optimize;
};
// The only instance of Configuration struct.
extern Configuration *Config;
} // namespace elf
} // namespace lld
#endif

View File

@ -0,0 +1,588 @@
//===- Driver.cpp ---------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "Driver.h"
#include "Config.h"
#include "Error.h"
#include "ICF.h"
#include "InputFiles.h"
#include "InputSection.h"
#include "LinkerScript.h"
#include "Strings.h"
#include "SymbolListFile.h"
#include "SymbolTable.h"
#include "Target.h"
#include "Writer.h"
#include "lld/Driver/Driver.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdlib>
#include <utility>
using namespace llvm;
using namespace llvm::ELF;
using namespace llvm::object;
using namespace llvm::sys;
using namespace lld;
using namespace lld::elf;
Configuration *elf::Config;
LinkerDriver *elf::Driver;
bool elf::link(ArrayRef<const char *> Args, raw_ostream &Error) {
HasError = false;
ErrorOS = &Error;
Configuration C;
LinkerDriver D;
ScriptConfiguration SC;
Config = &C;
Driver = &D;
ScriptConfig = &SC;
Driver->main(Args);
return !HasError;
}
// Parses a linker -m option.
static std::pair<ELFKind, uint16_t> parseEmulation(StringRef S) {
if (S.endswith("_fbsd"))
S = S.drop_back(5);
std::pair<ELFKind, uint16_t> Ret =
StringSwitch<std::pair<ELFKind, uint16_t>>(S)
.Case("aarch64linux", {ELF64LEKind, EM_AARCH64})
.Case("armelf_linux_eabi", {ELF32LEKind, EM_ARM})
.Case("elf32_x86_64", {ELF32LEKind, EM_X86_64})
.Case("elf32btsmip", {ELF32BEKind, EM_MIPS})
.Case("elf32ltsmip", {ELF32LEKind, EM_MIPS})
.Case("elf32ppc", {ELF32BEKind, EM_PPC})
.Case("elf64btsmip", {ELF64BEKind, EM_MIPS})
.Case("elf64ltsmip", {ELF64LEKind, EM_MIPS})
.Case("elf64ppc", {ELF64BEKind, EM_PPC64})
.Case("elf_i386", {ELF32LEKind, EM_386})
.Case("elf_x86_64", {ELF64LEKind, EM_X86_64})
.Default({ELFNoneKind, EM_NONE});
if (Ret.first == ELFNoneKind) {
if (S == "i386pe" || S == "i386pep" || S == "thumb2pe")
error("Windows targets are not supported on the ELF frontend: " + S);
else
error("unknown emulation: " + S);
}
return Ret;
}
// Returns slices of MB by parsing MB as an archive file.
// Each slice consists of a member file in the archive.
std::vector<MemoryBufferRef>
LinkerDriver::getArchiveMembers(MemoryBufferRef MB) {
std::unique_ptr<Archive> File =
check(Archive::create(MB), "failed to parse archive");
std::vector<MemoryBufferRef> V;
Error Err;
for (const ErrorOr<Archive::Child> &COrErr : File->children(Err)) {
Archive::Child C = check(COrErr, "could not get the child of the archive " +
File->getFileName());
MemoryBufferRef MBRef =
check(C.getMemoryBufferRef(),
"could not get the buffer for a child of the archive " +
File->getFileName());
V.push_back(MBRef);
}
if (Err)
Error(Err);
// Take ownership of memory buffers created for members of thin archives.
for (std::unique_ptr<MemoryBuffer> &MB : File->takeThinBuffers())
OwningMBs.push_back(std::move(MB));
return V;
}
// Opens and parses a file. Path has to be resolved already.
// Newly created memory buffers are owned by this driver.
void LinkerDriver::addFile(StringRef Path) {
using namespace sys::fs;
if (Config->Verbose)
outs() << Path << "\n";
Optional<MemoryBufferRef> Buffer = readFile(Path);
if (!Buffer.hasValue())
return;
MemoryBufferRef MBRef = *Buffer;
switch (identify_magic(MBRef.getBuffer())) {
case file_magic::unknown:
readLinkerScript(MBRef);
return;
case file_magic::archive:
if (WholeArchive) {
for (MemoryBufferRef MB : getArchiveMembers(MBRef))
Files.push_back(createObjectFile(MB, Path));
return;
}
Files.push_back(make_unique<ArchiveFile>(MBRef));
return;
case file_magic::elf_shared_object:
if (Config->Relocatable) {
error("attempted static link of dynamic object " + Path);
return;
}
Files.push_back(createSharedFile(MBRef));
return;
default:
if (InLib)
Files.push_back(make_unique<LazyObjectFile>(MBRef));
else
Files.push_back(createObjectFile(MBRef));
}
}
Optional<MemoryBufferRef> LinkerDriver::readFile(StringRef Path) {
auto MBOrErr = MemoryBuffer::getFile(Path);
if (auto EC = MBOrErr.getError()) {
error(EC, "cannot open " + Path);
return None;
}
std::unique_ptr<MemoryBuffer> &MB = *MBOrErr;
MemoryBufferRef MBRef = MB->getMemBufferRef();
OwningMBs.push_back(std::move(MB)); // take MB ownership
if (Cpio)
Cpio->append(relativeToRoot(Path), MBRef.getBuffer());
return MBRef;
}
// Add a given library by searching it from input search paths.
void LinkerDriver::addLibrary(StringRef Name) {
std::string Path = searchLibrary(Name);
if (Path.empty())
error("unable to find library -l" + Name);
else
addFile(Path);
}
// This function is called on startup. We need this for LTO since
// LTO calls LLVM functions to compile bitcode files to native code.
// Technically this can be delayed until we read bitcode files, but
// we don't bother to do lazily because the initialization is fast.
static void initLLVM(opt::InputArgList &Args) {
InitializeAllTargets();
InitializeAllTargetMCs();
InitializeAllAsmPrinters();
InitializeAllAsmParsers();
// This is a flag to discard all but GlobalValue names.
// We want to enable it by default because it saves memory.
// Disable it only when a developer option (-save-temps) is given.
Driver->Context.setDiscardValueNames(!Config->SaveTemps);
Driver->Context.enableDebugTypeODRUniquing();
// Parse and evaluate -mllvm options.
std::vector<const char *> V;
V.push_back("lld (LLVM option parsing)");
for (auto *Arg : Args.filtered(OPT_mllvm))
V.push_back(Arg->getValue());
cl::ParseCommandLineOptions(V.size(), V.data());
}
// Some command line options or some combinations of them are not allowed.
// This function checks for such errors.
static void checkOptions(opt::InputArgList &Args) {
// The MIPS ABI as of 2016 does not support the GNU-style symbol lookup
// table which is a relatively new feature.
if (Config->EMachine == EM_MIPS && Config->GnuHash)
error("the .gnu.hash section is not compatible with the MIPS target.");
if (Config->EMachine == EM_AMDGPU && !Config->Entry.empty())
error("-e option is not valid for AMDGPU.");
if (Config->Pie && Config->Shared)
error("-shared and -pie may not be used together");
if (Config->Relocatable) {
if (Config->Shared)
error("-r and -shared may not be used together");
if (Config->GcSections)
error("-r and --gc-sections may not be used together");
if (Config->ICF)
error("-r and --icf may not be used together");
if (Config->Pie)
error("-r and -pie may not be used together");
}
}
static StringRef
getString(opt::InputArgList &Args, unsigned Key, StringRef Default = "") {
if (auto *Arg = Args.getLastArg(Key))
return Arg->getValue();
return Default;
}
static int getInteger(opt::InputArgList &Args, unsigned Key, int Default) {
int V = Default;
if (auto *Arg = Args.getLastArg(Key)) {
StringRef S = Arg->getValue();
if (S.getAsInteger(10, V))
error(Arg->getSpelling() + ": number expected, but got " + S);
}
return V;
}
static const char *getReproduceOption(opt::InputArgList &Args) {
if (auto *Arg = Args.getLastArg(OPT_reproduce))
return Arg->getValue();
return getenv("LLD_REPRODUCE");
}
static bool hasZOption(opt::InputArgList &Args, StringRef Key) {
for (auto *Arg : Args.filtered(OPT_z))
if (Key == Arg->getValue())
return true;
return false;
}
void LinkerDriver::main(ArrayRef<const char *> ArgsArr) {
ELFOptTable Parser;
opt::InputArgList Args = Parser.parse(ArgsArr.slice(1));
if (Args.hasArg(OPT_help)) {
printHelp(ArgsArr[0]);
return;
}
if (Args.hasArg(OPT_version)) {
outs() << getVersionString();
return;
}
if (const char *Path = getReproduceOption(Args)) {
// Note that --reproduce is a debug option so you can ignore it
// if you are trying to understand the whole picture of the code.
Cpio.reset(CpioFile::create(Path));
if (Cpio) {
Cpio->append("response.txt", createResponseFile(Args));
Cpio->append("version.txt", getVersionString());
}
}
readConfigs(Args);
initLLVM(Args);
createFiles(Args);
checkOptions(Args);
if (HasError)
return;
switch (Config->EKind) {
case ELF32LEKind:
link<ELF32LE>(Args);
return;
case ELF32BEKind:
link<ELF32BE>(Args);
return;
case ELF64LEKind:
link<ELF64LE>(Args);
return;
case ELF64BEKind:
link<ELF64BE>(Args);
return;
default:
error("-m or at least a .o file required");
}
}
static UnresolvedPolicy getUnresolvedSymbolOption(opt::InputArgList &Args) {
if (Args.hasArg(OPT_noinhibit_exec))
return UnresolvedPolicy::Warn;
if (Args.hasArg(OPT_no_undefined) || hasZOption(Args, "defs"))
return UnresolvedPolicy::NoUndef;
if (Config->Relocatable)
return UnresolvedPolicy::Ignore;
if (auto *Arg = Args.getLastArg(OPT_unresolved_symbols)) {
StringRef S = Arg->getValue();
if (S == "ignore-all" || S == "ignore-in-object-files")
return UnresolvedPolicy::Ignore;
if (S == "ignore-in-shared-libs" || S == "report-all")
return UnresolvedPolicy::Error;
error("unknown --unresolved-symbols value: " + S);
}
return UnresolvedPolicy::Error;
}
// Initializes Config members by the command line options.
void LinkerDriver::readConfigs(opt::InputArgList &Args) {
for (auto *Arg : Args.filtered(OPT_L))
Config->SearchPaths.push_back(Arg->getValue());
std::vector<StringRef> RPaths;
for (auto *Arg : Args.filtered(OPT_rpath))
RPaths.push_back(Arg->getValue());
if (!RPaths.empty())
Config->RPath = llvm::join(RPaths.begin(), RPaths.end(), ":");
if (auto *Arg = Args.getLastArg(OPT_m)) {
// Parse ELF{32,64}{LE,BE} and CPU type.
StringRef S = Arg->getValue();
std::tie(Config->EKind, Config->EMachine) = parseEmulation(S);
Config->Emulation = S;
}
Config->AllowMultipleDefinition = Args.hasArg(OPT_allow_multiple_definition);
Config->Bsymbolic = Args.hasArg(OPT_Bsymbolic);
Config->BsymbolicFunctions = Args.hasArg(OPT_Bsymbolic_functions);
Config->Demangle = !Args.hasArg(OPT_no_demangle);
Config->DisableVerify = Args.hasArg(OPT_disable_verify);
Config->DiscardAll = Args.hasArg(OPT_discard_all);
Config->DiscardLocals = Args.hasArg(OPT_discard_locals);
Config->DiscardNone = Args.hasArg(OPT_discard_none);
Config->EhFrameHdr = Args.hasArg(OPT_eh_frame_hdr);
Config->EnableNewDtags = !Args.hasArg(OPT_disable_new_dtags);
Config->ExportDynamic = Args.hasArg(OPT_export_dynamic);
Config->FatalWarnings = Args.hasArg(OPT_fatal_warnings);
Config->GcSections = Args.hasArg(OPT_gc_sections);
Config->ICF = Args.hasArg(OPT_icf);
Config->NoGnuUnique = Args.hasArg(OPT_no_gnu_unique);
Config->NoUndefinedVersion = Args.hasArg(OPT_no_undefined_version);
Config->Pie = Args.hasArg(OPT_pie);
Config->PrintGcSections = Args.hasArg(OPT_print_gc_sections);
Config->Relocatable = Args.hasArg(OPT_relocatable);
Config->SaveTemps = Args.hasArg(OPT_save_temps);
Config->Shared = Args.hasArg(OPT_shared);
Config->StripAll = Args.hasArg(OPT_strip_all);
Config->StripDebug = Args.hasArg(OPT_strip_debug);
Config->Threads = Args.hasArg(OPT_threads);
Config->Trace = Args.hasArg(OPT_trace);
Config->Verbose = Args.hasArg(OPT_verbose);
Config->WarnCommon = Args.hasArg(OPT_warn_common);
Config->DynamicLinker = getString(Args, OPT_dynamic_linker);
Config->Entry = getString(Args, OPT_entry);
Config->Fini = getString(Args, OPT_fini, "_fini");
Config->Init = getString(Args, OPT_init, "_init");
Config->LtoAAPipeline = getString(Args, OPT_lto_aa_pipeline);
Config->LtoNewPmPasses = getString(Args, OPT_lto_newpm_passes);
Config->OutputFile = getString(Args, OPT_o);
Config->SoName = getString(Args, OPT_soname);
Config->Sysroot = getString(Args, OPT_sysroot);
Config->Optimize = getInteger(Args, OPT_O, 1);
Config->LtoO = getInteger(Args, OPT_lto_O, 2);
if (Config->LtoO > 3)
error("invalid optimization level for LTO: " + getString(Args, OPT_lto_O));
Config->LtoJobs = getInteger(Args, OPT_lto_jobs, 1);
if (Config->LtoJobs == 0)
error("number of threads must be > 0");
Config->ZCombreloc = !hasZOption(Args, "nocombreloc");
Config->ZExecStack = hasZOption(Args, "execstack");
Config->ZNodelete = hasZOption(Args, "nodelete");
Config->ZNow = hasZOption(Args, "now");
Config->ZOrigin = hasZOption(Args, "origin");
Config->ZRelro = !hasZOption(Args, "norelro");
if (Config->Relocatable)
Config->StripAll = false;
// --strip-all implies --strip-debug.
if (Config->StripAll)
Config->StripDebug = true;
// Config->Pic is true if we are generating position-independent code.
Config->Pic = Config->Pie || Config->Shared;
if (auto *Arg = Args.getLastArg(OPT_hash_style)) {
StringRef S = Arg->getValue();
if (S == "gnu") {
Config->GnuHash = true;
Config->SysvHash = false;
} else if (S == "both") {
Config->GnuHash = true;
} else if (S != "sysv")
error("unknown hash style: " + S);
}
// Parse --build-id or --build-id=<style>.
if (Args.hasArg(OPT_build_id))
Config->BuildId = BuildIdKind::Fnv1;
if (auto *Arg = Args.getLastArg(OPT_build_id_eq)) {
StringRef S = Arg->getValue();
if (S == "md5") {
Config->BuildId = BuildIdKind::Md5;
} else if (S == "sha1") {
Config->BuildId = BuildIdKind::Sha1;
} else if (S == "none") {
Config->BuildId = BuildIdKind::None;
} else if (S.startswith("0x")) {
Config->BuildId = BuildIdKind::Hexstring;
Config->BuildIdVector = parseHex(S.substr(2));
} else {
error("unknown --build-id style: " + S);
}
}
for (auto *Arg : Args.filtered(OPT_undefined))
Config->Undefined.push_back(Arg->getValue());
Config->UnresolvedSymbols = getUnresolvedSymbolOption(Args);
if (auto *Arg = Args.getLastArg(OPT_dynamic_list))
if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue()))
parseDynamicList(*Buffer);
for (auto *Arg : Args.filtered(OPT_export_dynamic_symbol))
Config->DynamicList.push_back(Arg->getValue());
if (auto *Arg = Args.getLastArg(OPT_version_script))
if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue()))
parseVersionScript(*Buffer);
}
void LinkerDriver::createFiles(opt::InputArgList &Args) {
for (auto *Arg : Args) {
switch (Arg->getOption().getID()) {
case OPT_l:
addLibrary(Arg->getValue());
break;
case OPT_alias_script_T:
case OPT_INPUT:
case OPT_script:
addFile(Arg->getValue());
break;
case OPT_as_needed:
Config->AsNeeded = true;
break;
case OPT_no_as_needed:
Config->AsNeeded = false;
break;
case OPT_Bstatic:
Config->Static = true;
break;
case OPT_Bdynamic:
Config->Static = false;
break;
case OPT_whole_archive:
WholeArchive = true;
break;
case OPT_no_whole_archive:
WholeArchive = false;
break;
case OPT_start_lib:
InLib = true;
break;
case OPT_end_lib:
InLib = false;
break;
}
}
if (Files.empty() && !HasError)
error("no input files.");
// If -m <machine_type> was not given, infer it from object files.
if (Config->EKind == ELFNoneKind) {
for (std::unique_ptr<InputFile> &F : Files) {
if (F->EKind == ELFNoneKind)
continue;
Config->EKind = F->EKind;
Config->EMachine = F->EMachine;
break;
}
}
}
// Do actual linking. Note that when this function is called,
// all linker scripts have already been parsed.
template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) {
SymbolTable<ELFT> Symtab;
elf::Symtab<ELFT>::X = &Symtab;
std::unique_ptr<TargetInfo> TI(createTarget());
Target = TI.get();
LinkerScript<ELFT> LS;
Script<ELFT>::X = &LS;
Config->Rela = ELFT::Is64Bits || Config->EMachine == EM_X86_64;
Config->Mips64EL =
(Config->EMachine == EM_MIPS && Config->EKind == ELF64LEKind);
// Add entry symbol. Note that AMDGPU binaries have no entry points.
if (Config->Entry.empty() && !Config->Shared && !Config->Relocatable &&
Config->EMachine != EM_AMDGPU)
Config->Entry = (Config->EMachine == EM_MIPS) ? "__start" : "_start";
// Default output filename is "a.out" by the Unix tradition.
if (Config->OutputFile.empty())
Config->OutputFile = "a.out";
// Handle --trace-symbol.
for (auto *Arg : Args.filtered(OPT_trace_symbol))
Symtab.trace(Arg->getValue());
// Set either EntryAddr (if S is a number) or EntrySym (otherwise).
if (!Config->Entry.empty()) {
StringRef S = Config->Entry;
if (S.getAsInteger(0, Config->EntryAddr))
Config->EntrySym = Symtab.addUndefined(S);
}
// Initialize Config->ImageBase.
if (auto *Arg = Args.getLastArg(OPT_image_base)) {
StringRef S = Arg->getValue();
if (S.getAsInteger(0, Config->ImageBase))
error(Arg->getSpelling() + ": number expected, but got " + S);
else if ((Config->ImageBase % Target->PageSize) != 0)
warning(Arg->getSpelling() + ": address isn't multiple of page size");
} else {
Config->ImageBase = Config->Pic ? 0 : Target->DefaultImageBase;
}
for (std::unique_ptr<InputFile> &F : Files)
Symtab.addFile(std::move(F));
if (HasError)
return; // There were duplicate symbols or incompatible files
Symtab.scanUndefinedFlags();
Symtab.scanShlibUndefined();
Symtab.scanDynamicList();
Symtab.scanVersionScript();
Symtab.scanSymbolVersions();
Symtab.addCombinedLtoObject();
if (HasError)
return;
for (auto *Arg : Args.filtered(OPT_wrap))
Symtab.wrap(Arg->getValue());
// Write the result to the file.
if (Config->GcSections)
markLive<ELFT>();
if (Config->ICF)
doIcf<ELFT>();
// MergeInputSection::splitIntoPieces needs to be called before
// any call of MergeInputSection::getOffset. Do that.
for (const std::unique_ptr<elf::ObjectFile<ELFT>> &F :
Symtab.getObjectFiles())
for (InputSectionBase<ELFT> *S : F->getSections()) {
if (!S || S == &InputSection<ELFT>::Discarded || !S->Live)
continue;
if (S->Compressed)
S->uncompress();
if (auto *MS = dyn_cast<MergeInputSection<ELFT>>(S))
MS->splitIntoPieces();
}
writeResult<ELFT>(&Symtab);
}

View File

@ -0,0 +1,111 @@
//===- Driver.h -------------------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_ELF_DRIVER_H
#define LLD_ELF_DRIVER_H
#include "SymbolTable.h"
#include "lld/Core/LLVM.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Support/raw_ostream.h"
namespace lld {
namespace elf {
extern class LinkerDriver *Driver;
class CpioFile;
class LinkerDriver {
public:
void main(ArrayRef<const char *> Args);
void addFile(StringRef Path);
void addLibrary(StringRef Name);
llvm::LLVMContext Context; // to parse bitcode ifles
std::unique_ptr<CpioFile> Cpio; // for reproduce
private:
std::vector<MemoryBufferRef> getArchiveMembers(MemoryBufferRef MB);
llvm::Optional<MemoryBufferRef> readFile(StringRef Path);
void readConfigs(llvm::opt::InputArgList &Args);
void createFiles(llvm::opt::InputArgList &Args);
template <class ELFT> void link(llvm::opt::InputArgList &Args);
// True if we are in --whole-archive and --no-whole-archive.
bool WholeArchive = false;
// True if we are in --start-lib and --end-lib.
bool InLib = false;
llvm::BumpPtrAllocator Alloc;
std::vector<std::unique_ptr<InputFile>> Files;
std::vector<std::unique_ptr<MemoryBuffer>> OwningMBs;
};
// Parses command line options.
class ELFOptTable : public llvm::opt::OptTable {
public:
ELFOptTable();
llvm::opt::InputArgList parse(ArrayRef<const char *> Argv);
private:
llvm::BumpPtrAllocator Alloc;
};
// Create enum with OPT_xxx values for each option in Options.td
enum {
OPT_INVALID = 0,
#define OPTION(_1, _2, ID, _4, _5, _6, _7, _8, _9, _10, _11) OPT_##ID,
#include "Options.inc"
#undef OPTION
};
// This is the class to create a .cpio file for --reproduce.
//
// If "--reproduce foo" is given, we create a file "foo.cpio" and
// copy all input files to the archive, along with a response file
// to re-run the same command with the same inputs.
// It is useful for reporting issues to LLD developers.
//
// Cpio as a file format is a deliberate choice. It's standardized in
// POSIX and very easy to create. cpio command is available virtually
// on all Unix systems. See
// http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_07
// for the format details.
class CpioFile {
public:
static CpioFile *create(StringRef OutputPath);
void append(StringRef Path, StringRef Data);
private:
CpioFile(std::unique_ptr<llvm::raw_fd_ostream> OS, StringRef Basename);
std::unique_ptr<llvm::raw_fd_ostream> OS;
llvm::StringSet<> Seen;
std::string Basename;
};
void printHelp(const char *Argv0);
std::string getVersionString();
std::vector<uint8_t> parseHexstring(StringRef S);
std::string createResponseFile(const llvm::opt::InputArgList &Args);
std::string relativeToRoot(StringRef Path);
std::string findFromSearchPaths(StringRef Path);
std::string searchLibrary(StringRef Path);
std::string buildSysrootedPath(llvm::StringRef Dir, llvm::StringRef File);
} // namespace elf
} // namespace lld
#endif

View File

@ -0,0 +1,276 @@
//===- DriverUtils.cpp ----------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains utility functions for the driver. Because there
// are so many small functions, we created this separate file to make
// Driver.cpp less cluttered.
//
//===----------------------------------------------------------------------===//
#include "Driver.h"
#include "Error.h"
#include "lld/Config/Version.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Option/Option.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/StringSaver.h"
using namespace llvm;
using namespace llvm::sys;
using namespace lld;
using namespace lld::elf;
// Create OptTable
// Create prefix string literals used in Options.td
#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE;
#include "Options.inc"
#undef PREFIX
// Create table mapping all options defined in Options.td
static const opt::OptTable::Info OptInfo[] = {
#define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X6, X7, X8, X9, X10) \
{ \
X1, X2, X9, X10, OPT_##ID, opt::Option::KIND##Class, X8, X7, OPT_##GROUP, \
OPT_##ALIAS, X6 \
},
#include "Options.inc"
#undef OPTION
};
ELFOptTable::ELFOptTable() : OptTable(OptInfo) {}
static cl::TokenizerCallback getQuotingStyle(opt::InputArgList &Args) {
if (auto *Arg = Args.getLastArg(OPT_rsp_quoting)) {
StringRef S = Arg->getValue();
if (S != "windows" && S != "posix")
error("invalid response file quoting: " + S);
if (S == "windows")
return cl::TokenizeWindowsCommandLine;
return cl::TokenizeGNUCommandLine;
}
if (Triple(sys::getProcessTriple()).getOS() == Triple::Win32)
return cl::TokenizeWindowsCommandLine;
return cl::TokenizeGNUCommandLine;
}
// Parses a given list of options.
opt::InputArgList ELFOptTable::parse(ArrayRef<const char *> Argv) {
// Make InputArgList from string vectors.
unsigned MissingIndex;
unsigned MissingCount;
SmallVector<const char *, 256> Vec(Argv.data(), Argv.data() + Argv.size());
// We need to get the quoting style for response files before parsing all
// options so we parse here before and ignore all the options but
// --rsp-quoting.
opt::InputArgList Args = this->ParseArgs(Vec, MissingIndex, MissingCount);
// Expand response files. '@<filename>' is replaced by the file's contents.
StringSaver Saver(Alloc);
cl::ExpandResponseFiles(Saver, getQuotingStyle(Args), Vec);
// Parse options and then do error checking.
Args = this->ParseArgs(Vec, MissingIndex, MissingCount);
if (MissingCount)
error(Twine("missing arg value for \"") + Args.getArgString(MissingIndex) +
"\", expected " + Twine(MissingCount) +
(MissingCount == 1 ? " argument.\n" : " arguments"));
for (auto *Arg : Args.filtered(OPT_UNKNOWN))
error("unknown argument: " + Arg->getSpelling());
return Args;
}
void elf::printHelp(const char *Argv0) {
ELFOptTable Table;
Table.PrintHelp(outs(), Argv0, "lld", false);
}
std::string elf::getVersionString() {
std::string Version = getLLDVersion();
std::string Repo = getLLDRepositoryVersion();
if (Repo.empty())
return "LLD " + Version + "\n";
return "LLD " + Version + " " + Repo + "\n";
}
// Makes a given pathname an absolute path first, and then remove
// beginning /. For example, "../foo.o" is converted to "home/john/foo.o",
// assuming that the current directory is "/home/john/bar".
std::string elf::relativeToRoot(StringRef Path) {
SmallString<128> Abs = Path;
if (std::error_code EC = fs::make_absolute(Abs))
fatal("make_absolute failed: " + EC.message());
path::remove_dots(Abs, /*remove_dot_dot=*/true);
// This is Windows specific. root_name() returns a drive letter
// (e.g. "c:") or a UNC name (//net). We want to keep it as part
// of the result.
SmallString<128> Res;
StringRef Root = path::root_name(Abs);
if (Root.endswith(":"))
Res = Root.drop_back();
else if (Root.startswith("//"))
Res = Root.substr(2);
path::append(Res, path::relative_path(Abs));
return Res.str();
}
CpioFile::CpioFile(std::unique_ptr<raw_fd_ostream> OS, StringRef S)
: OS(std::move(OS)), Basename(S) {}
CpioFile *CpioFile::create(StringRef OutputPath) {
std::string Path = (OutputPath + ".cpio").str();
std::error_code EC;
auto OS = llvm::make_unique<raw_fd_ostream>(Path, EC, fs::F_None);
if (EC) {
error(EC, "--reproduce: failed to open " + Path);
return nullptr;
}
return new CpioFile(std::move(OS), path::filename(OutputPath));
}
static void writeMember(raw_fd_ostream &OS, StringRef Path, StringRef Data) {
// The c_dev/c_ino pair should be unique according to the spec,
// but no one seems to care.
OS << "070707"; // c_magic
OS << "000000"; // c_dev
OS << "000000"; // c_ino
OS << "100664"; // c_mode: C_ISREG | rw-rw-r--
OS << "000000"; // c_uid
OS << "000000"; // c_gid
OS << "000001"; // c_nlink
OS << "000000"; // c_rdev
OS << "00000000000"; // c_mtime
OS << format("%06o", Path.size() + 1); // c_namesize
OS << format("%011o", Data.size()); // c_filesize
OS << Path << '\0'; // c_name
OS << Data; // c_filedata
}
void CpioFile::append(StringRef Path, StringRef Data) {
if (!Seen.insert(Path).second)
return;
// Construct an in-archive filename so that /home/foo/bar is stored
// as baz/home/foo/bar where baz is the basename of the output file.
// (i.e. in that case we are creating baz.cpio.)
SmallString<128> Fullpath;
path::append(Fullpath, Basename, Path);
// Use unix path separators so the cpio can be extracted on both unix and
// windows.
std::replace(Fullpath.begin(), Fullpath.end(), '\\', '/');
writeMember(*OS, Fullpath, Data);
// Print the trailer and seek back.
// This way we have a valid archive if we crash.
uint64_t Pos = OS->tell();
writeMember(*OS, "TRAILER!!!", "");
OS->seek(Pos);
}
// Quote a given string if it contains a space character.
static std::string quote(StringRef S) {
if (S.find(' ') == StringRef::npos)
return S;
return ("\"" + S + "\"").str();
}
static std::string rewritePath(StringRef S) {
if (fs::exists(S))
return relativeToRoot(S);
return S;
}
static std::string stringize(opt::Arg *Arg) {
std::string K = Arg->getSpelling();
if (Arg->getNumValues() == 0)
return K;
std::string V = quote(Arg->getValue());
if (Arg->getOption().getRenderStyle() == opt::Option::RenderJoinedStyle)
return K + V;
return K + " " + V;
}
// Reconstructs command line arguments so that so that you can re-run
// the same command with the same inputs. This is for --reproduce.
std::string elf::createResponseFile(const opt::InputArgList &Args) {
SmallString<0> Data;
raw_svector_ostream OS(Data);
// Copy the command line to the output while rewriting paths.
for (auto *Arg : Args) {
switch (Arg->getOption().getID()) {
case OPT_reproduce:
break;
case OPT_INPUT:
OS << quote(rewritePath(Arg->getValue())) << "\n";
break;
case OPT_L:
case OPT_dynamic_list:
case OPT_rpath:
case OPT_alias_script_T:
case OPT_script:
case OPT_version_script:
OS << Arg->getSpelling() << " "
<< quote(rewritePath(Arg->getValue())) << "\n";
break;
default:
OS << stringize(Arg) << "\n";
}
}
return Data.str();
}
std::string elf::findFromSearchPaths(StringRef Path) {
for (StringRef Dir : Config->SearchPaths) {
std::string FullPath = buildSysrootedPath(Dir, Path);
if (fs::exists(FullPath))
return FullPath;
}
return "";
}
// Searches a given library from input search paths, which are filled
// from -L command line switches. Returns a path to an existent library file.
std::string elf::searchLibrary(StringRef Path) {
if (Path.startswith(":"))
return findFromSearchPaths(Path.substr(1));
for (StringRef Dir : Config->SearchPaths) {
if (!Config->Static) {
std::string S = buildSysrootedPath(Dir, ("lib" + Path + ".so").str());
if (fs::exists(S))
return S;
}
std::string S = buildSysrootedPath(Dir, ("lib" + Path + ".a").str());
if (fs::exists(S))
return S;
}
return "";
}
// Makes a path by concatenating Dir and File.
// If Dir starts with '=' the result will be preceded by Sysroot,
// which can be set with --sysroot command line switch.
std::string elf::buildSysrootedPath(StringRef Dir, StringRef File) {
SmallString<128> Path;
if (Dir.startswith("="))
path::append(Path, Config->Sysroot, Dir.substr(1), File);
else
path::append(Path, Dir, File);
return Path.str();
}

View File

@ -0,0 +1,167 @@
//===- EhFrame.cpp -------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// .eh_frame section contains information on how to unwind the stack when
// an exception is thrown. The section consists of sequence of CIE and FDE
// records. The linker needs to merge CIEs and associate FDEs to CIEs.
// That means the linker has to understand the format of the section.
//
// This file contains a few utility functions to read .eh_frame contents.
//
//===----------------------------------------------------------------------===//
#include "EhFrame.h"
#include "Error.h"
#include "llvm/Object/ELF.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/Endian.h"
using namespace llvm;
using namespace llvm::ELF;
using namespace llvm::dwarf;
using namespace llvm::object;
using namespace llvm::support::endian;
namespace lld {
namespace elf {
// .eh_frame section is a sequence of records. Each record starts with
// a 4 byte length field. This function reads the length.
template <class ELFT> size_t readEhRecordSize(ArrayRef<uint8_t> D) {
const endianness E = ELFT::TargetEndianness;
if (D.size() < 4)
fatal("CIE/FDE too small");
// First 4 bytes of CIE/FDE is the size of the record.
// If it is 0xFFFFFFFF, the next 8 bytes contain the size instead,
// but we do not support that format yet.
uint64_t V = read32<E>(D.data());
if (V == UINT32_MAX)
fatal("CIE/FDE too large");
uint64_t Size = V + 4;
if (Size > D.size())
fatal("CIE/FIE ends past the end of the section");
return Size;
}
// Read a byte and advance D by one byte.
static uint8_t readByte(ArrayRef<uint8_t> &D) {
if (D.empty())
fatal("corrupted or unsupported CIE information");
uint8_t B = D.front();
D = D.slice(1);
return B;
}
// Skip an integer encoded in the LEB128 format.
// Actual number is not of interest because only the runtime needs it.
// But we need to be at least able to skip it so that we can read
// the field that follows a LEB128 number.
static void skipLeb128(ArrayRef<uint8_t> &D) {
while (!D.empty()) {
uint8_t Val = D.front();
D = D.slice(1);
if ((Val & 0x80) == 0)
return;
}
fatal("corrupted or unsupported CIE information");
}
template <class ELFT> static size_t getAugPSize(unsigned Enc) {
switch (Enc & 0x0f) {
case DW_EH_PE_absptr:
case DW_EH_PE_signed:
return ELFT::Is64Bits ? 8 : 4;
case DW_EH_PE_udata2:
case DW_EH_PE_sdata2:
return 2;
case DW_EH_PE_udata4:
case DW_EH_PE_sdata4:
return 4;
case DW_EH_PE_udata8:
case DW_EH_PE_sdata8:
return 8;
}
fatal("unknown FDE encoding");
}
template <class ELFT> static void skipAugP(ArrayRef<uint8_t> &D) {
uint8_t Enc = readByte(D);
if ((Enc & 0xf0) == DW_EH_PE_aligned)
fatal("DW_EH_PE_aligned encoding is not supported");
size_t Size = getAugPSize<ELFT>(Enc);
if (Size >= D.size())
fatal("corrupted CIE");
D = D.slice(Size);
}
template <class ELFT> uint8_t getFdeEncoding(ArrayRef<uint8_t> D) {
if (D.size() < 8)
fatal("CIE too small");
D = D.slice(8);
uint8_t Version = readByte(D);
if (Version != 1 && Version != 3)
fatal("FDE version 1 or 3 expected, but got " + Twine((unsigned)Version));
const unsigned char *AugEnd = std::find(D.begin(), D.end(), '\0');
if (AugEnd == D.end())
fatal("corrupted CIE");
StringRef Aug(reinterpret_cast<const char *>(D.begin()), AugEnd - D.begin());
D = D.slice(Aug.size() + 1);
// Code alignment factor should always be 1 for .eh_frame.
if (readByte(D) != 1)
fatal("CIE code alignment must be 1");
// Skip data alignment factor.
skipLeb128(D);
// Skip the return address register. In CIE version 1 this is a single
// byte. In CIE version 3 this is an unsigned LEB128.
if (Version == 1)
readByte(D);
else
skipLeb128(D);
// We only care about an 'R' value, but other records may precede an 'R'
// record. Unfortunately records are not in TLV (type-length-value) format,
// so we need to teach the linker how to skip records for each type.
for (char C : Aug) {
if (C == 'R')
return readByte(D);
if (C == 'z') {
skipLeb128(D);
continue;
}
if (C == 'P') {
skipAugP<ELFT>(D);
continue;
}
if (C == 'L') {
readByte(D);
continue;
}
fatal("unknown .eh_frame augmentation string: " + Aug);
}
return DW_EH_PE_absptr;
}
template size_t readEhRecordSize<ELF32LE>(ArrayRef<uint8_t>);
template size_t readEhRecordSize<ELF32BE>(ArrayRef<uint8_t>);
template size_t readEhRecordSize<ELF64LE>(ArrayRef<uint8_t>);
template size_t readEhRecordSize<ELF64BE>(ArrayRef<uint8_t>);
template uint8_t getFdeEncoding<ELF32LE>(ArrayRef<uint8_t>);
template uint8_t getFdeEncoding<ELF32BE>(ArrayRef<uint8_t>);
template uint8_t getFdeEncoding<ELF64LE>(ArrayRef<uint8_t>);
template uint8_t getFdeEncoding<ELF64BE>(ArrayRef<uint8_t>);
}
}

View File

@ -0,0 +1,22 @@
//===- EhFrame.h ------------------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_ELF_EHFRAME_H
#define LLD_ELF_EHFRAME_H
#include "lld/Core/LLVM.h"
namespace lld {
namespace elf {
template <class ELFT> size_t readEhRecordSize(ArrayRef<uint8_t> Data);
template <class ELFT> uint8_t getFdeEncoding(ArrayRef<uint8_t> Data);
}
}
#endif

View File

@ -0,0 +1,65 @@
//===- Error.cpp ----------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "Error.h"
#include "Config.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
namespace lld {
namespace elf {
bool HasError;
raw_ostream *ErrorOS;
void log(const Twine &Msg) {
if (Config->Verbose)
outs() << Msg << "\n";
}
void warning(const Twine &Msg) {
if (Config->FatalWarnings)
error(Msg);
else
*ErrorOS << Msg << "\n";
}
void error(const Twine &Msg) {
*ErrorOS << Msg << "\n";
HasError = true;
}
void error(std::error_code EC, const Twine &Prefix) {
error(Prefix + ": " + EC.message());
}
void fatal(const Twine &Msg) {
*ErrorOS << Msg << "\n";
exit(1);
}
void fatal(const Twine &Msg, const Twine &Prefix) {
fatal(Prefix + ": " + Msg);
}
void check(std::error_code EC) {
if (EC)
fatal(EC.message());
}
void check(Error Err) {
check(errorToErrorCode(std::move(Err)));
}
} // namespace elf
} // namespace lld

View File

@ -0,0 +1,61 @@
//===- Error.h --------------------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_COFF_ERROR_H
#define LLD_COFF_ERROR_H
#include "lld/Core/LLVM.h"
namespace lld {
namespace elf {
extern bool HasError;
extern llvm::raw_ostream *ErrorOS;
void log(const Twine &Msg);
void warning(const Twine &Msg);
void error(const Twine &Msg);
void error(std::error_code EC, const Twine &Prefix);
template <typename T> void error(const ErrorOr<T> &V, const Twine &Prefix) {
error(V.getError(), Prefix);
}
LLVM_ATTRIBUTE_NORETURN void fatal(const Twine &Msg);
LLVM_ATTRIBUTE_NORETURN void fatal(const Twine &Msg, const Twine &Prefix);
template <class T> T check(ErrorOr<T> E) {
if (auto EC = E.getError())
fatal(EC.message());
return std::move(*E);
}
template <class T> T check(Expected<T> E) {
if (!E)
fatal(errorToErrorCode(E.takeError()).message());
return std::move(*E);
}
template <class T> T check(ErrorOr<T> E, const Twine &Prefix) {
if (auto EC = E.getError())
fatal(EC.message(), Prefix);
return std::move(*E);
}
template <class T> T check(Expected<T> E, const Twine &Prefix) {
if (!E)
fatal(errorToErrorCode(E.takeError()).message(), Prefix);
return std::move(*E);
}
} // namespace elf
} // namespace lld
#endif

View File

@ -0,0 +1,345 @@
//===- ICF.cpp ------------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Identical Code Folding is a feature to merge sections not by name (which
// is regular comdat handling) but by contents. If two non-writable sections
// have the same data, relocations, attributes, etc., then the two
// are considered identical and merged by the linker. This optimization
// makes outputs smaller.
//
// ICF is theoretically a problem of reducing graphs by merging as many
// identical subgraphs as possible if we consider sections as vertices and
// relocations as edges. It may sound simple, but it is a bit more
// complicated than you might think. The order of processing sections
// matters because merging two sections can make other sections, whose
// relocations now point to the same section, mergeable. Graphs may contain
// cycles. We need a sophisticated algorithm to do this properly and
// efficiently.
//
// What we do in this file is this. We split sections into groups. Sections
// in the same group are considered identical.
//
// We begin by optimistically putting all sections into a single equivalence
// class. Then we apply a series of checks that split this initial
// equivalence class into more and more refined equivalence classes based on
// the properties by which a section can be distinguished.
//
// We begin by checking that the section contents and flags are the
// same. This only needs to be done once since these properties don't depend
// on the current equivalence class assignment.
//
// Then we split the equivalence classes based on checking that their
// relocations are the same, where relocation targets are compared by their
// equivalence class, not the concrete section. This may need to be done
// multiple times because as the equivalence classes are refined, two
// sections that had a relocation target in the same equivalence class may
// now target different equivalence classes, and hence these two sections
// must be put in different equivalence classes (whereas in the previous
// iteration they were not since the relocation target was the same.)
//
// Our algorithm is smart enough to merge the following mutually-recursive
// functions.
//
// void foo() { bar(); }
// void bar() { foo(); }
//
// This algorithm is so-called "optimistic" algorithm described in
// http://research.google.com/pubs/pub36912.html. (Note that what GNU
// gold implemented is different from the optimistic algorithm.)
//
//===----------------------------------------------------------------------===//
#include "ICF.h"
#include "Config.h"
#include "OutputSections.h"
#include "SymbolTable.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/Object/ELF.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/raw_ostream.h"
using namespace lld;
using namespace lld::elf;
using namespace llvm;
using namespace llvm::ELF;
using namespace llvm::object;
namespace lld {
namespace elf {
template <class ELFT> class ICF {
typedef typename ELFT::Shdr Elf_Shdr;
typedef typename ELFT::Sym Elf_Sym;
typedef typename ELFT::uint uintX_t;
typedef Elf_Rel_Impl<ELFT, false> Elf_Rel;
using Comparator = std::function<bool(const InputSection<ELFT> *,
const InputSection<ELFT> *)>;
public:
void run();
private:
uint64_t NextId = 1;
static void setLive(SymbolTable<ELFT> *S);
static uint64_t relSize(InputSection<ELFT> *S);
static uint64_t getHash(InputSection<ELFT> *S);
static bool isEligible(InputSectionBase<ELFT> *Sec);
static std::vector<InputSection<ELFT> *> getSections();
void segregate(InputSection<ELFT> **Begin, InputSection<ELFT> **End,
Comparator Eq);
void forEachGroup(std::vector<InputSection<ELFT> *> &V, Comparator Eq);
template <class RelTy>
static bool relocationEq(ArrayRef<RelTy> RA, ArrayRef<RelTy> RB);
template <class RelTy>
static bool variableEq(const InputSection<ELFT> *A,
const InputSection<ELFT> *B, ArrayRef<RelTy> RA,
ArrayRef<RelTy> RB);
static bool equalsConstant(const InputSection<ELFT> *A,
const InputSection<ELFT> *B);
static bool equalsVariable(const InputSection<ELFT> *A,
const InputSection<ELFT> *B);
};
}
}
// Returns a hash value for S. Note that the information about
// relocation targets is not included in the hash value.
template <class ELFT> uint64_t ICF<ELFT>::getHash(InputSection<ELFT> *S) {
uint64_t Flags = S->getSectionHdr()->sh_flags;
uint64_t H = hash_combine(Flags, S->getSize());
for (const Elf_Shdr *Rel : S->RelocSections)
H = hash_combine(H, (uint64_t)Rel->sh_size);
return H;
}
// Returns true if Sec is subject of ICF.
template <class ELFT> bool ICF<ELFT>::isEligible(InputSectionBase<ELFT> *Sec) {
if (!Sec || Sec == &InputSection<ELFT>::Discarded || !Sec->Live)
return false;
auto *S = dyn_cast<InputSection<ELFT>>(Sec);
if (!S)
return false;
// .init and .fini contains instructions that must be executed to
// initialize and finalize the process. They cannot and should not
// be merged.
StringRef Name = S->getSectionName();
if (Name == ".init" || Name == ".fini")
return false;
const Elf_Shdr &H = *S->getSectionHdr();
return (H.sh_flags & SHF_ALLOC) && (~H.sh_flags & SHF_WRITE);
}
template <class ELFT>
std::vector<InputSection<ELFT> *> ICF<ELFT>::getSections() {
std::vector<InputSection<ELFT> *> V;
for (const std::unique_ptr<ObjectFile<ELFT>> &F :
Symtab<ELFT>::X->getObjectFiles())
for (InputSectionBase<ELFT> *S : F->getSections())
if (isEligible(S))
V.push_back(cast<InputSection<ELFT>>(S));
return V;
}
// All sections between Begin and End must have the same group ID before
// you call this function. This function compare sections between Begin
// and End using Eq and assign new group IDs for new groups.
template <class ELFT>
void ICF<ELFT>::segregate(InputSection<ELFT> **Begin, InputSection<ELFT> **End,
Comparator Eq) {
// This loop rearranges [Begin, End) so that all sections that are
// equal in terms of Eq are contiguous. The algorithm is quadratic in
// the worst case, but that is not an issue in practice because the
// number of distinct sections in [Begin, End) is usually very small.
InputSection<ELFT> **I = Begin;
for (;;) {
InputSection<ELFT> *Head = *I;
auto Bound = std::stable_partition(
I + 1, End, [&](InputSection<ELFT> *S) { return Eq(Head, S); });
if (Bound == End)
return;
uint64_t Id = NextId++;
for (; I != Bound; ++I)
(*I)->GroupId = Id;
}
}
template <class ELFT>
void ICF<ELFT>::forEachGroup(std::vector<InputSection<ELFT> *> &V,
Comparator Eq) {
for (InputSection<ELFT> **I = V.data(), **E = I + V.size(); I != E;) {
InputSection<ELFT> *Head = *I;
auto Bound = std::find_if(I + 1, E, [&](InputSection<ELFT> *S) {
return S->GroupId != Head->GroupId;
});
segregate(I, Bound, Eq);
I = Bound;
}
}
// Compare two lists of relocations.
template <class ELFT>
template <class RelTy>
bool ICF<ELFT>::relocationEq(ArrayRef<RelTy> RelsA, ArrayRef<RelTy> RelsB) {
const RelTy *IA = RelsA.begin();
const RelTy *EA = RelsA.end();
const RelTy *IB = RelsB.begin();
const RelTy *EB = RelsB.end();
if (EA - IA != EB - IB)
return false;
for (; IA != EA; ++IA, ++IB)
if (IA->r_offset != IB->r_offset ||
IA->getType(Config->Mips64EL) != IB->getType(Config->Mips64EL) ||
getAddend<ELFT>(*IA) != getAddend<ELFT>(*IB))
return false;
return true;
}
// Compare "non-moving" part of two InputSections, namely everything
// except relocation targets.
template <class ELFT>
bool ICF<ELFT>::equalsConstant(const InputSection<ELFT> *A,
const InputSection<ELFT> *B) {
if (A->RelocSections.size() != B->RelocSections.size())
return false;
for (size_t I = 0, E = A->RelocSections.size(); I != E; ++I) {
const Elf_Shdr *RA = A->RelocSections[I];
const Elf_Shdr *RB = B->RelocSections[I];
ELFFile<ELFT> &FileA = A->File->getObj();
ELFFile<ELFT> &FileB = B->File->getObj();
if (RA->sh_type == SHT_RELA) {
if (!relocationEq(FileA.relas(RA), FileB.relas(RB)))
return false;
} else {
if (!relocationEq(FileA.rels(RA), FileB.rels(RB)))
return false;
}
}
return A->getSectionHdr()->sh_flags == B->getSectionHdr()->sh_flags &&
A->getSize() == B->getSize() &&
A->getSectionData() == B->getSectionData();
}
template <class ELFT>
template <class RelTy>
bool ICF<ELFT>::variableEq(const InputSection<ELFT> *A,
const InputSection<ELFT> *B, ArrayRef<RelTy> RelsA,
ArrayRef<RelTy> RelsB) {
const RelTy *IA = RelsA.begin();
const RelTy *EA = RelsA.end();
const RelTy *IB = RelsB.begin();
for (; IA != EA; ++IA, ++IB) {
SymbolBody &SA = A->File->getRelocTargetSym(*IA);
SymbolBody &SB = B->File->getRelocTargetSym(*IB);
if (&SA == &SB)
continue;
// Or, the symbols should be pointing to the same section
// in terms of the group ID.
auto *DA = dyn_cast<DefinedRegular<ELFT>>(&SA);
auto *DB = dyn_cast<DefinedRegular<ELFT>>(&SB);
if (!DA || !DB)
return false;
if (DA->Value != DB->Value)
return false;
InputSection<ELFT> *X = dyn_cast<InputSection<ELFT>>(DA->Section);
InputSection<ELFT> *Y = dyn_cast<InputSection<ELFT>>(DB->Section);
if (X && Y && X->GroupId && X->GroupId == Y->GroupId)
continue;
return false;
}
return true;
}
// Compare "moving" part of two InputSections, namely relocation targets.
template <class ELFT>
bool ICF<ELFT>::equalsVariable(const InputSection<ELFT> *A,
const InputSection<ELFT> *B) {
for (size_t I = 0, E = A->RelocSections.size(); I != E; ++I) {
const Elf_Shdr *RA = A->RelocSections[I];
const Elf_Shdr *RB = B->RelocSections[I];
ELFFile<ELFT> &FileA = A->File->getObj();
ELFFile<ELFT> &FileB = B->File->getObj();
if (RA->sh_type == SHT_RELA) {
if (!variableEq(A, B, FileA.relas(RA), FileB.relas(RB)))
return false;
} else {
if (!variableEq(A, B, FileA.rels(RA), FileB.rels(RB)))
return false;
}
}
return true;
}
// The main function of ICF.
template <class ELFT> void ICF<ELFT>::run() {
// Initially, we use hash values as section group IDs. Therefore,
// if two sections have the same ID, they are likely (but not
// guaranteed) to have the same static contents in terms of ICF.
std::vector<InputSection<ELFT> *> V = getSections();
for (InputSection<ELFT> *S : V)
// Set MSB on to avoid collisions with serial group IDs
S->GroupId = getHash(S) | (uint64_t(1) << 63);
// From now on, sections in V are ordered so that sections in
// the same group are consecutive in the vector.
std::stable_sort(V.begin(), V.end(),
[](InputSection<ELFT> *A, InputSection<ELFT> *B) {
return A->GroupId < B->GroupId;
});
// Compare static contents and assign unique IDs for each static content.
forEachGroup(V, equalsConstant);
// Split groups by comparing relocations until we get a convergence.
int Cnt = 1;
for (;;) {
++Cnt;
uint64_t Id = NextId;
forEachGroup(V, equalsVariable);
if (Id == NextId)
break;
}
log("ICF needed " + Twine(Cnt) + " iterations.");
// Merge sections in the same group.
for (auto I = V.begin(), E = V.end(); I != E;) {
InputSection<ELFT> *Head = *I++;
auto Bound = std::find_if(I, E, [&](InputSection<ELFT> *S) {
return Head->GroupId != S->GroupId;
});
if (I == Bound)
continue;
log("selected " + Head->getSectionName());
while (I != Bound) {
InputSection<ELFT> *S = *I++;
log(" removed " + S->getSectionName());
Head->replace(S);
}
}
}
// ICF entry point function.
template <class ELFT> void elf::doIcf() { ICF<ELFT>().run(); }
template void elf::doIcf<ELF32LE>();
template void elf::doIcf<ELF32BE>();
template void elf::doIcf<ELF64LE>();
template void elf::doIcf<ELF64BE>();

View File

@ -0,0 +1,19 @@
//===- ICF.h --------------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_ELF_ICF_H
#define LLD_ELF_ICF_H
namespace lld {
namespace elf {
template <class ELFT> void doIcf();
}
}
#endif

View File

@ -0,0 +1,828 @@
//===- InputFiles.cpp -----------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "InputFiles.h"
#include "Driver.h"
#include "Error.h"
#include "InputSection.h"
#include "SymbolTable.h"
#include "Symbols.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Bitcode/ReaderWriter.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using namespace llvm::ELF;
using namespace llvm::object;
using namespace llvm::sys::fs;
using namespace lld;
using namespace lld::elf;
// Returns "(internal)", "foo.a(bar.o)" or "baz.o".
std::string elf::getFilename(const InputFile *F) {
if (!F)
return "(internal)";
if (!F->ArchiveName.empty())
return (F->ArchiveName + "(" + F->getName() + ")").str();
return F->getName();
}
template <class ELFT>
static ELFFile<ELFT> createELFObj(MemoryBufferRef MB) {
std::error_code EC;
ELFFile<ELFT> F(MB.getBuffer(), EC);
if (EC)
error(EC, "failed to read " + MB.getBufferIdentifier());
return F;
}
template <class ELFT> static ELFKind getELFKind() {
if (ELFT::TargetEndianness == support::little)
return ELFT::Is64Bits ? ELF64LEKind : ELF32LEKind;
return ELFT::Is64Bits ? ELF64BEKind : ELF32BEKind;
}
template <class ELFT>
ELFFileBase<ELFT>::ELFFileBase(Kind K, MemoryBufferRef MB)
: InputFile(K, MB), ELFObj(createELFObj<ELFT>(MB)) {
EKind = getELFKind<ELFT>();
EMachine = ELFObj.getHeader()->e_machine;
}
template <class ELFT>
typename ELFT::SymRange ELFFileBase<ELFT>::getElfSymbols(bool OnlyGlobals) {
if (!Symtab)
return Elf_Sym_Range(nullptr, nullptr);
Elf_Sym_Range Syms = ELFObj.symbols(Symtab);
uint32_t NumSymbols = std::distance(Syms.begin(), Syms.end());
uint32_t FirstNonLocal = Symtab->sh_info;
if (FirstNonLocal > NumSymbols)
fatal(getFilename(this) + ": invalid sh_info in symbol table");
if (OnlyGlobals)
return makeArrayRef(Syms.begin() + FirstNonLocal, Syms.end());
return makeArrayRef(Syms.begin(), Syms.end());
}
template <class ELFT>
uint32_t ELFFileBase<ELFT>::getSectionIndex(const Elf_Sym &Sym) const {
uint32_t I = Sym.st_shndx;
if (I == ELF::SHN_XINDEX)
return ELFObj.getExtendedSymbolTableIndex(&Sym, Symtab, SymtabSHNDX);
if (I >= ELF::SHN_LORESERVE)
return 0;
return I;
}
template <class ELFT> void ELFFileBase<ELFT>::initStringTable() {
if (!Symtab)
return;
StringTable = check(ELFObj.getStringTableForSymtab(*Symtab));
}
template <class ELFT>
elf::ObjectFile<ELFT>::ObjectFile(MemoryBufferRef M)
: ELFFileBase<ELFT>(Base::ObjectKind, M) {}
template <class ELFT>
ArrayRef<SymbolBody *> elf::ObjectFile<ELFT>::getNonLocalSymbols() {
if (!this->Symtab)
return this->SymbolBodies;
uint32_t FirstNonLocal = this->Symtab->sh_info;
return makeArrayRef(this->SymbolBodies).slice(FirstNonLocal);
}
template <class ELFT>
ArrayRef<SymbolBody *> elf::ObjectFile<ELFT>::getLocalSymbols() {
if (!this->Symtab)
return this->SymbolBodies;
uint32_t FirstNonLocal = this->Symtab->sh_info;
return makeArrayRef(this->SymbolBodies).slice(1, FirstNonLocal - 1);
}
template <class ELFT>
ArrayRef<SymbolBody *> elf::ObjectFile<ELFT>::getSymbols() {
if (!this->Symtab)
return this->SymbolBodies;
return makeArrayRef(this->SymbolBodies).slice(1);
}
template <class ELFT> uint32_t elf::ObjectFile<ELFT>::getMipsGp0() const {
if (ELFT::Is64Bits && MipsOptions && MipsOptions->Reginfo)
return MipsOptions->Reginfo->ri_gp_value;
if (!ELFT::Is64Bits && MipsReginfo && MipsReginfo->Reginfo)
return MipsReginfo->Reginfo->ri_gp_value;
return 0;
}
template <class ELFT>
void elf::ObjectFile<ELFT>::parse(DenseSet<StringRef> &ComdatGroups) {
// Read section and symbol tables.
initializeSections(ComdatGroups);
initializeSymbols();
}
// Sections with SHT_GROUP and comdat bits define comdat section groups.
// They are identified and deduplicated by group name. This function
// returns a group name.
template <class ELFT>
StringRef elf::ObjectFile<ELFT>::getShtGroupSignature(const Elf_Shdr &Sec) {
const ELFFile<ELFT> &Obj = this->ELFObj;
const Elf_Shdr *Symtab = check(Obj.getSection(Sec.sh_link));
const Elf_Sym *Sym = Obj.getSymbol(Symtab, Sec.sh_info);
StringRef Strtab = check(Obj.getStringTableForSymtab(*Symtab));
return check(Sym->getName(Strtab));
}
template <class ELFT>
ArrayRef<typename elf::ObjectFile<ELFT>::Elf_Word>
elf::ObjectFile<ELFT>::getShtGroupEntries(const Elf_Shdr &Sec) {
const ELFFile<ELFT> &Obj = this->ELFObj;
ArrayRef<Elf_Word> Entries =
check(Obj.template getSectionContentsAsArray<Elf_Word>(&Sec));
if (Entries.empty() || Entries[0] != GRP_COMDAT)
fatal(getFilename(this) + ": unsupported SHT_GROUP format");
return Entries.slice(1);
}
template <class ELFT>
bool elf::ObjectFile<ELFT>::shouldMerge(const Elf_Shdr &Sec) {
// We don't merge sections if -O0 (default is -O1). This makes sometimes
// the linker significantly faster, although the output will be bigger.
if (Config->Optimize == 0)
return false;
uintX_t Flags = Sec.sh_flags;
if (!(Flags & SHF_MERGE))
return false;
if (Flags & SHF_WRITE)
fatal(getFilename(this) + ": writable SHF_MERGE section is not supported");
uintX_t EntSize = Sec.sh_entsize;
if (!EntSize || Sec.sh_size % EntSize)
fatal(getFilename(this) +
": SHF_MERGE section size must be a multiple of sh_entsize");
// Don't try to merge if the alignment is larger than the sh_entsize and this
// is not SHF_STRINGS.
//
// Since this is not a SHF_STRINGS, we would need to pad after every entity.
// It would be equivalent for the producer of the .o to just set a larger
// sh_entsize.
if (Flags & SHF_STRINGS)
return true;
return Sec.sh_addralign <= EntSize;
}
template <class ELFT>
void elf::ObjectFile<ELFT>::initializeSections(
DenseSet<StringRef> &ComdatGroups) {
uint64_t Size = this->ELFObj.getNumSections();
Sections.resize(Size);
unsigned I = -1;
const ELFFile<ELFT> &Obj = this->ELFObj;
for (const Elf_Shdr &Sec : Obj.sections()) {
++I;
if (Sections[I] == &InputSection<ELFT>::Discarded)
continue;
switch (Sec.sh_type) {
case SHT_GROUP:
Sections[I] = &InputSection<ELFT>::Discarded;
if (ComdatGroups.insert(getShtGroupSignature(Sec)).second)
continue;
for (uint32_t SecIndex : getShtGroupEntries(Sec)) {
if (SecIndex >= Size)
fatal(getFilename(this) + ": invalid section index in group: " +
Twine(SecIndex));
Sections[SecIndex] = &InputSection<ELFT>::Discarded;
}
break;
case SHT_SYMTAB:
this->Symtab = &Sec;
break;
case SHT_SYMTAB_SHNDX:
this->SymtabSHNDX = check(Obj.getSHNDXTable(Sec));
break;
case SHT_STRTAB:
case SHT_NULL:
break;
case SHT_RELA:
case SHT_REL: {
// This section contains relocation information.
// If -r is given, we do not interpret or apply relocation
// but just copy relocation sections to output.
if (Config->Relocatable) {
Sections[I] = new (IAlloc.Allocate()) InputSection<ELFT>(this, &Sec);
break;
}
// Find the relocation target section and associate this
// section with it.
InputSectionBase<ELFT> *Target = getRelocTarget(Sec);
if (!Target)
break;
if (auto *S = dyn_cast<InputSection<ELFT>>(Target)) {
S->RelocSections.push_back(&Sec);
break;
}
if (auto *S = dyn_cast<EhInputSection<ELFT>>(Target)) {
if (S->RelocSection)
fatal(
getFilename(this) +
": multiple relocation sections to .eh_frame are not supported");
S->RelocSection = &Sec;
break;
}
fatal(getFilename(this) +
": relocations pointing to SHF_MERGE are not supported");
}
case SHT_ARM_ATTRIBUTES:
// FIXME: ARM meta-data section. At present attributes are ignored,
// they can be used to reason about object compatibility.
Sections[I] = &InputSection<ELFT>::Discarded;
break;
default:
Sections[I] = createInputSection(Sec);
}
}
}
template <class ELFT>
InputSectionBase<ELFT> *
elf::ObjectFile<ELFT>::getRelocTarget(const Elf_Shdr &Sec) {
uint32_t Idx = Sec.sh_info;
if (Idx >= Sections.size())
fatal(getFilename(this) + ": invalid relocated section index: " +
Twine(Idx));
InputSectionBase<ELFT> *Target = Sections[Idx];
// Strictly speaking, a relocation section must be included in the
// group of the section it relocates. However, LLVM 3.3 and earlier
// would fail to do so, so we gracefully handle that case.
if (Target == &InputSection<ELFT>::Discarded)
return nullptr;
if (!Target)
fatal(getFilename(this) + ": unsupported relocation reference");
return Target;
}
template <class ELFT>
InputSectionBase<ELFT> *
elf::ObjectFile<ELFT>::createInputSection(const Elf_Shdr &Sec) {
StringRef Name = check(this->ELFObj.getSectionName(&Sec));
// .note.GNU-stack is a marker section to control the presence of
// PT_GNU_STACK segment in outputs. Since the presence of the segment
// is controlled only by the command line option (-z execstack) in LLD,
// .note.GNU-stack is ignored.
if (Name == ".note.GNU-stack")
return &InputSection<ELFT>::Discarded;
if (Name == ".note.GNU-split-stack") {
error("objects using splitstacks are not supported");
return &InputSection<ELFT>::Discarded;
}
if (Config->StripDebug && Name.startswith(".debug"))
return &InputSection<ELFT>::Discarded;
// A MIPS object file has a special sections that contain register
// usage info, which need to be handled by the linker specially.
if (Config->EMachine == EM_MIPS) {
if (Name == ".reginfo") {
MipsReginfo.reset(new MipsReginfoInputSection<ELFT>(this, &Sec));
return MipsReginfo.get();
}
if (Name == ".MIPS.options") {
MipsOptions.reset(new MipsOptionsInputSection<ELFT>(this, &Sec));
return MipsOptions.get();
}
}
// The linker merges EH (exception handling) frames and creates a
// .eh_frame_hdr section for runtime. So we handle them with a special
// class. For relocatable outputs, they are just passed through.
if (Name == ".eh_frame" && !Config->Relocatable)
return new (EHAlloc.Allocate()) EhInputSection<ELFT>(this, &Sec);
if (shouldMerge(Sec))
return new (MAlloc.Allocate()) MergeInputSection<ELFT>(this, &Sec);
return new (IAlloc.Allocate()) InputSection<ELFT>(this, &Sec);
}
template <class ELFT> void elf::ObjectFile<ELFT>::initializeSymbols() {
this->initStringTable();
Elf_Sym_Range Syms = this->getElfSymbols(false);
uint32_t NumSymbols = std::distance(Syms.begin(), Syms.end());
SymbolBodies.reserve(NumSymbols);
for (const Elf_Sym &Sym : Syms)
SymbolBodies.push_back(createSymbolBody(&Sym));
}
template <class ELFT>
InputSectionBase<ELFT> *
elf::ObjectFile<ELFT>::getSection(const Elf_Sym &Sym) const {
uint32_t Index = this->getSectionIndex(Sym);
if (Index == 0)
return nullptr;
if (Index >= Sections.size() || !Sections[Index])
fatal(getFilename(this) + ": invalid section index: " + Twine(Index));
InputSectionBase<ELFT> *S = Sections[Index];
if (S == &InputSectionBase<ELFT>::Discarded)
return S;
return S->Repl;
}
template <class ELFT>
SymbolBody *elf::ObjectFile<ELFT>::createSymbolBody(const Elf_Sym *Sym) {
int Binding = Sym->getBinding();
InputSectionBase<ELFT> *Sec = getSection(*Sym);
if (Binding == STB_LOCAL) {
if (Sym->st_shndx == SHN_UNDEF)
return new (this->Alloc)
Undefined(Sym->st_name, Sym->st_other, Sym->getType(), this);
return new (this->Alloc) DefinedRegular<ELFT>(*Sym, Sec);
}
StringRef Name = check(Sym->getName(this->StringTable));
switch (Sym->st_shndx) {
case SHN_UNDEF:
return elf::Symtab<ELFT>::X
->addUndefined(Name, Binding, Sym->st_other, Sym->getType(),
/*CanOmitFromDynSym*/ false, this)
->body();
case SHN_COMMON:
return elf::Symtab<ELFT>::X
->addCommon(Name, Sym->st_size, Sym->st_value, Binding, Sym->st_other,
Sym->getType(), this)
->body();
}
switch (Binding) {
default:
fatal(getFilename(this) + ": unexpected binding: " + Twine(Binding));
case STB_GLOBAL:
case STB_WEAK:
case STB_GNU_UNIQUE:
if (Sec == &InputSection<ELFT>::Discarded)
return elf::Symtab<ELFT>::X
->addUndefined(Name, Binding, Sym->st_other, Sym->getType(),
/*CanOmitFromDynSym*/ false, this)
->body();
return elf::Symtab<ELFT>::X->addRegular(Name, *Sym, Sec)->body();
}
}
template <class ELFT> void ArchiveFile::parse() {
File = check(Archive::create(MB), "failed to parse archive");
// Read the symbol table to construct Lazy objects.
for (const Archive::Symbol &Sym : File->symbols())
Symtab<ELFT>::X->addLazyArchive(this, Sym);
}
// Returns a buffer pointing to a member file containing a given symbol.
MemoryBufferRef ArchiveFile::getMember(const Archive::Symbol *Sym) {
Archive::Child C =
check(Sym->getMember(),
"could not get the member for symbol " + Sym->getName());
if (!Seen.insert(C.getChildOffset()).second)
return MemoryBufferRef();
MemoryBufferRef Ret =
check(C.getMemoryBufferRef(),
"could not get the buffer for the member defining symbol " +
Sym->getName());
if (C.getParent()->isThin() && Driver->Cpio)
Driver->Cpio->append(relativeToRoot(check(C.getFullName())),
Ret.getBuffer());
return Ret;
}
template <class ELFT>
SharedFile<ELFT>::SharedFile(MemoryBufferRef M)
: ELFFileBase<ELFT>(Base::SharedKind, M), AsNeeded(Config->AsNeeded) {}
template <class ELFT>
const typename ELFT::Shdr *
SharedFile<ELFT>::getSection(const Elf_Sym &Sym) const {
uint32_t Index = this->getSectionIndex(Sym);
if (Index == 0)
return nullptr;
return check(this->ELFObj.getSection(Index));
}
// Partially parse the shared object file so that we can call
// getSoName on this object.
template <class ELFT> void SharedFile<ELFT>::parseSoName() {
typedef typename ELFT::Dyn Elf_Dyn;
typedef typename ELFT::uint uintX_t;
const Elf_Shdr *DynamicSec = nullptr;
const ELFFile<ELFT> Obj = this->ELFObj;
for (const Elf_Shdr &Sec : Obj.sections()) {
switch (Sec.sh_type) {
default:
continue;
case SHT_DYNSYM:
this->Symtab = &Sec;
break;
case SHT_DYNAMIC:
DynamicSec = &Sec;
break;
case SHT_SYMTAB_SHNDX:
this->SymtabSHNDX = check(Obj.getSHNDXTable(Sec));
break;
case SHT_GNU_versym:
this->VersymSec = &Sec;
break;
case SHT_GNU_verdef:
this->VerdefSec = &Sec;
break;
}
}
this->initStringTable();
SoName = this->getName();
if (!DynamicSec)
return;
auto *Begin =
reinterpret_cast<const Elf_Dyn *>(Obj.base() + DynamicSec->sh_offset);
const Elf_Dyn *End = Begin + DynamicSec->sh_size / sizeof(Elf_Dyn);
for (const Elf_Dyn &Dyn : make_range(Begin, End)) {
if (Dyn.d_tag == DT_SONAME) {
uintX_t Val = Dyn.getVal();
if (Val >= this->StringTable.size())
fatal(getFilename(this) + ": invalid DT_SONAME entry");
SoName = StringRef(this->StringTable.data() + Val);
return;
}
}
}
// Parse the version definitions in the object file if present. Returns a vector
// whose nth element contains a pointer to the Elf_Verdef for version identifier
// n. Version identifiers that are not definitions map to nullptr. The array
// always has at least length 1.
template <class ELFT>
std::vector<const typename ELFT::Verdef *>
SharedFile<ELFT>::parseVerdefs(const Elf_Versym *&Versym) {
std::vector<const Elf_Verdef *> Verdefs(1);
// We only need to process symbol versions for this DSO if it has both a
// versym and a verdef section, which indicates that the DSO contains symbol
// version definitions.
if (!VersymSec || !VerdefSec)
return Verdefs;
// The location of the first global versym entry.
Versym = reinterpret_cast<const Elf_Versym *>(this->ELFObj.base() +
VersymSec->sh_offset) +
this->Symtab->sh_info;
// We cannot determine the largest verdef identifier without inspecting
// every Elf_Verdef, but both bfd and gold assign verdef identifiers
// sequentially starting from 1, so we predict that the largest identifier
// will be VerdefCount.
unsigned VerdefCount = VerdefSec->sh_info;
Verdefs.resize(VerdefCount + 1);
// Build the Verdefs array by following the chain of Elf_Verdef objects
// from the start of the .gnu.version_d section.
const uint8_t *Verdef = this->ELFObj.base() + VerdefSec->sh_offset;
for (unsigned I = 0; I != VerdefCount; ++I) {
auto *CurVerdef = reinterpret_cast<const Elf_Verdef *>(Verdef);
Verdef += CurVerdef->vd_next;
unsigned VerdefIndex = CurVerdef->vd_ndx;
if (Verdefs.size() <= VerdefIndex)
Verdefs.resize(VerdefIndex + 1);
Verdefs[VerdefIndex] = CurVerdef;
}
return Verdefs;
}
// Fully parse the shared object file. This must be called after parseSoName().
template <class ELFT> void SharedFile<ELFT>::parseRest() {
// Create mapping from version identifiers to Elf_Verdef entries.
const Elf_Versym *Versym = nullptr;
std::vector<const Elf_Verdef *> Verdefs = parseVerdefs(Versym);
Elf_Sym_Range Syms = this->getElfSymbols(true);
for (const Elf_Sym &Sym : Syms) {
unsigned VersymIndex = 0;
if (Versym) {
VersymIndex = Versym->vs_index;
++Versym;
}
StringRef Name = check(Sym.getName(this->StringTable));
if (Sym.isUndefined()) {
Undefs.push_back(Name);
continue;
}
if (Versym) {
// Ignore local symbols and non-default versions.
if (VersymIndex == VER_NDX_LOCAL || (VersymIndex & VERSYM_HIDDEN))
continue;
}
const Elf_Verdef *V =
VersymIndex == VER_NDX_GLOBAL ? nullptr : Verdefs[VersymIndex];
elf::Symtab<ELFT>::X->addShared(this, Name, Sym, V);
}
}
static ELFKind getELFKind(MemoryBufferRef MB) {
std::string TripleStr = getBitcodeTargetTriple(MB, Driver->Context);
Triple TheTriple(TripleStr);
bool Is64Bits = TheTriple.isArch64Bit();
if (TheTriple.isLittleEndian())
return Is64Bits ? ELF64LEKind : ELF32LEKind;
return Is64Bits ? ELF64BEKind : ELF32BEKind;
}
static uint8_t getMachineKind(MemoryBufferRef MB) {
std::string TripleStr = getBitcodeTargetTriple(MB, Driver->Context);
switch (Triple(TripleStr).getArch()) {
case Triple::aarch64:
return EM_AARCH64;
case Triple::arm:
return EM_ARM;
case Triple::mips:
case Triple::mipsel:
case Triple::mips64:
case Triple::mips64el:
return EM_MIPS;
case Triple::ppc:
return EM_PPC;
case Triple::ppc64:
return EM_PPC64;
case Triple::x86:
return EM_386;
case Triple::x86_64:
return EM_X86_64;
default:
fatal(MB.getBufferIdentifier() +
": could not infer e_machine from bitcode target triple " +
TripleStr);
}
}
BitcodeFile::BitcodeFile(MemoryBufferRef MB) : InputFile(BitcodeKind, MB) {
EKind = getELFKind(MB);
EMachine = getMachineKind(MB);
}
static uint8_t getGvVisibility(const GlobalValue *GV) {
switch (GV->getVisibility()) {
case GlobalValue::DefaultVisibility:
return STV_DEFAULT;
case GlobalValue::HiddenVisibility:
return STV_HIDDEN;
case GlobalValue::ProtectedVisibility:
return STV_PROTECTED;
}
llvm_unreachable("unknown visibility");
}
template <class ELFT>
Symbol *BitcodeFile::createSymbol(const DenseSet<const Comdat *> &KeptComdats,
const IRObjectFile &Obj,
const BasicSymbolRef &Sym) {
const GlobalValue *GV = Obj.getSymbolGV(Sym.getRawDataRefImpl());
SmallString<64> Name;
raw_svector_ostream OS(Name);
Sym.printName(OS);
StringRef NameRef = Saver.save(StringRef(Name));
uint32_t Flags = Sym.getFlags();
bool IsWeak = Flags & BasicSymbolRef::SF_Weak;
uint32_t Binding = IsWeak ? STB_WEAK : STB_GLOBAL;
uint8_t Type = STT_NOTYPE;
bool CanOmitFromDynSym = false;
// FIXME: Expose a thread-local flag for module asm symbols.
if (GV) {
if (GV->isThreadLocal())
Type = STT_TLS;
CanOmitFromDynSym = canBeOmittedFromSymbolTable(GV);
}
uint8_t Visibility;
if (GV)
Visibility = getGvVisibility(GV);
else
// FIXME: Set SF_Hidden flag correctly for module asm symbols, and expose
// protected visibility.
Visibility = STV_DEFAULT;
if (GV)
if (const Comdat *C = GV->getComdat())
if (!KeptComdats.count(C))
return Symtab<ELFT>::X->addUndefined(NameRef, Binding, Visibility, Type,
CanOmitFromDynSym, this);
const Module &M = Obj.getModule();
if (Flags & BasicSymbolRef::SF_Undefined)
return Symtab<ELFT>::X->addUndefined(NameRef, Binding, Visibility, Type,
CanOmitFromDynSym, this);
if (Flags & BasicSymbolRef::SF_Common) {
// FIXME: Set SF_Common flag correctly for module asm symbols, and expose
// size and alignment.
assert(GV);
const DataLayout &DL = M.getDataLayout();
uint64_t Size = DL.getTypeAllocSize(GV->getValueType());
return Symtab<ELFT>::X->addCommon(NameRef, Size, GV->getAlignment(),
Binding, Visibility, STT_OBJECT, this);
}
return Symtab<ELFT>::X->addBitcode(NameRef, IsWeak, Visibility, Type,
CanOmitFromDynSym, this);
}
bool BitcodeFile::shouldSkip(uint32_t Flags) {
return !(Flags & BasicSymbolRef::SF_Global) ||
(Flags & BasicSymbolRef::SF_FormatSpecific);
}
template <class ELFT>
void BitcodeFile::parse(DenseSet<StringRef> &ComdatGroups) {
Obj = check(IRObjectFile::create(MB, Driver->Context));
const Module &M = Obj->getModule();
DenseSet<const Comdat *> KeptComdats;
for (const auto &P : M.getComdatSymbolTable()) {
StringRef N = Saver.save(P.first());
if (ComdatGroups.insert(N).second)
KeptComdats.insert(&P.second);
}
for (const BasicSymbolRef &Sym : Obj->symbols())
if (!shouldSkip(Sym.getFlags()))
Symbols.push_back(createSymbol<ELFT>(KeptComdats, *Obj, Sym));
}
template <template <class> class T>
static std::unique_ptr<InputFile> createELFFile(MemoryBufferRef MB) {
unsigned char Size;
unsigned char Endian;
std::tie(Size, Endian) = getElfArchType(MB.getBuffer());
if (Endian != ELFDATA2LSB && Endian != ELFDATA2MSB)
fatal("invalid data encoding: " + MB.getBufferIdentifier());
std::unique_ptr<InputFile> Obj;
if (Size == ELFCLASS32 && Endian == ELFDATA2LSB)
Obj.reset(new T<ELF32LE>(MB));
else if (Size == ELFCLASS32 && Endian == ELFDATA2MSB)
Obj.reset(new T<ELF32BE>(MB));
else if (Size == ELFCLASS64 && Endian == ELFDATA2LSB)
Obj.reset(new T<ELF64LE>(MB));
else if (Size == ELFCLASS64 && Endian == ELFDATA2MSB)
Obj.reset(new T<ELF64BE>(MB));
else
fatal("invalid file class: " + MB.getBufferIdentifier());
if (!Config->FirstElf)
Config->FirstElf = Obj.get();
return Obj;
}
static bool isBitcode(MemoryBufferRef MB) {
using namespace sys::fs;
return identify_magic(MB.getBuffer()) == file_magic::bitcode;
}
std::unique_ptr<InputFile> elf::createObjectFile(MemoryBufferRef MB,
StringRef ArchiveName) {
std::unique_ptr<InputFile> F;
if (isBitcode(MB))
F.reset(new BitcodeFile(MB));
else
F = createELFFile<ObjectFile>(MB);
F->ArchiveName = ArchiveName;
return F;
}
std::unique_ptr<InputFile> elf::createSharedFile(MemoryBufferRef MB) {
return createELFFile<SharedFile>(MB);
}
MemoryBufferRef LazyObjectFile::getBuffer() {
if (Seen)
return MemoryBufferRef();
Seen = true;
return MB;
}
template <class ELFT>
void LazyObjectFile::parse() {
for (StringRef Sym : getSymbols())
Symtab<ELFT>::X->addLazyObject(Sym, *this);
}
template <class ELFT> std::vector<StringRef> LazyObjectFile::getElfSymbols() {
typedef typename ELFT::Shdr Elf_Shdr;
typedef typename ELFT::Sym Elf_Sym;
typedef typename ELFT::SymRange Elf_Sym_Range;
const ELFFile<ELFT> Obj = createELFObj<ELFT>(this->MB);
for (const Elf_Shdr &Sec : Obj.sections()) {
if (Sec.sh_type != SHT_SYMTAB)
continue;
Elf_Sym_Range Syms = Obj.symbols(&Sec);
uint32_t FirstNonLocal = Sec.sh_info;
StringRef StringTable = check(Obj.getStringTableForSymtab(Sec));
std::vector<StringRef> V;
for (const Elf_Sym &Sym : Syms.slice(FirstNonLocal))
if (Sym.st_shndx != SHN_UNDEF)
V.push_back(check(Sym.getName(StringTable)));
return V;
}
return {};
}
std::vector<StringRef> LazyObjectFile::getBitcodeSymbols() {
LLVMContext Context;
std::unique_ptr<IRObjectFile> Obj =
check(IRObjectFile::create(this->MB, Context));
std::vector<StringRef> V;
for (const BasicSymbolRef &Sym : Obj->symbols()) {
uint32_t Flags = Sym.getFlags();
if (BitcodeFile::shouldSkip(Flags))
continue;
if (Flags & BasicSymbolRef::SF_Undefined)
continue;
SmallString<64> Name;
raw_svector_ostream OS(Name);
Sym.printName(OS);
V.push_back(Saver.save(StringRef(Name)));
}
return V;
}
// Returns a vector of globally-visible defined symbol names.
std::vector<StringRef> LazyObjectFile::getSymbols() {
if (isBitcode(this->MB))
return getBitcodeSymbols();
unsigned char Size;
unsigned char Endian;
std::tie(Size, Endian) = getElfArchType(this->MB.getBuffer());
if (Size == ELFCLASS32) {
if (Endian == ELFDATA2LSB)
return getElfSymbols<ELF32LE>();
return getElfSymbols<ELF32BE>();
}
if (Endian == ELFDATA2LSB)
return getElfSymbols<ELF64LE>();
return getElfSymbols<ELF64BE>();
}
template void ArchiveFile::parse<ELF32LE>();
template void ArchiveFile::parse<ELF32BE>();
template void ArchiveFile::parse<ELF64LE>();
template void ArchiveFile::parse<ELF64BE>();
template void BitcodeFile::parse<ELF32LE>(DenseSet<StringRef> &);
template void BitcodeFile::parse<ELF32BE>(DenseSet<StringRef> &);
template void BitcodeFile::parse<ELF64LE>(DenseSet<StringRef> &);
template void BitcodeFile::parse<ELF64BE>(DenseSet<StringRef> &);
template void LazyObjectFile::parse<ELF32LE>();
template void LazyObjectFile::parse<ELF32BE>();
template void LazyObjectFile::parse<ELF64LE>();
template void LazyObjectFile::parse<ELF64BE>();
template class elf::ELFFileBase<ELF32LE>;
template class elf::ELFFileBase<ELF32BE>;
template class elf::ELFFileBase<ELF64LE>;
template class elf::ELFFileBase<ELF64BE>;
template class elf::ObjectFile<ELF32LE>;
template class elf::ObjectFile<ELF32BE>;
template class elf::ObjectFile<ELF64LE>;
template class elf::ObjectFile<ELF64BE>;
template class elf::SharedFile<ELF32LE>;
template class elf::SharedFile<ELF32BE>;
template class elf::SharedFile<ELF64LE>;
template class elf::SharedFile<ELF64BE>;

View File

@ -0,0 +1,304 @@
//===- InputFiles.h ---------------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_ELF_INPUT_FILES_H
#define LLD_ELF_INPUT_FILES_H
#include "Config.h"
#include "InputSection.h"
#include "Error.h"
#include "Symbols.h"
#include "lld/Core/LLVM.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/Comdat.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/ELF.h"
#include "llvm/Object/IRObjectFile.h"
#include "llvm/Support/StringSaver.h"
#include <map>
namespace lld {
namespace elf {
using llvm::object::Archive;
class InputFile;
class Lazy;
class SymbolBody;
// The root class of input files.
class InputFile {
public:
enum Kind {
ObjectKind,
SharedKind,
LazyObjectKind,
ArchiveKind,
BitcodeKind,
};
Kind kind() const { return FileKind; }
StringRef getName() const { return MB.getBufferIdentifier(); }
MemoryBufferRef MB;
// Filename of .a which contained this file. If this file was
// not in an archive file, it is the empty string. We use this
// string for creating error messages.
StringRef ArchiveName;
// If this is an architecture-specific file, the following members
// have ELF type (i.e. ELF{32,64}{LE,BE}) and target machine type.
ELFKind EKind = ELFNoneKind;
uint16_t EMachine = llvm::ELF::EM_NONE;
protected:
InputFile(Kind K, MemoryBufferRef M) : MB(M), FileKind(K) {}
private:
const Kind FileKind;
};
// Returns "(internal)", "foo.a(bar.o)" or "baz.o".
std::string getFilename(const InputFile *F);
template <typename ELFT> class ELFFileBase : public InputFile {
public:
typedef typename ELFT::Shdr Elf_Shdr;
typedef typename ELFT::Sym Elf_Sym;
typedef typename ELFT::Word Elf_Word;
typedef typename ELFT::SymRange Elf_Sym_Range;
ELFFileBase(Kind K, MemoryBufferRef M);
static bool classof(const InputFile *F) {
Kind K = F->kind();
return K == ObjectKind || K == SharedKind;
}
const llvm::object::ELFFile<ELFT> &getObj() const { return ELFObj; }
llvm::object::ELFFile<ELFT> &getObj() { return ELFObj; }
uint8_t getOSABI() const {
return getObj().getHeader()->e_ident[llvm::ELF::EI_OSABI];
}
StringRef getStringTable() const { return StringTable; }
uint32_t getSectionIndex(const Elf_Sym &Sym) const;
Elf_Sym_Range getElfSymbols(bool OnlyGlobals);
protected:
llvm::object::ELFFile<ELFT> ELFObj;
const Elf_Shdr *Symtab = nullptr;
ArrayRef<Elf_Word> SymtabSHNDX;
StringRef StringTable;
void initStringTable();
};
// .o file.
template <class ELFT> class ObjectFile : public ELFFileBase<ELFT> {
typedef ELFFileBase<ELFT> Base;
typedef typename ELFT::Sym Elf_Sym;
typedef typename ELFT::Shdr Elf_Shdr;
typedef typename ELFT::SymRange Elf_Sym_Range;
typedef typename ELFT::Word Elf_Word;
typedef typename ELFT::uint uintX_t;
StringRef getShtGroupSignature(const Elf_Shdr &Sec);
ArrayRef<Elf_Word> getShtGroupEntries(const Elf_Shdr &Sec);
public:
static bool classof(const InputFile *F) {
return F->kind() == Base::ObjectKind;
}
ArrayRef<SymbolBody *> getSymbols();
ArrayRef<SymbolBody *> getLocalSymbols();
ArrayRef<SymbolBody *> getNonLocalSymbols();
explicit ObjectFile(MemoryBufferRef M);
void parse(llvm::DenseSet<StringRef> &ComdatGroups);
ArrayRef<InputSectionBase<ELFT> *> getSections() const { return Sections; }
InputSectionBase<ELFT> *getSection(const Elf_Sym &Sym) const;
SymbolBody &getSymbolBody(uint32_t SymbolIndex) const {
return *SymbolBodies[SymbolIndex];
}
template <typename RelT> SymbolBody &getRelocTargetSym(const RelT &Rel) const {
uint32_t SymIndex = Rel.getSymbol(Config->Mips64EL);
return getSymbolBody(SymIndex);
}
const Elf_Shdr *getSymbolTable() const { return this->Symtab; };
// Get MIPS GP0 value defined by this file. This value represents the gp value
// used to create the relocatable object and required to support
// R_MIPS_GPREL16 / R_MIPS_GPREL32 relocations.
uint32_t getMipsGp0() const;
// The number is the offset in the string table. It will be used as the
// st_name of the symbol.
std::vector<std::pair<const DefinedRegular<ELFT> *, unsigned>> KeptLocalSyms;
// SymbolBodies and Thunks for sections in this file are allocated
// using this buffer.
llvm::BumpPtrAllocator Alloc;
private:
void initializeSections(llvm::DenseSet<StringRef> &ComdatGroups);
void initializeSymbols();
InputSectionBase<ELFT> *getRelocTarget(const Elf_Shdr &Sec);
InputSectionBase<ELFT> *createInputSection(const Elf_Shdr &Sec);
bool shouldMerge(const Elf_Shdr &Sec);
SymbolBody *createSymbolBody(const Elf_Sym *Sym);
// List of all sections defined by this file.
std::vector<InputSectionBase<ELFT> *> Sections;
// List of all symbols referenced or defined by this file.
std::vector<SymbolBody *> SymbolBodies;
// MIPS .reginfo section defined by this file.
std::unique_ptr<MipsReginfoInputSection<ELFT>> MipsReginfo;
// MIPS .MIPS.options section defined by this file.
std::unique_ptr<MipsOptionsInputSection<ELFT>> MipsOptions;
llvm::SpecificBumpPtrAllocator<InputSection<ELFT>> IAlloc;
llvm::SpecificBumpPtrAllocator<MergeInputSection<ELFT>> MAlloc;
llvm::SpecificBumpPtrAllocator<EhInputSection<ELFT>> EHAlloc;
};
// LazyObjectFile is analogous to ArchiveFile in the sense that
// the file contains lazy symbols. The difference is that
// LazyObjectFile wraps a single file instead of multiple files.
//
// This class is used for --start-lib and --end-lib options which
// instruct the linker to link object files between them with the
// archive file semantics.
class LazyObjectFile : public InputFile {
public:
explicit LazyObjectFile(MemoryBufferRef M) : InputFile(LazyObjectKind, M) {}
static bool classof(const InputFile *F) {
return F->kind() == LazyObjectKind;
}
template <class ELFT> void parse();
MemoryBufferRef getBuffer();
private:
std::vector<StringRef> getSymbols();
template <class ELFT> std::vector<StringRef> getElfSymbols();
std::vector<StringRef> getBitcodeSymbols();
llvm::BumpPtrAllocator Alloc;
llvm::StringSaver Saver{Alloc};
bool Seen = false;
};
// An ArchiveFile object represents a .a file.
class ArchiveFile : public InputFile {
public:
explicit ArchiveFile(MemoryBufferRef M) : InputFile(ArchiveKind, M) {}
static bool classof(const InputFile *F) { return F->kind() == ArchiveKind; }
template <class ELFT> void parse();
// Returns a memory buffer for a given symbol. An empty memory buffer
// is returned if we have already returned the same memory buffer.
// (So that we don't instantiate same members more than once.)
MemoryBufferRef getMember(const Archive::Symbol *Sym);
private:
std::unique_ptr<Archive> File;
llvm::DenseSet<uint64_t> Seen;
};
class BitcodeFile : public InputFile {
public:
explicit BitcodeFile(MemoryBufferRef M);
static bool classof(const InputFile *F) { return F->kind() == BitcodeKind; }
template <class ELFT>
void parse(llvm::DenseSet<StringRef> &ComdatGroups);
ArrayRef<Symbol *> getSymbols() { return Symbols; }
static bool shouldSkip(uint32_t Flags);
std::unique_ptr<llvm::object::IRObjectFile> Obj;
private:
std::vector<Symbol *> Symbols;
llvm::BumpPtrAllocator Alloc;
llvm::StringSaver Saver{Alloc};
template <class ELFT>
Symbol *createSymbol(const llvm::DenseSet<const llvm::Comdat *> &KeptComdats,
const llvm::object::IRObjectFile &Obj,
const llvm::object::BasicSymbolRef &Sym);
};
// .so file.
template <class ELFT> class SharedFile : public ELFFileBase<ELFT> {
typedef ELFFileBase<ELFT> Base;
typedef typename ELFT::Shdr Elf_Shdr;
typedef typename ELFT::Sym Elf_Sym;
typedef typename ELFT::Word Elf_Word;
typedef typename ELFT::SymRange Elf_Sym_Range;
typedef typename ELFT::Versym Elf_Versym;
typedef typename ELFT::Verdef Elf_Verdef;
std::vector<StringRef> Undefs;
StringRef SoName;
const Elf_Shdr *VersymSec = nullptr;
const Elf_Shdr *VerdefSec = nullptr;
public:
StringRef getSoName() const { return SoName; }
const Elf_Shdr *getSection(const Elf_Sym &Sym) const;
llvm::ArrayRef<StringRef> getUndefinedSymbols() { return Undefs; }
static bool classof(const InputFile *F) {
return F->kind() == Base::SharedKind;
}
explicit SharedFile(MemoryBufferRef M);
void parseSoName();
void parseRest();
std::vector<const Elf_Verdef *> parseVerdefs(const Elf_Versym *&Versym);
struct NeededVer {
// The string table offset of the version name in the output file.
size_t StrTab;
// The version identifier for this version name.
uint16_t Index;
};
// Mapping from Elf_Verdef data structures to information about Elf_Vernaux
// data structures in the output file.
std::map<const Elf_Verdef *, NeededVer> VerdefMap;
// Used for --as-needed
bool AsNeeded = false;
bool IsUsed = false;
bool isNeeded() const { return !AsNeeded || IsUsed; }
};
std::unique_ptr<InputFile> createObjectFile(MemoryBufferRef MB,
StringRef ArchiveName = "");
std::unique_ptr<InputFile> createSharedFile(MemoryBufferRef MB);
} // namespace elf
} // namespace lld
#endif

View File

@ -0,0 +1,691 @@
//===- InputSection.cpp ---------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "InputSection.h"
#include "Config.h"
#include "EhFrame.h"
#include "Error.h"
#include "InputFiles.h"
#include "LinkerScript.h"
#include "OutputSections.h"
#include "Target.h"
#include "Thunks.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/Endian.h"
using namespace llvm;
using namespace llvm::ELF;
using namespace llvm::object;
using namespace llvm::support::endian;
using namespace lld;
using namespace lld::elf;
template <class ELFT> bool elf::isDiscarded(InputSectionBase<ELFT> *S) {
return !S || S == &InputSection<ELFT>::Discarded || !S->Live ||
Script<ELFT>::X->isDiscarded(S);
}
template <class ELFT>
InputSectionBase<ELFT>::InputSectionBase(elf::ObjectFile<ELFT> *File,
const Elf_Shdr *Header,
Kind SectionKind)
: Header(Header), File(File), SectionKind(SectionKind), Repl(this),
Compressed(Header->sh_flags & SHF_COMPRESSED) {
// The garbage collector sets sections' Live bits.
// If GC is disabled, all sections are considered live by default.
Live = !Config->GcSections;
// The ELF spec states that a value of 0 means the section has
// no alignment constraits.
Alignment = std::max<uintX_t>(Header->sh_addralign, 1);
}
template <class ELFT> size_t InputSectionBase<ELFT>::getSize() const {
if (auto *D = dyn_cast<InputSection<ELFT>>(this))
if (D->getThunksSize() > 0)
return D->getThunkOff() + D->getThunksSize();
return Header->sh_size;
}
template <class ELFT> StringRef InputSectionBase<ELFT>::getSectionName() const {
return check(File->getObj().getSectionName(this->Header));
}
template <class ELFT>
ArrayRef<uint8_t> InputSectionBase<ELFT>::getSectionData() const {
if (Compressed)
return ArrayRef<uint8_t>((const uint8_t *)Uncompressed.data(),
Uncompressed.size());
return check(this->File->getObj().getSectionContents(this->Header));
}
template <class ELFT>
typename ELFT::uint InputSectionBase<ELFT>::getOffset(uintX_t Offset) const {
switch (SectionKind) {
case Regular:
return cast<InputSection<ELFT>>(this)->OutSecOff + Offset;
case EHFrame:
return cast<EhInputSection<ELFT>>(this)->getOffset(Offset);
case Merge:
return cast<MergeInputSection<ELFT>>(this)->getOffset(Offset);
case MipsReginfo:
case MipsOptions:
// MIPS .reginfo and .MIPS.options sections are consumed by the linker,
// and the linker produces a single output section. It is possible that
// input files contain section symbol points to the corresponding input
// section. Redirect it to the produced output section.
if (Offset != 0)
fatal("Unsupported reference to the middle of '" + getSectionName() +
"' section");
return this->OutSec->getVA();
}
llvm_unreachable("invalid section kind");
}
template <class ELFT> void InputSectionBase<ELFT>::uncompress() {
if (!zlib::isAvailable())
fatal("build lld with zlib to enable compressed sections support");
// A compressed section consists of a header of Elf_Chdr type
// followed by compressed data.
ArrayRef<uint8_t> Data =
check(this->File->getObj().getSectionContents(this->Header));
if (Data.size() < sizeof(Elf_Chdr))
fatal("corrupt compressed section");
auto *Hdr = reinterpret_cast<const Elf_Chdr *>(Data.data());
Data = Data.slice(sizeof(Elf_Chdr));
if (Hdr->ch_type != ELFCOMPRESS_ZLIB)
fatal("unsupported compression type");
StringRef Buf((const char *)Data.data(), Data.size());
if (zlib::uncompress(Buf, Uncompressed, Hdr->ch_size) != zlib::StatusOK)
fatal("error uncompressing section");
}
template <class ELFT>
typename ELFT::uint
InputSectionBase<ELFT>::getOffset(const DefinedRegular<ELFT> &Sym) const {
return getOffset(Sym.Value);
}
template <class ELFT>
InputSection<ELFT>::InputSection(elf::ObjectFile<ELFT> *F,
const Elf_Shdr *Header)
: InputSectionBase<ELFT>(F, Header, Base::Regular) {}
template <class ELFT>
bool InputSection<ELFT>::classof(const InputSectionBase<ELFT> *S) {
return S->SectionKind == Base::Regular;
}
template <class ELFT>
InputSectionBase<ELFT> *InputSection<ELFT>::getRelocatedSection() {
assert(this->Header->sh_type == SHT_RELA || this->Header->sh_type == SHT_REL);
ArrayRef<InputSectionBase<ELFT> *> Sections = this->File->getSections();
return Sections[this->Header->sh_info];
}
template <class ELFT>
void InputSection<ELFT>::addThunk(const Thunk<ELFT> *T) {
Thunks.push_back(T);
}
template <class ELFT> uint64_t InputSection<ELFT>::getThunkOff() const {
return this->Header->sh_size;
}
template <class ELFT> uint64_t InputSection<ELFT>::getThunksSize() const {
uint64_t Total = 0;
for (const Thunk<ELFT> *T : Thunks)
Total += T->size();
return Total;
}
// This is used for -r. We can't use memcpy to copy relocations because we need
// to update symbol table offset and section index for each relocation. So we
// copy relocations one by one.
template <class ELFT>
template <class RelTy>
void InputSection<ELFT>::copyRelocations(uint8_t *Buf, ArrayRef<RelTy> Rels) {
InputSectionBase<ELFT> *RelocatedSection = getRelocatedSection();
for (const RelTy &Rel : Rels) {
uint32_t Type = Rel.getType(Config->Mips64EL);
SymbolBody &Body = this->File->getRelocTargetSym(Rel);
RelTy *P = reinterpret_cast<RelTy *>(Buf);
Buf += sizeof(RelTy);
P->r_offset = RelocatedSection->getOffset(Rel.r_offset);
P->setSymbolAndType(Body.DynsymIndex, Type, Config->Mips64EL);
}
}
// Page(Expr) is the page address of the expression Expr, defined
// as (Expr & ~0xFFF). (This applies even if the machine page size
// supported by the platform has a different value.)
static uint64_t getAArch64Page(uint64_t Expr) {
return Expr & (~static_cast<uint64_t>(0xFFF));
}
template <class ELFT>
static typename ELFT::uint getSymVA(uint32_t Type, typename ELFT::uint A,
typename ELFT::uint P,
const SymbolBody &Body, RelExpr Expr) {
typedef typename ELFT::uint uintX_t;
switch (Expr) {
case R_HINT:
llvm_unreachable("cannot relocate hint relocs");
case R_TLSLD:
return Out<ELFT>::Got->getTlsIndexOff() + A -
Out<ELFT>::Got->getNumEntries() * sizeof(uintX_t);
case R_TLSLD_PC:
return Out<ELFT>::Got->getTlsIndexVA() + A - P;
case R_THUNK_ABS:
return Body.getThunkVA<ELFT>() + A;
case R_THUNK_PC:
case R_THUNK_PLT_PC:
return Body.getThunkVA<ELFT>() + A - P;
case R_PPC_TOC:
return getPPC64TocBase() + A;
case R_TLSGD:
return Out<ELFT>::Got->getGlobalDynOffset(Body) + A -
Out<ELFT>::Got->getNumEntries() * sizeof(uintX_t);
case R_TLSGD_PC:
return Out<ELFT>::Got->getGlobalDynAddr(Body) + A - P;
case R_TLSDESC:
return Out<ELFT>::Got->getGlobalDynAddr(Body) + A;
case R_TLSDESC_PAGE:
return getAArch64Page(Out<ELFT>::Got->getGlobalDynAddr(Body) + A) -
getAArch64Page(P);
case R_PLT:
return Body.getPltVA<ELFT>() + A;
case R_PLT_PC:
case R_PPC_PLT_OPD:
return Body.getPltVA<ELFT>() + A - P;
case R_SIZE:
return Body.getSize<ELFT>() + A;
case R_GOTREL:
return Body.getVA<ELFT>(A) - Out<ELFT>::Got->getVA();
case R_RELAX_TLS_GD_TO_IE_END:
case R_GOT_FROM_END:
return Body.getGotOffset<ELFT>() + A -
Out<ELFT>::Got->getNumEntries() * sizeof(uintX_t);
case R_RELAX_TLS_GD_TO_IE_ABS:
case R_GOT:
return Body.getGotVA<ELFT>() + A;
case R_RELAX_TLS_GD_TO_IE_PAGE_PC:
case R_GOT_PAGE_PC:
return getAArch64Page(Body.getGotVA<ELFT>() + A) - getAArch64Page(P);
case R_RELAX_TLS_GD_TO_IE:
case R_GOT_PC:
return Body.getGotVA<ELFT>() + A - P;
case R_GOTONLY_PC:
return Out<ELFT>::Got->getVA() + A - P;
case R_RELAX_TLS_LD_TO_LE:
case R_RELAX_TLS_IE_TO_LE:
case R_RELAX_TLS_GD_TO_LE:
case R_TLS:
if (Target->TcbSize)
return Body.getVA<ELFT>(A) +
alignTo(Target->TcbSize, Out<ELFT>::TlsPhdr->p_align);
return Body.getVA<ELFT>(A) - Out<ELFT>::TlsPhdr->p_memsz;
case R_RELAX_TLS_GD_TO_LE_NEG:
case R_NEG_TLS:
return Out<ELF32LE>::TlsPhdr->p_memsz - Body.getVA<ELFT>(A);
case R_ABS:
case R_RELAX_GOT_PC_NOPIC:
return Body.getVA<ELFT>(A);
case R_GOT_OFF:
return Body.getGotOffset<ELFT>() + A;
case R_MIPS_GOT_LOCAL_PAGE:
// If relocation against MIPS local symbol requires GOT entry, this entry
// should be initialized by 'page address'. This address is high 16-bits
// of sum the symbol's value and the addend.
return Out<ELFT>::Got->getMipsLocalPageOffset(Body.getVA<ELFT>(A));
case R_MIPS_GOT_OFF:
// In case of MIPS if a GOT relocation has non-zero addend this addend
// should be applied to the GOT entry content not to the GOT entry offset.
// That is why we use separate expression type.
return Out<ELFT>::Got->getMipsGotOffset(Body, A);
case R_MIPS_TLSGD:
return Out<ELFT>::Got->getGlobalDynOffset(Body) +
Out<ELFT>::Got->getMipsTlsOffset() - MipsGPOffset;
case R_MIPS_TLSLD:
return Out<ELFT>::Got->getTlsIndexOff() +
Out<ELFT>::Got->getMipsTlsOffset() - MipsGPOffset;
case R_PPC_OPD: {
uint64_t SymVA = Body.getVA<ELFT>(A);
// If we have an undefined weak symbol, we might get here with a symbol
// address of zero. That could overflow, but the code must be unreachable,
// so don't bother doing anything at all.
if (!SymVA)
return 0;
if (Out<ELF64BE>::Opd) {
// If this is a local call, and we currently have the address of a
// function-descriptor, get the underlying code address instead.
uint64_t OpdStart = Out<ELF64BE>::Opd->getVA();
uint64_t OpdEnd = OpdStart + Out<ELF64BE>::Opd->getSize();
bool InOpd = OpdStart <= SymVA && SymVA < OpdEnd;
if (InOpd)
SymVA = read64be(&Out<ELF64BE>::OpdBuf[SymVA - OpdStart]);
}
return SymVA - P;
}
case R_PC:
case R_RELAX_GOT_PC:
return Body.getVA<ELFT>(A) - P;
case R_PLT_PAGE_PC:
case R_PAGE_PC:
return getAArch64Page(Body.getVA<ELFT>(A)) - getAArch64Page(P);
}
llvm_unreachable("Invalid expression");
}
// This function applies relocations to sections without SHF_ALLOC bit.
// Such sections are never mapped to memory at runtime. Debug sections are
// an example. Relocations in non-alloc sections are much easier to
// handle than in allocated sections because it will never need complex
// treatement such as GOT or PLT (because at runtime no one refers them).
// So, we handle relocations for non-alloc sections directly in this
// function as a performance optimization.
template <class ELFT>
template <class RelTy>
void InputSection<ELFT>::relocateNonAlloc(uint8_t *Buf, ArrayRef<RelTy> Rels) {
const unsigned Bits = sizeof(uintX_t) * 8;
for (const RelTy &Rel : Rels) {
uint32_t Type = Rel.getType(Config->Mips64EL);
uintX_t Offset = this->getOffset(Rel.r_offset);
uint8_t *BufLoc = Buf + Offset;
uintX_t Addend = getAddend<ELFT>(Rel);
if (!RelTy::IsRela)
Addend += Target->getImplicitAddend(BufLoc, Type);
SymbolBody &Sym = this->File->getRelocTargetSym(Rel);
if (Target->getRelExpr(Type, Sym) != R_ABS) {
error(this->getSectionName() + " has non-ABS reloc");
return;
}
uintX_t AddrLoc = this->OutSec->getVA() + Offset;
uint64_t SymVA =
SignExtend64<Bits>(getSymVA<ELFT>(Type, Addend, AddrLoc, Sym, R_ABS));
Target->relocateOne(BufLoc, Type, SymVA);
}
}
template <class ELFT>
void InputSectionBase<ELFT>::relocate(uint8_t *Buf, uint8_t *BufEnd) {
// scanReloc function in Writer.cpp constructs Relocations
// vector only for SHF_ALLOC'ed sections. For other sections,
// we handle relocations directly here.
auto *IS = dyn_cast<InputSection<ELFT>>(this);
if (IS && !(IS->Header->sh_flags & SHF_ALLOC)) {
for (const Elf_Shdr *RelSec : IS->RelocSections) {
if (RelSec->sh_type == SHT_RELA)
IS->relocateNonAlloc(Buf, IS->File->getObj().relas(RelSec));
else
IS->relocateNonAlloc(Buf, IS->File->getObj().rels(RelSec));
}
return;
}
const unsigned Bits = sizeof(uintX_t) * 8;
for (const Relocation<ELFT> &Rel : Relocations) {
uintX_t Offset = Rel.InputSec->getOffset(Rel.Offset);
uint8_t *BufLoc = Buf + Offset;
uint32_t Type = Rel.Type;
uintX_t A = Rel.Addend;
uintX_t AddrLoc = OutSec->getVA() + Offset;
RelExpr Expr = Rel.Expr;
uint64_t SymVA =
SignExtend64<Bits>(getSymVA<ELFT>(Type, A, AddrLoc, *Rel.Sym, Expr));
switch (Expr) {
case R_RELAX_GOT_PC:
case R_RELAX_GOT_PC_NOPIC:
Target->relaxGot(BufLoc, SymVA);
break;
case R_RELAX_TLS_IE_TO_LE:
Target->relaxTlsIeToLe(BufLoc, Type, SymVA);
break;
case R_RELAX_TLS_LD_TO_LE:
Target->relaxTlsLdToLe(BufLoc, Type, SymVA);
break;
case R_RELAX_TLS_GD_TO_LE:
case R_RELAX_TLS_GD_TO_LE_NEG:
Target->relaxTlsGdToLe(BufLoc, Type, SymVA);
break;
case R_RELAX_TLS_GD_TO_IE:
case R_RELAX_TLS_GD_TO_IE_ABS:
case R_RELAX_TLS_GD_TO_IE_PAGE_PC:
case R_RELAX_TLS_GD_TO_IE_END:
Target->relaxTlsGdToIe(BufLoc, Type, SymVA);
break;
case R_PPC_PLT_OPD:
// Patch a nop (0x60000000) to a ld.
if (BufLoc + 8 <= BufEnd && read32be(BufLoc + 4) == 0x60000000)
write32be(BufLoc + 4, 0xe8410028); // ld %r2, 40(%r1)
// fallthrough
default:
Target->relocateOne(BufLoc, Type, SymVA);
break;
}
}
}
template <class ELFT> void InputSection<ELFT>::writeTo(uint8_t *Buf) {
if (this->Header->sh_type == SHT_NOBITS)
return;
ELFFile<ELFT> &EObj = this->File->getObj();
// If -r is given, then an InputSection may be a relocation section.
if (this->Header->sh_type == SHT_RELA) {
copyRelocations(Buf + OutSecOff, EObj.relas(this->Header));
return;
}
if (this->Header->sh_type == SHT_REL) {
copyRelocations(Buf + OutSecOff, EObj.rels(this->Header));
return;
}
// Copy section contents from source object file to output file.
ArrayRef<uint8_t> Data = this->getSectionData();
memcpy(Buf + OutSecOff, Data.data(), Data.size());
// Iterate over all relocation sections that apply to this section.
uint8_t *BufEnd = Buf + OutSecOff + Data.size();
this->relocate(Buf, BufEnd);
// The section might have a data/code generated by the linker and need
// to be written after the section. Usually these are thunks - small piece
// of code used to jump between "incompatible" functions like PIC and non-PIC
// or if the jump target too far and its address does not fit to the short
// jump istruction.
if (!Thunks.empty()) {
Buf += OutSecOff + getThunkOff();
for (const Thunk<ELFT> *T : Thunks) {
T->writeTo(Buf);
Buf += T->size();
}
}
}
template <class ELFT>
void InputSection<ELFT>::replace(InputSection<ELFT> *Other) {
this->Alignment = std::max(this->Alignment, Other->Alignment);
Other->Repl = this->Repl;
Other->Live = false;
}
template <class ELFT>
SplitInputSection<ELFT>::SplitInputSection(
elf::ObjectFile<ELFT> *File, const Elf_Shdr *Header,
typename InputSectionBase<ELFT>::Kind SectionKind)
: InputSectionBase<ELFT>(File, Header, SectionKind) {}
template <class ELFT>
EhInputSection<ELFT>::EhInputSection(elf::ObjectFile<ELFT> *F,
const Elf_Shdr *Header)
: SplitInputSection<ELFT>(F, Header, InputSectionBase<ELFT>::EHFrame) {
// Mark .eh_frame sections as live by default because there are
// usually no relocations that point to .eh_frames. Otherwise,
// the garbage collector would drop all .eh_frame sections.
this->Live = true;
}
template <class ELFT>
bool EhInputSection<ELFT>::classof(const InputSectionBase<ELFT> *S) {
return S->SectionKind == InputSectionBase<ELFT>::EHFrame;
}
// .eh_frame is a sequence of CIE or FDE records.
// This function splits an input section into records and returns them.
template <class ELFT>
void EhInputSection<ELFT>::split() {
ArrayRef<uint8_t> Data = this->getSectionData();
for (size_t Off = 0, End = Data.size(); Off != End;) {
size_t Size = readEhRecordSize<ELFT>(Data.slice(Off));
this->Pieces.emplace_back(Off, Data.slice(Off, Size));
// The empty record is the end marker.
if (Size == 4)
break;
Off += Size;
}
}
template <class ELFT>
typename ELFT::uint EhInputSection<ELFT>::getOffset(uintX_t Offset) const {
// The file crtbeginT.o has relocations pointing to the start of an empty
// .eh_frame that is known to be the first in the link. It does that to
// identify the start of the output .eh_frame. Handle this special case.
if (this->getSectionHdr()->sh_size == 0)
return Offset;
const SectionPiece *Piece = this->getSectionPiece(Offset);
if (Piece->OutputOff == size_t(-1))
return -1; // Not in the output
uintX_t Addend = Offset - Piece->InputOff;
return Piece->OutputOff + Addend;
}
static size_t findNull(ArrayRef<uint8_t> A, size_t EntSize) {
// Optimize the common case.
StringRef S((const char *)A.data(), A.size());
if (EntSize == 1)
return S.find(0);
for (unsigned I = 0, N = S.size(); I != N; I += EntSize) {
const char *B = S.begin() + I;
if (std::all_of(B, B + EntSize, [](char C) { return C == 0; }))
return I;
}
return StringRef::npos;
}
// Split SHF_STRINGS section. Such section is a sequence of
// null-terminated strings.
static std::vector<SectionPiece> splitStrings(ArrayRef<uint8_t> Data,
size_t EntSize) {
std::vector<SectionPiece> V;
size_t Off = 0;
while (!Data.empty()) {
size_t End = findNull(Data, EntSize);
if (End == StringRef::npos)
fatal("string is not null terminated");
size_t Size = End + EntSize;
V.emplace_back(Off, Data.slice(0, Size));
Data = Data.slice(Size);
Off += Size;
}
return V;
}
// Split non-SHF_STRINGS section. Such section is a sequence of
// fixed size records.
static std::vector<SectionPiece> splitNonStrings(ArrayRef<uint8_t> Data,
size_t EntSize) {
std::vector<SectionPiece> V;
size_t Size = Data.size();
assert((Size % EntSize) == 0);
for (unsigned I = 0, N = Size; I != N; I += EntSize)
V.emplace_back(I, Data.slice(I, EntSize));
return V;
}
template <class ELFT>
MergeInputSection<ELFT>::MergeInputSection(elf::ObjectFile<ELFT> *F,
const Elf_Shdr *Header)
: SplitInputSection<ELFT>(F, Header, InputSectionBase<ELFT>::Merge) {}
template <class ELFT> void MergeInputSection<ELFT>::splitIntoPieces() {
ArrayRef<uint8_t> Data = this->getSectionData();
uintX_t EntSize = this->Header->sh_entsize;
if (this->Header->sh_flags & SHF_STRINGS)
this->Pieces = splitStrings(Data, EntSize);
else
this->Pieces = splitNonStrings(Data, EntSize);
if (Config->GcSections)
for (uintX_t Off : LiveOffsets)
this->getSectionPiece(Off)->Live = true;
}
template <class ELFT>
bool MergeInputSection<ELFT>::classof(const InputSectionBase<ELFT> *S) {
return S->SectionKind == InputSectionBase<ELFT>::Merge;
}
// Do binary search to get a section piece at a given input offset.
template <class ELFT>
SectionPiece *SplitInputSection<ELFT>::getSectionPiece(uintX_t Offset) {
auto *This = static_cast<const SplitInputSection<ELFT> *>(this);
return const_cast<SectionPiece *>(This->getSectionPiece(Offset));
}
template <class ELFT>
const SectionPiece *
SplitInputSection<ELFT>::getSectionPiece(uintX_t Offset) const {
ArrayRef<uint8_t> D = this->getSectionData();
StringRef Data((const char *)D.data(), D.size());
uintX_t Size = Data.size();
if (Offset >= Size)
fatal("entry is past the end of the section");
// Find the element this offset points to.
auto I = std::upper_bound(
Pieces.begin(), Pieces.end(), Offset,
[](const uintX_t &A, const SectionPiece &B) { return A < B.InputOff; });
--I;
return &*I;
}
// Returns the offset in an output section for a given input offset.
// Because contents of a mergeable section is not contiguous in output,
// it is not just an addition to a base output offset.
template <class ELFT>
typename ELFT::uint MergeInputSection<ELFT>::getOffset(uintX_t Offset) const {
auto It = OffsetMap.find(Offset);
if (It != OffsetMap.end())
return It->second;
// If Offset is not at beginning of a section piece, it is not in the map.
// In that case we need to search from the original section piece vector.
const SectionPiece &Piece = *this->getSectionPiece(Offset);
assert(Piece.Live);
uintX_t Addend = Offset - Piece.InputOff;
return Piece.OutputOff + Addend;
}
// Create a map from input offsets to output offsets for all section pieces.
// It is called after finalize().
template <class ELFT> void MergeInputSection<ELFT>::finalizePieces() {
OffsetMap.grow(this->Pieces.size());
for (SectionPiece &Piece : this->Pieces) {
if (!Piece.Live)
continue;
if (Piece.OutputOff == size_t(-1)) {
// Offsets of tail-merged strings are computed lazily.
auto *OutSec = static_cast<MergeOutputSection<ELFT> *>(this->OutSec);
ArrayRef<uint8_t> D = Piece.data();
StringRef S((const char *)D.data(), D.size());
Piece.OutputOff = OutSec->getOffset(S);
}
OffsetMap[Piece.InputOff] = Piece.OutputOff;
}
}
template <class ELFT>
MipsReginfoInputSection<ELFT>::MipsReginfoInputSection(elf::ObjectFile<ELFT> *F,
const Elf_Shdr *Hdr)
: InputSectionBase<ELFT>(F, Hdr, InputSectionBase<ELFT>::MipsReginfo) {
// Initialize this->Reginfo.
ArrayRef<uint8_t> D = this->getSectionData();
if (D.size() != sizeof(Elf_Mips_RegInfo<ELFT>)) {
error("invalid size of .reginfo section");
return;
}
Reginfo = reinterpret_cast<const Elf_Mips_RegInfo<ELFT> *>(D.data());
}
template <class ELFT>
bool MipsReginfoInputSection<ELFT>::classof(const InputSectionBase<ELFT> *S) {
return S->SectionKind == InputSectionBase<ELFT>::MipsReginfo;
}
template <class ELFT>
MipsOptionsInputSection<ELFT>::MipsOptionsInputSection(elf::ObjectFile<ELFT> *F,
const Elf_Shdr *Hdr)
: InputSectionBase<ELFT>(F, Hdr, InputSectionBase<ELFT>::MipsOptions) {
// Find ODK_REGINFO option in the section's content.
ArrayRef<uint8_t> D = this->getSectionData();
while (!D.empty()) {
if (D.size() < sizeof(Elf_Mips_Options<ELFT>)) {
error("invalid size of .MIPS.options section");
break;
}
auto *O = reinterpret_cast<const Elf_Mips_Options<ELFT> *>(D.data());
if (O->kind == ODK_REGINFO) {
Reginfo = &O->getRegInfo();
break;
}
D = D.slice(O->size);
}
}
template <class ELFT>
bool MipsOptionsInputSection<ELFT>::classof(const InputSectionBase<ELFT> *S) {
return S->SectionKind == InputSectionBase<ELFT>::MipsOptions;
}
template bool elf::isDiscarded<ELF32LE>(InputSectionBase<ELF32LE> *);
template bool elf::isDiscarded<ELF32BE>(InputSectionBase<ELF32BE> *);
template bool elf::isDiscarded<ELF64LE>(InputSectionBase<ELF64LE> *);
template bool elf::isDiscarded<ELF64BE>(InputSectionBase<ELF64BE> *);
template class elf::InputSectionBase<ELF32LE>;
template class elf::InputSectionBase<ELF32BE>;
template class elf::InputSectionBase<ELF64LE>;
template class elf::InputSectionBase<ELF64BE>;
template class elf::InputSection<ELF32LE>;
template class elf::InputSection<ELF32BE>;
template class elf::InputSection<ELF64LE>;
template class elf::InputSection<ELF64BE>;
template class elf::SplitInputSection<ELF32LE>;
template class elf::SplitInputSection<ELF32BE>;
template class elf::SplitInputSection<ELF64LE>;
template class elf::SplitInputSection<ELF64BE>;
template class elf::EhInputSection<ELF32LE>;
template class elf::EhInputSection<ELF32BE>;
template class elf::EhInputSection<ELF64LE>;
template class elf::EhInputSection<ELF64BE>;
template class elf::MergeInputSection<ELF32LE>;
template class elf::MergeInputSection<ELF32BE>;
template class elf::MergeInputSection<ELF64LE>;
template class elf::MergeInputSection<ELF64BE>;
template class elf::MipsReginfoInputSection<ELF32LE>;
template class elf::MipsReginfoInputSection<ELF32BE>;
template class elf::MipsReginfoInputSection<ELF64LE>;
template class elf::MipsReginfoInputSection<ELF64BE>;
template class elf::MipsOptionsInputSection<ELF32LE>;
template class elf::MipsOptionsInputSection<ELF32BE>;
template class elf::MipsOptionsInputSection<ELF64LE>;
template class elf::MipsOptionsInputSection<ELF64BE>;

View File

@ -0,0 +1,270 @@
//===- InputSection.h -------------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_ELF_INPUT_SECTION_H
#define LLD_ELF_INPUT_SECTION_H
#include "Config.h"
#include "Relocations.h"
#include "Thunks.h"
#include "lld/Core/LLVM.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/Object/ELF.h"
namespace lld {
namespace elf {
template <class ELFT> bool isDiscarded(InputSectionBase<ELFT> *S);
class SymbolBody;
template <class ELFT> class ICF;
template <class ELFT> class DefinedRegular;
template <class ELFT> class ObjectFile;
template <class ELFT> class OutputSection;
template <class ELFT> class OutputSectionBase;
// This corresponds to a section of an input file.
template <class ELFT> class InputSectionBase {
protected:
typedef typename ELFT::Chdr Elf_Chdr;
typedef typename ELFT::Rel Elf_Rel;
typedef typename ELFT::Rela Elf_Rela;
typedef typename ELFT::Shdr Elf_Shdr;
typedef typename ELFT::Sym Elf_Sym;
typedef typename ELFT::uint uintX_t;
const Elf_Shdr *Header;
// The file this section is from.
ObjectFile<ELFT> *File;
// If a section is compressed, this vector has uncompressed section data.
SmallVector<char, 0> Uncompressed;
public:
enum Kind { Regular, EHFrame, Merge, MipsReginfo, MipsOptions };
Kind SectionKind;
InputSectionBase() : Repl(this) {}
InputSectionBase(ObjectFile<ELFT> *File, const Elf_Shdr *Header,
Kind SectionKind);
OutputSectionBase<ELFT> *OutSec = nullptr;
uint32_t Alignment;
// Used for garbage collection.
bool Live;
// This pointer points to the "real" instance of this instance.
// Usually Repl == this. However, if ICF merges two sections,
// Repl pointer of one section points to another section. So,
// if you need to get a pointer to this instance, do not use
// this but instead this->Repl.
InputSectionBase<ELFT> *Repl;
// Returns the size of this section (even if this is a common or BSS.)
size_t getSize() const;
static InputSectionBase<ELFT> Discarded;
StringRef getSectionName() const;
const Elf_Shdr *getSectionHdr() const { return Header; }
ObjectFile<ELFT> *getFile() const { return File; }
uintX_t getOffset(const DefinedRegular<ELFT> &Sym) const;
// Translate an offset in the input section to an offset in the output
// section.
uintX_t getOffset(uintX_t Offset) const;
ArrayRef<uint8_t> getSectionData() const;
void uncompress();
void relocate(uint8_t *Buf, uint8_t *BufEnd);
std::vector<Relocation<ELFT>> Relocations;
bool Compressed;
};
template <class ELFT> InputSectionBase<ELFT> InputSectionBase<ELFT>::Discarded;
// SectionPiece represents a piece of splittable section contents.
struct SectionPiece {
SectionPiece(size_t Off, ArrayRef<uint8_t> Data)
: InputOff(Off), Data((const uint8_t *)Data.data()), Size(Data.size()),
Live(!Config->GcSections) {}
ArrayRef<uint8_t> data() { return {Data, Size}; }
size_t size() const { return Size; }
size_t InputOff;
size_t OutputOff = -1;
private:
// We use bitfields because SplitInputSection is accessed by
// std::upper_bound very often.
// We want to save bits to make it cache friendly.
const uint8_t *Data;
uint32_t Size : 31;
public:
uint32_t Live : 1;
};
// Usually sections are copied to the output as atomic chunks of data,
// but some special types of sections are split into small pieces of data
// and each piece is copied to a different place in the output.
// This class represents such special sections.
template <class ELFT> class SplitInputSection : public InputSectionBase<ELFT> {
typedef typename ELFT::Shdr Elf_Shdr;
typedef typename ELFT::uint uintX_t;
public:
SplitInputSection(ObjectFile<ELFT> *File, const Elf_Shdr *Header,
typename InputSectionBase<ELFT>::Kind SectionKind);
// Splittable sections are handled as a sequence of data
// rather than a single large blob of data.
std::vector<SectionPiece> Pieces;
// Returns the SectionPiece at a given input section offset.
SectionPiece *getSectionPiece(uintX_t Offset);
const SectionPiece *getSectionPiece(uintX_t Offset) const;
};
// This corresponds to a SHF_MERGE section of an input file.
template <class ELFT> class MergeInputSection : public SplitInputSection<ELFT> {
typedef typename ELFT::uint uintX_t;
typedef typename ELFT::Sym Elf_Sym;
typedef typename ELFT::Shdr Elf_Shdr;
public:
MergeInputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Header);
static bool classof(const InputSectionBase<ELFT> *S);
void splitIntoPieces();
// Mark the piece at a given offset live. Used by GC.
void markLiveAt(uintX_t Offset) { LiveOffsets.insert(Offset); }
// Translate an offset in the input section to an offset
// in the output section.
uintX_t getOffset(uintX_t Offset) const;
void finalizePieces();
private:
llvm::DenseMap<uintX_t, uintX_t> OffsetMap;
llvm::DenseSet<uintX_t> LiveOffsets;
};
// This corresponds to a .eh_frame section of an input file.
template <class ELFT> class EhInputSection : public SplitInputSection<ELFT> {
public:
typedef typename ELFT::Shdr Elf_Shdr;
typedef typename ELFT::uint uintX_t;
EhInputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Header);
static bool classof(const InputSectionBase<ELFT> *S);
void split();
// Translate an offset in the input section to an offset in the output
// section.
uintX_t getOffset(uintX_t Offset) const;
// Relocation section that refer to this one.
const Elf_Shdr *RelocSection = nullptr;
};
// This corresponds to a non SHF_MERGE section of an input file.
template <class ELFT> class InputSection : public InputSectionBase<ELFT> {
friend ICF<ELFT>;
typedef InputSectionBase<ELFT> Base;
typedef typename ELFT::Shdr Elf_Shdr;
typedef typename ELFT::Rela Elf_Rela;
typedef typename ELFT::Rel Elf_Rel;
typedef typename ELFT::Sym Elf_Sym;
typedef typename ELFT::uint uintX_t;
public:
InputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Header);
// Write this section to a mmap'ed file, assuming Buf is pointing to
// beginning of the output section.
void writeTo(uint8_t *Buf);
// Relocation sections that refer to this one.
llvm::TinyPtrVector<const Elf_Shdr *> RelocSections;
// The offset from beginning of the output sections this section was assigned
// to. The writer sets a value.
uint64_t OutSecOff = 0;
static bool classof(const InputSectionBase<ELFT> *S);
InputSectionBase<ELFT> *getRelocatedSection();
// Register thunk related to the symbol. When the section is written
// to a mmap'ed file, target is requested to write an actual thunk code.
// Now thunks is supported for MIPS and ARM target only.
void addThunk(const Thunk<ELFT> *T);
// The offset of synthetic thunk code from beginning of this section.
uint64_t getThunkOff() const;
// Size of chunk with thunks code.
uint64_t getThunksSize() const;
template <class RelTy>
void relocateNonAlloc(uint8_t *Buf, llvm::ArrayRef<RelTy> Rels);
private:
template <class RelTy>
void copyRelocations(uint8_t *Buf, llvm::ArrayRef<RelTy> Rels);
// Called by ICF to merge two input sections.
void replace(InputSection<ELFT> *Other);
// Used by ICF.
uint64_t GroupId = 0;
llvm::TinyPtrVector<const Thunk<ELFT> *> Thunks;
};
// MIPS .reginfo section provides information on the registers used by the code
// in the object file. Linker should collect this information and write a single
// .reginfo section in the output file. The output section contains a union of
// used registers masks taken from input .reginfo sections and final value
// of the `_gp` symbol. For details: Chapter 4 / "Register Information" at
// ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf
template <class ELFT>
class MipsReginfoInputSection : public InputSectionBase<ELFT> {
typedef typename ELFT::Shdr Elf_Shdr;
public:
MipsReginfoInputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Hdr);
static bool classof(const InputSectionBase<ELFT> *S);
const llvm::object::Elf_Mips_RegInfo<ELFT> *Reginfo = nullptr;
};
template <class ELFT>
class MipsOptionsInputSection : public InputSectionBase<ELFT> {
typedef typename ELFT::Shdr Elf_Shdr;
public:
MipsOptionsInputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Hdr);
static bool classof(const InputSectionBase<ELFT> *S);
const llvm::object::Elf_Mips_RegInfo<ELFT> *Reginfo = nullptr;
};
} // namespace elf
} // namespace lld
#endif

View File

@ -0,0 +1,325 @@
//===- LTO.cpp ------------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "LTO.h"
#include "Config.h"
#include "Driver.h"
#include "Error.h"
#include "InputFiles.h"
#include "Symbols.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/LoopPassManager.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Bitcode/ReaderWriter.h"
#include "llvm/CodeGen/CommandFlags.h"
#include "llvm/CodeGen/ParallelCG.h"
#include "llvm/IR/AutoUpgrade.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Verifier.h"
#include "llvm/LTO/legacy/UpdateCompilerUsed.h"
#include "llvm/Linker/IRMover.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/StringSaver.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
using namespace llvm;
using namespace llvm::object;
using namespace llvm::ELF;
using namespace lld;
using namespace lld::elf;
// This is for use when debugging LTO.
static void saveBuffer(StringRef Buffer, const Twine &Path) {
std::error_code EC;
raw_fd_ostream OS(Path.str(), EC, sys::fs::OpenFlags::F_None);
if (EC)
error(EC, "cannot create " + Path);
OS << Buffer;
}
// This is for use when debugging LTO.
static void saveBCFile(Module &M, const Twine &Path) {
std::error_code EC;
raw_fd_ostream OS(Path.str(), EC, sys::fs::OpenFlags::F_None);
if (EC)
error(EC, "cannot create " + Path);
WriteBitcodeToFile(&M, OS, /* ShouldPreserveUseListOrder */ true);
}
static void runNewCustomLtoPasses(Module &M, TargetMachine &TM) {
PassBuilder PB(&TM);
AAManager AA;
// Parse a custom AA pipeline if asked to.
if (!PB.parseAAPipeline(AA, Config->LtoAAPipeline)) {
error("Unable to parse AA pipeline description: " + Config->LtoAAPipeline);
return;
}
LoopAnalysisManager LAM;
FunctionAnalysisManager FAM;
CGSCCAnalysisManager CGAM;
ModuleAnalysisManager MAM;
// Register the AA manager first so that our version is the one used.
FAM.registerPass([&] { return std::move(AA); });
// Register all the basic analyses with the managers.
PB.registerModuleAnalyses(MAM);
PB.registerCGSCCAnalyses(CGAM);
PB.registerFunctionAnalyses(FAM);
PB.registerLoopAnalyses(LAM);
PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
ModulePassManager MPM;
if (!Config->DisableVerify)
MPM.addPass(VerifierPass());
// Now, add all the passes we've been requested to.
if (!PB.parsePassPipeline(MPM, Config->LtoNewPmPasses)) {
error("unable to parse pass pipeline description: " +
Config->LtoNewPmPasses);
return;
}
if (!Config->DisableVerify)
MPM.addPass(VerifierPass());
MPM.run(M, MAM);
}
static void runOldLtoPasses(Module &M, TargetMachine &TM) {
// Note that the gold plugin has a similar piece of code, so
// it is probably better to move this code to a common place.
legacy::PassManager LtoPasses;
LtoPasses.add(createTargetTransformInfoWrapperPass(TM.getTargetIRAnalysis()));
PassManagerBuilder PMB;
PMB.LibraryInfo = new TargetLibraryInfoImpl(Triple(TM.getTargetTriple()));
PMB.Inliner = createFunctionInliningPass();
PMB.VerifyInput = PMB.VerifyOutput = !Config->DisableVerify;
PMB.LoopVectorize = true;
PMB.SLPVectorize = true;
PMB.OptLevel = Config->LtoO;
PMB.populateLTOPassManager(LtoPasses);
LtoPasses.run(M);
}
static void runLTOPasses(Module &M, TargetMachine &TM) {
if (!Config->LtoNewPmPasses.empty()) {
// The user explicitly asked for a set of passes to be run.
// This needs the new PM to work as there's no clean way to
// pass a set of passes to run in the legacy PM.
runNewCustomLtoPasses(M, TM);
if (HasError)
return;
} else {
// Run the 'default' set of LTO passes. This code still uses
// the legacy PM as the new one is not the default.
runOldLtoPasses(M, TM);
}
if (Config->SaveTemps)
saveBCFile(M, Config->OutputFile + ".lto.opt.bc");
}
static bool shouldInternalize(const SmallPtrSet<GlobalValue *, 8> &Used,
Symbol *S, GlobalValue *GV) {
if (S->IsUsedInRegularObj || Used.count(GV))
return false;
return !S->includeInDynsym();
}
BitcodeCompiler::BitcodeCompiler()
: Combined(new Module("ld-temp.o", Driver->Context)) {}
static void undefine(Symbol *S) {
replaceBody<Undefined>(S, S->body()->getName(), STV_DEFAULT, S->body()->Type,
nullptr);
}
static void handleUndefinedAsmRefs(const BasicSymbolRef &Sym, GlobalValue *GV,
StringSet<> &AsmUndefinedRefs) {
// GV associated => not an assembly symbol, bail out.
if (GV)
return;
// This is an undefined reference to a symbol in asm. We put that in
// compiler.used, so that we can preserve it from being dropped from
// the output, without necessarily preventing its internalization.
SmallString<64> Name;
raw_svector_ostream OS(Name);
Sym.printName(OS);
AsmUndefinedRefs.insert(Name.str());
}
void BitcodeCompiler::add(BitcodeFile &F) {
std::unique_ptr<IRObjectFile> Obj = std::move(F.Obj);
std::vector<GlobalValue *> Keep;
unsigned BodyIndex = 0;
ArrayRef<Symbol *> Syms = F.getSymbols();
Module &M = Obj->getModule();
if (M.getDataLayoutStr().empty())
fatal("invalid bitcode file: " + F.getName() + " has no datalayout");
// Discard non-compatible debug infos if necessary.
M.materializeMetadata();
UpgradeDebugInfo(M);
// If a symbol appears in @llvm.used, the linker is required
// to treat the symbol as there is a reference to the symbol
// that it cannot see. Therefore, we can't internalize.
SmallPtrSet<GlobalValue *, 8> Used;
collectUsedGlobalVariables(M, Used, /* CompilerUsed */ false);
for (const BasicSymbolRef &Sym : Obj->symbols()) {
uint32_t Flags = Sym.getFlags();
GlobalValue *GV = Obj->getSymbolGV(Sym.getRawDataRefImpl());
if (GV && GV->hasAppendingLinkage())
Keep.push_back(GV);
if (BitcodeFile::shouldSkip(Flags))
continue;
Symbol *S = Syms[BodyIndex++];
if (Flags & BasicSymbolRef::SF_Undefined) {
handleUndefinedAsmRefs(Sym, GV, AsmUndefinedRefs);
continue;
}
auto *B = dyn_cast<DefinedBitcode>(S->body());
if (!B || B->file() != &F)
continue;
// We collect the set of symbols we want to internalize here
// and change the linkage after the IRMover executed, i.e. after
// we imported the symbols and satisfied undefined references
// to it. We can't just change linkage here because otherwise
// the IRMover will just rename the symbol.
if (GV && shouldInternalize(Used, S, GV))
InternalizedSyms.insert(GV->getName());
// At this point we know that either the combined LTO object will provide a
// definition of a symbol, or we will internalize it. In either case, we
// need to undefine the symbol. In the former case, the real definition
// needs to be able to replace the original definition without conflicting.
// In the latter case, we need to allow the combined LTO object to provide a
// definition with the same name, for example when doing parallel codegen.
undefine(S);
if (!GV)
// Module asm symbol.
continue;
switch (GV->getLinkage()) {
default:
break;
case GlobalValue::LinkOnceAnyLinkage:
GV->setLinkage(GlobalValue::WeakAnyLinkage);
break;
case GlobalValue::LinkOnceODRLinkage:
GV->setLinkage(GlobalValue::WeakODRLinkage);
break;
}
Keep.push_back(GV);
}
IRMover Mover(*Combined);
if (Error E = Mover.move(Obj->takeModule(), Keep,
[](GlobalValue &, IRMover::ValueAdder) {})) {
handleAllErrors(std::move(E), [&](const ErrorInfoBase &EIB) {
fatal("failed to link module " + F.getName() + ": " + EIB.message());
});
}
}
static void internalize(GlobalValue &GV) {
assert(!GV.hasLocalLinkage() &&
"Trying to internalize a symbol with local linkage!");
GV.setLinkage(GlobalValue::InternalLinkage);
}
std::vector<std::unique_ptr<InputFile>> BitcodeCompiler::runSplitCodegen(
const std::function<std::unique_ptr<TargetMachine>()> &TMFactory) {
unsigned NumThreads = Config->LtoJobs;
OwningData.resize(NumThreads);
std::list<raw_svector_ostream> OSs;
std::vector<raw_pwrite_stream *> OSPtrs;
for (SmallString<0> &Obj : OwningData) {
OSs.emplace_back(Obj);
OSPtrs.push_back(&OSs.back());
}
splitCodeGen(std::move(Combined), OSPtrs, {}, TMFactory);
std::vector<std::unique_ptr<InputFile>> ObjFiles;
for (SmallString<0> &Obj : OwningData)
ObjFiles.push_back(createObjectFile(
MemoryBufferRef(Obj, "LLD-INTERNAL-combined-lto-object")));
// If -save-temps is given, we need to save temporary objects to files.
// This is for debugging.
if (Config->SaveTemps) {
if (NumThreads == 1) {
saveBuffer(OwningData[0], Config->OutputFile + ".lto.o");
} else {
for (unsigned I = 0; I < NumThreads; ++I)
saveBuffer(OwningData[I], Config->OutputFile + Twine(I) + ".lto.o");
}
}
return ObjFiles;
}
// Merge all the bitcode files we have seen, codegen the result
// and return the resulting ObjectFile.
std::vector<std::unique_ptr<InputFile>> BitcodeCompiler::compile() {
for (const auto &Name : InternalizedSyms) {
GlobalValue *GV = Combined->getNamedValue(Name.first());
assert(GV);
internalize(*GV);
}
std::string TheTriple = Combined->getTargetTriple();
std::string Msg;
const Target *T = TargetRegistry::lookupTarget(TheTriple, Msg);
if (!T)
fatal("target not found: " + Msg);
// LLD supports the new relocations.
TargetOptions Options = InitTargetOptionsFromCodeGenFlags();
Options.RelaxELFRelocations = true;
auto CreateTargetMachine = [&]() {
return std::unique_ptr<TargetMachine>(T->createTargetMachine(
TheTriple, "", "", Options, Config->Pic ? Reloc::PIC_ : Reloc::Static));
};
std::unique_ptr<TargetMachine> TM = CreateTargetMachine();
// Update llvm.compiler.used so that optimizations won't strip
// off AsmUndefinedReferences.
updateCompilerUsed(*Combined, *TM, AsmUndefinedRefs);
if (Config->SaveTemps)
saveBCFile(*Combined, Config->OutputFile + ".lto.bc");
runLTOPasses(*Combined, *TM);
if (HasError)
return {};
return runSplitCodegen(CreateTargetMachine);
}

View File

@ -0,0 +1,54 @@
//===- LTO.h ----------------------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file provides a way to combine bitcode files into one ELF
// file by compiling them using LLVM.
//
// If LTO is in use, your input files are not in regular ELF files
// but instead LLVM bitcode files. In that case, the linker has to
// convert bitcode files into the native format so that we can create
// an ELF file that contains native code. This file provides that
// functionality.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_ELF_LTO_H
#define LLD_ELF_LTO_H
#include "lld/Core/LLVM.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/IR/Module.h"
#include "llvm/Linker/IRMover.h"
namespace lld {
namespace elf {
class BitcodeFile;
class InputFile;
class BitcodeCompiler {
public:
BitcodeCompiler();
void add(BitcodeFile &F);
std::vector<std::unique_ptr<InputFile>> compile();
private:
std::vector<std::unique_ptr<InputFile>> runSplitCodegen(
const std::function<std::unique_ptr<llvm::TargetMachine>()> &TMFactory);
std::unique_ptr<llvm::Module> Combined;
std::vector<SmallString<0>> OwningData;
llvm::StringSet<> InternalizedSyms;
llvm::StringSet<> AsmUndefinedRefs;
};
}
}
#endif

View File

@ -0,0 +1,606 @@
//===- LinkerScript.cpp ---------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the parser/evaluator of the linker script.
// It does not construct an AST but consume linker script directives directly.
// Results are written to Driver or Config object.
//
//===----------------------------------------------------------------------===//
#include "LinkerScript.h"
#include "Config.h"
#include "Driver.h"
#include "InputSection.h"
#include "OutputSections.h"
#include "ScriptParser.h"
#include "Strings.h"
#include "Symbols.h"
#include "SymbolTable.h"
#include "Target.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/StringSaver.h"
using namespace llvm;
using namespace llvm::ELF;
using namespace llvm::object;
using namespace lld;
using namespace lld::elf;
ScriptConfiguration *elf::ScriptConfig;
// This is an operator-precedence parser to parse and evaluate
// a linker script expression. For each linker script arithmetic
// expression (e.g. ". = . + 0x1000"), a new instance of ExprParser
// is created and ran.
namespace {
class ExprParser : public ScriptParserBase {
public:
ExprParser(std::vector<StringRef> &Tokens, uint64_t Dot)
: ScriptParserBase(Tokens), Dot(Dot) {}
uint64_t run();
private:
uint64_t parsePrimary();
uint64_t parseTernary(uint64_t Cond);
uint64_t apply(StringRef Op, uint64_t L, uint64_t R);
uint64_t parseExpr1(uint64_t Lhs, int MinPrec);
uint64_t parseExpr();
uint64_t Dot;
};
}
static int precedence(StringRef Op) {
return StringSwitch<int>(Op)
.Case("*", 4)
.Case("/", 4)
.Case("+", 3)
.Case("-", 3)
.Case("<", 2)
.Case(">", 2)
.Case(">=", 2)
.Case("<=", 2)
.Case("==", 2)
.Case("!=", 2)
.Case("&", 1)
.Default(-1);
}
static uint64_t evalExpr(std::vector<StringRef> &Tokens, uint64_t Dot) {
return ExprParser(Tokens, Dot).run();
}
uint64_t ExprParser::run() {
uint64_t V = parseExpr();
if (!atEOF() && !Error)
setError("stray token: " + peek());
return V;
}
// This is a part of the operator-precedence parser to evaluate
// arithmetic expressions in SECTIONS command. This function evaluates an
// integer literal, a parenthesized expression, the ALIGN function,
// or the special variable ".".
uint64_t ExprParser::parsePrimary() {
StringRef Tok = next();
if (Tok == ".")
return Dot;
if (Tok == "(") {
uint64_t V = parseExpr();
expect(")");
return V;
}
if (Tok == "ALIGN") {
expect("(");
uint64_t V = parseExpr();
expect(")");
return alignTo(Dot, V);
}
uint64_t V = 0;
if (Tok.getAsInteger(0, V))
setError("malformed number: " + Tok);
return V;
}
uint64_t ExprParser::parseTernary(uint64_t Cond) {
next();
uint64_t V = parseExpr();
expect(":");
uint64_t W = parseExpr();
return Cond ? V : W;
}
uint64_t ExprParser::apply(StringRef Op, uint64_t L, uint64_t R) {
if (Op == "*")
return L * R;
if (Op == "/") {
if (R == 0) {
error("division by zero");
return 0;
}
return L / R;
}
if (Op == "+")
return L + R;
if (Op == "-")
return L - R;
if (Op == "<")
return L < R;
if (Op == ">")
return L > R;
if (Op == ">=")
return L >= R;
if (Op == "<=")
return L <= R;
if (Op == "==")
return L == R;
if (Op == "!=")
return L != R;
if (Op == "&")
return L & R;
llvm_unreachable("invalid operator");
}
// This is a part of the operator-precedence parser.
// This function assumes that the remaining token stream starts
// with an operator.
uint64_t ExprParser::parseExpr1(uint64_t Lhs, int MinPrec) {
while (!atEOF()) {
// Read an operator and an expression.
StringRef Op1 = peek();
if (Op1 == "?")
return parseTernary(Lhs);
if (precedence(Op1) < MinPrec)
return Lhs;
next();
uint64_t Rhs = parsePrimary();
// Evaluate the remaining part of the expression first if the
// next operator has greater precedence than the previous one.
// For example, if we have read "+" and "3", and if the next
// operator is "*", then we'll evaluate 3 * ... part first.
while (!atEOF()) {
StringRef Op2 = peek();
if (precedence(Op2) <= precedence(Op1))
break;
Rhs = parseExpr1(Rhs, precedence(Op2));
}
Lhs = apply(Op1, Lhs, Rhs);
}
return Lhs;
}
// Reads and evaluates an arithmetic expression.
uint64_t ExprParser::parseExpr() { return parseExpr1(parsePrimary(), 0); }
template <class ELFT>
StringRef LinkerScript<ELFT>::getOutputSection(InputSectionBase<ELFT> *S) {
for (SectionRule &R : Opt.Sections)
if (globMatch(R.SectionPattern, S->getSectionName()))
return R.Dest;
return "";
}
template <class ELFT>
bool LinkerScript<ELFT>::isDiscarded(InputSectionBase<ELFT> *S) {
return getOutputSection(S) == "/DISCARD/";
}
template <class ELFT>
bool LinkerScript<ELFT>::shouldKeep(InputSectionBase<ELFT> *S) {
for (StringRef Pat : Opt.KeptSections)
if (globMatch(Pat, S->getSectionName()))
return true;
return false;
}
template <class ELFT>
void LinkerScript<ELFT>::assignAddresses(
ArrayRef<OutputSectionBase<ELFT> *> Sections) {
// Orphan sections are sections present in the input files which
// are not explicitly placed into the output file by the linker script.
// We place orphan sections at end of file.
// Other linkers places them using some heuristics as described in
// https://sourceware.org/binutils/docs/ld/Orphan-Sections.html#Orphan-Sections.
for (OutputSectionBase<ELFT> *Sec : Sections) {
StringRef Name = Sec->getName();
if (getSectionIndex(Name) == INT_MAX)
Opt.Commands.push_back({SectionKind, {}, Name});
}
// Assign addresses as instructed by linker script SECTIONS sub-commands.
Dot = Out<ELFT>::ElfHeader->getSize() + Out<ELFT>::ProgramHeaders->getSize();
uintX_t MinVA = std::numeric_limits<uintX_t>::max();
uintX_t ThreadBssOffset = 0;
for (SectionsCommand &Cmd : Opt.Commands) {
if (Cmd.Kind == AssignmentKind) {
uint64_t Val = evalExpr(Cmd.Expr, Dot);
if (Cmd.Name == ".") {
Dot = Val;
} else {
auto *D = cast<DefinedRegular<ELFT>>(Symtab<ELFT>::X->find(Cmd.Name));
D->Value = Val;
}
continue;
}
// Find all the sections with required name. There can be more than
// ont section with such name, if the alignment, flags or type
// attribute differs.
assert(Cmd.Kind == SectionKind);
for (OutputSectionBase<ELFT> *Sec : Sections) {
if (Sec->getName() != Cmd.Name)
continue;
if ((Sec->getFlags() & SHF_TLS) && Sec->getType() == SHT_NOBITS) {
uintX_t TVA = Dot + ThreadBssOffset;
TVA = alignTo(TVA, Sec->getAlignment());
Sec->setVA(TVA);
ThreadBssOffset = TVA - Dot + Sec->getSize();
continue;
}
if (Sec->getFlags() & SHF_ALLOC) {
Dot = alignTo(Dot, Sec->getAlignment());
Sec->setVA(Dot);
MinVA = std::min(MinVA, Dot);
Dot += Sec->getSize();
continue;
}
}
}
// ELF and Program headers need to be right before the first section in
// memory.
// Set their addresses accordingly.
MinVA = alignDown(MinVA - Out<ELFT>::ElfHeader->getSize() -
Out<ELFT>::ProgramHeaders->getSize(),
Target->PageSize);
Out<ELFT>::ElfHeader->setVA(MinVA);
Out<ELFT>::ProgramHeaders->setVA(Out<ELFT>::ElfHeader->getSize() + MinVA);
}
template <class ELFT>
ArrayRef<uint8_t> LinkerScript<ELFT>::getFiller(StringRef Name) {
auto I = Opt.Filler.find(Name);
if (I == Opt.Filler.end())
return {};
return I->second;
}
// Returns the index of the given section name in linker script
// SECTIONS commands. Sections are laid out as the same order as they
// were in the script. If a given name did not appear in the script,
// it returns INT_MAX, so that it will be laid out at end of file.
template <class ELFT>
int LinkerScript<ELFT>::getSectionIndex(StringRef Name) {
auto Begin = Opt.Commands.begin();
auto End = Opt.Commands.end();
auto I = std::find_if(Begin, End, [&](SectionsCommand &N) {
return N.Kind == SectionKind && N.Name == Name;
});
return I == End ? INT_MAX : (I - Begin);
}
// A compartor to sort output sections. Returns -1 or 1 if
// A or B are mentioned in linker script. Otherwise, returns 0.
template <class ELFT>
int LinkerScript<ELFT>::compareSections(StringRef A, StringRef B) {
int I = getSectionIndex(A);
int J = getSectionIndex(B);
if (I == INT_MAX && J == INT_MAX)
return 0;
return I < J ? -1 : 1;
}
template <class ELFT>
void LinkerScript<ELFT>::addScriptedSymbols() {
for (SectionsCommand &Cmd : Opt.Commands)
if (Cmd.Kind == AssignmentKind)
if (Cmd.Name != "." && Symtab<ELFT>::X->find(Cmd.Name) == nullptr)
Symtab<ELFT>::X->addAbsolute(Cmd.Name, STV_DEFAULT);
}
class elf::ScriptParser : public ScriptParserBase {
typedef void (ScriptParser::*Handler)();
public:
ScriptParser(StringRef S, bool B) : ScriptParserBase(S), IsUnderSysroot(B) {}
void run();
private:
void addFile(StringRef Path);
void readAsNeeded();
void readEntry();
void readExtern();
void readGroup();
void readInclude();
void readNothing() {}
void readOutput();
void readOutputArch();
void readOutputFormat();
void readSearchDir();
void readSections();
void readLocationCounterValue();
void readOutputSectionDescription(StringRef OutSec);
void readSymbolAssignment(StringRef Name);
std::vector<StringRef> readSectionsCommandExpr();
const static StringMap<Handler> Cmd;
ScriptConfiguration &Opt = *ScriptConfig;
StringSaver Saver = {ScriptConfig->Alloc};
bool IsUnderSysroot;
};
const StringMap<elf::ScriptParser::Handler> elf::ScriptParser::Cmd = {
{"ENTRY", &ScriptParser::readEntry},
{"EXTERN", &ScriptParser::readExtern},
{"GROUP", &ScriptParser::readGroup},
{"INCLUDE", &ScriptParser::readInclude},
{"INPUT", &ScriptParser::readGroup},
{"OUTPUT", &ScriptParser::readOutput},
{"OUTPUT_ARCH", &ScriptParser::readOutputArch},
{"OUTPUT_FORMAT", &ScriptParser::readOutputFormat},
{"SEARCH_DIR", &ScriptParser::readSearchDir},
{"SECTIONS", &ScriptParser::readSections},
{";", &ScriptParser::readNothing}};
void ScriptParser::run() {
while (!atEOF()) {
StringRef Tok = next();
if (Handler Fn = Cmd.lookup(Tok))
(this->*Fn)();
else
setError("unknown directive: " + Tok);
}
}
void ScriptParser::addFile(StringRef S) {
if (IsUnderSysroot && S.startswith("/")) {
SmallString<128> Path;
(Config->Sysroot + S).toStringRef(Path);
if (sys::fs::exists(Path)) {
Driver->addFile(Saver.save(Path.str()));
return;
}
}
if (sys::path::is_absolute(S)) {
Driver->addFile(S);
} else if (S.startswith("=")) {
if (Config->Sysroot.empty())
Driver->addFile(S.substr(1));
else
Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1)));
} else if (S.startswith("-l")) {
Driver->addLibrary(S.substr(2));
} else if (sys::fs::exists(S)) {
Driver->addFile(S);
} else {
std::string Path = findFromSearchPaths(S);
if (Path.empty())
setError("unable to find " + S);
else
Driver->addFile(Saver.save(Path));
}
}
void ScriptParser::readAsNeeded() {
expect("(");
bool Orig = Config->AsNeeded;
Config->AsNeeded = true;
while (!Error) {
StringRef Tok = next();
if (Tok == ")")
break;
addFile(Tok);
}
Config->AsNeeded = Orig;
}
void ScriptParser::readEntry() {
// -e <symbol> takes predecence over ENTRY(<symbol>).
expect("(");
StringRef Tok = next();
if (Config->Entry.empty())
Config->Entry = Tok;
expect(")");
}
void ScriptParser::readExtern() {
expect("(");
while (!Error) {
StringRef Tok = next();
if (Tok == ")")
return;
Config->Undefined.push_back(Tok);
}
}
void ScriptParser::readGroup() {
expect("(");
while (!Error) {
StringRef Tok = next();
if (Tok == ")")
return;
if (Tok == "AS_NEEDED") {
readAsNeeded();
continue;
}
addFile(Tok);
}
}
void ScriptParser::readInclude() {
StringRef Tok = next();
auto MBOrErr = MemoryBuffer::getFile(Tok);
if (!MBOrErr) {
setError("cannot open " + Tok);
return;
}
std::unique_ptr<MemoryBuffer> &MB = *MBOrErr;
StringRef S = Saver.save(MB->getMemBufferRef().getBuffer());
std::vector<StringRef> V = tokenize(S);
Tokens.insert(Tokens.begin() + Pos, V.begin(), V.end());
}
void ScriptParser::readOutput() {
// -o <file> takes predecence over OUTPUT(<file>).
expect("(");
StringRef Tok = next();
if (Config->OutputFile.empty())
Config->OutputFile = Tok;
expect(")");
}
void ScriptParser::readOutputArch() {
// Error checking only for now.
expect("(");
next();
expect(")");
}
void ScriptParser::readOutputFormat() {
// Error checking only for now.
expect("(");
next();
StringRef Tok = next();
if (Tok == ")")
return;
if (Tok != ",") {
setError("unexpected token: " + Tok);
return;
}
next();
expect(",");
next();
expect(")");
}
void ScriptParser::readSearchDir() {
expect("(");
Config->SearchPaths.push_back(next());
expect(")");
}
void ScriptParser::readSections() {
Opt.DoLayout = true;
expect("{");
while (!Error && !skip("}")) {
StringRef Tok = peek();
if (Tok == ".") {
readLocationCounterValue();
continue;
}
next();
if (peek() == "=")
readSymbolAssignment(Tok);
else
readOutputSectionDescription(Tok);
}
}
void ScriptParser::readLocationCounterValue() {
expect(".");
expect("=");
std::vector<StringRef> Expr = readSectionsCommandExpr();
if (Expr.empty())
error("error in location counter expression");
else
Opt.Commands.push_back({AssignmentKind, std::move(Expr), "."});
}
void ScriptParser::readOutputSectionDescription(StringRef OutSec) {
Opt.Commands.push_back({SectionKind, {}, OutSec});
expect(":");
expect("{");
while (!Error && !skip("}")) {
StringRef Tok = next();
if (Tok == "*") {
expect("(");
while (!Error && !skip(")"))
Opt.Sections.emplace_back(OutSec, next());
} else if (Tok == "KEEP") {
expect("(");
expect("*");
expect("(");
while (!Error && !skip(")")) {
StringRef Sec = next();
Opt.Sections.emplace_back(OutSec, Sec);
Opt.KeptSections.push_back(Sec);
}
expect(")");
} else {
setError("unknown command " + Tok);
}
}
StringRef Tok = peek();
if (Tok.startswith("=")) {
if (!Tok.startswith("=0x")) {
setError("filler should be a hexadecimal value");
return;
}
Tok = Tok.substr(3);
Opt.Filler[OutSec] = parseHex(Tok);
next();
}
}
void ScriptParser::readSymbolAssignment(StringRef Name) {
expect("=");
std::vector<StringRef> Expr = readSectionsCommandExpr();
if (Expr.empty())
error("error in symbol assignment expression");
else
Opt.Commands.push_back({AssignmentKind, std::move(Expr), Name});
}
std::vector<StringRef> ScriptParser::readSectionsCommandExpr() {
std::vector<StringRef> Expr;
while (!Error) {
StringRef Tok = next();
if (Tok == ";")
break;
Expr.push_back(Tok);
}
return Expr;
}
static bool isUnderSysroot(StringRef Path) {
if (Config->Sysroot == "")
return false;
for (; !Path.empty(); Path = sys::path::parent_path(Path))
if (sys::fs::equivalent(Config->Sysroot, Path))
return true;
return false;
}
// Entry point.
void elf::readLinkerScript(MemoryBufferRef MB) {
StringRef Path = MB.getBufferIdentifier();
ScriptParser(MB.getBuffer(), isUnderSysroot(Path)).run();
}
template class elf::LinkerScript<ELF32LE>;
template class elf::LinkerScript<ELF32BE>;
template class elf::LinkerScript<ELF64LE>;
template class elf::LinkerScript<ELF64BE>;

View File

@ -0,0 +1,103 @@
//===- LinkerScript.h -------------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_ELF_LINKER_SCRIPT_H
#define LLD_ELF_LINKER_SCRIPT_H
#include "lld/Core/LLVM.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/MemoryBuffer.h"
namespace lld {
namespace elf {
// Parses a linker script. Calling this function updates
// Config and ScriptConfig.
void readLinkerScript(MemoryBufferRef MB);
class ScriptParser;
template <class ELFT> class InputSectionBase;
template <class ELFT> class OutputSectionBase;
// This class represents each rule in SECTIONS command.
struct SectionRule {
SectionRule(StringRef D, StringRef S)
: Dest(D), SectionPattern(S) {}
StringRef Dest;
StringRef SectionPattern;
};
// This enum represents what we can observe in SECTIONS tag of script:
// ExprKind is a location counter change, like ". = . + 0x1000"
// SectionKind is a description of output section, like ".data :..."
enum SectionsCommandKind { SectionKind, AssignmentKind };
struct SectionsCommand {
SectionsCommandKind Kind;
std::vector<StringRef> Expr;
StringRef Name;
};
// ScriptConfiguration holds linker script parse results.
struct ScriptConfiguration {
// SECTIONS commands.
std::vector<SectionRule> Sections;
// Section fill attribute for each section.
llvm::StringMap<std::vector<uint8_t>> Filler;
// Used to assign addresses to sections.
std::vector<SectionsCommand> Commands;
bool DoLayout = false;
llvm::BumpPtrAllocator Alloc;
// List of section patterns specified with KEEP commands. They will
// be kept even if they are unused and --gc-sections is specified.
std::vector<StringRef> KeptSections;
};
extern ScriptConfiguration *ScriptConfig;
// This is a runner of the linker script.
template <class ELFT> class LinkerScript {
typedef typename ELFT::uint uintX_t;
public:
StringRef getOutputSection(InputSectionBase<ELFT> *S);
ArrayRef<uint8_t> getFiller(StringRef Name);
bool isDiscarded(InputSectionBase<ELFT> *S);
bool shouldKeep(InputSectionBase<ELFT> *S);
void assignAddresses(ArrayRef<OutputSectionBase<ELFT> *> S);
int compareSections(StringRef A, StringRef B);
void addScriptedSymbols();
private:
// "ScriptConfig" is a bit too long, so define a short name for it.
ScriptConfiguration &Opt = *ScriptConfig;
int getSectionIndex(StringRef Name);
uintX_t Dot;
};
// Variable template is a C++14 feature, so we can't template
// a global variable. Use a struct to workaround.
template <class ELFT> struct Script { static LinkerScript<ELFT> *X; };
template <class ELFT> LinkerScript<ELFT> *Script<ELFT>::X;
} // namespace elf
} // namespace lld
#endif

View File

@ -0,0 +1,201 @@
//===- MarkLive.cpp -------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements --gc-sections, which is a feature to remove unused
// sections from output. Unused sections are sections that are not reachable
// from known GC-root symbols or sections. Naturally the feature is
// implemented as a mark-sweep garbage collector.
//
// Here's how it works. Each InputSectionBase has a "Live" bit. The bit is off
// by default. Starting with GC-root symbols or sections, markLive function
// defined in this file visits all reachable sections to set their Live
// bits. Writer will then ignore sections whose Live bits are off, so that
// such sections are not included into output.
//
//===----------------------------------------------------------------------===//
#include "InputSection.h"
#include "LinkerScript.h"
#include "OutputSections.h"
#include "Strings.h"
#include "SymbolTable.h"
#include "Symbols.h"
#include "Target.h"
#include "Writer.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Object/ELF.h"
#include <functional>
#include <vector>
using namespace llvm;
using namespace llvm::ELF;
using namespace llvm::object;
using namespace lld;
using namespace lld::elf;
// A resolved relocation. The Sec and Offset fields are set if the relocation
// was resolved to an offset within a section.
template <class ELFT>
struct ResolvedReloc {
InputSectionBase<ELFT> *Sec;
typename ELFT::uint Offset;
};
template <class ELFT>
static typename ELFT::uint getAddend(InputSectionBase<ELFT> &Sec,
const typename ELFT::Rel &Rel) {
return Target->getImplicitAddend(Sec.getSectionData().begin(),
Rel.getType(Config->Mips64EL));
}
template <class ELFT>
static typename ELFT::uint getAddend(InputSectionBase<ELFT> &Sec,
const typename ELFT::Rela &Rel) {
return Rel.r_addend;
}
template <class ELFT, class RelT>
static ResolvedReloc<ELFT> resolveReloc(InputSectionBase<ELFT> &Sec,
RelT &Rel) {
SymbolBody &B = Sec.getFile()->getRelocTargetSym(Rel);
auto *D = dyn_cast<DefinedRegular<ELFT>>(&B);
if (!D || !D->Section)
return {nullptr, 0};
typename ELFT::uint Offset = D->Value;
if (D->isSection())
Offset += getAddend(Sec, Rel);
return {D->Section->Repl, Offset};
}
template <class ELFT, class Elf_Shdr>
static void run(ELFFile<ELFT> &Obj, InputSectionBase<ELFT> &Sec,
Elf_Shdr *RelSec, std::function<void(ResolvedReloc<ELFT>)> Fn) {
if (RelSec->sh_type == SHT_RELA) {
for (const typename ELFT::Rela &RI : Obj.relas(RelSec))
Fn(resolveReloc(Sec, RI));
} else {
for (const typename ELFT::Rel &RI : Obj.rels(RelSec))
Fn(resolveReloc(Sec, RI));
}
}
// Calls Fn for each section that Sec refers to via relocations.
template <class ELFT>
static void forEachSuccessor(InputSection<ELFT> &Sec,
std::function<void(ResolvedReloc<ELFT>)> Fn) {
ELFFile<ELFT> &Obj = Sec.getFile()->getObj();
for (const typename ELFT::Shdr *RelSec : Sec.RelocSections)
run(Obj, Sec, RelSec, Fn);
}
template <class ELFT>
static void scanEhFrameSection(EhInputSection<ELFT> &EH,
std::function<void(ResolvedReloc<ELFT>)> Fn) {
if (!EH.RelocSection)
return;
ELFFile<ELFT> &EObj = EH.getFile()->getObj();
run<ELFT>(EObj, EH, EH.RelocSection, [&](ResolvedReloc<ELFT> R) {
if (!R.Sec || R.Sec == &InputSection<ELFT>::Discarded)
return;
if (R.Sec->getSectionHdr()->sh_flags & SHF_EXECINSTR)
return;
Fn({R.Sec, 0});
});
}
// Sections listed below are special because they are used by the loader
// just by being in an ELF file. They should not be garbage-collected.
template <class ELFT> static bool isReserved(InputSectionBase<ELFT> *Sec) {
switch (Sec->getSectionHdr()->sh_type) {
case SHT_FINI_ARRAY:
case SHT_INIT_ARRAY:
case SHT_NOTE:
case SHT_PREINIT_ARRAY:
return true;
default:
StringRef S = Sec->getSectionName();
// We do not want to reclaim sections if they can be referred
// by __start_* and __stop_* symbols.
if (isValidCIdentifier(S))
return true;
return S.startswith(".ctors") || S.startswith(".dtors") ||
S.startswith(".init") || S.startswith(".fini") ||
S.startswith(".jcr");
}
}
// This is the main function of the garbage collector.
// Starting from GC-root sections, this function visits all reachable
// sections to set their "Live" bits.
template <class ELFT> void elf::markLive() {
SmallVector<InputSection<ELFT> *, 256> Q;
auto Enqueue = [&](ResolvedReloc<ELFT> R) {
if (!R.Sec)
return;
// Usually, a whole section is marked as live or dead, but in mergeable
// (splittable) sections, each piece of data has independent liveness bit.
// So we explicitly tell it which offset is in use.
if (auto *MS = dyn_cast<MergeInputSection<ELFT>>(R.Sec))
MS->markLiveAt(R.Offset);
if (R.Sec->Live)
return;
R.Sec->Live = true;
if (InputSection<ELFT> *S = dyn_cast<InputSection<ELFT>>(R.Sec))
Q.push_back(S);
};
auto MarkSymbol = [&](const SymbolBody *Sym) {
if (auto *D = dyn_cast_or_null<DefinedRegular<ELFT>>(Sym))
Enqueue({D->Section, D->Value});
};
// Add GC root symbols.
if (Config->EntrySym)
MarkSymbol(Config->EntrySym->body());
MarkSymbol(Symtab<ELFT>::X->find(Config->Init));
MarkSymbol(Symtab<ELFT>::X->find(Config->Fini));
for (StringRef S : Config->Undefined)
MarkSymbol(Symtab<ELFT>::X->find(S));
// Preserve externally-visible symbols if the symbols defined by this
// file can interrupt other ELF file's symbols at runtime.
for (const Symbol *S : Symtab<ELFT>::X->getSymbols())
if (S->includeInDynsym())
MarkSymbol(S->body());
// Preserve special sections and those which are specified in linker
// script KEEP command.
for (const std::unique_ptr<ObjectFile<ELFT>> &F :
Symtab<ELFT>::X->getObjectFiles())
for (InputSectionBase<ELFT> *Sec : F->getSections())
if (Sec && Sec != &InputSection<ELFT>::Discarded) {
// .eh_frame is always marked as live now, but also it can reference to
// sections that contain personality. We preserve all non-text sections
// referred by .eh_frame here.
if (auto *EH = dyn_cast_or_null<EhInputSection<ELFT>>(Sec))
scanEhFrameSection<ELFT>(*EH, Enqueue);
if (isReserved(Sec) || Script<ELFT>::X->shouldKeep(Sec))
Enqueue({Sec, 0});
}
// Mark all reachable sections.
while (!Q.empty())
forEachSuccessor<ELFT>(*Q.pop_back_val(), Enqueue);
}
template void elf::markLive<ELF32LE>();
template void elf::markLive<ELF32BE>();
template void elf::markLive<ELF64LE>();
template void elf::markLive<ELF64BE>();

View File

@ -0,0 +1,276 @@
include "llvm/Option/OptParser.td"
// For options whose names are multiple letters, either one dash or
// two can precede the option name except those that start with 'o'.
class F<string name>: Flag<["--", "-"], name>;
class J<string name>: Joined<["--", "-"], name>;
class S<string name>: Separate<["--", "-"], name>;
class JS<string name>: JoinedOrSeparate<["--", "-"], name>;
def Bsymbolic: F<"Bsymbolic">, HelpText<"Bind defined symbols locally">;
def Bsymbolic_functions: F<"Bsymbolic-functions">,
HelpText<"Bind defined function symbols locally">;
def Bdynamic: F<"Bdynamic">, HelpText<"Link against shared libraries">;
def Bstatic: F<"Bstatic">, HelpText<"Do not link against shared libraries">;
def build_id: F<"build-id">, HelpText<"Generate build ID note">;
def build_id_eq: J<"build-id=">, HelpText<"Generate build ID note">;
def L: JoinedOrSeparate<["-"], "L">, MetaVarName<"<dir>">,
HelpText<"Add a directory to the library search path">;
def O: Joined<["-"], "O">, HelpText<"Optimize output file size">;
def allow_multiple_definition: F<"allow-multiple-definition">,
HelpText<"Allow multiple definitions">;
def as_needed: F<"as-needed">,
HelpText<"Only set DT_NEEDED for shared libraries if used">;
def disable_new_dtags: F<"disable-new-dtags">,
HelpText<"Disable new dynamic tags">;
def discard_all: F<"discard-all">, HelpText<"Delete all local symbols">;
def discard_locals: F<"discard-locals">,
HelpText<"Delete temporary local symbols">;
def discard_none: F<"discard-none">,
HelpText<"Keep all symbols in the symbol table">;
def dynamic_linker: S<"dynamic-linker">,
HelpText<"Which dynamic linker to use">;
def dynamic_list: S<"dynamic-list">,
HelpText<"Read a list of dynamic symbols">;
def eh_frame_hdr: F<"eh-frame-hdr">,
HelpText<"Request creation of .eh_frame_hdr section and PT_GNU_EH_FRAME segment header">;
def enable_new_dtags: F<"enable-new-dtags">,
HelpText<"Enable new dynamic tags">;
def end_lib: F<"end-lib">,
HelpText<"End a grouping of objects that should be treated as if they were together in an archive">;
def entry: S<"entry">, MetaVarName<"<entry>">,
HelpText<"Name of entry point symbol">;
def export_dynamic: F<"export-dynamic">,
HelpText<"Put symbols in the dynamic symbol table">;
def export_dynamic_symbol: S<"export-dynamic-symbol">,
HelpText<"Put a symbol in the dynamic symbol table">;
def fatal_warnings: F<"fatal-warnings">,
HelpText<"Treat warnings as errors">;
def fini: S<"fini">, MetaVarName<"<symbol>">,
HelpText<"Specify a finalizer function">;
def hash_style: S<"hash-style">,
HelpText<"Specify hash style (sysv, gnu or both)">;
def help: F<"help">, HelpText<"Print option help">;
def icf: F<"icf=all">, HelpText<"Enable identical code folding">;
def image_base : J<"image-base=">, HelpText<"Set the base address">;
def gc_sections: F<"gc-sections">,
HelpText<"Enable garbage collection of unused sections">;
def init: S<"init">, MetaVarName<"<symbol>">,
HelpText<"Specify an initializer function">;
def l: JoinedOrSeparate<["-"], "l">, MetaVarName<"<libName>">,
HelpText<"Root name of library to use">;
def lto_O: J<"lto-O">, MetaVarName<"<opt-level>">,
HelpText<"Optimization level for LTO">;
def m: JoinedOrSeparate<["-"], "m">, HelpText<"Set target emulation">;
def no_as_needed: F<"no-as-needed">,
HelpText<"Always DT_NEEDED for shared libraries">;
def no_demangle: F<"no-demangle">,
HelpText<"Do not demangle symbol names">;
def no_gnu_unique: F<"no-gnu-unique">,
HelpText<"Disable STB_GNU_UNIQUE symbol binding">;
def no_whole_archive: F<"no-whole-archive">,
HelpText<"Restores the default behavior of loading archive members">;
def noinhibit_exec: F<"noinhibit-exec">,
HelpText<"Retain the executable output file whenever it is still usable">;
def no_undefined: F<"no-undefined">,
HelpText<"Report unresolved symbols even if the linker is creating a shared library">;
def no_undefined_version: F<"no-undefined-version">,
HelpText<"Report version scripts that refer undefined symbols">;
def o: JoinedOrSeparate<["-"], "o">, MetaVarName<"<path>">,
HelpText<"Path to file to write output">;
def pie: F<"pie">, HelpText<"Create a position independent executable">;
def print_gc_sections: F<"print-gc-sections">,
HelpText<"List removed unused sections">;
def reproduce: S<"reproduce">,
HelpText<"Dump linker invocation and input files for debugging">;
def rpath: S<"rpath">, HelpText<"Add a DT_RUNPATH to the output">;
def relocatable: F<"relocatable">, HelpText<"Create relocatable object file">;
def script: S<"script">, HelpText<"Read linker script">;
def shared: F<"shared">, HelpText<"Build a shared object">;
def soname: J<"soname=">, HelpText<"Set DT_SONAME">;
def start_lib: F<"start-lib">,
HelpText<"Start a grouping of objects that should be treated as if they were together in an archive">;
def strip_all: F<"strip-all">, HelpText<"Strip all symbols">;
def strip_debug: F<"strip-debug">, HelpText<"Strip debugging information">;
def sysroot: J<"sysroot=">, HelpText<"Set the system root">;
def threads: F<"threads">, HelpText<"Enable use of threads">;
def trace: F<"trace">, HelpText<"Print the names of the input files">;
def trace_symbol : J<"trace-symbol=">, HelpText<"Trace references to symbols">;
def undefined: J<"undefined=">,
HelpText<"Force undefined symbol during linking">;
def unresolved_symbols: J<"unresolved-symbols=">,
HelpText<"Determine how to handle unresolved symbols">;
def rsp_quoting: J<"rsp-quoting=">,
HelpText<"Quoting style for response files. Values supported: windows|posix">;
def verbose: F<"verbose">, HelpText<"Verbose mode">;
def version: F<"version">, HelpText<"Display the version number">;
def version_script: S<"version-script">,
HelpText<"Read a version script">;
def warn_common: F<"warn-common">,
HelpText<"Warn about duplicate common symbols">;
def whole_archive: F<"whole-archive">,
HelpText<"Force load of all members in a static library">;
def wrap: S<"wrap">, MetaVarName<"<symbol>">,
HelpText<"Use wrapper functions for symbol">;
def z: JoinedOrSeparate<["-"], "z">, MetaVarName<"<option>">,
HelpText<"Linker option extensions">;
// Aliases
def alias_Bdynamic_call_shared: F<"call_shared">, Alias<Bdynamic>;
def alias_Bdynamic_dy: F<"dy">, Alias<Bdynamic>;
def alias_Bstatic_dn: F<"dn">, Alias<Bstatic>;
def alias_Bstatic_non_shared: F<"non_shared">, Alias<Bstatic>;
def alias_Bstatic_static: F<"static">, Alias<Bstatic>;
def alias_L__library_path: J<"library-path=">, Alias<L>;
def alias_discard_all_x: Flag<["-"], "x">, Alias<discard_all>;
def alias_discard_locals_X: Flag<["-"], "X">, Alias<discard_locals>;
def alias_dynamic_list: J<"dynamic-list=">, Alias<dynamic_list>;
def alias_entry_e: JoinedOrSeparate<["-"], "e">, Alias<entry>;
def alias_entry_entry: J<"entry=">, Alias<entry>;
def alias_export_dynamic_E: Flag<["-"], "E">, Alias<export_dynamic>;
def alias_export_dynamic_symbol: J<"export-dynamic-symbol=">,
Alias<export_dynamic_symbol>;
def alias_fini_fini: J<"fini=">, Alias<fini>;
def alias_hash_style_hash_style: J<"hash-style=">, Alias<hash_style>;
def alias_init_init: J<"init=">, Alias<init>;
def alias_l__library: J<"library=">, Alias<l>;
def alias_o_output: Joined<["--"], "output=">, Alias<o>;
def alias_pie_pic_executable: F<"pic-executable">, Alias<pie>;
def alias_relocatable_r: Flag<["-"], "r">, Alias<relocatable>;
def alias_rpath_R: Joined<["-"], "R">, Alias<rpath>;
def alias_rpath_rpath: J<"rpath=">, Alias<rpath>;
def alias_script_T: JoinedOrSeparate<["-"], "T">, Alias<script>;
def alias_shared_Bshareable: F<"Bshareable">, Alias<shared>;
def alias_soname_h: JoinedOrSeparate<["-"], "h">, Alias<soname>;
def alias_soname_soname: S<"soname">, Alias<soname>;
def alias_strip_all: Flag<["-"], "s">, Alias<strip_all>;
def alias_strip_debug_S: Flag<["-"], "S">, Alias<strip_debug>;
def alias_trace: Flag<["-"], "t">, Alias<trace>;
def alias_trace_symbol_y : JoinedOrSeparate<["-"], "y">, Alias<trace_symbol>;
def alias_undefined_u: JoinedOrSeparate<["-"], "u">, Alias<undefined>;
def alias_version_V: Flag<["-"], "V">, Alias<version>;
def alias_version_v: Flag<["-"], "v">, Alias<version>;
def alias_wrap_wrap: J<"wrap=">, Alias<wrap>;
// Our symbol resolution algorithm handles symbols in archive files differently
// than traditional linkers, so we don't need --start-group and --end-group.
// These options are recongized for compatibility but ignored.
def end_group: F<"end-group">;
def end_group_paren: Flag<["-"], ")">;
def start_group: F<"start-group">;
def start_group_paren: Flag<["-"], "(">;
// Ignore LTO plugin-related options.
// clang -flto passes -plugin and -plugin-opt to the linker. This is required
// for ld.gold and ld.bfd to get LTO working. But it's not for lld which doesn't
// rely on a plugin. Instead of detecting which linker is used on clang side we
// just ignore the option on lld side as it's easier. In fact, the linker could
// be called 'ld' and understanding which linker is used would require parsing of
// --version output.
def plugin: S<"plugin">;
def plugin_eq: J<"plugin=">;
def plugin_opt: S<"plugin-opt">;
def plugin_opt_eq: J<"plugin-opt=">;
// Options listed below are silently ignored for now for compatibility.
def allow_shlib_undefined: F<"allow-shlib-undefined">;
def define_common: F<"define-common">;
def demangle: F<"demangle">;
def detect_odr_violations: F<"detect-odr-violations">;
def no_add_needed: F<"no-add-needed">;
def no_allow_shlib_undefined: F<"no-allow-shlib-undefined">;
def no_copy_dt_needed_entries: F<"no-copy-dt-needed-entries">,
Alias<no_add_needed>;
def no_dynamic_linker: F<"no-dynamic-linker">;
def no_fatal_warnings: F<"no-fatal-warnings">;
def no_mmap_output_file: F<"no-mmap-output-file">;
def no_warn_common: F<"no-warn-common">;
def no_warn_mismatch: F<"no-warn-mismatch">;
def rpath_link: S<"rpath-link">;
def rpath_link_eq: J<"rpath-link=">;
def sort_common: F<"sort-common">;
def warn_execstack: F<"warn-execstack">;
def warn_shared_textrel: F<"warn-shared-textrel">;
def G: Separate<["-"], "G">;
// Aliases for ignored options
def alias_define_common_d: Flag<["-"], "d">, Alias<define_common>;
def alias_define_common_dc: F<"dc">, Alias<define_common>;
def alias_define_common_dp: F<"dp">, Alias<define_common>;
def alias_version_script_version_script: J<"version-script=">,
Alias<version_script>;
// LTO-related options.
def lto_jobs: J<"lto-jobs=">, HelpText<"Number of threads to run codegen">;
def lto_aa_pipeline: J<"lto-aa-pipeline=">,
HelpText<"AA pipeline to run during LTO. Used in conjunction with -lto-newpm-passes">;
def lto_newpm_passes: J<"lto-newpm-passes=">,
HelpText<"Passes to run during LTO">;
def disable_verify: F<"disable-verify">;
def mllvm: S<"mllvm">;
def save_temps: F<"save-temps">;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,732 @@
//===- OutputSections.h -----------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_ELF_OUTPUT_SECTIONS_H
#define LLD_ELF_OUTPUT_SECTIONS_H
#include "Config.h"
#include "Relocations.h"
#include "lld/Core/LLVM.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Object/ELF.h"
#include "llvm/Support/MD5.h"
#include "llvm/Support/SHA1.h"
namespace lld {
namespace elf {
class SymbolBody;
struct SectionPiece;
template <class ELFT> class SymbolTable;
template <class ELFT> class SymbolTableSection;
template <class ELFT> class StringTableSection;
template <class ELFT> class EhInputSection;
template <class ELFT> class InputSection;
template <class ELFT> class InputSectionBase;
template <class ELFT> class MergeInputSection;
template <class ELFT> class MipsReginfoInputSection;
template <class ELFT> class OutputSection;
template <class ELFT> class ObjectFile;
template <class ELFT> class SharedFile;
template <class ELFT> class SharedSymbol;
template <class ELFT> class DefinedRegular;
// This represents a section in an output file.
// Different sub classes represent different types of sections. Some contain
// input sections, others are created by the linker.
// The writer creates multiple OutputSections and assign them unique,
// non-overlapping file offsets and VAs.
template <class ELFT> class OutputSectionBase {
public:
typedef typename ELFT::uint uintX_t;
typedef typename ELFT::Shdr Elf_Shdr;
OutputSectionBase(StringRef Name, uint32_t Type, uintX_t Flags);
void setVA(uintX_t VA) { Header.sh_addr = VA; }
uintX_t getVA() const { return Header.sh_addr; }
void setFileOffset(uintX_t Off) { Header.sh_offset = Off; }
void setSHName(unsigned Val) { Header.sh_name = Val; }
void writeHeaderTo(Elf_Shdr *SHdr);
StringRef getName() { return Name; }
virtual void addSection(InputSectionBase<ELFT> *C) {}
unsigned SectionIndex;
// Returns the size of the section in the output file.
uintX_t getSize() const { return Header.sh_size; }
void setSize(uintX_t Val) { Header.sh_size = Val; }
uintX_t getFlags() const { return Header.sh_flags; }
uintX_t getFileOff() const { return Header.sh_offset; }
uintX_t getAlignment() const { return Header.sh_addralign; }
uint32_t getType() const { return Header.sh_type; }
void updateAlignment(uintX_t Alignment) {
if (Alignment > Header.sh_addralign)
Header.sh_addralign = Alignment;
}
// If true, this section will be page aligned on disk.
// Typically the first section of each PT_LOAD segment has this flag.
bool PageAlign = false;
virtual void finalize() {}
virtual void finalizePieces() {}
virtual void assignOffsets() {}
virtual void writeTo(uint8_t *Buf) {}
virtual ~OutputSectionBase() = default;
protected:
StringRef Name;
Elf_Shdr Header;
};
template <class ELFT> class GotSection final : public OutputSectionBase<ELFT> {
typedef OutputSectionBase<ELFT> Base;
typedef typename ELFT::uint uintX_t;
public:
GotSection();
void finalize() override;
void writeTo(uint8_t *Buf) override;
void addEntry(SymbolBody &Sym);
void addMipsEntry(SymbolBody &Sym, uintX_t Addend, RelExpr Expr);
bool addDynTlsEntry(SymbolBody &Sym);
bool addTlsIndex();
bool empty() const { return MipsPageEntries == 0 && Entries.empty(); }
uintX_t getMipsLocalPageOffset(uintX_t Addr);
uintX_t getMipsGotOffset(const SymbolBody &B, uintX_t Addend) const;
uintX_t getGlobalDynAddr(const SymbolBody &B) const;
uintX_t getGlobalDynOffset(const SymbolBody &B) const;
uintX_t getNumEntries() const { return Entries.size(); }
// Returns the symbol which corresponds to the first entry of the global part
// of GOT on MIPS platform. It is required to fill up MIPS-specific dynamic
// table properties.
// Returns nullptr if the global part is empty.
const SymbolBody *getMipsFirstGlobalEntry() const;
// Returns the number of entries in the local part of GOT including
// the number of reserved entries. This method is MIPS-specific.
unsigned getMipsLocalEntriesNum() const;
// Returns offset of TLS part of the MIPS GOT table. This part goes
// after 'local' and 'global' entries.
uintX_t getMipsTlsOffset();
uintX_t getTlsIndexVA() { return Base::getVA() + TlsIndexOff; }
uint32_t getTlsIndexOff() { return TlsIndexOff; }
// Flag to force GOT to be in output if we have relocations
// that relies on its address.
bool HasGotOffRel = false;
private:
std::vector<const SymbolBody *> Entries;
uint32_t TlsIndexOff = -1;
uint32_t MipsPageEntries = 0;
// Output sections referenced by MIPS GOT relocations.
llvm::SmallPtrSet<const OutputSectionBase<ELFT> *, 10> MipsOutSections;
llvm::DenseMap<uintX_t, size_t> MipsLocalGotPos;
// MIPS ABI requires to create unique GOT entry for each Symbol/Addend
// pairs. The `MipsGotMap` maps (S,A) pair to the GOT index in the `MipsLocal`
// or `MipsGlobal` vectors. In general it does not have a sence to take in
// account addend for preemptible symbols because the corresponding
// GOT entries should have one-to-one mapping with dynamic symbols table.
// But we use the same container's types for both kind of GOT entries
// to handle them uniformly.
typedef std::pair<const SymbolBody*, uintX_t> MipsGotEntry;
typedef std::vector<MipsGotEntry> MipsGotEntries;
llvm::DenseMap<MipsGotEntry, size_t> MipsGotMap;
MipsGotEntries MipsLocal;
MipsGotEntries MipsGlobal;
// Write MIPS-specific parts of the GOT.
void writeMipsGot(uint8_t *&Buf);
};
template <class ELFT>
class GotPltSection final : public OutputSectionBase<ELFT> {
typedef typename ELFT::uint uintX_t;
public:
GotPltSection();
void finalize() override;
void writeTo(uint8_t *Buf) override;
void addEntry(SymbolBody &Sym);
bool empty() const;
private:
std::vector<const SymbolBody *> Entries;
};
template <class ELFT> class PltSection final : public OutputSectionBase<ELFT> {
typedef OutputSectionBase<ELFT> Base;
typedef typename ELFT::uint uintX_t;
public:
PltSection();
void finalize() override;
void writeTo(uint8_t *Buf) override;
void addEntry(SymbolBody &Sym);
bool empty() const { return Entries.empty(); }
private:
std::vector<std::pair<const SymbolBody *, unsigned>> Entries;
};
template <class ELFT> class DynamicReloc {
typedef typename ELFT::uint uintX_t;
public:
DynamicReloc(uint32_t Type, const InputSectionBase<ELFT> *InputSec,
uintX_t OffsetInSec, bool UseSymVA, SymbolBody *Sym,
uintX_t Addend)
: Type(Type), Sym(Sym), InputSec(InputSec), OffsetInSec(OffsetInSec),
UseSymVA(UseSymVA), Addend(Addend) {}
DynamicReloc(uint32_t Type, const OutputSectionBase<ELFT> *OutputSec,
uintX_t OffsetInSec, bool UseSymVA, SymbolBody *Sym,
uintX_t Addend)
: Type(Type), Sym(Sym), OutputSec(OutputSec), OffsetInSec(OffsetInSec),
UseSymVA(UseSymVA), Addend(Addend) {}
uintX_t getOffset() const;
uintX_t getAddend() const;
uint32_t getSymIndex() const;
const OutputSectionBase<ELFT> *getOutputSec() const { return OutputSec; }
uint32_t Type;
private:
SymbolBody *Sym;
const InputSectionBase<ELFT> *InputSec = nullptr;
const OutputSectionBase<ELFT> *OutputSec = nullptr;
uintX_t OffsetInSec;
bool UseSymVA;
uintX_t Addend;
};
template <class ELFT>
class SymbolTableSection final : public OutputSectionBase<ELFT> {
public:
typedef typename ELFT::Shdr Elf_Shdr;
typedef typename ELFT::Sym Elf_Sym;
typedef typename ELFT::SymRange Elf_Sym_Range;
typedef typename ELFT::uint uintX_t;
SymbolTableSection(StringTableSection<ELFT> &StrTabSec);
void finalize() override;
void writeTo(uint8_t *Buf) override;
void addSymbol(SymbolBody *Body);
StringTableSection<ELFT> &getStrTabSec() const { return StrTabSec; }
unsigned getNumSymbols() const { return NumLocals + Symbols.size() + 1; }
ArrayRef<std::pair<SymbolBody *, size_t>> getSymbols() const {
return Symbols;
}
unsigned NumLocals = 0;
StringTableSection<ELFT> &StrTabSec;
private:
void writeLocalSymbols(uint8_t *&Buf);
void writeGlobalSymbols(uint8_t *Buf);
const OutputSectionBase<ELFT> *getOutputSection(SymbolBody *Sym);
// A vector of symbols and their string table offsets.
std::vector<std::pair<SymbolBody *, size_t>> Symbols;
};
// For more information about .gnu.version and .gnu.version_r see:
// https://www.akkadia.org/drepper/symbol-versioning
// The .gnu.version_d section which has a section type of SHT_GNU_verdef shall
// contain symbol version definitions. The number of entries in this section
// shall be contained in the DT_VERDEFNUM entry of the .dynamic section.
// The section shall contain an array of Elf_Verdef structures, optionally
// followed by an array of Elf_Verdaux structures.
template <class ELFT>
class VersionDefinitionSection final : public OutputSectionBase<ELFT> {
typedef typename ELFT::Verdef Elf_Verdef;
typedef typename ELFT::Verdaux Elf_Verdaux;
public:
VersionDefinitionSection();
void finalize() override;
void writeTo(uint8_t *Buf) override;
private:
void writeOne(uint8_t *Buf, uint32_t Index, StringRef Name, size_t NameOff);
unsigned FileDefNameOff;
};
// The .gnu.version section specifies the required version of each symbol in the
// dynamic symbol table. It contains one Elf_Versym for each dynamic symbol
// table entry. An Elf_Versym is just a 16-bit integer that refers to a version
// identifier defined in the either .gnu.version_r or .gnu.version_d section.
// The values 0 and 1 are reserved. All other values are used for versions in
// the own object or in any of the dependencies.
template <class ELFT>
class VersionTableSection final : public OutputSectionBase<ELFT> {
typedef typename ELFT::Versym Elf_Versym;
public:
VersionTableSection();
void finalize() override;
void writeTo(uint8_t *Buf) override;
};
// The .gnu.version_r section defines the version identifiers used by
// .gnu.version. It contains a linked list of Elf_Verneed data structures. Each
// Elf_Verneed specifies the version requirements for a single DSO, and contains
// a reference to a linked list of Elf_Vernaux data structures which define the
// mapping from version identifiers to version names.
template <class ELFT>
class VersionNeedSection final : public OutputSectionBase<ELFT> {
typedef typename ELFT::Verneed Elf_Verneed;
typedef typename ELFT::Vernaux Elf_Vernaux;
// A vector of shared files that need Elf_Verneed data structures and the
// string table offsets of their sonames.
std::vector<std::pair<SharedFile<ELFT> *, size_t>> Needed;
// The next available version identifier.
unsigned NextIndex;
public:
VersionNeedSection();
void addSymbol(SharedSymbol<ELFT> *SS);
void finalize() override;
void writeTo(uint8_t *Buf) override;
size_t getNeedNum() const { return Needed.size(); }
};
template <class ELFT>
class RelocationSection final : public OutputSectionBase<ELFT> {
typedef typename ELFT::Rel Elf_Rel;
typedef typename ELFT::Rela Elf_Rela;
typedef typename ELFT::uint uintX_t;
public:
RelocationSection(StringRef Name, bool Sort);
void addReloc(const DynamicReloc<ELFT> &Reloc);
unsigned getRelocOffset();
void finalize() override;
void writeTo(uint8_t *Buf) override;
bool hasRelocs() const { return !Relocs.empty(); }
bool Static = false;
private:
bool Sort;
std::vector<DynamicReloc<ELFT>> Relocs;
};
template <class ELFT>
class OutputSection final : public OutputSectionBase<ELFT> {
public:
typedef typename ELFT::Shdr Elf_Shdr;
typedef typename ELFT::Sym Elf_Sym;
typedef typename ELFT::Rel Elf_Rel;
typedef typename ELFT::Rela Elf_Rela;
typedef typename ELFT::uint uintX_t;
OutputSection(StringRef Name, uint32_t Type, uintX_t Flags);
void addSection(InputSectionBase<ELFT> *C) override;
void sortInitFini();
void sortCtorsDtors();
void writeTo(uint8_t *Buf) override;
void finalize() override;
void assignOffsets() override;
std::vector<InputSection<ELFT> *> Sections;
};
template <class ELFT>
class MergeOutputSection final : public OutputSectionBase<ELFT> {
typedef typename ELFT::uint uintX_t;
public:
MergeOutputSection(StringRef Name, uint32_t Type, uintX_t Flags,
uintX_t Alignment);
void addSection(InputSectionBase<ELFT> *S) override;
void writeTo(uint8_t *Buf) override;
unsigned getOffset(StringRef Val);
void finalize() override;
void finalizePieces() override;
bool shouldTailMerge() const;
private:
llvm::StringTableBuilder Builder;
std::vector<MergeInputSection<ELFT> *> Sections;
};
struct CieRecord {
SectionPiece *Piece = nullptr;
std::vector<SectionPiece *> FdePieces;
};
// Output section for .eh_frame.
template <class ELFT>
class EhOutputSection final : public OutputSectionBase<ELFT> {
typedef typename ELFT::uint uintX_t;
typedef typename ELFT::Shdr Elf_Shdr;
typedef typename ELFT::Rel Elf_Rel;
typedef typename ELFT::Rela Elf_Rela;
public:
EhOutputSection();
void writeTo(uint8_t *Buf) override;
void finalize() override;
bool empty() const { return Sections.empty(); }
void addSection(InputSectionBase<ELFT> *S) override;
size_t NumFdes = 0;
private:
template <class RelTy>
void addSectionAux(EhInputSection<ELFT> *S, llvm::ArrayRef<RelTy> Rels);
template <class RelTy>
CieRecord *addCie(SectionPiece &Piece, EhInputSection<ELFT> *Sec,
ArrayRef<RelTy> &Rels);
template <class RelTy>
bool isFdeLive(SectionPiece &Piece, EhInputSection<ELFT> *Sec,
ArrayRef<RelTy> &Rels);
uintX_t getFdePc(uint8_t *Buf, size_t Off, uint8_t Enc);
std::vector<EhInputSection<ELFT> *> Sections;
std::vector<CieRecord *> Cies;
// CIE records are uniquified by their contents and personality functions.
llvm::DenseMap<std::pair<ArrayRef<uint8_t>, SymbolBody *>, CieRecord> CieMap;
};
template <class ELFT>
class InterpSection final : public OutputSectionBase<ELFT> {
public:
InterpSection();
void writeTo(uint8_t *Buf) override;
};
template <class ELFT>
class StringTableSection final : public OutputSectionBase<ELFT> {
public:
typedef typename ELFT::uint uintX_t;
StringTableSection(StringRef Name, bool Dynamic);
unsigned addString(StringRef S, bool HashIt = true);
void writeTo(uint8_t *Buf) override;
unsigned getSize() const { return Size; }
void finalize() override { this->Header.sh_size = getSize(); }
bool isDynamic() const { return Dynamic; }
private:
const bool Dynamic;
llvm::DenseMap<StringRef, unsigned> StringMap;
std::vector<StringRef> Strings;
unsigned Size = 1; // ELF string tables start with a NUL byte, so 1.
};
template <class ELFT>
class HashTableSection final : public OutputSectionBase<ELFT> {
typedef typename ELFT::Word Elf_Word;
public:
HashTableSection();
void finalize() override;
void writeTo(uint8_t *Buf) override;
};
// Outputs GNU Hash section. For detailed explanation see:
// https://blogs.oracle.com/ali/entry/gnu_hash_elf_sections
template <class ELFT>
class GnuHashTableSection final : public OutputSectionBase<ELFT> {
typedef typename ELFT::Off Elf_Off;
typedef typename ELFT::Word Elf_Word;
typedef typename ELFT::uint uintX_t;
public:
GnuHashTableSection();
void finalize() override;
void writeTo(uint8_t *Buf) override;
// Adds symbols to the hash table.
// Sorts the input to satisfy GNU hash section requirements.
void addSymbols(std::vector<std::pair<SymbolBody *, size_t>> &Symbols);
private:
static unsigned calcNBuckets(unsigned NumHashed);
static unsigned calcMaskWords(unsigned NumHashed);
void writeHeader(uint8_t *&Buf);
void writeBloomFilter(uint8_t *&Buf);
void writeHashTable(uint8_t *Buf);
struct SymbolData {
SymbolBody *Body;
size_t STName;
uint32_t Hash;
};
std::vector<SymbolData> Symbols;
unsigned MaskWords;
unsigned NBuckets;
unsigned Shift2;
};
template <class ELFT>
class DynamicSection final : public OutputSectionBase<ELFT> {
typedef OutputSectionBase<ELFT> Base;
typedef typename ELFT::Dyn Elf_Dyn;
typedef typename ELFT::Rel Elf_Rel;
typedef typename ELFT::Rela Elf_Rela;
typedef typename ELFT::Shdr Elf_Shdr;
typedef typename ELFT::Sym Elf_Sym;
typedef typename ELFT::uint uintX_t;
// The .dynamic section contains information for the dynamic linker.
// The section consists of fixed size entries, which consist of
// type and value fields. Value are one of plain integers, symbol
// addresses, or section addresses. This struct represents the entry.
struct Entry {
int32_t Tag;
union {
OutputSectionBase<ELFT> *OutSec;
uint64_t Val;
const SymbolBody *Sym;
};
enum KindT { SecAddr, SymAddr, PlainInt } Kind;
Entry(int32_t Tag, OutputSectionBase<ELFT> *OutSec)
: Tag(Tag), OutSec(OutSec), Kind(SecAddr) {}
Entry(int32_t Tag, uint64_t Val) : Tag(Tag), Val(Val), Kind(PlainInt) {}
Entry(int32_t Tag, const SymbolBody *Sym)
: Tag(Tag), Sym(Sym), Kind(SymAddr) {}
};
// finalize() fills this vector with the section contents. finalize()
// cannot directly create final section contents because when the
// function is called, symbol or section addresses are not fixed yet.
std::vector<Entry> Entries;
public:
explicit DynamicSection();
void finalize() override;
void writeTo(uint8_t *Buf) override;
OutputSectionBase<ELFT> *PreInitArraySec = nullptr;
OutputSectionBase<ELFT> *InitArraySec = nullptr;
OutputSectionBase<ELFT> *FiniArraySec = nullptr;
};
template <class ELFT>
class MipsReginfoOutputSection final : public OutputSectionBase<ELFT> {
typedef llvm::object::Elf_Mips_RegInfo<ELFT> Elf_Mips_RegInfo;
public:
MipsReginfoOutputSection();
void writeTo(uint8_t *Buf) override;
void addSection(InputSectionBase<ELFT> *S) override;
private:
uint32_t GprMask = 0;
};
template <class ELFT>
class MipsOptionsOutputSection final : public OutputSectionBase<ELFT> {
typedef llvm::object::Elf_Mips_Options<ELFT> Elf_Mips_Options;
typedef llvm::object::Elf_Mips_RegInfo<ELFT> Elf_Mips_RegInfo;
public:
MipsOptionsOutputSection();
void writeTo(uint8_t *Buf) override;
void addSection(InputSectionBase<ELFT> *S) override;
private:
uint32_t GprMask = 0;
};
// --eh-frame-hdr option tells linker to construct a header for all the
// .eh_frame sections. This header is placed to a section named .eh_frame_hdr
// and also to a PT_GNU_EH_FRAME segment.
// At runtime the unwinder then can find all the PT_GNU_EH_FRAME segments by
// calling dl_iterate_phdr.
// This section contains a lookup table for quick binary search of FDEs.
// Detailed info about internals can be found in Ian Lance Taylor's blog:
// http://www.airs.com/blog/archives/460 (".eh_frame")
// http://www.airs.com/blog/archives/462 (".eh_frame_hdr")
template <class ELFT>
class EhFrameHeader final : public OutputSectionBase<ELFT> {
typedef typename ELFT::uint uintX_t;
public:
EhFrameHeader();
void finalize() override;
void writeTo(uint8_t *Buf) override;
void addFde(uint32_t Pc, uint32_t FdeVA);
private:
struct FdeData {
uint32_t Pc;
uint32_t FdeVA;
};
std::vector<FdeData> Fdes;
};
template <class ELFT> class BuildIdSection : public OutputSectionBase<ELFT> {
public:
void writeTo(uint8_t *Buf) override;
virtual void writeBuildId(ArrayRef<ArrayRef<uint8_t>> Bufs) = 0;
protected:
BuildIdSection(size_t HashSize);
size_t HashSize;
uint8_t *HashBuf = nullptr;
};
template <class ELFT> class BuildIdFnv1 final : public BuildIdSection<ELFT> {
public:
BuildIdFnv1() : BuildIdSection<ELFT>(8) {}
void writeBuildId(ArrayRef<ArrayRef<uint8_t>> Bufs) override;
};
template <class ELFT> class BuildIdMd5 final : public BuildIdSection<ELFT> {
public:
BuildIdMd5() : BuildIdSection<ELFT>(16) {}
void writeBuildId(ArrayRef<ArrayRef<uint8_t>> Bufs) override;
};
template <class ELFT> class BuildIdSha1 final : public BuildIdSection<ELFT> {
public:
BuildIdSha1() : BuildIdSection<ELFT>(20) {}
void writeBuildId(ArrayRef<ArrayRef<uint8_t>> Bufs) override;
};
template <class ELFT>
class BuildIdHexstring final : public BuildIdSection<ELFT> {
public:
BuildIdHexstring();
void writeBuildId(ArrayRef<ArrayRef<uint8_t>> Bufs) override;
};
// All output sections that are hadnled by the linker specially are
// globally accessible. Writer initializes them, so don't use them
// until Writer is initialized.
template <class ELFT> struct Out {
typedef typename ELFT::uint uintX_t;
typedef typename ELFT::Phdr Elf_Phdr;
static BuildIdSection<ELFT> *BuildId;
static DynamicSection<ELFT> *Dynamic;
static EhFrameHeader<ELFT> *EhFrameHdr;
static EhOutputSection<ELFT> *EhFrame;
static GnuHashTableSection<ELFT> *GnuHashTab;
static GotPltSection<ELFT> *GotPlt;
static GotSection<ELFT> *Got;
static HashTableSection<ELFT> *HashTab;
static InterpSection<ELFT> *Interp;
static OutputSection<ELFT> *Bss;
static OutputSection<ELFT> *MipsRldMap;
static OutputSectionBase<ELFT> *Opd;
static uint8_t *OpdBuf;
static PltSection<ELFT> *Plt;
static RelocationSection<ELFT> *RelaDyn;
static RelocationSection<ELFT> *RelaPlt;
static StringTableSection<ELFT> *DynStrTab;
static StringTableSection<ELFT> *ShStrTab;
static StringTableSection<ELFT> *StrTab;
static SymbolTableSection<ELFT> *DynSymTab;
static SymbolTableSection<ELFT> *SymTab;
static VersionDefinitionSection<ELFT> *VerDef;
static VersionTableSection<ELFT> *VerSym;
static VersionNeedSection<ELFT> *VerNeed;
static Elf_Phdr *TlsPhdr;
static OutputSectionBase<ELFT> *ElfHeader;
static OutputSectionBase<ELFT> *ProgramHeaders;
};
template <bool Is64Bits> struct SectionKey {
typedef typename std::conditional<Is64Bits, uint64_t, uint32_t>::type uintX_t;
StringRef Name;
uint32_t Type;
uintX_t Flags;
uintX_t Alignment;
};
// This class knows how to create an output section for a given
// input section. Output section type is determined by various
// factors, including input section's sh_flags, sh_type and
// linker scripts.
template <class ELFT> class OutputSectionFactory {
typedef typename ELFT::Shdr Elf_Shdr;
typedef typename ELFT::uint uintX_t;
typedef typename elf::SectionKey<ELFT::Is64Bits> Key;
public:
std::pair<OutputSectionBase<ELFT> *, bool> create(InputSectionBase<ELFT> *C,
StringRef OutsecName);
OutputSectionBase<ELFT> *lookup(StringRef Name, uint32_t Type, uintX_t Flags);
private:
Key createKey(InputSectionBase<ELFT> *C, StringRef OutsecName);
llvm::SmallDenseMap<Key, OutputSectionBase<ELFT> *> Map;
};
template <class ELFT> BuildIdSection<ELFT> *Out<ELFT>::BuildId;
template <class ELFT> DynamicSection<ELFT> *Out<ELFT>::Dynamic;
template <class ELFT> EhFrameHeader<ELFT> *Out<ELFT>::EhFrameHdr;
template <class ELFT> EhOutputSection<ELFT> *Out<ELFT>::EhFrame;
template <class ELFT> GnuHashTableSection<ELFT> *Out<ELFT>::GnuHashTab;
template <class ELFT> GotPltSection<ELFT> *Out<ELFT>::GotPlt;
template <class ELFT> GotSection<ELFT> *Out<ELFT>::Got;
template <class ELFT> HashTableSection<ELFT> *Out<ELFT>::HashTab;
template <class ELFT> InterpSection<ELFT> *Out<ELFT>::Interp;
template <class ELFT> OutputSection<ELFT> *Out<ELFT>::Bss;
template <class ELFT> OutputSection<ELFT> *Out<ELFT>::MipsRldMap;
template <class ELFT> OutputSectionBase<ELFT> *Out<ELFT>::Opd;
template <class ELFT> uint8_t *Out<ELFT>::OpdBuf;
template <class ELFT> PltSection<ELFT> *Out<ELFT>::Plt;
template <class ELFT> RelocationSection<ELFT> *Out<ELFT>::RelaDyn;
template <class ELFT> RelocationSection<ELFT> *Out<ELFT>::RelaPlt;
template <class ELFT> StringTableSection<ELFT> *Out<ELFT>::DynStrTab;
template <class ELFT> StringTableSection<ELFT> *Out<ELFT>::ShStrTab;
template <class ELFT> StringTableSection<ELFT> *Out<ELFT>::StrTab;
template <class ELFT> SymbolTableSection<ELFT> *Out<ELFT>::DynSymTab;
template <class ELFT> SymbolTableSection<ELFT> *Out<ELFT>::SymTab;
template <class ELFT> VersionDefinitionSection<ELFT> *Out<ELFT>::VerDef;
template <class ELFT> VersionTableSection<ELFT> *Out<ELFT>::VerSym;
template <class ELFT> VersionNeedSection<ELFT> *Out<ELFT>::VerNeed;
template <class ELFT> typename ELFT::Phdr *Out<ELFT>::TlsPhdr;
template <class ELFT> OutputSectionBase<ELFT> *Out<ELFT>::ElfHeader;
template <class ELFT> OutputSectionBase<ELFT> *Out<ELFT>::ProgramHeaders;
} // namespace elf
} // namespace lld
namespace llvm {
template <bool Is64Bits> struct DenseMapInfo<lld::elf::SectionKey<Is64Bits>> {
typedef typename lld::elf::SectionKey<Is64Bits> Key;
static Key getEmptyKey();
static Key getTombstoneKey();
static unsigned getHashValue(const Key &Val);
static bool isEqual(const Key &LHS, const Key &RHS);
};
}
#endif

View File

@ -0,0 +1 @@
See docs/NewLLD.rst

View File

@ -0,0 +1,704 @@
//===- Relocations.cpp ----------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains platform-independent functions to process relocations.
// I'll describe the overview of this file here.
//
// Simple relocations are easy to handle for the linker. For example,
// for R_X86_64_PC64 relocs, the linker just has to fix up locations
// with the relative offsets to the target symbols. It would just be
// reading records from relocation sections and applying them to output.
//
// But not all relocations are that easy to handle. For example, for
// R_386_GOTOFF relocs, the linker has to create new GOT entries for
// symbols if they don't exist, and fix up locations with GOT entry
// offsets from the beginning of GOT section. So there is more than
// fixing addresses in relocation processing.
//
// ELF defines a large number of complex relocations.
//
// The functions in this file analyze relocations and do whatever needs
// to be done. It includes, but not limited to, the following.
//
// - create GOT/PLT entries
// - create new relocations in .dynsym to let the dynamic linker resolve
// them at runtime (since ELF supports dynamic linking, not all
// relocations can be resolved at link-time)
// - create COPY relocs and reserve space in .bss
// - replace expensive relocs (in terms of runtime cost) with cheap ones
// - error out infeasible combinations such as PIC and non-relative relocs
//
// Note that the functions in this file don't actually apply relocations
// because it doesn't know about the output file nor the output file buffer.
// It instead stores Relocation objects to InputSection's Relocations
// vector to let it apply later in InputSection::writeTo.
//
//===----------------------------------------------------------------------===//
#include "Relocations.h"
#include "Config.h"
#include "OutputSections.h"
#include "SymbolTable.h"
#include "Target.h"
#include "Thunks.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using namespace llvm::ELF;
using namespace llvm::object;
using namespace llvm::support::endian;
namespace lld {
namespace elf {
static bool refersToGotEntry(RelExpr Expr) {
return Expr == R_GOT || Expr == R_GOT_OFF || Expr == R_MIPS_GOT_LOCAL_PAGE ||
Expr == R_MIPS_GOT_OFF || Expr == R_MIPS_TLSGD ||
Expr == R_MIPS_TLSLD || Expr == R_GOT_PAGE_PC || Expr == R_GOT_PC ||
Expr == R_GOT_FROM_END || Expr == R_TLSGD || Expr == R_TLSGD_PC ||
Expr == R_TLSDESC || Expr == R_TLSDESC_PAGE;
}
static bool isPreemptible(const SymbolBody &Body, uint32_t Type) {
// In case of MIPS GP-relative relocations always resolve to a definition
// in a regular input file, ignoring the one-definition rule. So we,
// for example, should not attempt to create a dynamic relocation even
// if the target symbol is preemptible. There are two two MIPS GP-relative
// relocations R_MIPS_GPREL16 and R_MIPS_GPREL32. But only R_MIPS_GPREL16
// can be against a preemptible symbol.
// To get MIPS relocation type we apply 0xff mask. In case of O32 ABI all
// relocation types occupy eight bit. In case of N64 ABI we extract first
// relocation from 3-in-1 packet because only the first relocation can
// be against a real symbol.
if (Config->EMachine == EM_MIPS && (Type & 0xff) == R_MIPS_GPREL16)
return false;
return Body.isPreemptible();
}
// This function is similar to the `handleTlsRelocation`. MIPS does not support
// any relaxations for TLS relocations so by factoring out MIPS handling into
// the separate function we can simplify the code and does not pollute
// `handleTlsRelocation` by MIPS `ifs` statements.
template <class ELFT>
static unsigned
handleMipsTlsRelocation(uint32_t Type, SymbolBody &Body,
InputSectionBase<ELFT> &C, typename ELFT::uint Offset,
typename ELFT::uint Addend, RelExpr Expr) {
if (Expr == R_MIPS_TLSLD) {
if (Out<ELFT>::Got->addTlsIndex())
Out<ELFT>::RelaDyn->addReloc({Target->TlsModuleIndexRel, Out<ELFT>::Got,
Out<ELFT>::Got->getTlsIndexOff(), false,
nullptr, 0});
C.Relocations.push_back({Expr, Type, &C, Offset, Addend, &Body});
return 1;
}
if (Target->isTlsGlobalDynamicRel(Type)) {
if (Out<ELFT>::Got->addDynTlsEntry(Body)) {
typedef typename ELFT::uint uintX_t;
uintX_t Off = Out<ELFT>::Got->getGlobalDynOffset(Body);
Out<ELFT>::RelaDyn->addReloc(
{Target->TlsModuleIndexRel, Out<ELFT>::Got, Off, false, &Body, 0});
Out<ELFT>::RelaDyn->addReloc({Target->TlsOffsetRel, Out<ELFT>::Got,
Off + (uintX_t)sizeof(uintX_t), false,
&Body, 0});
}
C.Relocations.push_back({Expr, Type, &C, Offset, Addend, &Body});
return 1;
}
return 0;
}
// Returns the number of relocations processed.
template <class ELFT>
static unsigned handleTlsRelocation(uint32_t Type, SymbolBody &Body,
InputSectionBase<ELFT> &C,
typename ELFT::uint Offset,
typename ELFT::uint Addend, RelExpr Expr) {
if (!(C.getSectionHdr()->sh_flags & SHF_ALLOC))
return 0;
if (!Body.isTls())
return 0;
typedef typename ELFT::uint uintX_t;
if (Config->EMachine == EM_MIPS)
return handleMipsTlsRelocation<ELFT>(Type, Body, C, Offset, Addend, Expr);
if ((Expr == R_TLSDESC || Expr == R_TLSDESC_PAGE || Expr == R_HINT) &&
Config->Shared) {
if (Out<ELFT>::Got->addDynTlsEntry(Body)) {
uintX_t Off = Out<ELFT>::Got->getGlobalDynOffset(Body);
Out<ELFT>::RelaDyn->addReloc(
{Target->TlsDescRel, Out<ELFT>::Got, Off, false, &Body, 0});
}
if (Expr != R_HINT)
C.Relocations.push_back({Expr, Type, &C, Offset, Addend, &Body});
return 1;
}
if (Expr == R_TLSLD_PC || Expr == R_TLSLD) {
// Local-Dynamic relocs can be relaxed to Local-Exec.
if (!Config->Shared) {
C.Relocations.push_back(
{R_RELAX_TLS_LD_TO_LE, Type, &C, Offset, Addend, &Body});
return 2;
}
if (Out<ELFT>::Got->addTlsIndex())
Out<ELFT>::RelaDyn->addReloc({Target->TlsModuleIndexRel, Out<ELFT>::Got,
Out<ELFT>::Got->getTlsIndexOff(), false,
nullptr, 0});
C.Relocations.push_back({Expr, Type, &C, Offset, Addend, &Body});
return 1;
}
// Local-Dynamic relocs can be relaxed to Local-Exec.
if (Target->isTlsLocalDynamicRel(Type) && !Config->Shared) {
C.Relocations.push_back(
{R_RELAX_TLS_LD_TO_LE, Type, &C, Offset, Addend, &Body});
return 1;
}
if (Expr == R_TLSDESC_PAGE || Expr == R_TLSDESC || Expr == R_HINT ||
Target->isTlsGlobalDynamicRel(Type)) {
if (Config->Shared) {
if (Out<ELFT>::Got->addDynTlsEntry(Body)) {
uintX_t Off = Out<ELFT>::Got->getGlobalDynOffset(Body);
Out<ELFT>::RelaDyn->addReloc(
{Target->TlsModuleIndexRel, Out<ELFT>::Got, Off, false, &Body, 0});
// If the symbol is preemptible we need the dynamic linker to write
// the offset too.
if (isPreemptible(Body, Type))
Out<ELFT>::RelaDyn->addReloc({Target->TlsOffsetRel, Out<ELFT>::Got,
Off + (uintX_t)sizeof(uintX_t), false,
&Body, 0});
}
C.Relocations.push_back({Expr, Type, &C, Offset, Addend, &Body});
return 1;
}
// Global-Dynamic relocs can be relaxed to Initial-Exec or Local-Exec
// depending on the symbol being locally defined or not.
if (isPreemptible(Body, Type)) {
C.Relocations.push_back(
{Target->adjustRelaxExpr(Type, nullptr, R_RELAX_TLS_GD_TO_IE), Type,
&C, Offset, Addend, &Body});
if (!Body.isInGot()) {
Out<ELFT>::Got->addEntry(Body);
Out<ELFT>::RelaDyn->addReloc({Target->TlsGotRel, Out<ELFT>::Got,
Body.getGotOffset<ELFT>(), false, &Body,
0});
}
return Target->TlsGdRelaxSkip;
}
C.Relocations.push_back(
{Target->adjustRelaxExpr(Type, nullptr, R_RELAX_TLS_GD_TO_LE), Type, &C,
Offset, Addend, &Body});
return Target->TlsGdRelaxSkip;
}
// Initial-Exec relocs can be relaxed to Local-Exec if the symbol is locally
// defined.
if (Target->isTlsInitialExecRel(Type) && !Config->Shared &&
!isPreemptible(Body, Type)) {
C.Relocations.push_back(
{R_RELAX_TLS_IE_TO_LE, Type, &C, Offset, Addend, &Body});
return 1;
}
return 0;
}
template <endianness E> static int16_t readSignedLo16(const uint8_t *Loc) {
return read32<E>(Loc) & 0xffff;
}
template <class RelTy>
static uint32_t getMipsPairType(const RelTy *Rel, const SymbolBody &Sym) {
switch (Rel->getType(Config->Mips64EL)) {
case R_MIPS_HI16:
return R_MIPS_LO16;
case R_MIPS_GOT16:
return Sym.isLocal() ? R_MIPS_LO16 : R_MIPS_NONE;
case R_MIPS_PCHI16:
return R_MIPS_PCLO16;
case R_MICROMIPS_HI16:
return R_MICROMIPS_LO16;
default:
return R_MIPS_NONE;
}
}
template <class ELFT, class RelTy>
static int32_t findMipsPairedAddend(const uint8_t *Buf, const uint8_t *BufLoc,
SymbolBody &Sym, const RelTy *Rel,
const RelTy *End) {
uint32_t SymIndex = Rel->getSymbol(Config->Mips64EL);
uint32_t Type = getMipsPairType(Rel, Sym);
// Some MIPS relocations use addend calculated from addend of the relocation
// itself and addend of paired relocation. ABI requires to compute such
// combined addend in case of REL relocation record format only.
// See p. 4-17 at ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf
if (RelTy::IsRela || Type == R_MIPS_NONE)
return 0;
for (const RelTy *RI = Rel; RI != End; ++RI) {
if (RI->getType(Config->Mips64EL) != Type)
continue;
if (RI->getSymbol(Config->Mips64EL) != SymIndex)
continue;
const endianness E = ELFT::TargetEndianness;
return ((read32<E>(BufLoc) & 0xffff) << 16) +
readSignedLo16<E>(Buf + RI->r_offset);
}
warning("can't find matching " + getRelName(Type) + " relocation for " +
getRelName(Rel->getType(Config->Mips64EL)));
return 0;
}
// True if non-preemptable symbol always has the same value regardless of where
// the DSO is loaded.
template <class ELFT> static bool isAbsolute(const SymbolBody &Body) {
if (Body.isUndefined())
return !Body.isLocal() && Body.symbol()->isWeak();
if (const auto *DR = dyn_cast<DefinedRegular<ELFT>>(&Body))
return DR->Section == nullptr; // Absolute symbol.
return false;
}
static bool needsPlt(RelExpr Expr) {
return Expr == R_PLT_PC || Expr == R_PPC_PLT_OPD || Expr == R_PLT ||
Expr == R_PLT_PAGE_PC || Expr == R_THUNK_PLT_PC;
}
// True if this expression is of the form Sym - X, where X is a position in the
// file (PC, or GOT for example).
static bool isRelExpr(RelExpr Expr) {
return Expr == R_PC || Expr == R_GOTREL || Expr == R_PAGE_PC ||
Expr == R_RELAX_GOT_PC || Expr == R_THUNK_PC || Expr == R_THUNK_PLT_PC;
}
template <class ELFT>
static bool isStaticLinkTimeConstant(RelExpr E, uint32_t Type,
const SymbolBody &Body) {
// These expressions always compute a constant
if (E == R_SIZE || E == R_GOT_FROM_END || E == R_GOT_OFF ||
E == R_MIPS_GOT_LOCAL_PAGE || E == R_MIPS_GOT_OFF || E == R_MIPS_TLSGD ||
E == R_GOT_PAGE_PC || E == R_GOT_PC || E == R_PLT_PC || E == R_TLSGD_PC ||
E == R_TLSGD || E == R_PPC_PLT_OPD || E == R_TLSDESC_PAGE ||
E == R_HINT || E == R_THUNK_PC || E == R_THUNK_PLT_PC)
return true;
// These never do, except if the entire file is position dependent or if
// only the low bits are used.
if (E == R_GOT || E == R_PLT || E == R_TLSDESC)
return Target->usesOnlyLowPageBits(Type) || !Config->Pic;
if (isPreemptible(Body, Type))
return false;
if (!Config->Pic)
return true;
bool AbsVal = isAbsolute<ELFT>(Body) || Body.isTls();
bool RelE = isRelExpr(E);
if (AbsVal && !RelE)
return true;
if (!AbsVal && RelE)
return true;
// Relative relocation to an absolute value. This is normally unrepresentable,
// but if the relocation refers to a weak undefined symbol, we allow it to
// resolve to the image base. This is a little strange, but it allows us to
// link function calls to such symbols. Normally such a call will be guarded
// with a comparison, which will load a zero from the GOT.
if (AbsVal && RelE) {
if (Body.isUndefined() && !Body.isLocal() && Body.symbol()->isWeak())
return true;
error("relocation " + getRelName(Type) +
" cannot refer to absolute symbol " + Body.getName());
return true;
}
return Target->usesOnlyLowPageBits(Type);
}
static RelExpr toPlt(RelExpr Expr) {
if (Expr == R_PPC_OPD)
return R_PPC_PLT_OPD;
if (Expr == R_PC)
return R_PLT_PC;
if (Expr == R_PAGE_PC)
return R_PLT_PAGE_PC;
if (Expr == R_ABS)
return R_PLT;
return Expr;
}
static RelExpr fromPlt(RelExpr Expr) {
// We decided not to use a plt. Optimize a reference to the plt to a
// reference to the symbol itself.
if (Expr == R_PLT_PC)
return R_PC;
if (Expr == R_PPC_PLT_OPD)
return R_PPC_OPD;
if (Expr == R_PLT)
return R_ABS;
return Expr;
}
template <class ELFT> static uint32_t getAlignment(SharedSymbol<ELFT> *SS) {
typedef typename ELFT::uint uintX_t;
uintX_t SecAlign = SS->file()->getSection(SS->Sym)->sh_addralign;
uintX_t SymValue = SS->Sym.st_value;
int TrailingZeros =
std::min(countTrailingZeros(SecAlign), countTrailingZeros(SymValue));
return 1 << TrailingZeros;
}
// Reserve space in .bss for copy relocation.
template <class ELFT> static void addCopyRelSymbol(SharedSymbol<ELFT> *SS) {
typedef typename ELFT::uint uintX_t;
typedef typename ELFT::Sym Elf_Sym;
// Copy relocation against zero-sized symbol doesn't make sense.
uintX_t SymSize = SS->template getSize<ELFT>();
if (SymSize == 0)
fatal("cannot create a copy relocation for " + SS->getName());
uintX_t Alignment = getAlignment(SS);
uintX_t Off = alignTo(Out<ELFT>::Bss->getSize(), Alignment);
Out<ELFT>::Bss->setSize(Off + SymSize);
Out<ELFT>::Bss->updateAlignment(Alignment);
uintX_t Shndx = SS->Sym.st_shndx;
uintX_t Value = SS->Sym.st_value;
// Look through the DSO's dynamic symbol table for aliases and create a
// dynamic symbol for each one. This causes the copy relocation to correctly
// interpose any aliases.
for (const Elf_Sym &S : SS->file()->getElfSymbols(true)) {
if (S.st_shndx != Shndx || S.st_value != Value)
continue;
auto *Alias = dyn_cast_or_null<SharedSymbol<ELFT>>(
Symtab<ELFT>::X->find(check(S.getName(SS->file()->getStringTable()))));
if (!Alias)
continue;
Alias->OffsetInBss = Off;
Alias->NeedsCopyOrPltAddr = true;
Alias->symbol()->IsUsedInRegularObj = true;
}
Out<ELFT>::RelaDyn->addReloc(
{Target->CopyRel, Out<ELFT>::Bss, SS->OffsetInBss, false, SS, 0});
}
template <class ELFT>
static RelExpr adjustExpr(const elf::ObjectFile<ELFT> &File, SymbolBody &Body,
bool IsWrite, RelExpr Expr, uint32_t Type,
const uint8_t *Data) {
bool Preemptible = isPreemptible(Body, Type);
if (Body.isGnuIFunc()) {
Expr = toPlt(Expr);
} else if (!Preemptible) {
if (needsPlt(Expr))
Expr = fromPlt(Expr);
if (Expr == R_GOT_PC)
Expr = Target->adjustRelaxExpr(Type, Data, Expr);
}
Expr = Target->getThunkExpr(Expr, Type, File, Body);
if (IsWrite || isStaticLinkTimeConstant<ELFT>(Expr, Type, Body))
return Expr;
// This relocation would require the dynamic linker to write a value to read
// only memory. We can hack around it if we are producing an executable and
// the refered symbol can be preemepted to refer to the executable.
if (Config->Shared || (Config->Pic && !isRelExpr(Expr))) {
error("can't create dynamic relocation " + getRelName(Type) +
" against readonly segment");
return Expr;
}
if (Body.getVisibility() != STV_DEFAULT) {
error("cannot preempt symbol");
return Expr;
}
if (Body.isObject()) {
// Produce a copy relocation.
auto *B = cast<SharedSymbol<ELFT>>(&Body);
if (!B->needsCopy())
addCopyRelSymbol(B);
return Expr;
}
if (Body.isFunc()) {
// This handles a non PIC program call to function in a shared library. In
// an ideal world, we could just report an error saying the relocation can
// overflow at runtime. In the real world with glibc, crt1.o has a
// R_X86_64_PC32 pointing to libc.so.
//
// The general idea on how to handle such cases is to create a PLT entry and
// use that as the function value.
//
// For the static linking part, we just return a plt expr and everything
// else will use the the PLT entry as the address.
//
// The remaining problem is making sure pointer equality still works. We
// need the help of the dynamic linker for that. We let it know that we have
// a direct reference to a so symbol by creating an undefined symbol with a
// non zero st_value. Seeing that, the dynamic linker resolves the symbol to
// the value of the symbol we created. This is true even for got entries, so
// pointer equality is maintained. To avoid an infinite loop, the only entry
// that points to the real function is a dedicated got entry used by the
// plt. That is identified by special relocation types (R_X86_64_JUMP_SLOT,
// R_386_JMP_SLOT, etc).
Body.NeedsCopyOrPltAddr = true;
return toPlt(Expr);
}
error("symbol is missing type");
return Expr;
}
template <class ELFT, class RelTy>
static typename ELFT::uint computeAddend(const elf::ObjectFile<ELFT> &File,
const uint8_t *SectionData,
const RelTy *End, const RelTy &RI,
RelExpr Expr, SymbolBody &Body) {
typedef typename ELFT::uint uintX_t;
uint32_t Type = RI.getType(Config->Mips64EL);
uintX_t Addend = getAddend<ELFT>(RI);
const uint8_t *BufLoc = SectionData + RI.r_offset;
if (!RelTy::IsRela)
Addend += Target->getImplicitAddend(BufLoc, Type);
if (Config->EMachine == EM_MIPS) {
Addend += findMipsPairedAddend<ELFT>(SectionData, BufLoc, Body, &RI, End);
if (Type == R_MIPS_LO16 && Expr == R_PC)
// R_MIPS_LO16 expression has R_PC type iif the target is _gp_disp
// symbol. In that case we should use the following formula for
// calculation "AHL + GP - P + 4". Let's add 4 right here.
// For details see p. 4-19 at
// ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf
Addend += 4;
if (Expr == R_GOTREL) {
Addend -= MipsGPOffset;
if (Body.isLocal())
Addend += File.getMipsGp0();
}
}
if (Config->Pic && Config->EMachine == EM_PPC64 && Type == R_PPC64_TOC)
Addend += getPPC64TocBase();
return Addend;
}
// The reason we have to do this early scan is as follows
// * To mmap the output file, we need to know the size
// * For that, we need to know how many dynamic relocs we will have.
// It might be possible to avoid this by outputting the file with write:
// * Write the allocated output sections, computing addresses.
// * Apply relocations, recording which ones require a dynamic reloc.
// * Write the dynamic relocations.
// * Write the rest of the file.
// This would have some drawbacks. For example, we would only know if .rela.dyn
// is needed after applying relocations. If it is, it will go after rw and rx
// sections. Given that it is ro, we will need an extra PT_LOAD. This
// complicates things for the dynamic linker and means we would have to reserve
// space for the extra PT_LOAD even if we end up not using it.
template <class ELFT, class RelTy>
static void scanRelocs(InputSectionBase<ELFT> &C, ArrayRef<RelTy> Rels) {
typedef typename ELFT::uint uintX_t;
bool IsWrite = C.getSectionHdr()->sh_flags & SHF_WRITE;
auto AddDyn = [=](const DynamicReloc<ELFT> &Reloc) {
Out<ELFT>::RelaDyn->addReloc(Reloc);
};
const elf::ObjectFile<ELFT> &File = *C.getFile();
ArrayRef<uint8_t> SectionData = C.getSectionData();
const uint8_t *Buf = SectionData.begin();
for (auto I = Rels.begin(), E = Rels.end(); I != E; ++I) {
const RelTy &RI = *I;
SymbolBody &Body = File.getRelocTargetSym(RI);
uint32_t Type = RI.getType(Config->Mips64EL);
RelExpr Expr = Target->getRelExpr(Type, Body);
bool Preemptible = isPreemptible(Body, Type);
Expr = adjustExpr(File, Body, IsWrite, Expr, Type, Buf + RI.r_offset);
if (HasError)
continue;
// Skip a relocation that points to a dead piece
// in a mergeable section.
if (C.getOffset(RI.r_offset) == (uintX_t)-1)
continue;
// This relocation does not require got entry, but it is relative to got and
// needs it to be created. Here we request for that.
if (Expr == R_GOTONLY_PC || Expr == R_GOTREL || Expr == R_PPC_TOC)
Out<ELFT>::Got->HasGotOffRel = true;
uintX_t Addend = computeAddend(File, Buf, E, RI, Expr, Body);
if (unsigned Processed = handleTlsRelocation<ELFT>(
Type, Body, C, RI.r_offset, Addend, Expr)) {
I += (Processed - 1);
continue;
}
// Ignore "hint" relocation because it is for optional code optimization.
if (Expr == R_HINT)
continue;
if (needsPlt(Expr) || Expr == R_THUNK_ABS || Expr == R_THUNK_PC ||
Expr == R_THUNK_PLT_PC || refersToGotEntry(Expr) ||
!isPreemptible(Body, Type)) {
// If the relocation points to something in the file, we can process it.
bool Constant = isStaticLinkTimeConstant<ELFT>(Expr, Type, Body);
// If the output being produced is position independent, the final value
// is still not known. In that case we still need some help from the
// dynamic linker. We can however do better than just copying the incoming
// relocation. We can process some of it and and just ask the dynamic
// linker to add the load address.
if (!Constant)
AddDyn({Target->RelativeRel, &C, RI.r_offset, true, &Body, Addend});
// If the produced value is a constant, we just remember to write it
// when outputting this section. We also have to do it if the format
// uses Elf_Rel, since in that case the written value is the addend.
if (Constant || !RelTy::IsRela)
C.Relocations.push_back({Expr, Type, &C, RI.r_offset, Addend, &Body});
} else {
// We don't know anything about the finaly symbol. Just ask the dynamic
// linker to handle the relocation for us.
AddDyn({Target->getDynRel(Type), &C, RI.r_offset, false, &Body, Addend});
// MIPS ABI turns using of GOT and dynamic relocations inside out.
// While regular ABI uses dynamic relocations to fill up GOT entries
// MIPS ABI requires dynamic linker to fills up GOT entries using
// specially sorted dynamic symbol table. This affects even dynamic
// relocations against symbols which do not require GOT entries
// creation explicitly, i.e. do not have any GOT-relocations. So if
// a preemptible symbol has a dynamic relocation we anyway have
// to create a GOT entry for it.
// If a non-preemptible symbol has a dynamic relocation against it,
// dynamic linker takes it st_value, adds offset and writes down
// result of the dynamic relocation. In case of preemptible symbol
// dynamic linker performs symbol resolution, writes the symbol value
// to the GOT entry and reads the GOT entry when it needs to perform
// a dynamic relocation.
// ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf p.4-19
if (Config->EMachine == EM_MIPS)
Out<ELFT>::Got->addMipsEntry(Body, Addend, Expr);
continue;
}
// Some targets might require creation of thunks for relocations.
// Now we support only MIPS which requires LA25 thunk to call PIC
// code from non-PIC one, and ARM which requires interworking.
if (Expr == R_THUNK_ABS || Expr == R_THUNK_PC || Expr == R_THUNK_PLT_PC) {
auto *Sec = cast<InputSection<ELFT>>(&C);
addThunk<ELFT>(Type, Body, *Sec);
}
// At this point we are done with the relocated position. Some relocations
// also require us to create a got or plt entry.
// If a relocation needs PLT, we create a PLT and a GOT slot for the symbol.
if (needsPlt(Expr)) {
if (Body.isInPlt())
continue;
Out<ELFT>::Plt->addEntry(Body);
uint32_t Rel;
if (Body.isGnuIFunc() && !Preemptible)
Rel = Target->IRelativeRel;
else
Rel = Target->PltRel;
Out<ELFT>::GotPlt->addEntry(Body);
Out<ELFT>::RelaPlt->addReloc({Rel, Out<ELFT>::GotPlt,
Body.getGotPltOffset<ELFT>(), !Preemptible,
&Body, 0});
continue;
}
if (refersToGotEntry(Expr)) {
if (Config->EMachine == EM_MIPS) {
// MIPS ABI has special rules to process GOT entries
// and doesn't require relocation entries for them.
// See "Global Offset Table" in Chapter 5 in the following document
// for detailed description:
// ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf
Out<ELFT>::Got->addMipsEntry(Body, Addend, Expr);
if (Body.isTls())
AddDyn({Target->TlsGotRel, Out<ELFT>::Got, Body.getGotOffset<ELFT>(),
!Preemptible, &Body, 0});
continue;
}
if (Body.isInGot())
continue;
Out<ELFT>::Got->addEntry(Body);
if (Preemptible || (Config->Pic && !isAbsolute<ELFT>(Body))) {
uint32_t DynType;
if (Body.isTls())
DynType = Target->TlsGotRel;
else if (Preemptible)
DynType = Target->GotRel;
else
DynType = Target->RelativeRel;
AddDyn({DynType, Out<ELFT>::Got, Body.getGotOffset<ELFT>(),
!Preemptible, &Body, 0});
}
continue;
}
}
}
template <class ELFT> void scanRelocations(InputSection<ELFT> &C) {
typedef typename ELFT::Shdr Elf_Shdr;
// Scan all relocations. Each relocation goes through a series
// of tests to determine if it needs special treatment, such as
// creating GOT, PLT, copy relocations, etc.
// Note that relocations for non-alloc sections are directly
// processed by InputSection::relocateNonAlloc.
if (C.getSectionHdr()->sh_flags & SHF_ALLOC)
for (const Elf_Shdr *RelSec : C.RelocSections)
scanRelocations(C, *RelSec);
}
template <class ELFT>
void scanRelocations(InputSectionBase<ELFT> &S,
const typename ELFT::Shdr &RelSec) {
ELFFile<ELFT> &EObj = S.getFile()->getObj();
if (RelSec.sh_type == SHT_RELA)
scanRelocs(S, EObj.relas(&RelSec));
else
scanRelocs(S, EObj.rels(&RelSec));
}
template void scanRelocations<ELF32LE>(InputSection<ELF32LE> &);
template void scanRelocations<ELF32BE>(InputSection<ELF32BE> &);
template void scanRelocations<ELF64LE>(InputSection<ELF64LE> &);
template void scanRelocations<ELF64BE>(InputSection<ELF64BE> &);
template void scanRelocations<ELF32LE>(InputSectionBase<ELF32LE> &,
const ELF32LE::Shdr &);
template void scanRelocations<ELF32BE>(InputSectionBase<ELF32BE> &,
const ELF32BE::Shdr &);
template void scanRelocations<ELF64LE>(InputSectionBase<ELF64LE> &,
const ELF64LE::Shdr &);
template void scanRelocations<ELF64BE>(InputSectionBase<ELF64BE> &,
const ELF64BE::Shdr &);
}
}

View File

@ -0,0 +1,93 @@
//===- Relocations.h -------------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_ELF_RELOCATIONS_H
#define LLD_ELF_RELOCATIONS_H
#include "lld/Core/LLVM.h"
namespace lld {
namespace elf {
class SymbolBody;
template <class ELFT> class InputSection;
template <class ELFT> class InputSectionBase;
enum RelExpr {
R_ABS,
R_GOT,
R_GOTONLY_PC,
R_GOTREL,
R_GOT_FROM_END,
R_GOT_OFF,
R_GOT_PAGE_PC,
R_GOT_PC,
R_HINT,
R_MIPS_GOT_LOCAL_PAGE,
R_MIPS_GOT_OFF,
R_MIPS_TLSGD,
R_MIPS_TLSLD,
R_NEG_TLS,
R_PAGE_PC,
R_PC,
R_PLT,
R_PLT_PC,
R_PLT_PAGE_PC,
R_PPC_OPD,
R_PPC_PLT_OPD,
R_PPC_TOC,
R_RELAX_GOT_PC,
R_RELAX_GOT_PC_NOPIC,
R_RELAX_TLS_GD_TO_IE,
R_RELAX_TLS_GD_TO_IE_END,
R_RELAX_TLS_GD_TO_IE_ABS,
R_RELAX_TLS_GD_TO_IE_PAGE_PC,
R_RELAX_TLS_GD_TO_LE,
R_RELAX_TLS_GD_TO_LE_NEG,
R_RELAX_TLS_IE_TO_LE,
R_RELAX_TLS_LD_TO_LE,
R_SIZE,
R_THUNK_ABS,
R_THUNK_PC,
R_THUNK_PLT_PC,
R_TLS,
R_TLSDESC,
R_TLSDESC_PAGE,
R_TLSGD,
R_TLSGD_PC,
R_TLSLD,
R_TLSLD_PC
};
template <class ELFT> struct Relocation {
RelExpr Expr;
uint32_t Type;
InputSectionBase<ELFT> *InputSec;
uint64_t Offset;
uint64_t Addend;
SymbolBody *Sym;
};
template <class ELFT> void scanRelocations(InputSection<ELFT> &);
template <class ELFT>
void scanRelocations(InputSectionBase<ELFT> &, const typename ELFT::Shdr &);
template <class ELFT>
static inline typename ELFT::uint getAddend(const typename ELFT::Rel &Rel) {
return 0;
}
template <class ELFT>
static inline typename ELFT::uint getAddend(const typename ELFT::Rela &Rel) {
return Rel.r_addend;
}
}
}
#endif

View File

@ -0,0 +1,163 @@
//===- ScriptParser.cpp ---------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the base parser class for linker script and dynamic
// list.
//
//===----------------------------------------------------------------------===//
#include "ScriptParser.h"
#include "Error.h"
#include "llvm/ADT/Twine.h"
using namespace llvm;
using namespace lld;
using namespace lld::elf;
// Returns the line that the character S[Pos] is in.
static StringRef getLine(StringRef S, size_t Pos) {
size_t Begin = S.rfind('\n', Pos);
size_t End = S.find('\n', Pos);
Begin = (Begin == StringRef::npos) ? 0 : Begin + 1;
if (End == StringRef::npos)
End = S.size();
// rtrim for DOS-style newlines.
return S.substr(Begin, End - Begin).rtrim();
}
void ScriptParserBase::printErrorPos() {
StringRef Tok = Tokens[Pos == 0 ? 0 : Pos - 1];
StringRef Line = getLine(Input, Tok.data() - Input.data());
size_t Col = Tok.data() - Line.data();
error(Line);
error(std::string(Col, ' ') + "^");
}
// We don't want to record cascading errors. Keep only the first one.
void ScriptParserBase::setError(const Twine &Msg) {
if (Error)
return;
if (Input.empty() || Tokens.empty()) {
error(Msg);
} else {
error("line " + Twine(getPos()) + ": " + Msg);
printErrorPos();
}
Error = true;
}
// Split S into linker script tokens.
std::vector<StringRef> ScriptParserBase::tokenize(StringRef S) {
std::vector<StringRef> Ret;
for (;;) {
S = skipSpace(S);
if (S.empty())
return Ret;
// Quoted token
if (S.startswith("\"")) {
size_t E = S.find("\"", 1);
if (E == StringRef::npos) {
error("unclosed quote");
return {};
}
Ret.push_back(S.substr(1, E - 1));
S = S.substr(E + 1);
continue;
}
// Unquoted token
size_t Pos = S.find_first_not_of(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
"0123456789_.$/\\~=+[]*?-:!<>");
// A character that cannot start a word (which is usually a
// punctuation) forms a single character token.
if (Pos == 0)
Pos = 1;
Ret.push_back(S.substr(0, Pos));
S = S.substr(Pos);
}
}
// Skip leading whitespace characters or comments.
StringRef ScriptParserBase::skipSpace(StringRef S) {
for (;;) {
if (S.startswith("/*")) {
size_t E = S.find("*/", 2);
if (E == StringRef::npos) {
error("unclosed comment in a linker script");
return "";
}
S = S.substr(E + 2);
continue;
}
if (S.startswith("#")) {
size_t E = S.find('\n', 1);
if (E == StringRef::npos)
E = S.size() - 1;
S = S.substr(E + 1);
continue;
}
size_t Size = S.size();
S = S.ltrim();
if (S.size() == Size)
return S;
}
}
// An erroneous token is handled as if it were the last token before EOF.
bool ScriptParserBase::atEOF() { return Error || Tokens.size() == Pos; }
StringRef ScriptParserBase::next() {
if (Error)
return "";
if (atEOF()) {
setError("unexpected EOF");
return "";
}
return Tokens[Pos++];
}
StringRef ScriptParserBase::peek() {
StringRef Tok = next();
if (Error)
return "";
--Pos;
return Tok;
}
bool ScriptParserBase::skip(StringRef Tok) {
if (Error)
return false;
if (atEOF()) {
setError("unexpected EOF");
return false;
}
if (Tokens[Pos] != Tok)
return false;
++Pos;
return true;
}
void ScriptParserBase::expect(StringRef Expect) {
if (Error)
return;
StringRef Tok = next();
if (Tok != Expect)
setError(Expect + " expected, but got " + Tok);
}
// Returns the current line number.
size_t ScriptParserBase::getPos() {
if (Pos == 0)
return 1;
const char *Begin = Input.data();
const char *Tok = Tokens[Pos - 1].data();
return StringRef(Begin, Tok - Begin).count('\n') + 1;
}

View File

@ -0,0 +1,49 @@
//===- ScriptParser.h -------------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_ELF_SCRIPT_PARSER_H
#define LLD_ELF_SCRIPT_PARSER_H
#include "lld/Core/LLVM.h"
#include "llvm/ADT/StringRef.h"
#include <utility>
#include <vector>
namespace lld {
namespace elf {
class ScriptParserBase {
public:
explicit ScriptParserBase(StringRef S) : Input(S), Tokens(tokenize(S)) {}
explicit ScriptParserBase(std::vector<StringRef> Tokens)
: Input(""), Tokens(std::move(Tokens)) {}
protected:
void setError(const Twine &Msg);
static std::vector<StringRef> tokenize(StringRef S);
static StringRef skipSpace(StringRef S);
bool atEOF();
StringRef next();
StringRef peek();
bool skip(StringRef Tok);
void expect(StringRef Expect);
size_t getPos();
void printErrorPos();
StringRef Input;
std::vector<StringRef> Tokens;
size_t Pos = 0;
bool Error = false;
};
} // namespace elf
} // namespace lld
#endif

View File

@ -0,0 +1,98 @@
//===- Strings.cpp -------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "Strings.h"
#include "Error.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Config/config.h"
#include <algorithm>
#ifdef HAVE_CXXABI_H
#include <cxxabi.h>
#endif
using namespace llvm;
using namespace lld;
using namespace lld::elf;
// Returns true if S matches T. S can contain glob meta-characters.
// The asterisk ('*') matches zero or more characters, and the question
// mark ('?') matches one character.
bool elf::globMatch(StringRef S, StringRef T) {
for (;;) {
if (S.empty())
return T.empty();
if (S[0] == '*') {
S = S.substr(1);
if (S.empty())
// Fast path. If a pattern is '*', it matches anything.
return true;
for (size_t I = 0, E = T.size(); I < E; ++I)
if (globMatch(S, T.substr(I)))
return true;
return false;
}
if (T.empty() || (S[0] != T[0] && S[0] != '?'))
return false;
S = S.substr(1);
T = T.substr(1);
}
}
// Converts a hex string (e.g. "deadbeef") to a vector.
std::vector<uint8_t> elf::parseHex(StringRef S) {
std::vector<uint8_t> Hex;
while (!S.empty()) {
StringRef B = S.substr(0, 2);
S = S.substr(2);
uint8_t H;
if (B.getAsInteger(16, H)) {
error("not a hexadecimal value: " + B);
return {};
}
Hex.push_back(H);
}
return Hex;
}
static bool isAlpha(char C) {
return ('a' <= C && C <= 'z') || ('A' <= C && C <= 'Z') || C == '_';
}
static bool isAlnum(char C) { return isAlpha(C) || ('0' <= C && C <= '9'); }
// Returns true if S is valid as a C language identifier.
bool elf::isValidCIdentifier(StringRef S) {
return !S.empty() && isAlpha(S[0]) &&
std::all_of(S.begin() + 1, S.end(), isAlnum);
}
// Returns the demangled C++ symbol name for Name.
std::string elf::demangle(StringRef Name) {
#if !defined(HAVE_CXXABI_H)
return Name;
#else
// __cxa_demangle can be used to demangle strings other than symbol
// names which do not necessarily start with "_Z". Name can be
// either a C or C++ symbol. Don't call __cxa_demangle if the name
// does not look like a C++ symbol name to avoid getting unexpected
// result for a C symbol that happens to match a mangled type name.
if (!Name.startswith("_Z"))
return Name;
char *Buf =
abi::__cxa_demangle(Name.str().c_str(), nullptr, nullptr, nullptr);
if (!Buf)
return Name;
std::string S(Buf);
free(Buf);
return S;
#endif
}

View File

@ -0,0 +1,29 @@
//===- Strings.h ------------------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_COFF_STRINGS_H
#define LLD_COFF_STRINGS_H
#include "lld/Core/LLVM.h"
#include <vector>
namespace lld {
namespace elf {
bool globMatch(StringRef S, StringRef T);
std::vector<uint8_t> parseHex(StringRef S);
bool isValidCIdentifier(StringRef S);
// Returns a demangled C++ symbol name. If Name is not a mangled
// name or the system does not provide __cxa_demangle function,
// it returns an unmodified string.
std::string demangle(StringRef Name);
}
}
#endif

View File

@ -0,0 +1,168 @@
//===- SymbolListFile.cpp -------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the parser/evaluator of the linker script.
// It does not construct an AST but consume linker script directives directly.
// Results are written to Driver or Config object.
//
//===----------------------------------------------------------------------===//
#include "SymbolListFile.h"
#include "Config.h"
#include "ScriptParser.h"
#include "llvm/Support/MemoryBuffer.h"
using namespace llvm;
using namespace llvm::ELF;
using namespace lld;
using namespace lld::elf;
// Parse the --dynamic-list argument. A dynamic list is in the form
//
// { symbol1; symbol2; [...]; symbolN };
//
// Multiple groups can be defined in the same file, and they are merged
// into a single group.
class DynamicListParser final : public ScriptParserBase {
public:
DynamicListParser(StringRef S) : ScriptParserBase(S) {}
void run();
};
void DynamicListParser::run() {
while (!atEOF()) {
expect("{");
while (!Error) {
Config->DynamicList.push_back(next());
expect(";");
if (skip("}"))
break;
}
expect(";");
}
}
void elf::parseDynamicList(MemoryBufferRef MB) {
DynamicListParser(MB.getBuffer()).run();
}
// Parse the --version-script argument. We currently only accept the following
// version script syntax:
//
// { [ global: symbol1; symbol2; [...]; symbolN; ] local: *; };
//
// No wildcards are supported, other than for the local entry. Symbol versioning
// is also not supported.
class VersionScriptParser final : public ScriptParserBase {
public:
VersionScriptParser(StringRef S) : ScriptParserBase(S) {}
void run();
private:
void parseExtern(std::vector<SymbolVersion> *Globals);
void parseVersion(StringRef VerStr);
void parseGlobal(StringRef VerStr);
void parseLocal();
};
size_t elf::defineSymbolVersion(StringRef VerStr) {
// Identifiers start at 2 because 0 and 1 are reserved
// for VER_NDX_LOCAL and VER_NDX_GLOBAL constants.
size_t VersionId = Config->VersionDefinitions.size() + 2;
Config->VersionDefinitions.push_back({VerStr, VersionId});
return VersionId;
}
void VersionScriptParser::parseVersion(StringRef VerStr) {
defineSymbolVersion(VerStr);
if (skip("global:") || peek() != "local:")
parseGlobal(VerStr);
if (skip("local:"))
parseLocal();
expect("}");
// Each version may have a parent version. For example, "Ver2" defined as
// "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" as a parent. This
// version hierarchy is, probably against your instinct, purely for human; the
// runtime doesn't care about them at all. In LLD, we simply skip the token.
if (!VerStr.empty() && peek() != ";")
next();
expect(";");
}
void VersionScriptParser::parseLocal() {
Config->DefaultSymbolVersion = VER_NDX_LOCAL;
expect("*");
expect(";");
}
void VersionScriptParser::parseExtern(std::vector<SymbolVersion> *Globals) {
expect("C++");
expect("{");
for (;;) {
if (peek() == "}" || Error)
break;
Globals->push_back({next(), true});
expect(";");
}
expect("}");
expect(";");
}
void VersionScriptParser::parseGlobal(StringRef VerStr) {
std::vector<SymbolVersion> *Globals;
if (VerStr.empty())
Globals = &Config->VersionScriptGlobals;
else
Globals = &Config->VersionDefinitions.back().Globals;
for (;;) {
if (skip("extern"))
parseExtern(Globals);
StringRef Cur = peek();
if (Cur == "}" || Cur == "local:" || Error)
return;
next();
Globals->push_back({Cur, false});
expect(";");
}
}
void VersionScriptParser::run() {
StringRef Msg = "anonymous version definition is used in "
"combination with other version definitions";
if (skip("{")) {
parseVersion("");
if (!atEOF())
setError(Msg);
return;
}
while (!atEOF() && !Error) {
StringRef VerStr = next();
if (VerStr == "{") {
setError(Msg);
return;
}
expect("{");
parseVersion(VerStr);
}
}
void elf::parseVersionScript(MemoryBufferRef MB) {
VersionScriptParser(MB.getBuffer()).run();
}

View File

@ -0,0 +1,27 @@
//===- SymbolListFile.h -----------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_ELF_SYMBOL_LIST_FILE_H
#define LLD_ELF_SYMBOL_LIST_FILE_H
#include "lld/Core/LLVM.h"
#include "llvm/Support/MemoryBuffer.h"
namespace lld {
namespace elf {
size_t defineSymbolVersion(StringRef Version);
void parseDynamicList(MemoryBufferRef MB);
void parseVersionScript(MemoryBufferRef MB);
} // namespace elf
} // namespace lld
#endif

View File

@ -0,0 +1,713 @@
//===- SymbolTable.cpp ----------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Symbol table is a bag of all known symbols. We put all symbols of
// all input files to the symbol table. The symbol table is basically
// a hash table with the logic to resolve symbol name conflicts using
// the symbol types.
//
//===----------------------------------------------------------------------===//
#include "SymbolTable.h"
#include "Config.h"
#include "Error.h"
#include "LinkerScript.h"
#include "Strings.h"
#include "SymbolListFile.h"
#include "Symbols.h"
#include "llvm/Bitcode/ReaderWriter.h"
#include "llvm/Support/StringSaver.h"
using namespace llvm;
using namespace llvm::object;
using namespace llvm::ELF;
using namespace lld;
using namespace lld::elf;
// All input object files must be for the same architecture
// (e.g. it does not make sense to link x86 object files with
// MIPS object files.) This function checks for that error.
template <class ELFT> static bool isCompatible(InputFile *F) {
if (!isa<ELFFileBase<ELFT>>(F) && !isa<BitcodeFile>(F))
return true;
if (F->EKind == Config->EKind && F->EMachine == Config->EMachine)
return true;
StringRef A = F->getName();
StringRef B = Config->Emulation;
if (B.empty())
B = Config->FirstElf->getName();
error(A + " is incompatible with " + B);
return false;
}
// Add symbols in File to the symbol table.
template <class ELFT>
void SymbolTable<ELFT>::addFile(std::unique_ptr<InputFile> File) {
InputFile *FileP = File.get();
if (!isCompatible<ELFT>(FileP))
return;
// .a file
if (auto *F = dyn_cast<ArchiveFile>(FileP)) {
ArchiveFiles.emplace_back(cast<ArchiveFile>(File.release()));
F->parse<ELFT>();
return;
}
// Lazy object file
if (auto *F = dyn_cast<LazyObjectFile>(FileP)) {
LazyObjectFiles.emplace_back(cast<LazyObjectFile>(File.release()));
F->parse<ELFT>();
return;
}
if (Config->Trace)
outs() << getFilename(FileP) << "\n";
// .so file
if (auto *F = dyn_cast<SharedFile<ELFT>>(FileP)) {
// DSOs are uniquified not by filename but by soname.
F->parseSoName();
if (!SoNames.insert(F->getSoName()).second)
return;
SharedFiles.emplace_back(cast<SharedFile<ELFT>>(File.release()));
F->parseRest();
return;
}
// LLVM bitcode file
if (auto *F = dyn_cast<BitcodeFile>(FileP)) {
BitcodeFiles.emplace_back(cast<BitcodeFile>(File.release()));
F->parse<ELFT>(ComdatGroups);
return;
}
// Regular object file
auto *F = cast<ObjectFile<ELFT>>(FileP);
ObjectFiles.emplace_back(cast<ObjectFile<ELFT>>(File.release()));
F->parse(ComdatGroups);
}
// This function is where all the optimizations of link-time
// optimization happens. When LTO is in use, some input files are
// not in native object file format but in the LLVM bitcode format.
// This function compiles bitcode files into a few big native files
// using LLVM functions and replaces bitcode symbols with the results.
// Because all bitcode files that consist of a program are passed
// to the compiler at once, it can do whole-program optimization.
template <class ELFT> void SymbolTable<ELFT>::addCombinedLtoObject() {
if (BitcodeFiles.empty())
return;
// Compile bitcode files.
Lto.reset(new BitcodeCompiler);
for (const std::unique_ptr<BitcodeFile> &F : BitcodeFiles)
Lto->add(*F);
std::vector<std::unique_ptr<InputFile>> IFs = Lto->compile();
// Replace bitcode symbols.
for (auto &IF : IFs) {
ObjectFile<ELFT> *Obj = cast<ObjectFile<ELFT>>(IF.release());
DenseSet<StringRef> DummyGroups;
Obj->parse(DummyGroups);
ObjectFiles.emplace_back(Obj);
}
}
template <class ELFT>
DefinedRegular<ELFT> *SymbolTable<ELFT>::addAbsolute(StringRef Name,
uint8_t Visibility) {
return cast<DefinedRegular<ELFT>>(
addRegular(Name, STB_GLOBAL, Visibility)->body());
}
// Add Name as an "ignored" symbol. An ignored symbol is a regular
// linker-synthesized defined symbol, but is only defined if needed.
template <class ELFT>
DefinedRegular<ELFT> *SymbolTable<ELFT>::addIgnored(StringRef Name,
uint8_t Visibility) {
if (!find(Name))
return nullptr;
return addAbsolute(Name, Visibility);
}
// Set a flag for --trace-symbol so that we can print out a log message
// if a new symbol with the same name is inserted into the symbol table.
template <class ELFT> void SymbolTable<ELFT>::trace(StringRef Name) {
Symtab.insert({Name, {-1, true}});
}
// Rename SYM as __wrap_SYM. The original symbol is preserved as __real_SYM.
// Used to implement --wrap.
template <class ELFT> void SymbolTable<ELFT>::wrap(StringRef Name) {
SymbolBody *B = find(Name);
if (!B)
return;
StringSaver Saver(Alloc);
Symbol *Sym = B->symbol();
Symbol *Real = addUndefined(Saver.save("__real_" + Name));
Symbol *Wrap = addUndefined(Saver.save("__wrap_" + Name));
// We rename symbols by replacing the old symbol's SymbolBody with the new
// symbol's SymbolBody. This causes all SymbolBody pointers referring to the
// old symbol to instead refer to the new symbol.
memcpy(Real->Body.buffer, Sym->Body.buffer, sizeof(Sym->Body));
memcpy(Sym->Body.buffer, Wrap->Body.buffer, sizeof(Wrap->Body));
}
static uint8_t getMinVisibility(uint8_t VA, uint8_t VB) {
if (VA == STV_DEFAULT)
return VB;
if (VB == STV_DEFAULT)
return VA;
return std::min(VA, VB);
}
// Find an existing symbol or create and insert a new one.
template <class ELFT>
std::pair<Symbol *, bool> SymbolTable<ELFT>::insert(StringRef Name) {
auto P = Symtab.insert({Name, {(int)SymVector.size(), false}});
SymIndex &V = P.first->second;
bool IsNew = P.second;
if (V.Idx == -1) {
IsNew = true;
V = {(int)SymVector.size(), true};
}
Symbol *Sym;
if (IsNew) {
Sym = new (Alloc) Symbol;
Sym->Binding = STB_WEAK;
Sym->Visibility = STV_DEFAULT;
Sym->IsUsedInRegularObj = false;
Sym->ExportDynamic = false;
Sym->VersionId = Config->DefaultSymbolVersion;
Sym->Traced = V.Traced;
SymVector.push_back(Sym);
} else {
Sym = SymVector[V.Idx];
}
return {Sym, IsNew};
}
// Find an existing symbol or create and insert a new one, then apply the given
// attributes.
template <class ELFT>
std::pair<Symbol *, bool>
SymbolTable<ELFT>::insert(StringRef Name, uint8_t Type, uint8_t Visibility,
bool CanOmitFromDynSym, bool IsUsedInRegularObj,
InputFile *File) {
Symbol *S;
bool WasInserted;
std::tie(S, WasInserted) = insert(Name);
// Merge in the new symbol's visibility.
S->Visibility = getMinVisibility(S->Visibility, Visibility);
if (!CanOmitFromDynSym && (Config->Shared || Config->ExportDynamic))
S->ExportDynamic = true;
if (IsUsedInRegularObj)
S->IsUsedInRegularObj = true;
if (!WasInserted && S->body()->Type != SymbolBody::UnknownType &&
((Type == STT_TLS) != S->body()->isTls()))
error("TLS attribute mismatch for symbol: " +
conflictMsg(S->body(), File));
return {S, WasInserted};
}
// Construct a string in the form of "Sym in File1 and File2".
// Used to construct an error message.
template <typename ELFT>
std::string SymbolTable<ELFT>::conflictMsg(SymbolBody *Existing,
InputFile *NewFile) {
std::string Sym = Existing->getName();
if (Config->Demangle)
Sym = demangle(Sym);
return Sym + " in " + getFilename(Existing->File) + " and " +
getFilename(NewFile);
}
template <class ELFT> Symbol *SymbolTable<ELFT>::addUndefined(StringRef Name) {
return addUndefined(Name, STB_GLOBAL, STV_DEFAULT, /*Type*/ 0,
/*CanOmitFromDynSym*/ false, /*File*/ nullptr);
}
template <class ELFT>
Symbol *SymbolTable<ELFT>::addUndefined(StringRef Name, uint8_t Binding,
uint8_t StOther, uint8_t Type,
bool CanOmitFromDynSym,
InputFile *File) {
Symbol *S;
bool WasInserted;
std::tie(S, WasInserted) =
insert(Name, Type, StOther & 3, CanOmitFromDynSym,
/*IsUsedInRegularObj*/ !File || !isa<BitcodeFile>(File), File);
if (WasInserted) {
S->Binding = Binding;
replaceBody<Undefined>(S, Name, StOther, Type, File);
return S;
}
if (Binding != STB_WEAK) {
if (S->body()->isShared() || S->body()->isLazy())
S->Binding = Binding;
if (auto *SS = dyn_cast<SharedSymbol<ELFT>>(S->body()))
SS->file()->IsUsed = true;
}
if (auto *L = dyn_cast<Lazy>(S->body())) {
// An undefined weak will not fetch archive members, but we have to remember
// its type. See also comment in addLazyArchive.
if (S->isWeak())
L->Type = Type;
else if (auto F = L->fetch())
addFile(std::move(F));
}
return S;
}
// We have a new defined symbol with the specified binding. Return 1 if the new
// symbol should win, -1 if the new symbol should lose, or 0 if both symbols are
// strong defined symbols.
static int compareDefined(Symbol *S, bool WasInserted, uint8_t Binding) {
if (WasInserted)
return 1;
SymbolBody *Body = S->body();
if (Body->isLazy() || Body->isUndefined() || Body->isShared())
return 1;
if (Binding == STB_WEAK)
return -1;
if (S->isWeak())
return 1;
return 0;
}
// We have a new non-common defined symbol with the specified binding. Return 1
// if the new symbol should win, -1 if the new symbol should lose, or 0 if there
// is a conflict. If the new symbol wins, also update the binding.
static int compareDefinedNonCommon(Symbol *S, bool WasInserted, uint8_t Binding) {
if (int Cmp = compareDefined(S, WasInserted, Binding)) {
if (Cmp > 0)
S->Binding = Binding;
return Cmp;
}
if (isa<DefinedCommon>(S->body())) {
// Non-common symbols take precedence over common symbols.
if (Config->WarnCommon)
warning("common " + S->body()->getName() + " is overridden");
return 1;
}
return 0;
}
template <class ELFT>
Symbol *SymbolTable<ELFT>::addCommon(StringRef N, uint64_t Size,
uint64_t Alignment, uint8_t Binding,
uint8_t StOther, uint8_t Type,
InputFile *File) {
Symbol *S;
bool WasInserted;
std::tie(S, WasInserted) =
insert(N, Type, StOther & 3, /*CanOmitFromDynSym*/ false,
/*IsUsedInRegularObj*/ true, File);
int Cmp = compareDefined(S, WasInserted, Binding);
if (Cmp > 0) {
S->Binding = Binding;
replaceBody<DefinedCommon>(S, N, Size, Alignment, StOther, Type, File);
} else if (Cmp == 0) {
auto *C = dyn_cast<DefinedCommon>(S->body());
if (!C) {
// Non-common symbols take precedence over common symbols.
if (Config->WarnCommon)
warning("common " + S->body()->getName() + " is overridden");
return S;
}
if (Config->WarnCommon)
warning("multiple common of " + S->body()->getName());
C->Size = std::max(C->Size, Size);
C->Alignment = std::max(C->Alignment, Alignment);
}
return S;
}
template <class ELFT>
void SymbolTable<ELFT>::reportDuplicate(SymbolBody *Existing,
InputFile *NewFile) {
std::string Msg = "duplicate symbol: " + conflictMsg(Existing, NewFile);
if (Config->AllowMultipleDefinition)
warning(Msg);
else
error(Msg);
}
template <typename ELFT>
Symbol *SymbolTable<ELFT>::addRegular(StringRef Name, const Elf_Sym &Sym,
InputSectionBase<ELFT> *Section) {
Symbol *S;
bool WasInserted;
std::tie(S, WasInserted) =
insert(Name, Sym.getType(), Sym.getVisibility(),
/*CanOmitFromDynSym*/ false, /*IsUsedInRegularObj*/ true,
Section ? Section->getFile() : nullptr);
int Cmp = compareDefinedNonCommon(S, WasInserted, Sym.getBinding());
if (Cmp > 0)
replaceBody<DefinedRegular<ELFT>>(S, Name, Sym, Section);
else if (Cmp == 0)
reportDuplicate(S->body(), Section->getFile());
return S;
}
template <typename ELFT>
Symbol *SymbolTable<ELFT>::addRegular(StringRef Name, uint8_t Binding,
uint8_t StOther) {
Symbol *S;
bool WasInserted;
std::tie(S, WasInserted) =
insert(Name, STT_NOTYPE, StOther & 3, /*CanOmitFromDynSym*/ false,
/*IsUsedInRegularObj*/ true, nullptr);
int Cmp = compareDefinedNonCommon(S, WasInserted, Binding);
if (Cmp > 0)
replaceBody<DefinedRegular<ELFT>>(S, Name, StOther);
else if (Cmp == 0)
reportDuplicate(S->body(), nullptr);
return S;
}
template <typename ELFT>
Symbol *SymbolTable<ELFT>::addSynthetic(StringRef N,
OutputSectionBase<ELFT> *Section,
uintX_t Value) {
Symbol *S;
bool WasInserted;
std::tie(S, WasInserted) =
insert(N, STT_NOTYPE, STV_HIDDEN, /*CanOmitFromDynSym*/ false,
/*IsUsedInRegularObj*/ true, nullptr);
int Cmp = compareDefinedNonCommon(S, WasInserted, STB_GLOBAL);
if (Cmp > 0)
replaceBody<DefinedSynthetic<ELFT>>(S, N, Value, Section);
else if (Cmp == 0)
reportDuplicate(S->body(), nullptr);
return S;
}
template <typename ELFT>
void SymbolTable<ELFT>::addShared(SharedFile<ELFT> *F, StringRef Name,
const Elf_Sym &Sym,
const typename ELFT::Verdef *Verdef) {
// DSO symbols do not affect visibility in the output, so we pass STV_DEFAULT
// as the visibility, which will leave the visibility in the symbol table
// unchanged.
Symbol *S;
bool WasInserted;
std::tie(S, WasInserted) =
insert(Name, Sym.getType(), STV_DEFAULT, /*CanOmitFromDynSym*/ true,
/*IsUsedInRegularObj*/ false, F);
// Make sure we preempt DSO symbols with default visibility.
if (Sym.getVisibility() == STV_DEFAULT)
S->ExportDynamic = true;
if (WasInserted || isa<Undefined>(S->body())) {
replaceBody<SharedSymbol<ELFT>>(S, F, Name, Sym, Verdef);
if (!S->isWeak())
F->IsUsed = true;
}
}
template <class ELFT>
Symbol *SymbolTable<ELFT>::addBitcode(StringRef Name, bool IsWeak,
uint8_t StOther, uint8_t Type,
bool CanOmitFromDynSym, BitcodeFile *F) {
Symbol *S;
bool WasInserted;
std::tie(S, WasInserted) = insert(Name, Type, StOther & 3, CanOmitFromDynSym,
/*IsUsedInRegularObj*/ false, F);
int Cmp =
compareDefinedNonCommon(S, WasInserted, IsWeak ? STB_WEAK : STB_GLOBAL);
if (Cmp > 0)
replaceBody<DefinedBitcode>(S, Name, StOther, Type, F);
else if (Cmp == 0)
reportDuplicate(S->body(), F);
return S;
}
template <class ELFT> SymbolBody *SymbolTable<ELFT>::find(StringRef Name) {
auto It = Symtab.find(Name);
if (It == Symtab.end())
return nullptr;
SymIndex V = It->second;
if (V.Idx == -1)
return nullptr;
return SymVector[V.Idx]->body();
}
// Returns a list of defined symbols that match with a given glob pattern.
template <class ELFT>
std::vector<SymbolBody *> SymbolTable<ELFT>::findAll(StringRef Pattern) {
std::vector<SymbolBody *> Res;
for (Symbol *Sym : SymVector) {
SymbolBody *B = Sym->body();
if (!B->isUndefined() && globMatch(Pattern, B->getName()))
Res.push_back(B);
}
return Res;
}
template <class ELFT>
void SymbolTable<ELFT>::addLazyArchive(ArchiveFile *F,
const object::Archive::Symbol Sym) {
Symbol *S;
bool WasInserted;
std::tie(S, WasInserted) = insert(Sym.getName());
if (WasInserted) {
replaceBody<LazyArchive>(S, *F, Sym, SymbolBody::UnknownType);
return;
}
if (!S->body()->isUndefined())
return;
// Weak undefined symbols should not fetch members from archives. If we were
// to keep old symbol we would not know that an archive member was available
// if a strong undefined symbol shows up afterwards in the link. If a strong
// undefined symbol never shows up, this lazy symbol will get to the end of
// the link and must be treated as the weak undefined one. We already marked
// this symbol as used when we added it to the symbol table, but we also need
// to preserve its type. FIXME: Move the Type field to Symbol.
if (S->isWeak()) {
replaceBody<LazyArchive>(S, *F, Sym, S->body()->Type);
return;
}
MemoryBufferRef MBRef = F->getMember(&Sym);
if (!MBRef.getBuffer().empty())
addFile(createObjectFile(MBRef, F->getName()));
}
template <class ELFT>
void SymbolTable<ELFT>::addLazyObject(StringRef Name, LazyObjectFile &Obj) {
Symbol *S;
bool WasInserted;
std::tie(S, WasInserted) = insert(Name);
if (WasInserted) {
replaceBody<LazyObject>(S, Name, Obj, SymbolBody::UnknownType);
return;
}
if (!S->body()->isUndefined())
return;
// See comment for addLazyArchive above.
if (S->isWeak()) {
replaceBody<LazyObject>(S, Name, Obj, S->body()->Type);
} else {
MemoryBufferRef MBRef = Obj.getBuffer();
if (!MBRef.getBuffer().empty())
addFile(createObjectFile(MBRef));
}
}
// Process undefined (-u) flags by loading lazy symbols named by those flags.
template <class ELFT> void SymbolTable<ELFT>::scanUndefinedFlags() {
for (StringRef S : Config->Undefined)
if (auto *L = dyn_cast_or_null<Lazy>(find(S)))
if (std::unique_ptr<InputFile> File = L->fetch())
addFile(std::move(File));
}
// This function takes care of the case in which shared libraries depend on
// the user program (not the other way, which is usual). Shared libraries
// may have undefined symbols, expecting that the user program provides
// the definitions for them. An example is BSD's __progname symbol.
// We need to put such symbols to the main program's .dynsym so that
// shared libraries can find them.
// Except this, we ignore undefined symbols in DSOs.
template <class ELFT> void SymbolTable<ELFT>::scanShlibUndefined() {
for (std::unique_ptr<SharedFile<ELFT>> &File : SharedFiles)
for (StringRef U : File->getUndefinedSymbols())
if (SymbolBody *Sym = find(U))
if (Sym->isDefined())
Sym->symbol()->ExportDynamic = true;
}
// This function process the dynamic list option by marking all the symbols
// to be exported in the dynamic table.
template <class ELFT> void SymbolTable<ELFT>::scanDynamicList() {
for (StringRef S : Config->DynamicList)
if (SymbolBody *B = find(S))
B->symbol()->ExportDynamic = true;
}
static bool hasWildcard(StringRef S) {
return S.find_first_of("?*") != StringRef::npos;
}
static void setVersionId(SymbolBody *Body, StringRef VersionName,
StringRef Name, uint16_t Version) {
if (!Body || Body->isUndefined()) {
if (Config->NoUndefinedVersion)
error("version script assignment of " + VersionName + " to symbol " +
Name + " failed: symbol not defined");
return;
}
Symbol *Sym = Body->symbol();
if (Sym->VersionId != Config->DefaultSymbolVersion)
warning("duplicate symbol " + Name + " in version script");
Sym->VersionId = Version;
}
template <class ELFT>
std::map<std::string, SymbolBody *> SymbolTable<ELFT>::getDemangledSyms() {
std::map<std::string, SymbolBody *> Result;
for (Symbol *Sym : SymVector) {
SymbolBody *B = Sym->body();
Result[demangle(B->getName())] = B;
}
return Result;
}
static bool hasExternCpp() {
for (VersionDefinition &V : Config->VersionDefinitions)
for (SymbolVersion Sym : V.Globals)
if (Sym.IsExternCpp)
return true;
return false;
}
// This function processes the --version-script option by marking all global
// symbols with the VersionScriptGlobal flag, which acts as a filter on the
// dynamic symbol table.
template <class ELFT> void SymbolTable<ELFT>::scanVersionScript() {
// If version script does not contain versions declarations,
// we just should mark global symbols.
if (!Config->VersionScriptGlobals.empty()) {
for (SymbolVersion &Sym : Config->VersionScriptGlobals)
if (SymbolBody *B = find(Sym.Name))
B->symbol()->VersionId = VER_NDX_GLOBAL;
return;
}
if (Config->VersionDefinitions.empty())
return;
// If we have symbols version declarations, we should
// assign version references for each symbol.
// Current rules are:
// * If there is an exact match for the mangled name or we have extern C++
// exact match, then we use it.
// * Otherwise, we look through the wildcard patterns. We look through the
// version tags in reverse order. We use the first match we find (the last
// matching version tag in the file).
// Handle exact matches and build a map of demangled externs for
// quick search during next step.
std::map<std::string, SymbolBody *> Demangled;
if (hasExternCpp())
Demangled = getDemangledSyms();
for (VersionDefinition &V : Config->VersionDefinitions) {
for (SymbolVersion Sym : V.Globals) {
if (hasWildcard(Sym.Name))
continue;
SymbolBody *B = Sym.IsExternCpp ? Demangled[Sym.Name] : find(Sym.Name);
setVersionId(B, V.Name, Sym.Name, V.Id);
}
}
// Handle wildcards.
for (size_t I = Config->VersionDefinitions.size() - 1; I != (size_t)-1; --I) {
VersionDefinition &V = Config->VersionDefinitions[I];
for (SymbolVersion &Sym : V.Globals)
if (hasWildcard(Sym.Name))
for (SymbolBody *B : findAll(Sym.Name))
if (B->symbol()->VersionId == Config->DefaultSymbolVersion)
B->symbol()->VersionId = V.Id;
}
}
// Returns the size of the longest version name.
static int getMaxVersionLen() {
size_t Len = 0;
for (VersionDefinition &V : Config->VersionDefinitions)
Len = std::max(Len, V.Name.size());
return Len;
}
// Parses a symbol name in the form of <name>@<version> or <name>@@<version>.
static std::pair<StringRef, uint16_t>
getSymbolVersion(SymbolBody *B, int MaxVersionLen) {
StringRef S = B->getName();
// MaxVersionLen was passed so that we don't need to scan
// all characters in a symbol name. It is effective because
// versions are usually short and symbol names can be very long.
size_t Pos = S.find('@', std::max(0, int(S.size()) - MaxVersionLen - 2));
if (Pos == 0 || Pos == StringRef::npos)
return {"", 0};
StringRef Name = S.substr(0, Pos);
StringRef Verstr = S.substr(Pos + 1);
if (Verstr.empty())
return {"", 0};
// '@@' in a symbol name means the default version.
// It is usually the most recent one.
bool IsDefault = (Verstr[0] == '@');
if (IsDefault)
Verstr = Verstr.substr(1);
for (VersionDefinition &V : Config->VersionDefinitions) {
if (V.Name == Verstr)
return {Name, IsDefault ? V.Id : (V.Id | VERSYM_HIDDEN)};
}
// It is an error if the specified version was not defined.
error("symbol " + S + " has undefined version " + Verstr);
return {"", 0};
}
// Versions are usually assigned to symbols using version scripts,
// but there's another way to assign versions to symbols.
// If a symbol name contains '@', the string after it is not
// actually a part of the symbol name but specifies a version.
// This function takes care of it.
template <class ELFT> void SymbolTable<ELFT>::scanSymbolVersions() {
if (Config->VersionDefinitions.empty())
return;
int MaxVersionLen = getMaxVersionLen();
// Unfortunately there's no way other than iterating over all
// symbols to look for '@' characters in symbol names.
// So this is inherently slow. A good news is that we do this
// only when versions have been defined.
for (Symbol *Sym : SymVector) {
// Symbol versions for exported symbols are by nature
// only for defined global symbols.
SymbolBody *B = Sym->body();
if (!B->isDefined())
continue;
uint8_t Visibility = B->getVisibility();
if (Visibility != STV_DEFAULT && Visibility != STV_PROTECTED)
continue;
// Look for '@' in the symbol name.
StringRef Name;
uint16_t Version;
std::tie(Name, Version) = getSymbolVersion(B, MaxVersionLen);
if (Name.empty())
continue;
B->setName(Name);
Sym->VersionId = Version;
}
}
template class elf::SymbolTable<ELF32LE>;
template class elf::SymbolTable<ELF32BE>;
template class elf::SymbolTable<ELF64LE>;
template class elf::SymbolTable<ELF64BE>;

View File

@ -0,0 +1,144 @@
//===- SymbolTable.h --------------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_ELF_SYMBOL_TABLE_H
#define LLD_ELF_SYMBOL_TABLE_H
#include "InputFiles.h"
#include "LTO.h"
#include "llvm/ADT/DenseMap.h"
namespace lld {
namespace elf {
class Lazy;
template <class ELFT> class OutputSectionBase;
struct Symbol;
typedef llvm::CachedHash<StringRef> SymName;
// SymbolTable is a bucket of all known symbols, including defined,
// undefined, or lazy symbols (the last one is symbols in archive
// files whose archive members are not yet loaded).
//
// We put all symbols of all files to a SymbolTable, and the
// SymbolTable selects the "best" symbols if there are name
// conflicts. For example, obviously, a defined symbol is better than
// an undefined symbol. Or, if there's a conflict between a lazy and a
// undefined, it'll read an archive member to read a real definition
// to replace the lazy symbol. The logic is implemented in the
// add*() functions, which are called by input files as they are parsed. There
// is one add* function per symbol type.
template <class ELFT> class SymbolTable {
typedef typename ELFT::Sym Elf_Sym;
typedef typename ELFT::uint uintX_t;
public:
void addFile(std::unique_ptr<InputFile> File);
void addCombinedLtoObject();
llvm::ArrayRef<Symbol *> getSymbols() const { return SymVector; }
const std::vector<std::unique_ptr<ObjectFile<ELFT>>> &getObjectFiles() const {
return ObjectFiles;
}
const std::vector<std::unique_ptr<SharedFile<ELFT>>> &getSharedFiles() const {
return SharedFiles;
}
DefinedRegular<ELFT> *addAbsolute(StringRef Name,
uint8_t Visibility = llvm::ELF::STV_HIDDEN);
DefinedRegular<ELFT> *addIgnored(StringRef Name,
uint8_t Visibility = llvm::ELF::STV_HIDDEN);
Symbol *addUndefined(StringRef Name);
Symbol *addUndefined(StringRef Name, uint8_t Binding, uint8_t StOther,
uint8_t Type, bool CanOmitFromDynSym, InputFile *File);
Symbol *addRegular(StringRef Name, const Elf_Sym &Sym,
InputSectionBase<ELFT> *Section);
Symbol *addRegular(StringRef Name, uint8_t Binding, uint8_t StOther);
Symbol *addSynthetic(StringRef N, OutputSectionBase<ELFT> *Section,
uintX_t Value);
void addShared(SharedFile<ELFT> *F, StringRef Name, const Elf_Sym &Sym,
const typename ELFT::Verdef *Verdef);
void addLazyArchive(ArchiveFile *F, const llvm::object::Archive::Symbol S);
void addLazyObject(StringRef Name, LazyObjectFile &Obj);
Symbol *addBitcode(StringRef Name, bool IsWeak, uint8_t StOther, uint8_t Type,
bool CanOmitFromDynSym, BitcodeFile *File);
Symbol *addCommon(StringRef N, uint64_t Size, uint64_t Alignment,
uint8_t Binding, uint8_t StOther, uint8_t Type,
InputFile *File);
void scanUndefinedFlags();
void scanShlibUndefined();
void scanDynamicList();
void scanVersionScript();
void scanSymbolVersions();
SymbolBody *find(StringRef Name);
void trace(StringRef Name);
void wrap(StringRef Name);
private:
std::vector<SymbolBody *> findAll(StringRef Pattern);
std::pair<Symbol *, bool> insert(StringRef Name);
std::pair<Symbol *, bool> insert(StringRef Name, uint8_t Type,
uint8_t Visibility, bool CanOmitFromDynSym,
bool IsUsedInRegularObj, InputFile *File);
std::string conflictMsg(SymbolBody *Existing, InputFile *NewFile);
void reportDuplicate(SymbolBody *Existing, InputFile *NewFile);
std::map<std::string, SymbolBody *> getDemangledSyms();
struct SymIndex {
int Idx : 31;
unsigned Traced : 1;
};
// The order the global symbols are in is not defined. We can use an arbitrary
// order, but it has to be reproducible. That is true even when cross linking.
// The default hashing of StringRef produces different results on 32 and 64
// bit systems so we use a map to a vector. That is arbitrary, deterministic
// but a bit inefficient.
// FIXME: Experiment with passing in a custom hashing or sorting the symbols
// once symbol resolution is finished.
llvm::DenseMap<SymName, SymIndex> Symtab;
std::vector<Symbol *> SymVector;
llvm::BumpPtrAllocator Alloc;
// Comdat groups define "link once" sections. If two comdat groups have the
// same name, only one of them is linked, and the other is ignored. This set
// is used to uniquify them.
llvm::DenseSet<StringRef> ComdatGroups;
// The symbol table owns all file objects.
std::vector<std::unique_ptr<ArchiveFile>> ArchiveFiles;
std::vector<std::unique_ptr<ObjectFile<ELFT>>> ObjectFiles;
std::vector<std::unique_ptr<LazyObjectFile>> LazyObjectFiles;
std::vector<std::unique_ptr<SharedFile<ELFT>>> SharedFiles;
std::vector<std::unique_ptr<BitcodeFile>> BitcodeFiles;
// Set of .so files to not link the same shared object file more than once.
llvm::DenseSet<StringRef> SoNames;
std::unique_ptr<BitcodeCompiler> Lto;
};
template <class ELFT> struct Symtab { static SymbolTable<ELFT> *X; };
template <class ELFT> SymbolTable<ELFT> *Symtab<ELFT>::X;
} // namespace elf
} // namespace lld
#endif

View File

@ -0,0 +1,336 @@
//===- Symbols.cpp --------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "Symbols.h"
#include "Error.h"
#include "InputFiles.h"
#include "InputSection.h"
#include "OutputSections.h"
#include "Target.h"
#include "llvm/ADT/STLExtras.h"
using namespace llvm;
using namespace llvm::object;
using namespace llvm::ELF;
using namespace lld;
using namespace lld::elf;
template <class ELFT>
static typename ELFT::uint getSymVA(const SymbolBody &Body,
typename ELFT::uint &Addend) {
typedef typename ELFT::uint uintX_t;
switch (Body.kind()) {
case SymbolBody::DefinedSyntheticKind: {
auto &D = cast<DefinedSynthetic<ELFT>>(Body);
const OutputSectionBase<ELFT> *Sec = D.Section;
if (!Sec)
return D.Value;
if (D.Value == DefinedSynthetic<ELFT>::SectionEnd)
return Sec->getVA() + Sec->getSize();
return Sec->getVA() + D.Value;
}
case SymbolBody::DefinedRegularKind: {
auto &D = cast<DefinedRegular<ELFT>>(Body);
InputSectionBase<ELFT> *SC = D.Section;
// According to the ELF spec reference to a local symbol from outside
// the group are not allowed. Unfortunately .eh_frame breaks that rule
// and must be treated specially. For now we just replace the symbol with
// 0.
if (SC == &InputSection<ELFT>::Discarded)
return 0;
// This is an absolute symbol.
if (!SC)
return D.Value;
uintX_t Offset = D.Value;
if (D.isSection()) {
Offset += Addend;
Addend = 0;
}
uintX_t VA = SC->OutSec->getVA() + SC->getOffset(Offset);
if (D.isTls())
return VA - Out<ELFT>::TlsPhdr->p_vaddr;
return VA;
}
case SymbolBody::DefinedCommonKind:
return Out<ELFT>::Bss->getVA() + cast<DefinedCommon>(Body).OffsetInBss;
case SymbolBody::SharedKind: {
auto &SS = cast<SharedSymbol<ELFT>>(Body);
if (!SS.NeedsCopyOrPltAddr)
return 0;
if (SS.isFunc())
return Body.getPltVA<ELFT>();
return Out<ELFT>::Bss->getVA() + SS.OffsetInBss;
}
case SymbolBody::UndefinedKind:
return 0;
case SymbolBody::LazyArchiveKind:
case SymbolBody::LazyObjectKind:
assert(Body.symbol()->IsUsedInRegularObj && "lazy symbol reached writer");
return 0;
case SymbolBody::DefinedBitcodeKind:
llvm_unreachable("should have been replaced");
}
llvm_unreachable("invalid symbol kind");
}
SymbolBody::SymbolBody(Kind K, uint32_t NameOffset, uint8_t StOther,
uint8_t Type)
: SymbolKind(K), NeedsCopyOrPltAddr(false), IsLocal(true),
IsInGlobalMipsGot(false), Type(Type), StOther(StOther),
NameOffset(NameOffset) {}
SymbolBody::SymbolBody(Kind K, StringRef Name, uint8_t StOther, uint8_t Type)
: SymbolKind(K), NeedsCopyOrPltAddr(false), IsLocal(false),
IsInGlobalMipsGot(false), Type(Type), StOther(StOther),
Name({Name.data(), Name.size()}) {}
StringRef SymbolBody::getName() const {
assert(!isLocal());
return StringRef(Name.S, Name.Len);
}
void SymbolBody::setName(StringRef S) {
Name.S = S.data();
Name.Len = S.size();
}
// Returns true if a symbol can be replaced at load-time by a symbol
// with the same name defined in other ELF executable or DSO.
bool SymbolBody::isPreemptible() const {
if (isLocal())
return false;
// Shared symbols resolve to the definition in the DSO. The exceptions are
// symbols with copy relocations (which resolve to .bss) or preempt plt
// entries (which resolve to that plt entry).
if (isShared())
return !NeedsCopyOrPltAddr;
// That's all that can be preempted in a non-DSO.
if (!Config->Shared)
return false;
// Only symbols that appear in dynsym can be preempted.
if (!symbol()->includeInDynsym())
return false;
// Only default visibility symbols can be preempted.
if (symbol()->Visibility != STV_DEFAULT)
return false;
// -Bsymbolic means that definitions are not preempted.
if (Config->Bsymbolic || (Config->BsymbolicFunctions && isFunc()))
return !isDefined();
return true;
}
template <class ELFT> bool SymbolBody::hasThunk() const {
if (auto *DR = dyn_cast<DefinedRegular<ELFT>>(this))
return DR->ThunkData != nullptr;
if (auto *S = dyn_cast<SharedSymbol<ELFT>>(this))
return S->ThunkData != nullptr;
return false;
}
template <class ELFT>
typename ELFT::uint SymbolBody::getVA(typename ELFT::uint Addend) const {
typename ELFT::uint OutVA = getSymVA<ELFT>(*this, Addend);
return OutVA + Addend;
}
template <class ELFT> typename ELFT::uint SymbolBody::getGotVA() const {
return Out<ELFT>::Got->getVA() + getGotOffset<ELFT>();
}
template <class ELFT> typename ELFT::uint SymbolBody::getGotOffset() const {
return GotIndex * Target->GotEntrySize;
}
template <class ELFT> typename ELFT::uint SymbolBody::getGotPltVA() const {
return Out<ELFT>::GotPlt->getVA() + getGotPltOffset<ELFT>();
}
template <class ELFT> typename ELFT::uint SymbolBody::getGotPltOffset() const {
return GotPltIndex * Target->GotPltEntrySize;
}
template <class ELFT> typename ELFT::uint SymbolBody::getPltVA() const {
return Out<ELFT>::Plt->getVA() + Target->PltHeaderSize +
PltIndex * Target->PltEntrySize;
}
template <class ELFT> typename ELFT::uint SymbolBody::getThunkVA() const {
if (const auto *DR = dyn_cast<DefinedRegular<ELFT>>(this))
return DR->ThunkData->getVA();
if (const auto *S = dyn_cast<SharedSymbol<ELFT>>(this))
return S->ThunkData->getVA();
fatal("getThunkVA() not supported for Symbol class\n");
}
template <class ELFT> typename ELFT::uint SymbolBody::getSize() const {
if (const auto *C = dyn_cast<DefinedCommon>(this))
return C->Size;
if (const auto *DR = dyn_cast<DefinedRegular<ELFT>>(this))
return DR->Size;
if (const auto *S = dyn_cast<SharedSymbol<ELFT>>(this))
return S->Sym.st_size;
return 0;
}
Defined::Defined(Kind K, StringRef Name, uint8_t StOther, uint8_t Type)
: SymbolBody(K, Name, StOther, Type) {}
Defined::Defined(Kind K, uint32_t NameOffset, uint8_t StOther, uint8_t Type)
: SymbolBody(K, NameOffset, StOther, Type) {}
DefinedBitcode::DefinedBitcode(StringRef Name, uint8_t StOther, uint8_t Type,
BitcodeFile *F)
: Defined(DefinedBitcodeKind, Name, StOther, Type) {
this->File = F;
}
bool DefinedBitcode::classof(const SymbolBody *S) {
return S->kind() == DefinedBitcodeKind;
}
Undefined::Undefined(StringRef Name, uint8_t StOther, uint8_t Type,
InputFile *File)
: SymbolBody(SymbolBody::UndefinedKind, Name, StOther, Type) {
this->File = File;
}
Undefined::Undefined(uint32_t NameOffset, uint8_t StOther, uint8_t Type,
InputFile *File)
: SymbolBody(SymbolBody::UndefinedKind, NameOffset, StOther, Type) {
this->File = File;
}
template <typename ELFT>
DefinedSynthetic<ELFT>::DefinedSynthetic(StringRef N, uintX_t Value,
OutputSectionBase<ELFT> *Section)
: Defined(SymbolBody::DefinedSyntheticKind, N, STV_HIDDEN, 0 /* Type */),
Value(Value), Section(Section) {}
DefinedCommon::DefinedCommon(StringRef N, uint64_t Size, uint64_t Alignment,
uint8_t StOther, uint8_t Type, InputFile *File)
: Defined(SymbolBody::DefinedCommonKind, N, StOther, Type),
Alignment(Alignment), Size(Size) {
this->File = File;
}
std::unique_ptr<InputFile> Lazy::fetch() {
if (auto *S = dyn_cast<LazyArchive>(this))
return S->fetch();
return cast<LazyObject>(this)->fetch();
}
LazyArchive::LazyArchive(ArchiveFile &File,
const llvm::object::Archive::Symbol S, uint8_t Type)
: Lazy(LazyArchiveKind, S.getName(), Type), Sym(S) {
this->File = &File;
}
LazyObject::LazyObject(StringRef Name, LazyObjectFile &File, uint8_t Type)
: Lazy(LazyObjectKind, Name, Type) {
this->File = &File;
}
std::unique_ptr<InputFile> LazyArchive::fetch() {
MemoryBufferRef MBRef = file()->getMember(&Sym);
// getMember returns an empty buffer if the member was already
// read from the library.
if (MBRef.getBuffer().empty())
return std::unique_ptr<InputFile>(nullptr);
return createObjectFile(MBRef, file()->getName());
}
std::unique_ptr<InputFile> LazyObject::fetch() {
MemoryBufferRef MBRef = file()->getBuffer();
if (MBRef.getBuffer().empty())
return std::unique_ptr<InputFile>(nullptr);
return createObjectFile(MBRef);
}
bool Symbol::includeInDynsym() const {
if (Visibility != STV_DEFAULT && Visibility != STV_PROTECTED)
return false;
return (ExportDynamic && VersionId != VER_NDX_LOCAL) || body()->isShared() ||
(body()->isUndefined() && Config->Shared);
}
// Print out a log message for --trace-symbol.
void elf::printTraceSymbol(Symbol *Sym) {
SymbolBody *B = Sym->body();
outs() << getFilename(B->File);
if (B->isUndefined())
outs() << ": reference to ";
else if (B->isCommon())
outs() << ": common definition of ";
else
outs() << ": definition of ";
outs() << B->getName() << "\n";
}
template bool SymbolBody::hasThunk<ELF32LE>() const;
template bool SymbolBody::hasThunk<ELF32BE>() const;
template bool SymbolBody::hasThunk<ELF64LE>() const;
template bool SymbolBody::hasThunk<ELF64BE>() const;
template uint32_t SymbolBody::template getVA<ELF32LE>(uint32_t) const;
template uint32_t SymbolBody::template getVA<ELF32BE>(uint32_t) const;
template uint64_t SymbolBody::template getVA<ELF64LE>(uint64_t) const;
template uint64_t SymbolBody::template getVA<ELF64BE>(uint64_t) const;
template uint32_t SymbolBody::template getGotVA<ELF32LE>() const;
template uint32_t SymbolBody::template getGotVA<ELF32BE>() const;
template uint64_t SymbolBody::template getGotVA<ELF64LE>() const;
template uint64_t SymbolBody::template getGotVA<ELF64BE>() const;
template uint32_t SymbolBody::template getGotOffset<ELF32LE>() const;
template uint32_t SymbolBody::template getGotOffset<ELF32BE>() const;
template uint64_t SymbolBody::template getGotOffset<ELF64LE>() const;
template uint64_t SymbolBody::template getGotOffset<ELF64BE>() const;
template uint32_t SymbolBody::template getGotPltVA<ELF32LE>() const;
template uint32_t SymbolBody::template getGotPltVA<ELF32BE>() const;
template uint64_t SymbolBody::template getGotPltVA<ELF64LE>() const;
template uint64_t SymbolBody::template getGotPltVA<ELF64BE>() const;
template uint32_t SymbolBody::template getThunkVA<ELF32LE>() const;
template uint32_t SymbolBody::template getThunkVA<ELF32BE>() const;
template uint64_t SymbolBody::template getThunkVA<ELF64LE>() const;
template uint64_t SymbolBody::template getThunkVA<ELF64BE>() const;
template uint32_t SymbolBody::template getGotPltOffset<ELF32LE>() const;
template uint32_t SymbolBody::template getGotPltOffset<ELF32BE>() const;
template uint64_t SymbolBody::template getGotPltOffset<ELF64LE>() const;
template uint64_t SymbolBody::template getGotPltOffset<ELF64BE>() const;
template uint32_t SymbolBody::template getPltVA<ELF32LE>() const;
template uint32_t SymbolBody::template getPltVA<ELF32BE>() const;
template uint64_t SymbolBody::template getPltVA<ELF64LE>() const;
template uint64_t SymbolBody::template getPltVA<ELF64BE>() const;
template uint32_t SymbolBody::template getSize<ELF32LE>() const;
template uint32_t SymbolBody::template getSize<ELF32BE>() const;
template uint64_t SymbolBody::template getSize<ELF64LE>() const;
template uint64_t SymbolBody::template getSize<ELF64BE>() const;
template class elf::DefinedSynthetic<ELF32LE>;
template class elf::DefinedSynthetic<ELF32BE>;
template class elf::DefinedSynthetic<ELF64LE>;
template class elf::DefinedSynthetic<ELF64BE>;

View File

@ -0,0 +1,474 @@
//===- Symbols.h ------------------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// All symbols are handled as SymbolBodies regardless of their types.
// This file defines various types of SymbolBodies.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_ELF_SYMBOLS_H
#define LLD_ELF_SYMBOLS_H
#include "InputSection.h"
#include "lld/Core/LLVM.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/ELF.h"
#include "llvm/Support/AlignOf.h"
namespace lld {
namespace elf {
class ArchiveFile;
class BitcodeFile;
class InputFile;
class LazyObjectFile;
class SymbolBody;
template <class ELFT> class ObjectFile;
template <class ELFT> class OutputSection;
template <class ELFT> class OutputSectionBase;
template <class ELFT> class SharedFile;
struct Symbol;
// The base class for real symbol classes.
class SymbolBody {
public:
enum Kind {
DefinedFirst,
DefinedRegularKind = DefinedFirst,
SharedKind,
DefinedCommonKind,
DefinedBitcodeKind,
DefinedSyntheticKind,
DefinedLast = DefinedSyntheticKind,
UndefinedKind,
LazyArchiveKind,
LazyObjectKind,
};
SymbolBody(Kind K) : SymbolKind(K) {}
Symbol *symbol();
const Symbol *symbol() const {
return const_cast<SymbolBody *>(this)->symbol();
}
Kind kind() const { return static_cast<Kind>(SymbolKind); }
bool isUndefined() const { return SymbolKind == UndefinedKind; }
bool isDefined() const { return SymbolKind <= DefinedLast; }
bool isCommon() const { return SymbolKind == DefinedCommonKind; }
bool isLazy() const {
return SymbolKind == LazyArchiveKind || SymbolKind == LazyObjectKind;
}
bool isShared() const { return SymbolKind == SharedKind; }
bool isLocal() const { return IsLocal; }
bool isPreemptible() const;
StringRef getName() const;
void setName(StringRef S);
uint32_t getNameOffset() const {
assert(isLocal());
return NameOffset;
}
uint8_t getVisibility() const { return StOther & 0x3; }
unsigned DynsymIndex = 0;
uint32_t GotIndex = -1;
uint32_t GotPltIndex = -1;
uint32_t PltIndex = -1;
uint32_t GlobalDynIndex = -1;
bool isInGot() const { return GotIndex != -1U; }
bool isInPlt() const { return PltIndex != -1U; }
template <class ELFT> bool hasThunk() const;
template <class ELFT>
typename ELFT::uint getVA(typename ELFT::uint Addend = 0) const;
template <class ELFT> typename ELFT::uint getGotOffset() const;
template <class ELFT> typename ELFT::uint getGotVA() const;
template <class ELFT> typename ELFT::uint getGotPltOffset() const;
template <class ELFT> typename ELFT::uint getGotPltVA() const;
template <class ELFT> typename ELFT::uint getPltVA() const;
template <class ELFT> typename ELFT::uint getThunkVA() const;
template <class ELFT> typename ELFT::uint getSize() const;
// The file from which this symbol was created.
InputFile *File = nullptr;
protected:
SymbolBody(Kind K, StringRef Name, uint8_t StOther, uint8_t Type);
SymbolBody(Kind K, uint32_t NameOffset, uint8_t StOther, uint8_t Type);
const unsigned SymbolKind : 8;
public:
// True if the linker has to generate a copy relocation for this shared
// symbol or if the symbol should point to its plt entry.
unsigned NeedsCopyOrPltAddr : 1;
// True if this is a local symbol.
unsigned IsLocal : 1;
// True if this symbol has an entry in the global part of MIPS GOT.
unsigned IsInGlobalMipsGot : 1;
// The following fields have the same meaning as the ELF symbol attributes.
uint8_t Type; // symbol type
uint8_t StOther; // st_other field value
// The Type field may also have this value. It means that we have not yet seen
// a non-Lazy symbol with this name, so we don't know what its type is. The
// Type field is normally set to this value for Lazy symbols unless we saw a
// weak undefined symbol first, in which case we need to remember the original
// symbol's type in order to check for TLS mismatches.
enum { UnknownType = 255 };
bool isSection() const { return Type == llvm::ELF::STT_SECTION; }
bool isTls() const { return Type == llvm::ELF::STT_TLS; }
bool isFunc() const { return Type == llvm::ELF::STT_FUNC; }
bool isGnuIFunc() const { return Type == llvm::ELF::STT_GNU_IFUNC; }
bool isObject() const { return Type == llvm::ELF::STT_OBJECT; }
bool isFile() const { return Type == llvm::ELF::STT_FILE; }
protected:
struct Str {
const char *S;
size_t Len;
};
union {
Str Name;
uint32_t NameOffset;
};
};
// The base class for any defined symbols.
class Defined : public SymbolBody {
public:
Defined(Kind K, StringRef Name, uint8_t StOther, uint8_t Type);
Defined(Kind K, uint32_t NameOffset, uint8_t StOther, uint8_t Type);
static bool classof(const SymbolBody *S) { return S->isDefined(); }
};
// The defined symbol in LLVM bitcode files.
class DefinedBitcode : public Defined {
public:
DefinedBitcode(StringRef Name, uint8_t StOther, uint8_t Type, BitcodeFile *F);
static bool classof(const SymbolBody *S);
BitcodeFile *file() { return (BitcodeFile *)this->File; }
};
class DefinedCommon : public Defined {
public:
DefinedCommon(StringRef N, uint64_t Size, uint64_t Alignment, uint8_t StOther,
uint8_t Type, InputFile *File);
static bool classof(const SymbolBody *S) {
return S->kind() == SymbolBody::DefinedCommonKind;
}
// The output offset of this common symbol in the output bss. Computed by the
// writer.
uint64_t OffsetInBss;
// The maximum alignment we have seen for this symbol.
uint64_t Alignment;
uint64_t Size;
};
// Regular defined symbols read from object file symbol tables.
template <class ELFT> class DefinedRegular : public Defined {
typedef typename ELFT::Sym Elf_Sym;
typedef typename ELFT::uint uintX_t;
public:
DefinedRegular(StringRef Name, const Elf_Sym &Sym,
InputSectionBase<ELFT> *Section)
: Defined(SymbolBody::DefinedRegularKind, Name, Sym.st_other,
Sym.getType()),
Value(Sym.st_value), Size(Sym.st_size),
Section(Section ? Section->Repl : NullInputSection) {
if (Section)
this->File = Section->getFile();
}
DefinedRegular(const Elf_Sym &Sym, InputSectionBase<ELFT> *Section)
: Defined(SymbolBody::DefinedRegularKind, Sym.st_name, Sym.st_other,
Sym.getType()),
Value(Sym.st_value), Size(Sym.st_size),
Section(Section ? Section->Repl : NullInputSection) {
assert(isLocal());
if (Section)
this->File = Section->getFile();
}
DefinedRegular(StringRef Name, uint8_t StOther)
: Defined(SymbolBody::DefinedRegularKind, Name, StOther,
llvm::ELF::STT_NOTYPE),
Value(0), Size(0), Section(NullInputSection) {}
static bool classof(const SymbolBody *S) {
return S->kind() == SymbolBody::DefinedRegularKind;
}
uintX_t Value;
uintX_t Size;
// The input section this symbol belongs to. Notice that this is
// a reference to a pointer. We are using two levels of indirections
// because of ICF. If ICF decides two sections need to be merged, it
// manipulates this Section pointers so that they point to the same
// section. This is a bit tricky, so be careful to not be confused.
// If this is null, the symbol is an absolute symbol.
InputSectionBase<ELFT> *&Section;
// If non-null the symbol has a Thunk that may be used as an alternative
// destination for callers of this Symbol.
Thunk<ELFT> *ThunkData = nullptr;
private:
static InputSectionBase<ELFT> *NullInputSection;
};
template <class ELFT>
InputSectionBase<ELFT> *DefinedRegular<ELFT>::NullInputSection;
// DefinedSynthetic is a class to represent linker-generated ELF symbols.
// The difference from the regular symbol is that DefinedSynthetic symbols
// don't belong to any input files or sections. Thus, its constructor
// takes an output section to calculate output VA, etc.
// If Section is null, this symbol is relative to the image base.
template <class ELFT> class DefinedSynthetic : public Defined {
public:
typedef typename ELFT::uint uintX_t;
DefinedSynthetic(StringRef N, uintX_t Value,
OutputSectionBase<ELFT> *Section);
static bool classof(const SymbolBody *S) {
return S->kind() == SymbolBody::DefinedSyntheticKind;
}
// Special value designates that the symbol 'points'
// to the end of the section.
static const uintX_t SectionEnd = uintX_t(-1);
uintX_t Value;
const OutputSectionBase<ELFT> *Section;
};
class Undefined : public SymbolBody {
public:
Undefined(StringRef Name, uint8_t StOther, uint8_t Type, InputFile *F);
Undefined(uint32_t NameOffset, uint8_t StOther, uint8_t Type, InputFile *F);
static bool classof(const SymbolBody *S) {
return S->kind() == UndefinedKind;
}
InputFile *file() { return this->File; }
};
template <class ELFT> class SharedSymbol : public Defined {
typedef typename ELFT::Sym Elf_Sym;
typedef typename ELFT::Verdef Elf_Verdef;
typedef typename ELFT::uint uintX_t;
public:
static bool classof(const SymbolBody *S) {
return S->kind() == SymbolBody::SharedKind;
}
SharedSymbol(SharedFile<ELFT> *F, StringRef Name, const Elf_Sym &Sym,
const Elf_Verdef *Verdef)
: Defined(SymbolBody::SharedKind, Name, Sym.st_other, Sym.getType()),
Sym(Sym), Verdef(Verdef) {
// IFuncs defined in DSOs are treated as functions by the static linker.
if (isGnuIFunc())
Type = llvm::ELF::STT_FUNC;
this->File = F;
}
SharedFile<ELFT> *file() { return (SharedFile<ELFT> *)this->File; }
const Elf_Sym &Sym;
// This field is a pointer to the symbol's version definition.
const Elf_Verdef *Verdef;
// OffsetInBss is significant only when needsCopy() is true.
uintX_t OffsetInBss = 0;
// If non-null the symbol has a Thunk that may be used as an alternative
// destination for callers of this Symbol.
Thunk<ELFT> *ThunkData = nullptr;
bool needsCopy() const { return this->NeedsCopyOrPltAddr && !this->isFunc(); }
};
// This class represents a symbol defined in an archive file. It is
// created from an archive file header, and it knows how to load an
// object file from an archive to replace itself with a defined
// symbol. If the resolver finds both Undefined and Lazy for
// the same name, it will ask the Lazy to load a file.
class Lazy : public SymbolBody {
public:
static bool classof(const SymbolBody *S) { return S->isLazy(); }
// Returns an object file for this symbol, or a nullptr if the file
// was already returned.
std::unique_ptr<InputFile> fetch();
protected:
Lazy(SymbolBody::Kind K, StringRef Name, uint8_t Type)
: SymbolBody(K, Name, llvm::ELF::STV_DEFAULT, Type) {}
};
// LazyArchive symbols represents symbols in archive files.
class LazyArchive : public Lazy {
public:
LazyArchive(ArchiveFile &File, const llvm::object::Archive::Symbol S,
uint8_t Type);
static bool classof(const SymbolBody *S) {
return S->kind() == LazyArchiveKind;
}
ArchiveFile *file() { return (ArchiveFile *)this->File; }
std::unique_ptr<InputFile> fetch();
private:
const llvm::object::Archive::Symbol Sym;
};
// LazyObject symbols represents symbols in object files between
// --start-lib and --end-lib options.
class LazyObject : public Lazy {
public:
LazyObject(StringRef Name, LazyObjectFile &File, uint8_t Type);
static bool classof(const SymbolBody *S) {
return S->kind() == LazyObjectKind;
}
LazyObjectFile *file() { return (LazyObjectFile *)this->File; }
std::unique_ptr<InputFile> fetch();
};
// Some linker-generated symbols need to be created as
// DefinedRegular symbols.
template <class ELFT> struct ElfSym {
// The content for _etext and etext symbols.
static DefinedRegular<ELFT> *Etext;
static DefinedRegular<ELFT> *Etext2;
// The content for _edata and edata symbols.
static DefinedRegular<ELFT> *Edata;
static DefinedRegular<ELFT> *Edata2;
// The content for _end and end symbols.
static DefinedRegular<ELFT> *End;
static DefinedRegular<ELFT> *End2;
// The content for _gp_disp symbol for MIPS target.
static SymbolBody *MipsGpDisp;
};
template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::Etext;
template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::Etext2;
template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::Edata;
template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::Edata2;
template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::End;
template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::End2;
template <class ELFT> SymbolBody *ElfSym<ELFT>::MipsGpDisp;
// A real symbol object, SymbolBody, is usually stored within a Symbol. There's
// always one Symbol for each symbol name. The resolver updates the SymbolBody
// stored in the Body field of this object as it resolves symbols. Symbol also
// holds computed properties of symbol names.
struct Symbol {
// Symbol binding. This is on the Symbol to track changes during resolution.
// In particular:
// An undefined weak is still weak when it resolves to a shared library.
// An undefined weak will not fetch archive members, but we have to remember
// it is weak.
uint8_t Binding;
// Version definition index.
uint16_t VersionId;
// Symbol visibility. This is the computed minimum visibility of all
// observed non-DSO symbols.
unsigned Visibility : 2;
// True if the symbol was used for linking and thus need to be added to the
// output file's symbol table. This is true for all symbols except for
// unreferenced DSO symbols and bitcode symbols that are unreferenced except
// by other bitcode objects.
unsigned IsUsedInRegularObj : 1;
// If this flag is true and the symbol has protected or default visibility, it
// will appear in .dynsym. This flag is set by interposable DSO symbols in
// executables, by most symbols in DSOs and executables built with
// --export-dynamic, and by dynamic lists.
unsigned ExportDynamic : 1;
// True if this symbol is specified by --trace-symbol option.
unsigned Traced : 1;
bool includeInDynsym() const;
bool isWeak() const { return Binding == llvm::ELF::STB_WEAK; }
// This field is used to store the Symbol's SymbolBody. This instantiation of
// AlignedCharArrayUnion gives us a struct with a char array field that is
// large and aligned enough to store any derived class of SymbolBody. We
// assume that the size and alignment of ELF64LE symbols is sufficient for any
// ELFT, and we verify this with the static_asserts in replaceBody.
llvm::AlignedCharArrayUnion<
DefinedBitcode, DefinedCommon, DefinedRegular<llvm::object::ELF64LE>,
DefinedSynthetic<llvm::object::ELF64LE>, Undefined,
SharedSymbol<llvm::object::ELF64LE>, LazyArchive, LazyObject>
Body;
SymbolBody *body() { return reinterpret_cast<SymbolBody *>(Body.buffer); }
const SymbolBody *body() const { return const_cast<Symbol *>(this)->body(); }
};
void printTraceSymbol(Symbol *Sym);
template <typename T, typename... ArgT>
void replaceBody(Symbol *S, ArgT &&... Arg) {
static_assert(sizeof(T) <= sizeof(S->Body), "Body too small");
static_assert(llvm::AlignOf<T>::Alignment <=
llvm::AlignOf<decltype(S->Body)>::Alignment,
"Body not aligned enough");
assert(static_cast<SymbolBody *>(static_cast<T *>(nullptr)) == nullptr &&
"Not a SymbolBody");
new (S->Body.buffer) T(std::forward<ArgT>(Arg)...);
// Print out a log message if --trace-symbol was specified.
// This is for debugging.
if (S->Traced)
printTraceSymbol(S);
}
inline Symbol *SymbolBody::symbol() {
assert(!isLocal());
return reinterpret_cast<Symbol *>(reinterpret_cast<char *>(this) -
offsetof(Symbol, Body));
}
} // namespace elf
} // namespace lld
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,114 @@
//===- Target.h -------------------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_ELF_TARGET_H
#define LLD_ELF_TARGET_H
#include "InputSection.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Object/ELF.h"
#include <memory>
namespace lld {
namespace elf {
class InputFile;
class SymbolBody;
class TargetInfo {
public:
virtual bool isTlsInitialExecRel(uint32_t Type) const;
virtual bool isTlsLocalDynamicRel(uint32_t Type) const;
virtual bool isTlsGlobalDynamicRel(uint32_t Type) const;
virtual uint32_t getDynRel(uint32_t Type) const { return Type; }
virtual void writeGotPltHeader(uint8_t *Buf) const {}
virtual void writeGotPlt(uint8_t *Buf, const SymbolBody &S) const {};
virtual uint64_t getImplicitAddend(const uint8_t *Buf, uint32_t Type) const;
// If lazy binding is supported, the first entry of the PLT has code
// to call the dynamic linker to resolve PLT entries the first time
// they are called. This function writes that code.
virtual void writePltHeader(uint8_t *Buf) const {}
virtual void writePlt(uint8_t *Buf, uint64_t GotEntryAddr,
uint64_t PltEntryAddr, int32_t Index,
unsigned RelOff) const {}
// Returns true if a relocation only uses the low bits of a value such that
// all those bits are in in the same page. For example, if the relocation
// only uses the low 12 bits in a system with 4k pages. If this is true, the
// bits will always have the same value at runtime and we don't have to emit
// a dynamic relocation.
virtual bool usesOnlyLowPageBits(uint32_t Type) const;
// Decide whether a Thunk is needed for the relocation from File
// targeting S. Returns one of:
// Expr if there is no Thunk required
// R_THUNK_ABS if thunk is required and expression is absolute
// R_THUNK_PC if thunk is required and expression is pc rel
// R_THUNK_PLT_PC if thunk is required to PLT entry and expression is pc rel
virtual RelExpr getThunkExpr(RelExpr Expr, uint32_t RelocType,
const InputFile &File,
const SymbolBody &S) const;
virtual RelExpr getRelExpr(uint32_t Type, const SymbolBody &S) const = 0;
virtual void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const = 0;
virtual ~TargetInfo();
unsigned TlsGdRelaxSkip = 1;
unsigned PageSize = 4096;
// On freebsd x86_64 the first page cannot be mmaped.
// On linux that is controled by vm.mmap_min_addr. At least on some x86_64
// installs that is 65536, so the first 15 pages cannot be used.
// Given that, the smallest value that can be used in here is 0x10000.
// If using 2MB pages, the smallest page aligned address that works is
// 0x200000, but it looks like every OS uses 4k pages for executables.
uint64_t DefaultImageBase = 0x10000;
uint32_t CopyRel;
uint32_t GotRel;
uint32_t PltRel;
uint32_t RelativeRel;
uint32_t IRelativeRel;
uint32_t TlsDescRel;
uint32_t TlsGotRel;
uint32_t TlsModuleIndexRel;
uint32_t TlsOffsetRel;
unsigned GotEntrySize;
unsigned GotPltEntrySize;
unsigned PltEntrySize;
unsigned PltHeaderSize;
// At least on x86_64 positions 1 and 2 are used by the first plt entry
// to support lazy loading.
unsigned GotPltHeaderEntriesNum = 3;
// Set to 0 for variant 2
unsigned TcbSize = 0;
virtual RelExpr adjustRelaxExpr(uint32_t Type, const uint8_t *Data,
RelExpr Expr) const;
virtual void relaxGot(uint8_t *Loc, uint64_t Val) const;
virtual void relaxTlsGdToIe(uint8_t *Loc, uint32_t Type, uint64_t Val) const;
virtual void relaxTlsGdToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const;
virtual void relaxTlsIeToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const;
virtual void relaxTlsLdToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const;
};
StringRef getRelName(uint32_t Type);
uint64_t getPPC64TocBase();
const unsigned MipsGPOffset = 0x7ff0;
extern TargetInfo *Target;
TargetInfo *createTarget();
}
}
#endif

View File

@ -0,0 +1,268 @@
//===- Thunks.cpp --------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===---------------------------------------------------------------------===//
//
// This file contains Thunk subclasses.
//
// A thunk is a small piece of code written after an input section
// which is used to jump between "incompatible" functions
// such as MIPS PIC and non-PIC or ARM non-Thumb and Thumb functions.
//
// If a jump target is too far and its address doesn't fit to a
// short jump instruction, we need to create a thunk too, but we
// haven't supported it yet.
//
// i386 and x86-64 don't need thunks.
//
//===---------------------------------------------------------------------===//
#include "Thunks.h"
#include "Error.h"
#include "InputFiles.h"
#include "InputSection.h"
#include "OutputSections.h"
#include "Symbols.h"
#include "Target.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Object/ELF.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/Endian.h"
using namespace llvm;
using namespace llvm::object;
using namespace llvm::support::endian;
using namespace llvm::ELF;
namespace lld {
namespace elf {
namespace {
// Specific ARM Thunk implementations. The naming convention is:
// Source State, TargetState, Target Requirement, ABS or PI, Range
template <class ELFT>
class ARMToThumbV7ABSLongThunk final : public Thunk<ELFT> {
public:
ARMToThumbV7ABSLongThunk(const SymbolBody &Dest,
const InputSection<ELFT> &Owner)
: Thunk<ELFT>(Dest, Owner) {}
uint32_t size() const override { return 12; }
void writeTo(uint8_t *Buf) const override;
};
template <class ELFT> class ARMToThumbV7PILongThunk final : public Thunk<ELFT> {
public:
ARMToThumbV7PILongThunk(const SymbolBody &Dest,
const InputSection<ELFT> &Owner)
: Thunk<ELFT>(Dest, Owner) {}
uint32_t size() const override { return 16; }
void writeTo(uint8_t *Buf) const override;
};
template <class ELFT>
class ThumbToARMV7ABSLongThunk final : public Thunk<ELFT> {
public:
ThumbToARMV7ABSLongThunk(const SymbolBody &Dest,
const InputSection<ELFT> &Owner)
: Thunk<ELFT>(Dest, Owner) {}
uint32_t size() const override { return 10; }
void writeTo(uint8_t *Buf) const override;
};
template <class ELFT> class ThumbToARMV7PILongThunk final : public Thunk<ELFT> {
public:
ThumbToARMV7PILongThunk(const SymbolBody &Dest,
const InputSection<ELFT> &Owner)
: Thunk<ELFT>(Dest, Owner) {}
uint32_t size() const override { return 12; }
void writeTo(uint8_t *Buf) const override;
};
// MIPS LA25 thunk
template <class ELFT> class MipsThunk final : public Thunk<ELFT> {
public:
MipsThunk(const SymbolBody &Dest, const InputSection<ELFT> &Owner)
: Thunk<ELFT>(Dest, Owner) {}
uint32_t size() const override { return 16; }
void writeTo(uint8_t *Buf) const override;
};
} // anonymous namespace
// ARM Target Thunks
template <class ELFT> static uint64_t getARMThunkDestVA(const SymbolBody &S) {
return S.isInPlt() ? S.getPltVA<ELFT>() : S.getVA<ELFT>();
}
template <class ELFT>
void ARMToThumbV7ABSLongThunk<ELFT>::writeTo(uint8_t *Buf) const {
const uint8_t Data[] = {
0x00, 0xc0, 0x00, 0xe3, // movw ip,:lower16:S
0x00, 0xc0, 0x40, 0xe3, // movt ip,:upper16:S
0x1c, 0xff, 0x2f, 0xe1, // bx ip
};
uint64_t S = getARMThunkDestVA<ELFT>(this->Destination);
memcpy(Buf, Data, sizeof(Data));
Target->relocateOne(Buf, R_ARM_MOVW_ABS_NC, S);
Target->relocateOne(Buf + 4, R_ARM_MOVT_ABS, S);
}
template <class ELFT>
void ThumbToARMV7ABSLongThunk<ELFT>::writeTo(uint8_t *Buf) const {
const uint8_t Data[] = {
0x40, 0xf2, 0x00, 0x0c, // movw ip, :lower16:S
0xc0, 0xf2, 0x00, 0x0c, // movt ip, :upper16:S
0x60, 0x47, // bx ip
};
uint64_t S = getARMThunkDestVA<ELFT>(this->Destination);
memcpy(Buf, Data, sizeof(Data));
Target->relocateOne(Buf, R_ARM_THM_MOVW_ABS_NC, S);
Target->relocateOne(Buf + 4, R_ARM_THM_MOVT_ABS, S);
}
template <class ELFT>
void ARMToThumbV7PILongThunk<ELFT>::writeTo(uint8_t *Buf) const {
const uint8_t Data[] = {
0xf0, 0xcf, 0x0f, 0xe3, // P: movw ip,:lower16:S - (P + (L1-P) +8)
0x00, 0xc0, 0x40, 0xe3, // movt ip,:upper16:S - (P + (L1-P+4) +8)
0x0f, 0xc0, 0x8c, 0xe0, // L1: add ip, ip, pc
0x1c, 0xff, 0x2f, 0xe1, // bx r12
};
uint64_t S = getARMThunkDestVA<ELFT>(this->Destination);
uint64_t P = this->getVA();
memcpy(Buf, Data, sizeof(Data));
Target->relocateOne(Buf, R_ARM_MOVW_PREL_NC, S - P - 16);
Target->relocateOne(Buf + 4, R_ARM_MOVT_PREL, S - P - 12);
}
template <class ELFT>
void ThumbToARMV7PILongThunk<ELFT>::writeTo(uint8_t *Buf) const {
const uint8_t Data[] = {
0x4f, 0xf6, 0xf4, 0x7c, // P: movw ip,:lower16:S - (P + (L1-P) + 4)
0xc0, 0xf2, 0x00, 0x0c, // movt ip,:upper16:S - (P + (L1-P+4) + 4)
0xfc, 0x44, // L1: add r12, pc
0x60, 0x47, // bx r12
};
uint64_t S = getARMThunkDestVA<ELFT>(this->Destination);
uint64_t P = this->getVA();
memcpy(Buf, Data, sizeof(Data));
Target->relocateOne(Buf, R_ARM_THM_MOVW_PREL_NC, S - P - 12);
Target->relocateOne(Buf + 4, R_ARM_THM_MOVT_PREL, S - P - 8);
}
// Write MIPS LA25 thunk code to call PIC function from the non-PIC one.
template <class ELFT> void MipsThunk<ELFT>::writeTo(uint8_t *Buf) const {
const endianness E = ELFT::TargetEndianness;
uint64_t S = this->Destination.template getVA<ELFT>();
write32<E>(Buf, 0x3c190000); // lui $25, %hi(func)
write32<E>(Buf + 4, 0x08000000 | (S >> 2)); // j func
write32<E>(Buf + 8, 0x27390000); // addiu $25, $25, %lo(func)
write32<E>(Buf + 12, 0x00000000); // nop
Target->relocateOne(Buf, R_MIPS_HI16, S);
Target->relocateOne(Buf + 8, R_MIPS_LO16, S);
}
template <class ELFT>
Thunk<ELFT>::Thunk(const SymbolBody &D, const InputSection<ELFT> &O)
: Destination(D), Owner(O), Offset(O.getThunkOff() + O.getThunksSize()) {}
template <class ELFT> typename ELFT::uint Thunk<ELFT>::getVA() const {
return Owner.OutSec->getVA() + Owner.OutSecOff + Offset;
}
template <class ELFT> Thunk<ELFT>::~Thunk() {}
// Creates a thunk for Thumb-ARM interworking.
template <class ELFT>
static Thunk<ELFT> *createThunkArm(uint32_t Reloc, SymbolBody &S,
InputSection<ELFT> &IS) {
// ARM relocations need ARM to Thumb interworking Thunks.
// Thumb relocations need Thumb to ARM relocations.
// Use position independent Thunks if we require position independent code.
BumpPtrAllocator &Alloc = IS.getFile()->Alloc;
switch (Reloc) {
case R_ARM_PC24:
case R_ARM_PLT32:
case R_ARM_JUMP24:
if (Config->Pic)
return new (Alloc) ARMToThumbV7PILongThunk<ELFT>(S, IS);
return new (Alloc) ARMToThumbV7ABSLongThunk<ELFT>(S, IS);
case R_ARM_THM_JUMP19:
case R_ARM_THM_JUMP24:
if (Config->Pic)
return new (Alloc) ThumbToARMV7PILongThunk<ELFT>(S, IS);
return new (Alloc) ThumbToARMV7ABSLongThunk<ELFT>(S, IS);
}
fatal("unrecognized relocation type");
}
template <class ELFT>
static void addThunkARM(uint32_t Reloc, SymbolBody &S, InputSection<ELFT> &IS) {
// Only one Thunk supported per symbol.
if (S.hasThunk<ELFT>())
return;
// ARM Thunks are added to the same InputSection as the relocation. This
// isn't strictly necessary but it makes it more likely that a limited range
// branch can reach the Thunk, and it makes Thunks to the PLT section easier
Thunk<ELFT> *T = createThunkArm(Reloc, S, IS);
IS.addThunk(T);
if (auto *Sym = dyn_cast<DefinedRegular<ELFT>>(&S))
Sym->ThunkData = T;
else if (auto *Sym = dyn_cast<SharedSymbol<ELFT>>(&S))
Sym->ThunkData = T;
else
fatal("symbol not DefinedRegular or Shared");
}
template <class ELFT>
static void addThunkMips(uint32_t RelocType, SymbolBody &S,
InputSection<ELFT> &IS) {
// Only one Thunk supported per symbol.
if (S.hasThunk<ELFT>())
return;
// Mips Thunks are added to the InputSection defining S.
auto *R = cast<DefinedRegular<ELFT>>(&S);
auto *Sec = cast<InputSection<ELFT>>(R->Section);
auto *T = new (IS.getFile()->Alloc) MipsThunk<ELFT>(S, *Sec);
Sec->addThunk(T);
R->ThunkData = T;
}
template <class ELFT>
void addThunk(uint32_t RelocType, SymbolBody &S, InputSection<ELFT> &IS) {
if (Config->EMachine == EM_ARM)
addThunkARM<ELFT>(RelocType, S, IS);
else if (Config->EMachine == EM_MIPS)
addThunkMips<ELFT>(RelocType, S, IS);
else
llvm_unreachable("add Thunk only supported for ARM and Mips");
}
template void addThunk<ELF32LE>(uint32_t, SymbolBody &,
InputSection<ELF32LE> &);
template void addThunk<ELF32BE>(uint32_t, SymbolBody &,
InputSection<ELF32BE> &);
template void addThunk<ELF64LE>(uint32_t, SymbolBody &,
InputSection<ELF64LE> &);
template void addThunk<ELF64BE>(uint32_t, SymbolBody &,
InputSection<ELF64BE> &);
template class Thunk<ELF32LE>;
template class Thunk<ELF32BE>;
template class Thunk<ELF64LE>;
template class Thunk<ELF64BE>;
} // namespace elf
} // namespace lld

View File

@ -0,0 +1,56 @@
//===- Thunks.h --------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_ELF_THUNKS_H
#define LLD_ELF_THUNKS_H
#include "Relocations.h"
namespace lld {
namespace elf {
class SymbolBody;
template <class ELFT> class InputSection;
// Class to describe an instance of a Thunk.
// A Thunk is a code-sequence inserted by the linker in between a caller and
// the callee. The relocation to the callee is redirected to the Thunk, which
// after executing transfers control to the callee. Typical uses of Thunks
// include transferring control from non-pi to pi and changing state on
// targets like ARM.
//
// Thunks can be created for DefinedRegular and Shared Symbols. The Thunk
// is stored in a field of the Symbol Destination.
// Thunks to be written to an InputSection are recorded by the InputSection.
template <class ELFT> class Thunk {
typedef typename ELFT::uint uintX_t;
public:
Thunk(const SymbolBody &Destination, const InputSection<ELFT> &Owner);
virtual ~Thunk();
virtual uint32_t size() const { return 0; }
virtual void writeTo(uint8_t *Buf) const {}
uintX_t getVA() const;
protected:
const SymbolBody &Destination;
const InputSection<ELFT> &Owner;
uint64_t Offset;
};
// For a Relocation to symbol S from InputSection Src, create a Thunk and
// update the fields of S and the InputSection that the Thunk body will be
// written to. At present there are implementations for ARM and Mips Thunks.
template <class ELFT>
void addThunk(uint32_t RelocType, SymbolBody &S, InputSection<ELFT> &Src);
} // namespace elf
} // namespace lld
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,38 @@
//===- Writer.h -------------------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_ELF_WRITER_H
#define LLD_ELF_WRITER_H
#include <memory>
namespace llvm {
class StringRef;
}
namespace lld {
namespace elf {
template <class ELFT> class InputSectionBase;
template <class ELFT> class ObjectFile;
template <class ELFT> class SymbolTable;
template <class ELFT> void writeResult(SymbolTable<ELFT> *Symtab);
template <class ELFT> void markLive();
template <class ELFT>
llvm::StringRef getOutputSectionName(InputSectionBase<ELFT> *S);
template <class ELFT>
void reportDiscarded(InputSectionBase<ELFT> *IS,
const std::unique_ptr<elf::ObjectFile<ELFT>> &File);
}
}
#endif

View File

@ -0,0 +1,62 @@
==============================================================================
lld License
==============================================================================
University of Illinois/NCSA
Open Source License
Copyright (c) 2011-2016 by the contributors listed in CREDITS.TXT
All rights reserved.
Developed by:
LLVM Team
University of Illinois at Urbana-Champaign
http://llvm.org
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal with
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimers.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimers in the
documentation and/or other materials provided with the distribution.
* Neither the names of the LLVM Team, University of Illinois at
Urbana-Champaign, nor the names of its contributors may be used to
endorse or promote products derived from this Software without specific
prior written permission.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
SOFTWARE.
==============================================================================
The lld software contains code written by third parties. Such software will
have its own individual LICENSE.TXT file in the directory in which it appears.
This file will describe the copyrights, license, and restrictions which apply
to that code.
The disclaimer of warranty in the University of Illinois Open Source License
applies to all code in the lld Distribution, and nothing in any of the
other licenses gives permission to use the names of the LLVM Team or the
University of Illinois to endorse or promote products derived from this
Software.
The following pieces of software have additional or alternate copyrights,
licenses, and/or restrictions:
Program Directory
------- ---------
<none yet>

View File

@ -0,0 +1,10 @@
LLVM Linker (lld)
==============================
This directory and its subdirectories contain source code for the LLVM Linker, a
modular cross platform linker which is built as part of the LLVM compiler
infrastructure project.
lld is open source software. You may freely distribute it under the terms of
the license agreement found in LICENSE.txt.

View File

@ -0,0 +1,51 @@
//===- lld/Config/Version.h - LLD Version Number ----------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief Defines version macros and version-related utility functions
/// for lld.
///
//===----------------------------------------------------------------------===//
#ifndef LLD_VERSION_H
#define LLD_VERSION_H
#include "lld/Config/Version.inc"
#include "llvm/ADT/StringRef.h"
#include <string>
/// \brief Helper macro for LLD_VERSION_STRING.
#define LLD_MAKE_VERSION_STRING2(X) #X
/// \brief Helper macro for LLD_VERSION_STRING.
#define LLD_MAKE_VERSION_STRING(X, Y) LLD_MAKE_VERSION_STRING2(X.Y)
/// \brief A string that describes the lld version number, e.g., "1.0".
#define LLD_VERSION_STRING \
LLD_MAKE_VERSION_STRING(LLD_VERSION_MAJOR, LLD_VERSION_MINOR)
namespace lld {
/// \brief Retrieves the repository path (e.g., Subversion path) that
/// identifies the particular lld branch, tag, or trunk from which this
/// lld was built.
llvm::StringRef getLLDRepositoryPath();
/// \brief Retrieves the repository revision number (or identifer) from which
/// this lld was built.
llvm::StringRef getLLDRevision();
/// \brief Retrieves the full repository version that is an amalgamation of
/// the information in getLLDRepositoryPath() and getLLDRevision().
std::string getLLDRepositoryVersion();
/// \brief Retrieves a string representing the complete lld version.
llvm::StringRef getLLDVersion();
}
#endif // LLD_VERSION_H

View File

@ -0,0 +1,5 @@
#define LLD_VERSION @LLD_VERSION@
#define LLD_VERSION_MAJOR @LLD_VERSION_MAJOR@
#define LLD_VERSION_MINOR @LLD_VERSION_MINOR@
#define LLD_REVISION_STRING "@LLD_REVISION@"
#define LLD_REPOSITORY_STRING "@LLD_REPOSITORY@"

View File

@ -0,0 +1,43 @@
//===- Core/AbsoluteAtom.h - An absolute Atom -----------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_CORE_ABSOLUTE_ATOM_H
#define LLD_CORE_ABSOLUTE_ATOM_H
#include "lld/Core/Atom.h"
namespace lld {
/// An AbsoluteAtom has no content.
/// It exists to represent content at fixed addresses in memory.
class AbsoluteAtom : public Atom {
public:
virtual uint64_t value() const = 0;
/// scope - The visibility of this atom to other atoms. C static functions
/// have scope scopeTranslationUnit. Regular C functions have scope
/// scopeGlobal. Functions compiled with visibility=hidden have scope
/// scopeLinkageUnit so they can be see by other atoms being linked but not
/// by the OS loader.
virtual Scope scope() const = 0;
static bool classof(const Atom *a) {
return a->definition() == definitionAbsolute;
}
static bool classof(const AbsoluteAtom *) { return true; }
protected:
AbsoluteAtom() : Atom(definitionAbsolute) {}
};
} // namespace lld
#endif // LLD_CORE_ABSOLUTE_ATOM_H

View File

@ -0,0 +1,47 @@
//===- Core/ArchiveLibraryFile.h - Models static library ------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_CORE_ARCHIVE_LIBRARY_FILE_H
#define LLD_CORE_ARCHIVE_LIBRARY_FILE_H
#include "lld/Core/File.h"
#include <set>
namespace lld {
///
/// The ArchiveLibraryFile subclass of File is used to represent unix
/// static library archives. These libraries provide no atoms to the
/// initial set of atoms linked. Instead, when the Resolver will query
/// ArchiveLibraryFile instances for specific symbols names using the
/// find() method. If the archive contains an object file which has a
/// DefinedAtom whose scope is not translationUnit, then that entire
/// object file File is returned.
///
class ArchiveLibraryFile : public File {
public:
static bool classof(const File *f) {
return f->kind() == kindArchiveLibrary;
}
/// Check if any member of the archive contains an Atom with the
/// specified name and return the File object for that member, or nullptr.
virtual File *find(StringRef name) = 0;
virtual std::error_code
parseAllMembers(std::vector<std::unique_ptr<File>> &result) = 0;
protected:
/// only subclasses of ArchiveLibraryFile can be instantiated
ArchiveLibraryFile(StringRef path) : File(path, kindArchiveLibrary) {}
};
} // namespace lld
#endif // LLD_CORE_ARCHIVE_LIBRARY_FILE_H

View File

@ -0,0 +1,129 @@
//===- Core/Atom.h - A node in linking graph ------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_CORE_ATOM_H
#define LLD_CORE_ATOM_H
#include "lld/Core/LLVM.h"
namespace lld {
class File;
template<typename T>
class OwningAtomPtr;
///
/// The linker has a Graph Theory model of linking. An object file is seen
/// as a set of Atoms with References to other Atoms. Each Atom is a node
/// and each Reference is an edge. An Atom can be a DefinedAtom which has
/// content or a UndefinedAtom which is a placeholder and represents an
/// undefined symbol (extern declaration).
///
class Atom {
template<typename T> friend class OwningAtomPtr;
public:
/// Whether this atom is defined or a proxy for an undefined symbol
enum Definition {
definitionRegular, ///< Normal C/C++ function or global variable.
definitionAbsolute, ///< Asm-only (foo = 10). Not tied to any content.
definitionUndefined, ///< Only in .o files to model reference to undef.
definitionSharedLibrary ///< Only in shared libraries to model export.
};
/// The scope in which this atom is acessible to other atoms.
enum Scope {
scopeTranslationUnit, ///< Accessible only to atoms in the same translation
/// unit (e.g. a C static).
scopeLinkageUnit, ///< Accessible to atoms being linked but not visible
/// to runtime loader (e.g. visibility=hidden).
scopeGlobal ///< Accessible to all atoms and visible to runtime
/// loader (e.g. visibility=default).
};
/// file - returns the File that produced/owns this Atom
virtual const File& file() const = 0;
/// name - The name of the atom. For a function atom, it is the (mangled)
/// name of the function.
virtual StringRef name() const = 0;
/// definition - Whether this atom is a definition or represents an undefined
/// symbol.
Definition definition() const { return _definition; }
static bool classof(const Atom *a) { return true; }
protected:
/// Atom is an abstract base class. Only subclasses can access constructor.
explicit Atom(Definition def) : _definition(def) {}
/// The memory for Atom objects is always managed by the owning File
/// object. Therefore, no one but the owning File object should call
/// delete on an Atom. In fact, some File objects may bulk allocate
/// an array of Atoms, so they cannot be individually deleted by anyone.
virtual ~Atom() {}
private:
Definition _definition;
};
/// Class which owns an atom pointer and runs the atom destructor when the
/// owning pointer goes out of scope.
template<typename T>
class OwningAtomPtr {
private:
OwningAtomPtr(const OwningAtomPtr &) = delete;
void operator=(const OwningAtomPtr&) = delete;
public:
OwningAtomPtr() : atom(nullptr) { }
OwningAtomPtr(T *atom) : atom(atom) { }
~OwningAtomPtr() {
if (atom)
runDestructor(atom);
}
void runDestructor(Atom *atom) {
atom->~Atom();
}
OwningAtomPtr(OwningAtomPtr &&ptr) : atom(ptr.atom) {
ptr.atom = nullptr;
}
void operator=(OwningAtomPtr&& ptr) {
if (atom)
runDestructor(atom);
atom = ptr.atom;
ptr.atom = nullptr;
}
T *const &get() const {
return atom;
}
T *&get() {
return atom;
}
T *release() {
auto *v = atom;
atom = nullptr;
return v;
}
private:
T *atom;
};
} // namespace lld
#endif // LLD_CORE_ATOM_H

View File

@ -0,0 +1,378 @@
//===- Core/DefinedAtom.h - An Atom with content --------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_CORE_DEFINED_ATOM_H
#define LLD_CORE_DEFINED_ATOM_H
#include "lld/Core/Atom.h"
#include "lld/Core/Reference.h"
#include "lld/Core/LLVM.h"
#include "llvm/Support/ErrorHandling.h"
namespace lld {
class File;
/// \brief The fundamental unit of linking.
///
/// A C function or global variable is an atom. An atom has content and
/// attributes. The content of a function atom is the instructions that
/// implement the function. The content of a global variable atom is its
/// initial bytes.
///
/// Here are some example attribute sets for common atoms. If a particular
/// attribute is not listed, the default values are: definition=regular,
/// sectionChoice=basedOnContent, scope=translationUnit, merge=no,
/// deadStrip=normal, interposable=no
///
/// C function: void foo() {} <br>
/// name=foo, type=code, perm=r_x, scope=global
///
/// C static function: staic void func() {} <br>
/// name=func, type=code, perm=r_x
///
/// C global variable: int count = 1; <br>
/// name=count, type=data, perm=rw_, scope=global
///
/// C tentative definition: int bar; <br>
/// name=bar, type=zerofill, perm=rw_, scope=global,
/// merge=asTentative, interposable=yesAndRuntimeWeak
///
/// Uninitialized C static variable: static int stuff; <br>
/// name=stuff, type=zerofill, perm=rw_
///
/// Weak C function: __attribute__((weak)) void foo() {} <br>
/// name=foo, type=code, perm=r_x, scope=global, merge=asWeak
///
/// Hidden C function: __attribute__((visibility("hidden"))) void foo() {}<br>
/// name=foo, type=code, perm=r_x, scope=linkageUnit
///
/// No-dead-strip function: __attribute__((used)) void foo() {} <br>
/// name=foo, type=code, perm=r_x, scope=global, deadStrip=never
///
/// Non-inlined C++ inline method: inline void Foo::doit() {} <br>
/// name=_ZN3Foo4doitEv, type=code, perm=r_x, scope=global,
/// mergeDupes=asWeak
///
/// Non-inlined C++ inline method whose address is taken:
/// inline void Foo::doit() {} <br>
/// name=_ZN3Foo4doitEv, type=code, perm=r_x, scope=global,
/// mergeDupes=asAddressedWeak
///
/// literal c-string: "hello" <br>
/// name="" type=cstring, perm=r__, scope=linkageUnit
///
/// literal double: 1.234 <br>
/// name="" type=literal8, perm=r__, scope=linkageUnit
///
/// constant: { 1,2,3 } <br>
/// name="" type=constant, perm=r__, scope=linkageUnit
///
/// Pointer to initializer function: <br>
/// name="" type=initializer, perm=rw_l,
/// sectionChoice=customRequired
///
/// C function place in custom section: __attribute__((section("__foo")))
/// void foo() {} <br>
/// name=foo, type=code, perm=r_x, scope=global,
/// sectionChoice=customRequired, customSectionName=__foo
///
class DefinedAtom : public Atom {
public:
enum Interposable {
interposeNo, // linker can directly bind uses of this atom
interposeYes, // linker must indirect (through GOT) uses
interposeYesAndRuntimeWeak // must indirect and mark symbol weak in final
// linked image
};
enum Merge {
mergeNo, // Another atom with same name is error
mergeAsTentative, // Is ANSI C tentative definition, can be coalesced
mergeAsWeak, // Is C++ inline definition that was not inlined,
// but address was not taken, so atom can be hidden
// by linker
mergeAsWeakAndAddressUsed, // Is C++ definition inline definition whose
// address was taken.
mergeSameNameAndSize, // Another atom with different size is error
mergeByLargestSection, // Choose an atom whose section is the largest.
mergeByContent, // Merge with other constants with same content.
};
enum ContentType {
typeUnknown, // for use with definitionUndefined
typeMachHeader, // atom representing mach_header [Darwin]
typeCode, // executable code
typeResolver, // function which returns address of target
typeBranchIsland, // linker created for large binaries
typeBranchShim, // linker created to switch thumb mode
typeStub, // linker created for calling external function
typeStubHelper, // linker created for initial stub binding
typeConstant, // a read-only constant
typeCString, // a zero terminated UTF8 C string
typeUTF16String, // a zero terminated UTF16 string
typeCFI, // a FDE or CIE from dwarf unwind info
typeLSDA, // extra unwinding info
typeLiteral4, // a four-btye read-only constant
typeLiteral8, // an eight-btye read-only constant
typeLiteral16, // a sixteen-btye read-only constant
typeData, // read-write data
typeDataFast, // allow data to be quickly accessed
typeZeroFill, // zero-fill data
typeZeroFillFast, // allow zero-fill data to be quicky accessed
typeConstData, // read-only data after dynamic linker is done
typeObjC1Class, // ObjC1 class [Darwin]
typeLazyPointer, // pointer through which a stub jumps
typeLazyDylibPointer, // pointer through which a stub jumps [Darwin]
typeNonLazyPointer, // pointer to external symbol
typeCFString, // NS/CFString object [Darwin]
typeGOT, // pointer to external symbol
typeInitializerPtr, // pointer to initializer function
typeTerminatorPtr, // pointer to terminator function
typeCStringPtr, // pointer to UTF8 C string [Darwin]
typeObjCClassPtr, // pointer to ObjC class [Darwin]
typeObjC2CategoryList, // pointers to ObjC category [Darwin]
typeObjCImageInfo, // pointer to ObjC class [Darwin]
typeObjCMethodList, // pointer to ObjC method list [Darwin]
typeDTraceDOF, // runtime data for Dtrace [Darwin]
typeInterposingTuples, // tuples of interposing info for dyld [Darwin]
typeTempLTO, // temporary atom for bitcode reader
typeCompactUnwindInfo, // runtime data for unwinder [Darwin]
typeProcessedUnwindInfo,// compressed compact unwind info [Darwin]
typeThunkTLV, // thunk used to access a TLV [Darwin]
typeTLVInitialData, // initial data for a TLV [Darwin]
typeTLVInitialZeroFill, // TLV initial zero fill data [Darwin]
typeTLVInitializerPtr, // pointer to thread local initializer [Darwin]
typeDSOHandle, // atom representing DSO handle [Darwin]
typeSectCreate, // Created via the -sectcreate option [Darwin]
};
// Permission bits for atoms and segments. The order of these values are
// important, because the layout pass may sort atoms by permission if other
// attributes are the same.
enum ContentPermissions {
perm___ = 0, // mapped as unaccessible
permR__ = 8, // mapped read-only
permRW_ = 8 + 2, // mapped readable and writable
permRW_L = 8 + 2 + 1, // initially mapped r/w, then made read-only
// loader writable
permR_X = 8 + 4, // mapped readable and executable
permRWX = 8 + 2 + 4, // mapped readable and writable and executable
permUnknown = 16 // unknown or invalid permissions
};
enum SectionChoice {
sectionBasedOnContent, // linker infers final section based on content
sectionCustomPreferred, // linker may place in specific section
sectionCustomRequired // linker must place in specific section
};
enum DeadStripKind {
deadStripNormal, // linker may dead strip this atom
deadStripNever, // linker must never dead strip this atom
deadStripAlways // linker must remove this atom if unused
};
enum DynamicExport {
/// \brief The linker may or may not export this atom dynamically depending
/// on the output type and other context of the link.
dynamicExportNormal,
/// \brief The linker will always export this atom dynamically.
dynamicExportAlways,
};
// Attributes describe a code model used by the atom.
enum CodeModel {
codeNA, // no specific code model
// MIPS code models
codeMipsPIC, // PIC function in a PIC / non-PIC mixed file
codeMipsMicro, // microMIPS instruction encoding
codeMipsMicroPIC, // microMIPS instruction encoding + PIC
codeMips16, // MIPS-16 instruction encoding
// ARM code models
codeARMThumb, // ARM Thumb instruction set
codeARM_a, // $a-like mapping symbol (for ARM code)
codeARM_d, // $d-like mapping symbol (for data)
codeARM_t, // $t-like mapping symbol (for Thumb code)
};
struct Alignment {
Alignment(int v, int m = 0) : value(v), modulus(m) {}
uint16_t value;
uint16_t modulus;
bool operator==(const Alignment &rhs) const {
return (value == rhs.value) && (modulus == rhs.modulus);
}
};
/// \brief returns a value for the order of this Atom within its file.
///
/// This is used by the linker to order the layout of Atoms so that the
/// resulting image is stable and reproducible.
virtual uint64_t ordinal() const = 0;
/// \brief the number of bytes of space this atom's content will occupy in the
/// final linked image.
///
/// For a function atom, it is the number of bytes of code in the function.
virtual uint64_t size() const = 0;
/// \brief The size of the section from which the atom is instantiated.
///
/// Merge::mergeByLargestSection is defined in terms of section size
/// and not in terms of atom size, so we need this function separate
/// from size().
virtual uint64_t sectionSize() const { return 0; }
/// \brief The visibility of this atom to other atoms.
///
/// C static functions have scope scopeTranslationUnit. Regular C functions
/// have scope scopeGlobal. Functions compiled with visibility=hidden have
/// scope scopeLinkageUnit so they can be see by other atoms being linked but
/// not by the OS loader.
virtual Scope scope() const = 0;
/// \brief Whether the linker should use direct or indirect access to this
/// atom.
virtual Interposable interposable() const = 0;
/// \brief how the linker should handle if multiple atoms have the same name.
virtual Merge merge() const = 0;
/// \brief The type of this atom, such as code or data.
virtual ContentType contentType() const = 0;
/// \brief The alignment constraints on how this atom must be laid out in the
/// final linked image (e.g. 16-byte aligned).
virtual Alignment alignment() const = 0;
/// \brief Whether this atom must be in a specially named section in the final
/// linked image, or if the linker can infer the section based on the
/// contentType().
virtual SectionChoice sectionChoice() const = 0;
/// \brief If sectionChoice() != sectionBasedOnContent, then this return the
/// name of the section the atom should be placed into.
virtual StringRef customSectionName() const = 0;
/// \brief constraints on whether the linker may dead strip away this atom.
virtual DeadStripKind deadStrip() const = 0;
/// \brief Under which conditions should this atom be dynamically exported.
virtual DynamicExport dynamicExport() const {
return dynamicExportNormal;
}
/// \brief Code model used by the atom.
virtual CodeModel codeModel() const { return codeNA; }
/// \brief Returns the OS memory protections required for this atom's content
/// at runtime.
///
/// A function atom is R_X, a global variable is RW_, and a read-only constant
/// is R__.
virtual ContentPermissions permissions() const;
/// \brief returns a reference to the raw (unrelocated) bytes of this Atom's
/// content.
virtual ArrayRef<uint8_t> rawContent() const = 0;
/// This class abstracts iterating over the sequence of References
/// in an Atom. Concrete instances of DefinedAtom must implement
/// the derefIterator() and incrementIterator() methods.
class reference_iterator {
public:
reference_iterator(const DefinedAtom &a, const void *it)
: _atom(a), _it(it) { }
const Reference *operator*() const {
return _atom.derefIterator(_it);
}
const Reference *operator->() const {
return _atom.derefIterator(_it);
}
bool operator==(const reference_iterator &other) const {
return _it == other._it;
}
bool operator!=(const reference_iterator &other) const {
return !(*this == other);
}
reference_iterator &operator++() {
_atom.incrementIterator(_it);
return *this;
}
private:
const DefinedAtom &_atom;
const void *_it;
};
/// \brief Returns an iterator to the beginning of this Atom's References.
virtual reference_iterator begin() const = 0;
/// \brief Returns an iterator to the end of this Atom's References.
virtual reference_iterator end() const = 0;
/// Adds a reference to this atom.
virtual void addReference(Reference::KindNamespace ns,
Reference::KindArch arch,
Reference::KindValue kindValue, uint64_t off,
const Atom *target, Reference::Addend a) {
llvm_unreachable("Subclass does not permit adding references");
}
static bool classof(const Atom *a) {
return a->definition() == definitionRegular;
}
/// Utility for deriving permissions from content type
static ContentPermissions permissions(ContentType type);
/// Utility function to check if the atom occupies file space
bool occupiesDiskSpace() const {
ContentType atomContentType = contentType();
return !(atomContentType == DefinedAtom::typeZeroFill ||
atomContentType == DefinedAtom::typeZeroFillFast ||
atomContentType == DefinedAtom::typeTLVInitialZeroFill);
}
/// Utility function to check if relocations in this atom to other defined
/// atoms can be implicitly generated, and so we don't need to explicitly
/// emit those relocations.
bool relocsToDefinedCanBeImplicit() const {
ContentType atomContentType = contentType();
return atomContentType == typeCFI;
}
// Returns true if lhs should be placed before rhs in the final output.
static bool compareByPosition(const DefinedAtom *lhs,
const DefinedAtom *rhs);
protected:
// DefinedAtom is an abstract base class. Only subclasses can access
// constructor.
DefinedAtom() : Atom(definitionRegular) { }
~DefinedAtom() override = default;
/// \brief Returns a pointer to the Reference object that the abstract
/// iterator "points" to.
virtual const Reference *derefIterator(const void *iter) const = 0;
/// \brief Adjusts the abstract iterator to "point" to the next Reference
/// object for this Atom.
virtual void incrementIterator(const void *&iter) const = 0;
};
} // end namespace lld
#endif

View File

@ -0,0 +1,68 @@
//===- Error.h - system_error extensions for lld ----------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This declares a new error_category for the lld library.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_CORE_ERROR_H
#define LLD_CORE_ERROR_H
#include "lld/Core/LLVM.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/Error.h"
#include <system_error>
namespace lld {
const std::error_category &YamlReaderCategory();
enum class YamlReaderError {
unknown_keyword,
illegal_value
};
inline std::error_code make_error_code(YamlReaderError e) {
return std::error_code(static_cast<int>(e), YamlReaderCategory());
}
/// Creates an error_code object that has associated with it an arbitrary
/// error messsage. The value() of the error_code will always be non-zero
/// but its value is meaningless. The messsage() will be (a copy of) the
/// supplied error string.
/// Note: Once ErrorOr<> is updated to work with errors other than error_code,
/// this can be updated to return some other kind of error.
std::error_code make_dynamic_error_code(StringRef msg);
/// Generic error.
///
/// For errors that don't require their own specific sub-error (most errors)
/// this class can be used to describe the error via a string message.
class GenericError : public llvm::ErrorInfo<GenericError> {
public:
static char ID;
GenericError(Twine Msg);
const std::string &getMessage() const { return Msg; }
void log(llvm::raw_ostream &OS) const override;
std::error_code convertToErrorCode() const override {
return make_dynamic_error_code(getMessage());
}
private:
std::string Msg;
};
} // end namespace lld
namespace std {
template <> struct is_error_code_enum<lld::YamlReaderError> : std::true_type {};
}
#endif

View File

@ -0,0 +1,278 @@
//===- Core/File.h - A Container of Atoms ---------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_CORE_FILE_H
#define LLD_CORE_FILE_H
#include "lld/Core/AbsoluteAtom.h"
#include "lld/Core/DefinedAtom.h"
#include "lld/Core/SharedLibraryAtom.h"
#include "lld/Core/UndefinedAtom.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/ErrorHandling.h"
#include <functional>
#include <memory>
#include <mutex>
#include <vector>
namespace lld {
class LinkingContext;
/// Every Atom is owned by some File. A common scenario is for a single
/// object file (.o) to be parsed by some reader and produce a single
/// File object that represents the content of that object file.
///
/// To iterate through the Atoms in a File there are four methods that
/// return collections. For instance to iterate through all the DefinedAtoms
/// in a File object use:
/// for (const DefinedAtoms *atom : file->defined()) {
/// }
///
/// The Atom objects in a File are owned by the File object. The Atom objects
/// are destroyed when the File object is destroyed.
class File {
public:
virtual ~File();
/// \brief Kinds of files that are supported.
enum Kind {
kindErrorObject, ///< a error object file (.o)
kindNormalizedObject, ///< a normalized file (.o)
kindMachObject, ///< a MachO object file (.o)
kindCEntryObject, ///< a file for CEntries
kindHeaderObject, ///< a file for file headers
kindEntryObject, ///< a file for the entry
kindUndefinedSymsObject, ///< a file for undefined symbols
kindStubHelperObject, ///< a file for stub helpers
kindResolverMergedObject, ///< the resolver merged file.
kindSectCreateObject, ///< a sect create object file (.o)
kindSharedLibrary, ///< shared library (.so)
kindArchiveLibrary ///< archive (.a)
};
/// \brief Returns file kind. Need for dyn_cast<> on File objects.
Kind kind() const {
return _kind;
}
/// This returns the path to the file which was used to create this object
/// (e.g. "/tmp/foo.o"). If the file is a member of an archive file, the
/// returned string includes the archive file name.
StringRef path() const {
if (_archivePath.empty())
return _path;
if (_archiveMemberPath.empty())
_archiveMemberPath = (_archivePath + "(" + _path + ")").str();
return _archiveMemberPath;
}
/// Returns the path of the archive file name if this file is instantiated
/// from an archive file. Otherwise returns the empty string.
StringRef archivePath() const { return _archivePath; }
void setArchivePath(StringRef path) { _archivePath = path; }
/// Returns the path name of this file. It doesn't include archive file name.
StringRef memberPath() const { return _path; }
/// Returns the command line order of the file.
uint64_t ordinal() const {
assert(_ordinal != UINT64_MAX);
return _ordinal;
}
/// Returns true/false depending on whether an ordinal has been set.
bool hasOrdinal() const { return (_ordinal != UINT64_MAX); }
/// Sets the command line order of the file.
void setOrdinal(uint64_t ordinal) const { _ordinal = ordinal; }
/// Returns the ordinal for the next atom to be defined in this file.
uint64_t getNextAtomOrdinalAndIncrement() const {
return _nextAtomOrdinal++;
}
/// For allocating any objects owned by this File.
llvm::BumpPtrAllocator &allocator() const {
return _allocator;
}
/// The type of atom mutable container.
template <typename T> using AtomVector = std::vector<OwningAtomPtr<T>>;
/// The range type for the atoms.
template <typename T> class AtomRange {
public:
AtomRange(AtomVector<T> &v) : _v(v) {}
AtomRange(const AtomVector<T> &v) : _v(const_cast<AtomVector<T> &>(v)) {}
typedef std::pointer_to_unary_function<const OwningAtomPtr<T>&,
const T*> ConstDerefFn;
typedef std::pointer_to_unary_function<OwningAtomPtr<T>&, T*> DerefFn;
typedef llvm::mapped_iterator<typename AtomVector<T>::const_iterator,
ConstDerefFn> ConstItTy;
typedef llvm::mapped_iterator<typename AtomVector<T>::iterator,
DerefFn> ItTy;
static const T* DerefConst(const OwningAtomPtr<T> &p) {
return p.get();
}
static T* Deref(OwningAtomPtr<T> &p) {
return p.get();
}
ConstItTy begin() const {
return ConstItTy(_v.begin(), ConstDerefFn(DerefConst));
}
ConstItTy end() const {
return ConstItTy(_v.end(), ConstDerefFn(DerefConst));
}
ItTy begin() {
return ItTy(_v.begin(), DerefFn(Deref));
}
ItTy end() {
return ItTy(_v.end(), DerefFn(Deref));
}
llvm::iterator_range<typename AtomVector<T>::iterator> owning_ptrs() {
return llvm::make_range(_v.begin(), _v.end());
}
llvm::iterator_range<typename AtomVector<T>::iterator> owning_ptrs() const {
return llvm::make_range(_v.begin(), _v.end());
}
bool empty() const {
return _v.empty();
}
size_t size() const {
return _v.size();
}
const OwningAtomPtr<T> &operator[](size_t idx) const {
return _v[idx];
}
OwningAtomPtr<T> &operator[](size_t idx) {
return _v[idx];
}
private:
AtomVector<T> &_v;
};
/// \brief Must be implemented to return the AtomVector object for
/// all DefinedAtoms in this File.
virtual const AtomRange<DefinedAtom> defined() const = 0;
/// \brief Must be implemented to return the AtomVector object for
/// all UndefinedAtomw in this File.
virtual const AtomRange<UndefinedAtom> undefined() const = 0;
/// \brief Must be implemented to return the AtomVector object for
/// all SharedLibraryAtoms in this File.
virtual const AtomRange<SharedLibraryAtom> sharedLibrary() const = 0;
/// \brief Must be implemented to return the AtomVector object for
/// all AbsoluteAtoms in this File.
virtual const AtomRange<AbsoluteAtom> absolute() const = 0;
/// Drop all of the atoms owned by this file. This will result in all of
/// the atoms running their destructors.
/// This is required because atoms may be allocated on a BumpPtrAllocator
/// of a different file. We need to destruct all atoms before any files.
virtual void clearAtoms() = 0;
/// \brief If a file is parsed using a different method than doParse(),
/// one must use this method to set the last error status, so that
/// doParse will not be called twice. Only YAML reader uses this
/// (because YAML reader does not read blobs but structured data).
void setLastError(std::error_code err) { _lastError = err; }
std::error_code parse();
// Usually each file owns a std::unique_ptr<MemoryBuffer>.
// However, there's one special case. If a file is an archive file,
// the archive file and its children all shares the same memory buffer.
// This method is used by the ArchiveFile to give its children
// co-ownership of the buffer.
void setSharedMemoryBuffer(std::shared_ptr<MemoryBuffer> mb) {
_sharedMemoryBuffer = mb;
}
protected:
/// \brief only subclasses of File can be instantiated
File(StringRef p, Kind kind)
: _path(p), _kind(kind), _ordinal(UINT64_MAX),
_nextAtomOrdinal(0) {}
/// \brief Subclasses should override this method to parse the
/// memory buffer passed to this file's constructor.
virtual std::error_code doParse() { return std::error_code(); }
static AtomVector<DefinedAtom> _noDefinedAtoms;
static AtomVector<UndefinedAtom> _noUndefinedAtoms;
static AtomVector<SharedLibraryAtom> _noSharedLibraryAtoms;
static AtomVector<AbsoluteAtom> _noAbsoluteAtoms;
mutable llvm::BumpPtrAllocator _allocator;
private:
StringRef _path;
std::string _archivePath;
mutable std::string _archiveMemberPath;
Kind _kind;
mutable uint64_t _ordinal;
mutable uint64_t _nextAtomOrdinal;
std::shared_ptr<MemoryBuffer> _sharedMemoryBuffer;
llvm::Optional<std::error_code> _lastError;
std::mutex _parseMutex;
};
/// An ErrorFile represents a file that doesn't exist.
/// If you try to parse a file which doesn't exist, an instance of this
/// class will be returned. That's parse method always returns an error.
/// This is useful to delay erroring on non-existent files, so that we
/// can do unit testing a driver using non-existing file paths.
class ErrorFile : public File {
public:
ErrorFile(StringRef path, std::error_code ec)
: File(path, kindErrorObject), _ec(ec) {}
std::error_code doParse() override { return _ec; }
const AtomRange<DefinedAtom> defined() const override {
llvm_unreachable("internal error");
}
const AtomRange<UndefinedAtom> undefined() const override {
llvm_unreachable("internal error");
}
const AtomRange<SharedLibraryAtom> sharedLibrary() const override {
llvm_unreachable("internal error");
}
const AtomRange<AbsoluteAtom> absolute() const override {
llvm_unreachable("internal error");
}
void clearAtoms() override {
}
private:
std::error_code _ec;
};
} // end namespace lld
#endif

View File

@ -0,0 +1,132 @@
//===- include/Core/Instrumentation.h - Instrumentation API ---------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief Provide an Instrumentation API that optionally uses VTune interfaces.
///
//===----------------------------------------------------------------------===//
#ifndef LLD_CORE_INSTRUMENTATION_H
#define LLD_CORE_INSTRUMENTATION_H
#include "llvm/Support/Compiler.h"
#include <utility>
#ifdef LLD_HAS_VTUNE
# include <ittnotify.h>
#endif
namespace lld {
#ifdef LLD_HAS_VTUNE
/// \brief A unique global scope for instrumentation data.
///
/// Domains last for the lifetime of the application and cannot be destroyed.
/// Multiple Domains created with the same name represent the same domain.
class Domain {
__itt_domain *_domain;
public:
explicit Domain(const char *name) : _domain(__itt_domain_createA(name)) {}
operator __itt_domain *() const { return _domain; }
__itt_domain *operator->() const { return _domain; }
};
/// \brief A global reference to a string constant.
///
/// These are uniqued by the ITT runtime and cannot be deleted. They are not
/// specific to a domain.
///
/// Prefer reusing a single StringHandle over passing a ntbs when the same
/// string will be used often.
class StringHandle {
__itt_string_handle *_handle;
public:
StringHandle(const char *name) : _handle(__itt_string_handle_createA(name)) {}
operator __itt_string_handle *() const { return _handle; }
};
/// \brief A task on a single thread. Nests within other tasks.
///
/// Each thread has its own task stack and tasks nest recursively on that stack.
/// A task cannot transfer threads.
///
/// SBRM is used to ensure task starts and ends are ballanced. The lifetime of
/// a task is either the lifetime of this object, or until end is called.
class ScopedTask {
__itt_domain *_domain;
ScopedTask(const ScopedTask &) = delete;
ScopedTask &operator=(const ScopedTask &) = delete;
public:
/// \brief Create a task in Domain \p d named \p s.
ScopedTask(const Domain &d, const StringHandle &s) : _domain(d) {
__itt_task_begin(d, __itt_null, __itt_null, s);
}
ScopedTask(ScopedTask &&other) {
*this = std::move(other);
}
ScopedTask &operator=(ScopedTask &&other) {
_domain = other._domain;
other._domain = nullptr;
return *this;
}
/// \brief Prematurely end this task.
void end() {
if (_domain)
__itt_task_end(_domain);
_domain = nullptr;
}
~ScopedTask() { end(); }
};
/// \brief A specific point in time. Allows metadata to be associated.
class Marker {
public:
Marker(const Domain &d, const StringHandle &s) {
__itt_marker(d, __itt_null, s, __itt_scope_global);
}
};
#else
class Domain {
public:
Domain(const char *name) {}
};
class StringHandle {
public:
StringHandle(const char *name) {}
};
class ScopedTask {
public:
ScopedTask(const Domain &d, const StringHandle &s) {}
void end() {}
};
class Marker {
public:
Marker(const Domain &d, const StringHandle &s) {}
};
#endif
inline const Domain &getDefaultDomain() {
static Domain domain("org.llvm.lld");
return domain;
}
} // end namespace lld.
#endif

View File

@ -0,0 +1,83 @@
//===--- LLVM.h - Import various common LLVM datatypes ----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file forward declares and imports various common LLVM datatypes that
// lld wants to use unqualified.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_CORE_LLVM_H
#define LLD_CORE_LLVM_H
// This should be the only #include, force #includes of all the others on
// clients.
#include "llvm/ADT/Hashing.h"
#include "llvm/Support/Casting.h"
#include <utility>
namespace llvm {
// ADT's.
class Error;
class StringRef;
class Twine;
class MemoryBuffer;
class MemoryBufferRef;
template<typename T> class ArrayRef;
template<unsigned InternalLen> class SmallString;
template<typename T, unsigned N> class SmallVector;
template<typename T> class SmallVectorImpl;
template<typename T>
struct SaveAndRestore;
template<typename T>
class ErrorOr;
template<typename T>
class Expected;
class raw_ostream;
// TODO: DenseMap, ...
}
namespace lld {
// Casting operators.
using llvm::isa;
using llvm::cast;
using llvm::dyn_cast;
using llvm::dyn_cast_or_null;
using llvm::cast_or_null;
// ADT's.
using llvm::Error;
using llvm::StringRef;
using llvm::Twine;
using llvm::MemoryBuffer;
using llvm::MemoryBufferRef;
using llvm::ArrayRef;
using llvm::SmallString;
using llvm::SmallVector;
using llvm::SmallVectorImpl;
using llvm::SaveAndRestore;
using llvm::ErrorOr;
using llvm::Expected;
using llvm::raw_ostream;
} // end namespace lld.
namespace std {
template <> struct hash<llvm::StringRef> {
public:
size_t operator()(const llvm::StringRef &s) const {
return llvm::hash_value(s);
}
};
}
#endif

View File

@ -0,0 +1,251 @@
//===- lld/Core/LinkingContext.h - Linker Target Info Interface -----------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_CORE_LINKING_CONTEXT_H
#define LLD_CORE_LINKING_CONTEXT_H
#include "lld/Core/Error.h"
#include "lld/Core/LLVM.h"
#include "lld/Core/Node.h"
#include "lld/Core/Reference.h"
#include "lld/Core/Reader.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/raw_ostream.h"
#include <string>
#include <vector>
namespace lld {
class PassManager;
class File;
class Writer;
class Node;
class SharedLibraryFile;
/// \brief The LinkingContext class encapsulates "what and how" to link.
///
/// The base class LinkingContext contains the options needed by core linking.
/// Subclasses of LinkingContext have additional options needed by specific
/// Writers.
class LinkingContext {
public:
virtual ~LinkingContext();
/// \name Methods needed by core linking
/// @{
/// Name of symbol linker should use as "entry point" to program,
/// usually "main" or "start".
virtual StringRef entrySymbolName() const { return _entrySymbolName; }
/// Whether core linking should remove Atoms not reachable by following
/// References from the entry point Atom or from all global scope Atoms
/// if globalsAreDeadStripRoots() is true.
bool deadStrip() const { return _deadStrip; }
/// Only used if deadStrip() returns true. Means all global scope Atoms
/// should be marked live (along with all Atoms they reference). Usually
/// this method returns false for main executables, but true for dynamic
/// shared libraries.
bool globalsAreDeadStripRoots() const { return _globalsAreDeadStripRoots; }
/// Only used if deadStrip() returns true. This method returns the names
/// of DefinedAtoms that should be marked live (along with all Atoms they
/// reference). Only Atoms with scope scopeLinkageUnit or scopeGlobal can
/// be kept live using this method.
const std::vector<StringRef> &deadStripRoots() const {
return _deadStripRoots;
}
/// Add the given symbol name to the dead strip root set. Only used if
/// deadStrip() returns true.
void addDeadStripRoot(StringRef symbolName) {
assert(!symbolName.empty() && "Empty symbol cannot be a dead strip root");
_deadStripRoots.push_back(symbolName);
}
/// Normally, every UndefinedAtom must be replaced by a DefinedAtom or a
/// SharedLibraryAtom for the link to be successful. This method controls
/// whether core linking prints out a list of remaining UndefinedAtoms.
///
/// \todo This should be a method core linking calls with a list of the
/// UndefinedAtoms so that different drivers can format the error message
/// as needed.
bool printRemainingUndefines() const { return _printRemainingUndefines; }
/// Normally, every UndefinedAtom must be replaced by a DefinedAtom or a
/// SharedLibraryAtom for the link to be successful. This method controls
/// whether core linking considers remaining undefines to be an error.
bool allowRemainingUndefines() const { return _allowRemainingUndefines; }
/// Normally, every UndefinedAtom must be replaced by a DefinedAtom or a
/// SharedLibraryAtom for the link to be successful. This method controls
/// whether core linking considers remaining undefines from the shared library
/// to be an error.
bool allowShlibUndefines() const { return _allowShlibUndefines; }
/// If true, core linking will write the path to each input file to stdout
/// (i.e. llvm::outs()) as it is used. This is used to implement the -t
/// linker option.
///
/// \todo This should be a method core linking calls so that drivers can
/// format the line as needed.
bool logInputFiles() const { return _logInputFiles; }
/// Parts of LLVM use global variables which are bound to command line
/// options (see llvm::cl::Options). This method returns "command line"
/// options which are used to configure LLVM's command line settings.
/// For instance the -debug-only XXX option can be used to dynamically
/// trace different parts of LLVM and lld.
const std::vector<const char *> &llvmOptions() const { return _llvmOptions; }
/// \name Methods used by Drivers to configure TargetInfo
/// @{
void setOutputPath(StringRef str) { _outputPath = str; }
// Set the entry symbol name. You may also need to call addDeadStripRoot() for
// the symbol if your platform supports dead-stripping, so that the symbol
// will not be removed from the output.
void setEntrySymbolName(StringRef name) {
_entrySymbolName = name;
}
void setDeadStripping(bool enable) { _deadStrip = enable; }
void setGlobalsAreDeadStripRoots(bool v) { _globalsAreDeadStripRoots = v; }
void setPrintRemainingUndefines(bool print) {
_printRemainingUndefines = print;
}
void setAllowRemainingUndefines(bool allow) {
_allowRemainingUndefines = allow;
}
void setAllowShlibUndefines(bool allow) { _allowShlibUndefines = allow; }
void setLogInputFiles(bool log) { _logInputFiles = log; }
void appendLLVMOption(const char *opt) { _llvmOptions.push_back(opt); }
std::vector<std::unique_ptr<Node>> &getNodes() { return _nodes; }
const std::vector<std::unique_ptr<Node>> &getNodes() const { return _nodes; }
/// This method adds undefined symbols specified by the -u option to the to
/// the list of undefined symbols known to the linker. This option essentially
/// forces an undefined symbol to be created. You may also need to call
/// addDeadStripRoot() for the symbol if your platform supports dead
/// stripping, so that the symbol will not be removed from the output.
void addInitialUndefinedSymbol(StringRef symbolName) {
_initialUndefinedSymbols.push_back(symbolName);
}
/// Iterators for symbols that appear on the command line.
typedef std::vector<StringRef> StringRefVector;
typedef StringRefVector::iterator StringRefVectorIter;
typedef StringRefVector::const_iterator StringRefVectorConstIter;
/// Create linker internal files containing atoms for the linker to include
/// during link. Flavors can override this function in their LinkingContext
/// to add more internal files. These internal files are positioned before
/// the actual input files.
virtual void createInternalFiles(std::vector<std::unique_ptr<File> > &) const;
/// Return the list of undefined symbols that are specified in the
/// linker command line, using the -u option.
ArrayRef<StringRef> initialUndefinedSymbols() const {
return _initialUndefinedSymbols;
}
/// After all set* methods are called, the Driver calls this method
/// to validate that there are no missing options or invalid combinations
/// of options. If there is a problem, a description of the problem
/// is written to the supplied stream.
///
/// \returns true if there is an error with the current settings.
bool validate(raw_ostream &diagnostics);
/// Formats symbol name for use in error messages.
virtual std::string demangle(StringRef symbolName) const = 0;
/// @}
/// \name Methods used by Driver::link()
/// @{
/// Returns the file system path to which the linked output should be written.
///
/// \todo To support in-memory linking, we need an abstraction that allows
/// the linker to write to an in-memory buffer.
StringRef outputPath() const { return _outputPath; }
/// Accessor for Register object embedded in LinkingContext.
const Registry &registry() const { return _registry; }
Registry &registry() { return _registry; }
/// This method is called by core linking to give the Writer a chance
/// to add file format specific "files" to set of files to be linked. This is
/// how file format specific atoms can be added to the link.
virtual void createImplicitFiles(std::vector<std::unique_ptr<File>> &) = 0;
/// This method is called by core linking to build the list of Passes to be
/// run on the merged/linked graph of all input files.
virtual void addPasses(PassManager &pm) = 0;
/// Calls through to the writeFile() method on the specified Writer.
///
/// \param linkedFile This is the merged/linked graph of all input file Atoms.
virtual llvm::Error writeFile(const File &linkedFile) const;
/// Return the next ordinal and Increment it.
virtual uint64_t getNextOrdinalAndIncrement() const { return _nextOrdinal++; }
// This function is called just before the Resolver kicks in.
// Derived classes may use it to change the list of input files.
virtual void finalizeInputFiles() = 0;
/// Callback invoked for each file the Resolver decides we are going to load.
/// This can be used to update context state based on the file, and emit
/// errors for any differences between the context state and a loaded file.
/// For example, we can error if we try to load a file which is a different
/// arch from that being linked.
virtual llvm::Error handleLoadedFile(File &file) = 0;
/// @}
protected:
LinkingContext(); // Must be subclassed
/// Abstract method to lazily instantiate the Writer.
virtual Writer &writer() const = 0;
/// Method to create an internal file for the entry symbol
virtual std::unique_ptr<File> createEntrySymbolFile() const;
std::unique_ptr<File> createEntrySymbolFile(StringRef filename) const;
/// Method to create an internal file for an undefined symbol
virtual std::unique_ptr<File> createUndefinedSymbolFile() const;
std::unique_ptr<File> createUndefinedSymbolFile(StringRef filename) const;
StringRef _outputPath;
StringRef _entrySymbolName;
bool _deadStrip = false;
bool _globalsAreDeadStripRoots = false;
bool _printRemainingUndefines = true;
bool _allowRemainingUndefines = false;
bool _logInputFiles = false;
bool _allowShlibUndefines = false;
std::vector<StringRef> _deadStripRoots;
std::vector<const char *> _llvmOptions;
StringRefVector _initialUndefinedSymbols;
std::vector<std::unique_ptr<Node>> _nodes;
mutable llvm::BumpPtrAllocator _allocator;
mutable uint64_t _nextOrdinal = 0;
Registry _registry;
private:
/// Validate the subclass bits. Only called by validate.
virtual bool validateImpl(raw_ostream &diagnostics) = 0;
};
} // end namespace lld
#endif

View File

@ -0,0 +1,74 @@
//===- lld/Core/Node.h - Input file class ---------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
///
/// The classes in this file represents inputs to the linker.
///
//===----------------------------------------------------------------------===//
#ifndef LLD_CORE_NODE_H
#define LLD_CORE_NODE_H
#include "lld/Core/File.h"
#include "llvm/Option/ArgList.h"
#include <memory>
#include <vector>
namespace lld {
// A Node represents a FileNode or other type of Node. In the latter case,
// the node contains meta information about the input file list.
// Currently only GroupEnd node is defined as a meta node.
class Node {
public:
enum class Kind { File, GroupEnd };
explicit Node(Kind type) : _kind(type) {}
virtual ~Node() {}
virtual Kind kind() const { return _kind; }
private:
Kind _kind;
};
// This is a marker for --end-group. getSize() returns the number of
// files between the corresponding --start-group and this marker.
class GroupEnd : public Node {
public:
explicit GroupEnd(int size) : Node(Kind::GroupEnd), _size(size) {}
int getSize() const { return _size; }
static bool classof(const Node *a) {
return a->kind() == Kind::GroupEnd;
}
private:
int _size;
};
// A container of File.
class FileNode : public Node {
public:
explicit FileNode(std::unique_ptr<File> f)
: Node(Node::Kind::File), _file(std::move(f)) {}
static bool classof(const Node *a) {
return a->kind() == Node::Kind::File;
}
File *getFile() { return _file.get(); }
protected:
std::unique_ptr<File> _file;
};
} // namespace lld
#endif // LLD_CORE_NODE_H

View File

@ -0,0 +1,297 @@
//===- lld/Core/Parallel.h - Parallel utilities ---------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_CORE_PARALLEL_H
#define LLD_CORE_PARALLEL_H
#include "lld/Core/Instrumentation.h"
#include "lld/Core/LLVM.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/thread.h"
#include <algorithm>
#include <atomic>
#include <condition_variable>
#include <mutex>
#include <stack>
#if defined(_MSC_VER) && LLVM_ENABLE_THREADS
#include <concrt.h>
#include <ppl.h>
#endif
namespace lld {
/// \brief Allows one or more threads to wait on a potentially unknown number of
/// events.
///
/// A latch starts at \p count. inc() increments this, and dec() decrements it.
/// All calls to sync() will block while the count is not 0.
///
/// Calling dec() on a Latch with a count of 0 has undefined behaivor.
class Latch {
uint32_t _count;
mutable std::mutex _condMut;
mutable std::condition_variable _cond;
public:
explicit Latch(uint32_t count = 0) : _count(count) {}
~Latch() { sync(); }
void inc() {
std::unique_lock<std::mutex> lock(_condMut);
++_count;
}
void dec() {
std::unique_lock<std::mutex> lock(_condMut);
if (--_count == 0)
_cond.notify_all();
}
void sync() const {
std::unique_lock<std::mutex> lock(_condMut);
_cond.wait(lock, [&] {
return _count == 0;
});
}
};
// Classes in this namespace are implementation details of this header.
namespace internal {
/// \brief An abstract class that takes closures and runs them asynchronously.
class Executor {
public:
virtual ~Executor() = default;
virtual void add(std::function<void()> func) = 0;
};
#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0
class SyncExecutor : public Executor {
public:
virtual void add(std::function<void()> func) {
func();
}
};
inline Executor *getDefaultExecutor() {
static SyncExecutor exec;
return &exec;
}
#elif defined(_MSC_VER)
/// \brief An Executor that runs tasks via ConcRT.
class ConcRTExecutor : public Executor {
struct Taskish {
Taskish(std::function<void()> task) : _task(task) {}
std::function<void()> _task;
static void run(void *p) {
Taskish *self = static_cast<Taskish *>(p);
self->_task();
concurrency::Free(self);
}
};
public:
virtual void add(std::function<void()> func) {
Concurrency::CurrentScheduler::ScheduleTask(Taskish::run,
new (concurrency::Alloc(sizeof(Taskish))) Taskish(func));
}
};
inline Executor *getDefaultExecutor() {
static ConcRTExecutor exec;
return &exec;
}
#else
/// \brief An implementation of an Executor that runs closures on a thread pool
/// in filo order.
class ThreadPoolExecutor : public Executor {
public:
explicit ThreadPoolExecutor(unsigned threadCount =
std::thread::hardware_concurrency())
: _stop(false), _done(threadCount) {
// Spawn all but one of the threads in another thread as spawning threads
// can take a while.
std::thread([&, threadCount] {
for (std::size_t i = 1; i < threadCount; ++i) {
std::thread([=] {
work();
}).detach();
}
work();
}).detach();
}
~ThreadPoolExecutor() override {
std::unique_lock<std::mutex> lock(_mutex);
_stop = true;
lock.unlock();
_cond.notify_all();
// Wait for ~Latch.
}
void add(std::function<void()> f) override {
std::unique_lock<std::mutex> lock(_mutex);
_workStack.push(f);
lock.unlock();
_cond.notify_one();
}
private:
void work() {
while (true) {
std::unique_lock<std::mutex> lock(_mutex);
_cond.wait(lock, [&] {
return _stop || !_workStack.empty();
});
if (_stop)
break;
auto task = _workStack.top();
_workStack.pop();
lock.unlock();
task();
}
_done.dec();
}
std::atomic<bool> _stop;
std::stack<std::function<void()>> _workStack;
std::mutex _mutex;
std::condition_variable _cond;
Latch _done;
};
inline Executor *getDefaultExecutor() {
static ThreadPoolExecutor exec;
return &exec;
}
#endif
} // namespace internal
/// \brief Allows launching a number of tasks and waiting for them to finish
/// either explicitly via sync() or implicitly on destruction.
class TaskGroup {
Latch _latch;
public:
void spawn(std::function<void()> f) {
_latch.inc();
internal::getDefaultExecutor()->add([&, f] {
f();
_latch.dec();
});
}
void sync() const { _latch.sync(); }
};
#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0
template <class RandomAccessIterator, class Comp>
void parallel_sort(
RandomAccessIterator start, RandomAccessIterator end,
const Comp &comp = std::less<
typename std::iterator_traits<RandomAccessIterator>::value_type>()) {
std::sort(start, end, comp);
}
#elif defined(_MSC_VER)
// Use ppl parallel_sort on Windows.
template <class RandomAccessIterator, class Comp>
void parallel_sort(
RandomAccessIterator start, RandomAccessIterator end,
const Comp &comp = std::less<
typename std::iterator_traits<RandomAccessIterator>::value_type>()) {
concurrency::parallel_sort(start, end, comp);
}
#else
namespace detail {
const ptrdiff_t minParallelSize = 1024;
/// \brief Inclusive median.
template <class RandomAccessIterator, class Comp>
RandomAccessIterator medianOf3(RandomAccessIterator start,
RandomAccessIterator end, const Comp &comp) {
RandomAccessIterator mid = start + (std::distance(start, end) / 2);
return comp(*start, *(end - 1))
? (comp(*mid, *(end - 1)) ? (comp(*start, *mid) ? mid : start)
: end - 1)
: (comp(*mid, *start) ? (comp(*(end - 1), *mid) ? mid : end - 1)
: start);
}
template <class RandomAccessIterator, class Comp>
void parallel_quick_sort(RandomAccessIterator start, RandomAccessIterator end,
const Comp &comp, TaskGroup &tg, size_t depth) {
// Do a sequential sort for small inputs.
if (std::distance(start, end) < detail::minParallelSize || depth == 0) {
std::sort(start, end, comp);
return;
}
// Partition.
auto pivot = medianOf3(start, end, comp);
// Move pivot to end.
std::swap(*(end - 1), *pivot);
pivot = std::partition(start, end - 1, [&comp, end](decltype(*start) v) {
return comp(v, *(end - 1));
});
// Move pivot to middle of partition.
std::swap(*pivot, *(end - 1));
// Recurse.
tg.spawn([=, &comp, &tg] {
parallel_quick_sort(start, pivot, comp, tg, depth - 1);
});
parallel_quick_sort(pivot + 1, end, comp, tg, depth - 1);
}
}
template <class RandomAccessIterator, class Comp>
void parallel_sort(
RandomAccessIterator start, RandomAccessIterator end,
const Comp &comp = std::less<
typename std::iterator_traits<RandomAccessIterator>::value_type>()) {
TaskGroup tg;
detail::parallel_quick_sort(start, end, comp, tg,
llvm::Log2_64(std::distance(start, end)) + 1);
}
#endif
template <class T> void parallel_sort(T *start, T *end) {
parallel_sort(start, end, std::less<T>());
}
#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0
template <class Iterator, class Func>
void parallel_for_each(Iterator begin, Iterator end, Func func) {
std::for_each(begin, end, func);
}
#elif defined(_MSC_VER)
// Use ppl parallel_for_each on Windows.
template <class Iterator, class Func>
void parallel_for_each(Iterator begin, Iterator end, Func func) {
concurrency::parallel_for_each(begin, end, func);
}
#else
template <class Iterator, class Func>
void parallel_for_each(Iterator begin, Iterator end, Func func) {
TaskGroup tg;
ptrdiff_t taskSize = 1024;
while (taskSize <= std::distance(begin, end)) {
tg.spawn([=, &func] { std::for_each(begin, begin + taskSize, func); });
begin += taskSize;
}
std::for_each(begin, end, func);
}
#endif
} // end namespace lld
#endif // LLD_CORE_PARALLEL_H

View File

@ -0,0 +1,46 @@
//===------ Core/Pass.h - Base class for linker passes --------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_CORE_PASS_H
#define LLD_CORE_PASS_H
#include "lld/Core/Atom.h"
#include "lld/Core/File.h"
#include "lld/Core/Reference.h"
#include "llvm/Support/Error.h"
#include <vector>
namespace lld {
class SimpleFile;
/// Once the core linking is done (which resolves references, coalesces atoms
/// and produces a complete Atom graph), the linker runs a series of passes
/// on the Atom graph. The graph is modeled as a File, which means the pass
/// has access to all the atoms and to File level attributes. Each pass does
/// a particular transformation to the Atom graph or to the File attributes.
///
/// This is the abstract base class for all passes. A Pass does its
/// actual work in it perform() method. It can iterator over Atoms in the
/// graph using the *begin()/*end() atom iterator of the File. It can add
/// new Atoms to the graph using the File's addAtom() method.
class Pass {
public:
virtual ~Pass() { }
/// Do the actual work of the Pass.
virtual llvm::Error perform(SimpleFile &mergedFile) = 0;
protected:
// Only subclassess can be instantiated.
Pass() { }
};
} // namespace lld
#endif // LLD_CORE_PASS_H

View File

@ -0,0 +1,48 @@
//===- lld/Core/PassManager.h - Manage linker passes ----------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_CORE_PASS_MANAGER_H
#define LLD_CORE_PASS_MANAGER_H
#include "lld/Core/LLVM.h"
#include "lld/Core/Pass.h"
#include "llvm/Support/Error.h"
#include <memory>
#include <vector>
namespace lld {
class SimpleFile;
class Pass;
/// \brief Owns and runs a collection of passes.
///
/// This class is currently just a container for passes and a way to run them.
///
/// In the future this should handle timing pass runs, running parallel passes,
/// and validate/satisfy pass dependencies.
class PassManager {
public:
void add(std::unique_ptr<Pass> pass) {
_passes.push_back(std::move(pass));
}
llvm::Error runOnFile(SimpleFile &file) {
for (std::unique_ptr<Pass> &pass : _passes)
if (llvm::Error EC = pass->perform(file))
return EC;
return llvm::Error();
}
private:
/// \brief Passes in the order they should run.
std::vector<std::unique_ptr<Pass>> _passes;
};
} // end namespace lld
#endif

View File

@ -0,0 +1,157 @@
//===- lld/Core/Reader.h - Abstract File Format Reading Interface ---------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_CORE_READER_H
#define LLD_CORE_READER_H
#include "lld/Core/LLVM.h"
#include "lld/Core/Reference.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/YAMLTraits.h"
#include <functional>
#include <memory>
#include <vector>
using llvm::sys::fs::file_magic;
namespace llvm {
namespace yaml {
class IO;
}
}
namespace lld {
class File;
class LinkingContext;
class MachOLinkingContext;
/// \brief An abstract class for reading object files, library files, and
/// executable files.
///
/// Each file format (e.g. mach-o, etc) has a concrete subclass of Reader.
class Reader {
public:
virtual ~Reader() {}
/// Sniffs the file to determine if this Reader can parse it.
/// The method is called with:
/// 1) the file_magic enumeration returned by identify_magic()
/// 2) the whole file content buffer if the above is not enough.
virtual bool canParse(file_magic magic, MemoryBufferRef mb) const = 0;
/// \brief Parse a supplied buffer (already filled with the contents of a
/// file) and create a File object.
/// The resulting File object takes ownership of the MemoryBuffer.
virtual ErrorOr<std::unique_ptr<File>>
loadFile(std::unique_ptr<MemoryBuffer> mb, const class Registry &) const = 0;
};
/// \brief An abstract class for handling alternate yaml representations
/// of object files.
///
/// The YAML syntax allows "tags" which are used to specify the type of
/// the YAML node. In lld, top level YAML documents can be in many YAML
/// representations (e.g mach-o encoded as yaml, etc). A tag is used to
/// specify which representation is used in the following YAML document.
/// To work, there must be a YamlIOTaggedDocumentHandler registered that
/// handles each tag type.
class YamlIOTaggedDocumentHandler {
public:
virtual ~YamlIOTaggedDocumentHandler();
/// This method is called on each registered YamlIOTaggedDocumentHandler
/// until one returns true. If the subclass handles tag type !xyz, then
/// this method should call io.mapTag("!xzy") to see if that is the current
/// document type, and if so, process the rest of the document using
/// YAML I/O, then convert the result into an lld::File* and return it.
virtual bool handledDocTag(llvm::yaml::IO &io, const lld::File *&f) const = 0;
};
/// A registry to hold the list of currently registered Readers and
/// tables which map Reference kind values to strings.
/// The linker does not directly invoke Readers. Instead, it registers
/// Readers based on it configuration and command line options, then calls
/// the Registry object to parse files.
class Registry {
public:
Registry();
/// Walk the list of registered Readers and find one that can parse the
/// supplied file and parse it.
ErrorOr<std::unique_ptr<File>>
loadFile(std::unique_ptr<MemoryBuffer> mb) const;
/// Walk the list of registered kind tables to convert a Reference Kind
/// name to a value.
bool referenceKindFromString(StringRef inputStr, Reference::KindNamespace &ns,
Reference::KindArch &a,
Reference::KindValue &value) const;
/// Walk the list of registered kind tables to convert a Reference Kind
/// value to a string.
bool referenceKindToString(Reference::KindNamespace ns, Reference::KindArch a,
Reference::KindValue value, StringRef &) const;
/// Walk the list of registered tag handlers and have the one that handles
/// the current document type process the yaml into an lld::File*.
bool handleTaggedDoc(llvm::yaml::IO &io, const lld::File *&file) const;
// These methods are called to dynamically add support for various file
// formats. The methods are also implemented in the appropriate lib*.a
// library, so that the code for handling a format is only linked in, if this
// method is used. Any options that a Reader might need must be passed
// as parameters to the addSupport*() method.
void addSupportArchives(bool logLoading);
void addSupportYamlFiles();
void addSupportMachOObjects(MachOLinkingContext &);
/// To convert between kind values and names, the registry walks the list
/// of registered kind tables. Each table is a zero terminated array of
/// KindStrings elements.
struct KindStrings {
Reference::KindValue value;
StringRef name;
};
/// A Reference Kind value is a tuple of <namespace, arch, value>. All
/// entries in a conversion table have the same <namespace, arch>. The
/// array then contains the value/name pairs.
void addKindTable(Reference::KindNamespace ns, Reference::KindArch arch,
const KindStrings array[]);
private:
struct KindEntry {
Reference::KindNamespace ns;
Reference::KindArch arch;
const KindStrings *array;
};
void add(std::unique_ptr<Reader>);
void add(std::unique_ptr<YamlIOTaggedDocumentHandler>);
std::vector<std::unique_ptr<Reader>> _readers;
std::vector<std::unique_ptr<YamlIOTaggedDocumentHandler>> _yamlHandlers;
std::vector<KindEntry> _kindEntries;
};
// Utilities for building a KindString table. For instance:
// static const Registry::KindStrings table[] = {
// LLD_KIND_STRING_ENTRY(R_VAX_ADDR16),
// LLD_KIND_STRING_ENTRY(R_VAX_DATA16),
// LLD_KIND_STRING_END
// };
#define LLD_KIND_STRING_ENTRY(name) { name, #name }
#define LLD_KIND_STRING_END { 0, "" }
} // end namespace lld
#endif

View File

@ -0,0 +1,119 @@
//===- Core/References.h - A Reference to Another Atom --------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_CORE_REFERENCES_H
#define LLD_CORE_REFERENCES_H
#include "lld/Core/LLVM.h"
#include "llvm/ADT/StringSwitch.h"
namespace lld {
class Atom;
///
/// The linker has a Graph Theory model of linking. An object file is seen
/// as a set of Atoms with References to other Atoms. Each Atom is a node
/// and each Reference is an edge.
///
/// For example if a function contains a call site to "malloc" 40 bytes into
/// the Atom, then the function Atom will have a Reference of: offsetInAtom=40,
/// kind=callsite, target=malloc, addend=0.
///
/// Besides supporting traditional "relocations", references are also used
/// forcing layout (one atom must follow another), marking data-in-code
/// (jump tables or ARM constants), etc.
///
/// The "kind" of a reference is a tuple of <namespace, arch, value>. This
/// enable us to re-use existing relocation types definded for various
/// file formats and architectures.
///
/// References and atoms form a directed graph. The dead-stripping pass
/// traverses them starting from dead-strip root atoms to garbage collect
/// unreachable ones.
///
/// References of any kind are considered as directed edges. In addition to
/// that, references of some kind is considered as bidirected edges.
class Reference {
public:
/// Which universe defines the kindValue().
enum class KindNamespace {
all = 0,
testing = 1,
mach_o = 2,
};
KindNamespace kindNamespace() const { return (KindNamespace)_kindNamespace; }
void setKindNamespace(KindNamespace ns) { _kindNamespace = (uint8_t)ns; }
// Which architecture the kind value is for.
enum class KindArch { all, AArch64, ARM, x86, x86_64};
KindArch kindArch() const { return (KindArch)_kindArch; }
void setKindArch(KindArch a) { _kindArch = (uint8_t)a; }
typedef uint16_t KindValue;
KindValue kindValue() const { return _kindValue; }
/// setKindValue() is needed because during linking, some optimizations may
/// change the codegen and hence the reference kind.
void setKindValue(KindValue value) {
_kindValue = value;
}
/// KindValues used with KindNamespace::all and KindArch::all.
enum {
// kindLayoutAfter is treated as a bidirected edge by the dead-stripping
// pass.
kindLayoutAfter = 1,
kindAssociate,
};
// A value to be added to the value of a target
typedef int64_t Addend;
/// If the reference is a fixup in the Atom, then this returns the
/// byte offset into the Atom's content to do the fix up.
virtual uint64_t offsetInAtom() const = 0;
/// Returns the atom this reference refers to.
virtual const Atom *target() const = 0;
/// During linking, the linker may merge graphs which coalesces some nodes
/// (i.e. Atoms). To switch the target of a reference, this method is called.
virtual void setTarget(const Atom *) = 0;
/// Some relocations require a symbol and a value (e.g. foo + 4).
virtual Addend addend() const = 0;
/// During linking, some optimzations may change addend value.
virtual void setAddend(Addend) = 0;
/// Returns target specific attributes of the reference.
virtual uint32_t tag() const { return 0; }
protected:
/// Reference is an abstract base class. Only subclasses can use constructor.
Reference(KindNamespace ns, KindArch a, KindValue value)
: _kindValue(value), _kindNamespace((uint8_t)ns), _kindArch((uint8_t)a) {}
/// The memory for Reference objects is always managed by the owning File
/// object. Therefore, no one but the owning File object should call
/// delete on an Reference. In fact, some File objects may bulk allocate
/// an array of References, so they cannot be individually deleted by anyone.
virtual ~Reference() {}
KindValue _kindValue;
uint8_t _kindNamespace;
uint8_t _kindArch;
};
} // namespace lld
#endif // LLD_CORE_REFERENCES_H

View File

@ -0,0 +1,106 @@
//===- Core/Resolver.h - Resolves Atom References -------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_CORE_RESOLVER_H
#define LLD_CORE_RESOLVER_H
#include "lld/Core/ArchiveLibraryFile.h"
#include "lld/Core/File.h"
#include "lld/Core/SharedLibraryFile.h"
#include "lld/Core/Simple.h"
#include "lld/Core/SymbolTable.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/Support/ErrorOr.h"
#include <set>
#include <unordered_map>
#include <unordered_set>
#include <vector>
namespace lld {
class Atom;
class LinkingContext;
/// \brief The Resolver is responsible for merging all input object files
/// and producing a merged graph.
class Resolver {
public:
Resolver(LinkingContext &ctx) : _ctx(ctx), _result(new MergedFile()) {}
// InputFiles::Handler methods
void doDefinedAtom(OwningAtomPtr<DefinedAtom> atom);
bool doUndefinedAtom(OwningAtomPtr<UndefinedAtom> atom);
void doSharedLibraryAtom(OwningAtomPtr<SharedLibraryAtom> atom);
void doAbsoluteAtom(OwningAtomPtr<AbsoluteAtom> atom);
// Handle files, this adds atoms from the current file thats
// being processed by the resolver
llvm::Expected<bool> handleFile(File &);
// Handle an archive library file.
llvm::Expected<bool> handleArchiveFile(File &);
// Handle a shared library file.
llvm::Error handleSharedLibrary(File &);
/// @brief do work of merging and resolving and return list
bool resolve();
std::unique_ptr<SimpleFile> resultFile() { return std::move(_result); }
private:
typedef std::function<llvm::Expected<bool>(StringRef)> UndefCallback;
bool undefinesAdded(int begin, int end);
File *getFile(int &index);
/// \brief The main function that iterates over the files to resolve
bool resolveUndefines();
void updateReferences();
void deadStripOptimize();
bool checkUndefines();
void removeCoalescedAwayAtoms();
llvm::Expected<bool> forEachUndefines(File &file, UndefCallback callback);
void markLive(const Atom *atom);
class MergedFile : public SimpleFile {
public:
MergedFile() : SimpleFile("<linker-internal>", kindResolverMergedObject) {}
void addAtoms(llvm::MutableArrayRef<OwningAtomPtr<Atom>> atoms);
};
LinkingContext &_ctx;
SymbolTable _symbolTable;
std::vector<OwningAtomPtr<Atom>> _atoms;
std::set<const Atom *> _deadStripRoots;
llvm::DenseSet<const Atom *> _liveAtoms;
llvm::DenseSet<const Atom *> _deadAtoms;
std::unique_ptr<MergedFile> _result;
std::unordered_multimap<const Atom *, const Atom *> _reverseRef;
// --start-group and --end-group
std::vector<File *> _files;
std::map<File *, bool> _newUndefinesAdded;
// List of undefined symbols.
std::vector<StringRef> _undefines;
// Start position in _undefines for each archive/shared library file.
// Symbols from index 0 to the start position are already searched before.
// Searching them again would never succeed. When we look for undefined
// symbols from an archive/shared library file, start from its start
// position to save time.
std::map<File *, size_t> _undefineIndex;
};
} // namespace lld
#endif // LLD_CORE_RESOLVER_H

View File

@ -0,0 +1,53 @@
//===- Core/SharedLibraryAtom.h - A Shared Library Atom -------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_CORE_SHARED_LIBRARY_ATOM_H
#define LLD_CORE_SHARED_LIBRARY_ATOM_H
#include "lld/Core/Atom.h"
namespace lld {
/// A SharedLibraryAtom has no content.
/// It exists to represent a symbol which will be bound at runtime.
class SharedLibraryAtom : public Atom {
public:
enum class Type : uint32_t {
Unknown,
Code,
Data,
};
/// Returns shared library name used to load it at runtime.
/// On Darwin it is the LC_DYLIB_LOAD dylib name.
virtual StringRef loadName() const = 0;
/// Returns if shared library symbol can be missing at runtime and if
/// so the loader should silently resolve address of symbol to be nullptr.
virtual bool canBeNullAtRuntime() const = 0;
virtual Type type() const = 0;
virtual uint64_t size() const = 0;
static bool classof(const Atom *a) {
return a->definition() == definitionSharedLibrary;
}
static inline bool classof(const SharedLibraryAtom *) { return true; }
protected:
SharedLibraryAtom() : Atom(definitionSharedLibrary) {}
~SharedLibraryAtom() override = default;
};
} // namespace lld
#endif // LLD_CORE_SHARED_LIBRARY_ATOM_H

View File

@ -0,0 +1,70 @@
//===- Core/SharedLibraryFile.h - Models shared libraries as Atoms --------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_CORE_SHARED_LIBRARY_FILE_H
#define LLD_CORE_SHARED_LIBRARY_FILE_H
#include "lld/Core/File.h"
namespace lld {
///
/// The SharedLibraryFile subclass of File is used to represent dynamic
/// shared libraries being linked against.
///
class SharedLibraryFile : public File {
public:
static bool classof(const File *f) {
return f->kind() == kindSharedLibrary;
}
/// Check if the shared library exports a symbol with the specified name.
/// If so, return a SharedLibraryAtom which represents that exported
/// symbol. Otherwise return nullptr.
virtual OwningAtomPtr<SharedLibraryAtom> exports(StringRef name) const = 0;
// Returns the install name.
virtual StringRef getDSOName() const = 0;
const AtomRange<DefinedAtom> defined() const override {
return _definedAtoms;
}
const AtomRange<UndefinedAtom> undefined() const override {
return _undefinedAtoms;
}
const AtomRange<SharedLibraryAtom> sharedLibrary() const override {
return _sharedLibraryAtoms;
}
const AtomRange<AbsoluteAtom> absolute() const override {
return _absoluteAtoms;
}
void clearAtoms() override {
_definedAtoms.clear();
_undefinedAtoms.clear();
_sharedLibraryAtoms.clear();
_absoluteAtoms.clear();
}
protected:
/// only subclasses of SharedLibraryFile can be instantiated
explicit SharedLibraryFile(StringRef path) : File(path, kindSharedLibrary) {}
AtomVector<DefinedAtom> _definedAtoms;
AtomVector<UndefinedAtom> _undefinedAtoms;
AtomVector<SharedLibraryAtom> _sharedLibraryAtoms;
AtomVector<AbsoluteAtom> _absoluteAtoms;
};
} // namespace lld
#endif // LLD_CORE_SHARED_LIBRARY_FILE_H

View File

@ -0,0 +1,324 @@
//===- lld/Core/Simple.h - Simple implementations of Atom and File --------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief Provide simple implementations for Atoms and File.
///
//===----------------------------------------------------------------------===//
#ifndef LLD_CORE_SIMPLE_H
#define LLD_CORE_SIMPLE_H
#include "lld/Core/AbsoluteAtom.h"
#include "lld/Core/Atom.h"
#include "lld/Core/DefinedAtom.h"
#include "lld/Core/File.h"
#include "lld/Core/Reference.h"
#include "lld/Core/SharedLibraryAtom.h"
#include "lld/Core/UndefinedAtom.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/ilist.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <functional>
namespace lld {
class SimpleFile : public File {
public:
SimpleFile(StringRef path, File::Kind kind)
: File(path, kind) {}
~SimpleFile() override {
_defined.clear();
_undefined.clear();
_shared.clear();
_absolute.clear();
}
void addAtom(DefinedAtom &a) {
_defined.push_back(OwningAtomPtr<DefinedAtom>(&a));
}
void addAtom(UndefinedAtom &a) {
_undefined.push_back(OwningAtomPtr<UndefinedAtom>(&a));
}
void addAtom(SharedLibraryAtom &a) {
_shared.push_back(OwningAtomPtr<SharedLibraryAtom>(&a));
}
void addAtom(AbsoluteAtom &a) {
_absolute.push_back(OwningAtomPtr<AbsoluteAtom>(&a));
}
void addAtom(const Atom &atom) {
if (auto *p = dyn_cast<DefinedAtom>(&atom)) {
addAtom(const_cast<DefinedAtom &>(*p));
} else if (auto *p = dyn_cast<UndefinedAtom>(&atom)) {
addAtom(const_cast<UndefinedAtom &>(*p));
} else if (auto *p = dyn_cast<SharedLibraryAtom>(&atom)) {
addAtom(const_cast<SharedLibraryAtom &>(*p));
} else if (auto *p = dyn_cast<AbsoluteAtom>(&atom)) {
addAtom(const_cast<AbsoluteAtom &>(*p));
} else {
llvm_unreachable("atom has unknown definition kind");
}
}
void removeDefinedAtomsIf(std::function<bool(const DefinedAtom *)> pred) {
auto &atoms = _defined;
auto newEnd = std::remove_if(atoms.begin(), atoms.end(),
[&pred](OwningAtomPtr<DefinedAtom> &p) {
return pred(p.get());
});
atoms.erase(newEnd, atoms.end());
}
const AtomRange<DefinedAtom> defined() const override { return _defined; }
const AtomRange<UndefinedAtom> undefined() const override {
return _undefined;
}
const AtomRange<SharedLibraryAtom> sharedLibrary() const override {
return _shared;
}
const AtomRange<AbsoluteAtom> absolute() const override {
return _absolute;
}
void clearAtoms() override {
_defined.clear();
_undefined.clear();
_shared.clear();
_absolute.clear();
}
private:
AtomVector<DefinedAtom> _defined;
AtomVector<UndefinedAtom> _undefined;
AtomVector<SharedLibraryAtom> _shared;
AtomVector<AbsoluteAtom> _absolute;
};
class SimpleReference : public Reference {
public:
SimpleReference(Reference::KindNamespace ns, Reference::KindArch arch,
Reference::KindValue value, uint64_t off, const Atom *t,
Reference::Addend a)
: Reference(ns, arch, value), _target(t), _offsetInAtom(off), _addend(a),
_next(nullptr), _prev(nullptr) {
}
SimpleReference()
: Reference(Reference::KindNamespace::all, Reference::KindArch::all, 0),
_target(nullptr), _offsetInAtom(0), _addend(0), _next(nullptr),
_prev(nullptr) {
}
uint64_t offsetInAtom() const override { return _offsetInAtom; }
const Atom *target() const override {
assert(_target);
return _target;
}
Addend addend() const override { return _addend; }
void setAddend(Addend a) override { _addend = a; }
void setTarget(const Atom *newAtom) override { _target = newAtom; }
SimpleReference *getNext() const { return _next; }
SimpleReference *getPrev() const { return _prev; }
void setNext(SimpleReference *n) { _next = n; }
void setPrev(SimpleReference *p) { _prev = p; }
private:
const Atom *_target;
uint64_t _offsetInAtom;
Addend _addend;
SimpleReference *_next;
SimpleReference *_prev;
};
} // end namespace lld
// ilist will lazily create a sentinal (so end() can return a node past the
// end of the list). We need this trait so that the sentinal is allocated
// via the BumpPtrAllocator.
namespace llvm {
template<>
struct ilist_sentinel_traits<lld::SimpleReference> {
ilist_sentinel_traits() : _allocator(nullptr) { }
void setAllocator(llvm::BumpPtrAllocator *alloc) {
_allocator = alloc;
}
lld::SimpleReference *createSentinel() const {
return new (*_allocator) lld::SimpleReference();
}
static void destroySentinel(lld::SimpleReference*) {}
static lld::SimpleReference *provideInitialHead() { return nullptr; }
lld::SimpleReference *ensureHead(lld::SimpleReference *&head) const {
if (!head) {
head = createSentinel();
noteHead(head, head);
ilist_traits<lld::SimpleReference>::setNext(head, nullptr);
return head;
}
return ilist_traits<lld::SimpleReference>::getPrev(head);
}
void noteHead(lld::SimpleReference *newHead,
lld::SimpleReference *sentinel) const {
ilist_traits<lld::SimpleReference>::setPrev(newHead, sentinel);
}
private:
mutable llvm::BumpPtrAllocator *_allocator;
};
} // end namespace llvm
namespace lld {
class SimpleDefinedAtom : public DefinedAtom {
public:
explicit SimpleDefinedAtom(const File &f)
: _file(f), _ordinal(f.getNextAtomOrdinalAndIncrement()) {
_references.setAllocator(&f.allocator());
}
~SimpleDefinedAtom() override {
_references.clearAndLeakNodesUnsafely();
}
const File &file() const override { return _file; }
StringRef name() const override { return StringRef(); }
uint64_t ordinal() const override { return _ordinal; }
Scope scope() const override { return DefinedAtom::scopeLinkageUnit; }
Interposable interposable() const override {
return DefinedAtom::interposeNo;
}
Merge merge() const override { return DefinedAtom::mergeNo; }
Alignment alignment() const override { return 1; }
SectionChoice sectionChoice() const override {
return DefinedAtom::sectionBasedOnContent;
}
StringRef customSectionName() const override { return StringRef(); }
DeadStripKind deadStrip() const override {
return DefinedAtom::deadStripNormal;
}
DefinedAtom::reference_iterator begin() const override {
const void *it = reinterpret_cast<const void *>(&*_references.begin());
return reference_iterator(*this, it);
}
DefinedAtom::reference_iterator end() const override {
const void *it = reinterpret_cast<const void *>(&*_references.end());
return reference_iterator(*this, it);
}
const Reference *derefIterator(const void *it) const override {
return reinterpret_cast<const Reference*>(it);
}
void incrementIterator(const void *&it) const override {
const SimpleReference* node = reinterpret_cast<const SimpleReference*>(it);
const SimpleReference* next = node->getNext();
it = reinterpret_cast<const void*>(next);
}
void addReference(Reference::KindNamespace ns,
Reference::KindArch arch,
Reference::KindValue kindValue, uint64_t off,
const Atom *target, Reference::Addend a) override {
assert(target && "trying to create reference to nothing");
auto node = new (_file.allocator())
SimpleReference(ns, arch, kindValue, off, target, a);
_references.push_back(node);
}
/// Sort references in a canonical order (by offset, then by kind).
void sortReferences() const {
// Cannot sort a linked list, so move elements into a temporary vector,
// sort the vector, then reconstruct the list.
llvm::SmallVector<SimpleReference *, 16> elements;
for (SimpleReference &node : _references) {
elements.push_back(&node);
}
std::sort(elements.begin(), elements.end(),
[] (const SimpleReference *lhs, const SimpleReference *rhs) -> bool {
uint64_t lhsOffset = lhs->offsetInAtom();
uint64_t rhsOffset = rhs->offsetInAtom();
if (rhsOffset != lhsOffset)
return (lhsOffset < rhsOffset);
if (rhs->kindNamespace() != lhs->kindNamespace())
return (lhs->kindNamespace() < rhs->kindNamespace());
if (rhs->kindArch() != lhs->kindArch())
return (lhs->kindArch() < rhs->kindArch());
return (lhs->kindValue() < rhs->kindValue());
});
_references.clearAndLeakNodesUnsafely();
for (SimpleReference *node : elements) {
_references.push_back(node);
}
}
void setOrdinal(uint64_t ord) { _ordinal = ord; }
private:
typedef llvm::ilist<SimpleReference> RefList;
const File &_file;
uint64_t _ordinal;
mutable RefList _references;
};
class SimpleUndefinedAtom : public UndefinedAtom {
public:
SimpleUndefinedAtom(const File &f, StringRef name) : _file(f), _name(name) {
assert(!name.empty() && "UndefinedAtoms must have a name");
}
~SimpleUndefinedAtom() override = default;
/// file - returns the File that produced/owns this Atom
const File &file() const override { return _file; }
/// name - The name of the atom. For a function atom, it is the (mangled)
/// name of the function.
StringRef name() const override { return _name; }
CanBeNull canBeNull() const override { return UndefinedAtom::canBeNullNever; }
private:
const File &_file;
StringRef _name;
};
} // end namespace lld
#endif // LLD_CORE_SIMPLE_H

View File

@ -0,0 +1,106 @@
//===- Core/SymbolTable.h - Main Symbol Table -----------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_CORE_SYMBOL_TABLE_H
#define LLD_CORE_SYMBOL_TABLE_H
#include "lld/Core/LLVM.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringExtras.h"
#include <cstring>
#include <map>
#include <vector>
namespace lld {
class AbsoluteAtom;
class Atom;
class DefinedAtom;
class LinkingContext;
class ResolverOptions;
class SharedLibraryAtom;
class UndefinedAtom;
/// \brief The SymbolTable class is responsible for coalescing atoms.
///
/// All atoms coalescable by-name or by-content should be added.
/// The method replacement() can be used to find the replacement atom
/// if an atom has been coalesced away.
class SymbolTable {
public:
/// @brief add atom to symbol table
bool add(const DefinedAtom &);
/// @brief add atom to symbol table
bool add(const UndefinedAtom &);
/// @brief add atom to symbol table
bool add(const SharedLibraryAtom &);
/// @brief add atom to symbol table
bool add(const AbsoluteAtom &);
/// @brief checks if name is in symbol table and if so atom is not
/// UndefinedAtom
bool isDefined(StringRef sym);
/// @brief returns atom in symbol table for specified name (or nullptr)
const Atom *findByName(StringRef sym);
/// @brief returns vector of remaining UndefinedAtoms
std::vector<const UndefinedAtom *> undefines();
/// returns vector of tentative definitions
std::vector<StringRef> tentativeDefinitions();
/// @brief add atom to replacement table
void addReplacement(const Atom *replaced, const Atom *replacement);
/// @brief if atom has been coalesced away, return replacement, else return atom
const Atom *replacement(const Atom *);
/// @brief if atom has been coalesced away, return true
bool isCoalescedAway(const Atom *);
private:
typedef llvm::DenseMap<const Atom *, const Atom *> AtomToAtom;
struct StringRefMappingInfo {
static StringRef getEmptyKey() { return StringRef(); }
static StringRef getTombstoneKey() { return StringRef(" ", 1); }
static unsigned getHashValue(StringRef const val) {
return llvm::HashString(val);
}
static bool isEqual(StringRef const lhs, StringRef const rhs) {
return lhs.equals(rhs);
}
};
typedef llvm::DenseMap<StringRef, const Atom *,
StringRefMappingInfo> NameToAtom;
struct AtomMappingInfo {
static const DefinedAtom * getEmptyKey() { return nullptr; }
static const DefinedAtom * getTombstoneKey() { return (DefinedAtom*)(-1); }
static unsigned getHashValue(const DefinedAtom * const Val);
static bool isEqual(const DefinedAtom * const LHS,
const DefinedAtom * const RHS);
};
typedef llvm::DenseSet<const DefinedAtom*, AtomMappingInfo> AtomContentSet;
bool addByName(const Atom &);
bool addByContent(const DefinedAtom &);
AtomToAtom _replacedAtoms;
NameToAtom _nameTable;
AtomContentSet _contentTable;
};
} // namespace lld
#endif // LLD_CORE_SYMBOL_TABLE_H

View File

@ -0,0 +1,17 @@
include/lld/Core
~~~~~~~~~~~~~~~~
* The yaml reader/writer interfaces should be changed to return
an explanatory string if there is an error. The existing error_code
abstraction only works for returning low level OS errors. It does not
work for describing formatting issues.
* We need to design a diagnostics interface. It would be nice to share code
with Clang_ where possible.
* We need to add more attributes to File. In particular, we need cpu
and OS information (like target triples). We should also provide explicit
support for `LLVM IR module flags metadata`__.
.. __: http://llvm.org/docs/LangRef.html#module_flags
.. _Clang: http://clang.llvm.org/docs/InternalsManual.html#Diagnostics

View File

@ -0,0 +1,68 @@
//===- Core/UndefinedAtom.h - An Undefined Atom ---------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_CORE_UNDEFINED_ATOM_H
#define LLD_CORE_UNDEFINED_ATOM_H
#include "lld/Core/Atom.h"
namespace lld {
/// An UndefinedAtom has no content.
/// It exists as a placeholder for a future atom.
class UndefinedAtom : public Atom {
public:
/// Whether this undefined symbol needs to be resolved,
/// or whether it can just evaluate to nullptr.
/// This concept is often called "weak", but that term
/// is overloaded to mean other things too.
enum CanBeNull {
/// Normal symbols must be resolved at build time
canBeNullNever,
/// This symbol can be missing at runtime and will evalute to nullptr.
/// That is, the static linker still must find a definition (usually
/// is some shared library), but at runtime, the dynamic loader
/// will allow the symbol to be missing and resolved to nullptr.
///
/// On Darwin this is generated using a function prototype with
/// __attribute__((weak_import)).
/// On linux this is generated using a function prototype with
/// __attribute__((weak)).
/// On Windows this feature is not supported.
canBeNullAtRuntime,
/// This symbol can be missing at build time.
/// That is, the static linker will not error if a definition for
/// this symbol is not found at build time. Instead, the linker
/// will build an executable that lets the dynamic loader find the
/// symbol at runtime.
/// This feature is not supported on Darwin nor Windows.
/// On linux this is generated using a function prototype with
/// __attribute__((weak)).
canBeNullAtBuildtime
};
virtual CanBeNull canBeNull() const = 0;
static bool classof(const Atom *a) {
return a->definition() == definitionUndefined;
}
static bool classof(const UndefinedAtom *) { return true; }
protected:
UndefinedAtom() : Atom(definitionUndefined) {}
~UndefinedAtom() override = default;
};
} // namespace lld
#endif // LLD_CORE_UNDEFINED_ATOM_H

View File

@ -0,0 +1,47 @@
//===- lld/Core/Writer.h - Abstract File Format Interface -----------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_CORE_WRITER_H
#define LLD_CORE_WRITER_H
#include "lld/Core/LLVM.h"
#include "llvm/Support/Error.h"
#include <memory>
#include <vector>
namespace lld {
class File;
class LinkingContext;
class MachOLinkingContext;
/// \brief The Writer is an abstract class for writing object files, shared
/// library files, and executable files. Each file format (e.g. mach-o, etc)
/// has a concrete subclass of Writer.
class Writer {
public:
virtual ~Writer();
/// \brief Write a file from the supplied File object
virtual llvm::Error writeFile(const File &linkedFile, StringRef path) = 0;
/// \brief This method is called by Core Linking to give the Writer a chance
/// to add file format specific "files" to set of files to be linked. This is
/// how file format specific atoms can be added to the link.
virtual void createImplicitFiles(std::vector<std::unique_ptr<File>> &) {}
protected:
// only concrete subclasses can be instantiated
Writer();
};
std::unique_ptr<Writer> createWriterMachO(const MachOLinkingContext &);
std::unique_ptr<Writer> createWriterYAML(const LinkingContext &);
} // end namespace lld
#endif

Some files were not shown because too many files have changed in this diff Show More