Vendor import of lld release_39 branch r276489:

https://llvm.org/svn/llvm-project/lld/branches/release_39@276489
This commit is contained in:
dim 2016-07-23 20:48:50 +00:00
parent 637003c885
commit 34a412d2e3
1684 changed files with 31087 additions and 79052 deletions

View File

@ -1,4 +1,4 @@
{
"project_id" : "lld",
"conduit_uri" : "http://reviews.llvm.org/"
"conduit_uri" : "https://reviews.llvm.org/"
}

View File

@ -94,7 +94,7 @@ endmacro(add_lld_library)
add_subdirectory(lib)
add_subdirectory(tools)
add_subdirectory(tools/lld)
if (LLVM_INCLUDE_TESTS)
add_subdirectory(test)

View File

@ -10,6 +10,7 @@ add_lld_library(lldCOFF
Error.cpp
ICF.cpp
InputFiles.cpp
Librarian.cpp
MarkLive.cpp
ModuleDef.cpp
PDB.cpp
@ -28,6 +29,8 @@ add_lld_library(lldCOFF
Target
Option
Support
LINK_LIBS ${PTHREAD_LIB}
)
add_dependencies(lldCOFF COFFOptionsTableGen)

View File

@ -34,10 +34,7 @@ SectionChunk::SectionChunk(ObjectFile *F, const coff_section *H)
// Initialize SectionName.
File->getCOFFObj()->getSectionName(Header, SectionName);
// Bit [20:24] contains section alignment. Both 0 and 1 mean alignment 1.
unsigned Shift = (Header->Characteristics >> 20) & 0xF;
if (Shift > 0)
Align = uint32_t(1) << (Shift - 1);
Align = Header->getAlignment();
// Only COMDAT sections are subject of dead-stripping.
Live = !isCOMDAT();
@ -64,7 +61,7 @@ void SectionChunk::applyRelX64(uint8_t *Off, uint16_t Type, Defined *Sym,
case IMAGE_REL_AMD64_SECTION: add16(Off, Sym->getSectionIndex()); break;
case IMAGE_REL_AMD64_SECREL: add32(Off, Sym->getSecrel()); break;
default:
error("Unsupported relocation type");
fatal("unsupported relocation type");
}
}
@ -79,7 +76,7 @@ void SectionChunk::applyRelX86(uint8_t *Off, uint16_t Type, Defined *Sym,
case IMAGE_REL_I386_SECTION: add16(Off, Sym->getSectionIndex()); break;
case IMAGE_REL_I386_SECREL: add32(Off, Sym->getSecrel()); break;
default:
error("Unsupported relocation type");
fatal("unsupported relocation type");
}
}
@ -123,7 +120,7 @@ void SectionChunk::applyRelARM(uint8_t *Off, uint16_t Type, Defined *Sym,
case IMAGE_REL_ARM_BRANCH24T: applyBranch24T(Off, S - P - 4); break;
case IMAGE_REL_ARM_BLX23T: applyBranch24T(Off, S - P - 4); break;
default:
error("Unsupported relocation type");
fatal("unsupported relocation type");
}
}
@ -310,7 +307,7 @@ void SEHTableChunk::writeTo(uint8_t *Buf) const {
BaserelChunk::BaserelChunk(uint32_t Page, Baserel *Begin, Baserel *End) {
// Block header consists of 4 byte page RVA and 4 byte block size.
// Each entry is 2 byte. Last entry may be padding.
Data.resize(align((End - Begin) * 2 + 8, 4));
Data.resize(alignTo((End - Begin) * 2 + 8, 4));
uint8_t *P = Data.data();
write32le(P, Page);
write32le(P + 4, Data.size());

View File

@ -18,6 +18,7 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/Object/COFF.h"
#include <atomic>
#include <utility>
#include <vector>
namespace lld {
@ -138,6 +139,7 @@ class SectionChunk : public Chunk {
SectionChunk(ObjectFile *File, const coff_section *Header);
static bool classof(const Chunk *C) { return C->kind() == SectionKind; }
size_t getSize() const override { return Header->SizeOfRawData; }
ArrayRef<uint8_t> getContents() const;
void writeTo(uint8_t *Buf) const override;
bool hasData() const override;
uint32_t getPermissions() const override;
@ -186,8 +188,6 @@ class SectionChunk : public Chunk {
uint32_t Checksum = 0;
private:
ArrayRef<uint8_t> getContents() const;
// A file this chunk was created from.
ObjectFile *File;
@ -295,7 +295,7 @@ class LocalImportChunk : public Chunk {
// functions. x86-only.
class SEHTableChunk : public Chunk {
public:
explicit SEHTableChunk(std::set<Defined *> S) : Syms(S) {}
explicit SEHTableChunk(std::set<Defined *> S) : Syms(std::move(S)) {}
size_t getSize() const override { return Syms.size() * 4; }
void writeTo(uint8_t *Buf) const override;
@ -326,10 +326,6 @@ class Baserel {
uint8_t Type;
};
inline uint64_t align(uint64_t Value, uint64_t Align) {
return llvm::RoundUpToAlignment(Value, Align);
}
} // namespace coff
} // namespace lld

View File

@ -106,11 +106,15 @@ struct Configuration {
// Used for /merge:from=to (e.g. /merge:.rdata=.text)
std::map<StringRef, StringRef> Merge;
// Used for /section=.name,{DEKPRSW} to set section attributes.
std::map<StringRef, uint32_t> Section;
// Options for manifest files.
ManifestKind Manifest = SideBySide;
int ManifestID = 1;
StringRef ManifestDependency;
bool ManifestUAC = true;
std::vector<std::string> ManifestInput;
StringRef ManifestLevel = "'asInvoker'";
StringRef ManifestUIAccess = "'false'";
StringRef ManifestFile;

View File

@ -45,7 +45,7 @@ class HintNameChunk : public Chunk {
size_t getSize() const override {
// Starts with 2 byte Hint field, followed by a null-terminated string,
// ends with 0 or 1 byte padding.
return align(Name.size() + 3, 2);
return alignTo(Name.size() + 3, 2);
}
void writeTo(uint8_t *Buf) const override {

View File

@ -14,6 +14,7 @@
#include "SymbolTable.h"
#include "Symbols.h"
#include "Writer.h"
#include "lld/Driver/Driver.h"
#include "llvm/ADT/Optional.h"
#include "llvm/LibDriver/LibDriver.h"
#include "llvm/Option/Arg.h"
@ -40,27 +41,28 @@ namespace coff {
Configuration *Config;
LinkerDriver *Driver;
void link(llvm::ArrayRef<const char *> Args) {
bool link(llvm::ArrayRef<const char *> Args) {
Configuration C;
LinkerDriver D;
Config = &C;
Driver = &D;
return Driver->link(Args);
Driver->link(Args);
return true;
}
// Drop directory components and replace extension with ".exe".
// Drop directory components and replace extension with ".exe" or ".dll".
static std::string getOutputPath(StringRef Path) {
auto P = Path.find_last_of("\\/");
StringRef S = (P == StringRef::npos) ? Path : Path.substr(P + 1);
return (S.substr(0, S.rfind('.')) + ".exe").str();
const char* E = Config->DLL ? ".dll" : ".exe";
return (S.substr(0, S.rfind('.')) + E).str();
}
// Opens a file. Path has to be resolved already.
// Newly created memory buffers are owned by this driver.
MemoryBufferRef LinkerDriver::openFile(StringRef Path) {
auto MBOrErr = MemoryBuffer::getFile(Path);
error(MBOrErr, Twine("Could not open ") + Path);
std::unique_ptr<MemoryBuffer> &MB = *MBOrErr;
std::unique_ptr<MemoryBuffer> MB =
check(MemoryBuffer::getFile(Path), "could not open " + Path);
MemoryBufferRef MBRef = MB->getMemBufferRef();
OwningMBs.push_back(std::move(MB)); // take ownership
return MBRef;
@ -116,12 +118,16 @@ void LinkerDriver::parseDirectives(StringRef S) {
case OPT_nodefaultlib:
Config->NoDefaultLibs.insert(doFindLib(Arg->getValue()));
break;
case OPT_section:
parseSection(Arg->getValue());
break;
case OPT_editandcontinue:
case OPT_fastfail:
case OPT_guardsym:
case OPT_throwingnew:
break;
default:
error(Twine(Arg->getSpelling()) + " is not allowed in .drectve");
fatal(Arg->getSpelling() + " is not allowed in .drectve");
}
}
}
@ -246,7 +252,7 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) {
// We call our own implementation of lib.exe that understands bitcode files.
if (ArgsArr.size() > 1 && StringRef(ArgsArr[1]).equals_lower("/lib")) {
if (llvm::libDriverMain(ArgsArr.slice(1)) != 0)
error("lib failed");
fatal("lib failed");
return;
}
@ -268,7 +274,7 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) {
}
if (Args.filtered_begin(OPT_INPUT) == Args.filtered_end())
error("no input files.");
fatal("no input files");
// Construct search path list.
SearchPaths.push_back("");
@ -295,7 +301,7 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) {
// Handle /noentry
if (Args.hasArg(OPT_noentry)) {
if (!Args.hasArg(OPT_dll))
error("/noentry must be specified with /dll");
fatal("/noentry must be specified with /dll");
Config->NoEntry = true;
}
@ -308,7 +314,7 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) {
// Handle /fixed
if (Args.hasArg(OPT_fixed)) {
if (Args.hasArg(OPT_dynamicbase))
error("/fixed must not be specified with /dynamicbase");
fatal("/fixed must not be specified with /dynamicbase");
Config->Relocatable = false;
Config->DynamicBase = false;
}
@ -382,17 +388,17 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) {
StringRef OptLevel = StringRef(S).substr(7);
if (OptLevel.getAsInteger(10, Config->LTOOptLevel) ||
Config->LTOOptLevel > 3)
error("/opt:lldlto: invalid optimization level: " + OptLevel);
fatal("/opt:lldlto: invalid optimization level: " + OptLevel);
continue;
}
if (StringRef(S).startswith("lldltojobs=")) {
StringRef Jobs = StringRef(S).substr(11);
if (Jobs.getAsInteger(10, Config->LTOJobs) || Config->LTOJobs == 0)
error("/opt:lldltojobs: invalid job count: " + Jobs);
fatal("/opt:lldltojobs: invalid job count: " + Jobs);
continue;
}
if (S != "ref" && S != "lbr" && S != "nolbr")
error(Twine("/opt: unknown option: ") + S);
fatal("/opt: unknown option: " + S);
}
}
@ -404,6 +410,10 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) {
for (auto *Arg : Args.filtered(OPT_merge))
parseMerge(Arg->getValue());
// Handle /section
for (auto *Arg : Args.filtered(OPT_section))
parseSection(Arg->getValue());
// Handle /manifest
if (auto *Arg = Args.getLastArg(OPT_manifest_colon))
parseManifest(Arg->getValue());
@ -420,6 +430,10 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) {
if (auto *Arg = Args.getLastArg(OPT_manifestfile))
Config->ManifestFile = Arg->getValue();
// Handle /manifestinput
for (auto *Arg : Args.filtered(OPT_manifestinput))
Config->ManifestInput.push_back(Arg->getValue());
// Handle miscellaneous boolean flags.
if (Args.hasArg(OPT_allowbind_no))
Config->AllowBind = false;
@ -485,7 +499,7 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) {
continue;
}
if (Config->Machine != MT)
error(Twine(File->getShortName()) + ": machine type " + machineToStr(MT) +
fatal(File->getShortName() + ": machine type " + machineToStr(MT) +
" conflicts with " + machineToStr(Config->Machine));
}
if (Config->Machine == IMAGE_FILE_MACHINE_UNKNOWN) {
@ -520,7 +534,7 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) {
// infer that from user-defined entry name.
StringRef S = findDefaultEntry();
if (S.empty())
error("entry point must be defined");
fatal("entry point must be defined");
Config->Entry = addUndefined(S);
if (Config->Verbose)
llvm::outs() << "Entry name inferred: " << S << "\n";
@ -627,14 +641,14 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) {
if (Config->Subsystem == IMAGE_SUBSYSTEM_UNKNOWN) {
Config->Subsystem = inferSubsystem();
if (Config->Subsystem == IMAGE_SUBSYSTEM_UNKNOWN)
error("subsystem must be defined");
fatal("subsystem must be defined");
}
// Handle /safeseh.
if (Args.hasArg(OPT_safeseh))
for (ObjectFile *File : Symtab.ObjectFiles)
if (!File->SEHCompat)
error("/safeseh: " + File->getName() + " is not compatible with SEH");
fatal("/safeseh: " + File->getName() + " is not compatible with SEH");
// Windows specific -- when we are creating a .dll file, we also
// need to create a .lib file.
@ -668,7 +682,8 @@ void LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) {
if (auto *Arg = Args.getLastArg(OPT_lldmap)) {
std::error_code EC;
llvm::raw_fd_ostream Out(Arg->getValue(), EC, OpenFlags::F_Text);
error(EC, "Could not create the symbol map");
if (EC)
fatal(EC, "could not create the symbol map");
Symtab.printMap(Out);
}
// Call exit to avoid calling destructors.

View File

@ -34,9 +34,6 @@ using llvm::COFF::WindowsSubsystem;
using llvm::Optional;
class InputFile;
// Entry point of the COFF linker.
void link(llvm::ArrayRef<const char *> Args);
// Implemented in MarkLive.cpp.
void markLive(const std::vector<Chunk *> &Chunks);
@ -136,6 +133,7 @@ void parseSubsystem(StringRef Arg, WindowsSubsystem *Sys, uint32_t *Major,
void parseAlternateName(StringRef);
void parseMerge(StringRef);
void parseSection(StringRef);
// Parses a string in the form of "EMBED[,=<integer>]|NO".
void parseManifest(StringRef Arg);
@ -163,7 +161,6 @@ void checkFailIfMismatch(StringRef Arg);
std::unique_ptr<MemoryBuffer>
convertResToCOFF(const std::vector<MemoryBufferRef> &MBs);
void touchFile(StringRef Path);
void createPDB(StringRef Path);
// Create enum with OPT_xxx values for each option in Options.td

View File

@ -19,15 +19,12 @@
#include "Symbols.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/ArchiveWriter.h"
#include "llvm/Object/COFF.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Option/Option.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileUtilities.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/raw_ostream.h"
@ -53,7 +50,8 @@ class Executor {
void run() {
ErrorOr<std::string> ExeOrErr = llvm::sys::findProgramByName(Prog);
error(ExeOrErr, Twine("unable to find ") + Prog + " in PATH: ");
if (auto EC = ExeOrErr.getError())
fatal(EC, "unable to find " + Prog + " in PATH: ");
const char *Exe = Saver.save(*ExeOrErr);
Args.insert(Args.begin(), Exe);
Args.push_back(nullptr);
@ -61,7 +59,7 @@ class Executor {
for (const char *S : Args)
if (S)
llvm::errs() << S << " ";
error("failed");
fatal("ExecuteAndWait failed");
}
}
@ -85,7 +83,7 @@ MachineTypes getMachineType(StringRef S) {
.Default(IMAGE_FILE_MACHINE_UNKNOWN);
if (MT != IMAGE_FILE_MACHINE_UNKNOWN)
return MT;
error(Twine("unknown /machine argument: ") + S);
fatal("unknown /machine argument: " + S);
}
StringRef machineToStr(MachineTypes MT) {
@ -106,9 +104,9 @@ void parseNumbers(StringRef Arg, uint64_t *Addr, uint64_t *Size) {
StringRef S1, S2;
std::tie(S1, S2) = Arg.split(',');
if (S1.getAsInteger(0, *Addr))
error(Twine("invalid number: ") + S1);
fatal("invalid number: " + S1);
if (Size && !S2.empty() && S2.getAsInteger(0, *Size))
error(Twine("invalid number: ") + S2);
fatal("invalid number: " + S2);
}
// Parses a string in the form of "<integer>[.<integer>]".
@ -117,10 +115,10 @@ void parseVersion(StringRef Arg, uint32_t *Major, uint32_t *Minor) {
StringRef S1, S2;
std::tie(S1, S2) = Arg.split('.');
if (S1.getAsInteger(0, *Major))
error(Twine("invalid number: ") + S1);
fatal("invalid number: " + S1);
*Minor = 0;
if (!S2.empty() && S2.getAsInteger(0, *Minor))
error(Twine("invalid number: ") + S2);
fatal("invalid number: " + S2);
}
// Parses a string in the form of "<subsystem>[,<integer>[.<integer>]]".
@ -140,7 +138,7 @@ void parseSubsystem(StringRef Arg, WindowsSubsystem *Sys, uint32_t *Major,
.Case("windows", IMAGE_SUBSYSTEM_WINDOWS_GUI)
.Default(IMAGE_SUBSYSTEM_UNKNOWN);
if (*Sys == IMAGE_SUBSYSTEM_UNKNOWN)
error(Twine("unknown subsystem: ") + SysStr);
fatal("unknown subsystem: " + SysStr);
if (!Ver.empty())
parseVersion(Ver, Major, Minor);
}
@ -151,10 +149,10 @@ void parseAlternateName(StringRef S) {
StringRef From, To;
std::tie(From, To) = S.split('=');
if (From.empty() || To.empty())
error(Twine("/alternatename: invalid argument: ") + S);
fatal("/alternatename: invalid argument: " + S);
auto It = Config->AlternateNames.find(From);
if (It != Config->AlternateNames.end() && It->second != To)
error(Twine("/alternatename: conflicts: ") + S);
fatal("/alternatename: conflicts: " + S);
Config->AlternateNames.insert(It, std::make_pair(From, To));
}
@ -164,7 +162,7 @@ void parseMerge(StringRef S) {
StringRef From, To;
std::tie(From, To) = S.split('=');
if (From.empty() || To.empty())
error(Twine("/merge: invalid argument: ") + S);
fatal("/merge: invalid argument: " + S);
auto Pair = Config->Merge.insert(std::make_pair(From, To));
bool Inserted = Pair.second;
if (!Inserted) {
@ -175,6 +173,47 @@ void parseMerge(StringRef S) {
}
}
static uint32_t parseSectionAttributes(StringRef S) {
uint32_t Ret = 0;
for (char C : S.lower()) {
switch (C) {
case 'd':
Ret |= IMAGE_SCN_MEM_DISCARDABLE;
break;
case 'e':
Ret |= IMAGE_SCN_MEM_EXECUTE;
break;
case 'k':
Ret |= IMAGE_SCN_MEM_NOT_CACHED;
break;
case 'p':
Ret |= IMAGE_SCN_MEM_NOT_PAGED;
break;
case 'r':
Ret |= IMAGE_SCN_MEM_READ;
break;
case 's':
Ret |= IMAGE_SCN_MEM_SHARED;
break;
case 'w':
Ret |= IMAGE_SCN_MEM_WRITE;
break;
default:
fatal("/section: invalid argument: " + S);
}
}
return Ret;
}
// Parses /section option argument.
void parseSection(StringRef S) {
StringRef Name, Attrs;
std::tie(Name, Attrs) = S.split(',');
if (Name.empty() || Attrs.empty())
fatal("/section: invalid argument: " + S);
Config->Section[Name] = parseSectionAttributes(Attrs);
}
// Parses a string in the form of "EMBED[,=<integer>]|NO".
// Results are directly written to Config.
void parseManifest(StringRef Arg) {
@ -183,16 +222,16 @@ void parseManifest(StringRef Arg) {
return;
}
if (!Arg.startswith_lower("embed"))
error(Twine("Invalid option ") + Arg);
fatal("invalid option " + Arg);
Config->Manifest = Configuration::Embed;
Arg = Arg.substr(strlen("embed"));
if (Arg.empty())
return;
if (!Arg.startswith_lower(",id="))
error(Twine("Invalid option ") + Arg);
fatal("invalid option " + Arg);
Arg = Arg.substr(strlen(",id="));
if (Arg.getAsInteger(0, Config->ManifestID))
error(Twine("Invalid option ") + Arg);
fatal("invalid option " + Arg);
}
// Parses a string in the form of "level=<string>|uiAccess=<string>|NO".
@ -216,7 +255,7 @@ void parseManifestUAC(StringRef Arg) {
std::tie(Config->ManifestUIAccess, Arg) = Arg.split(" ");
continue;
}
error(Twine("Invalid option ") + Arg);
fatal("invalid option " + Arg);
}
}
@ -240,10 +279,19 @@ static void quoteAndPrint(raw_ostream &Out, StringRef S) {
}
}
// Create a manifest file contents.
static std::string createManifestXml() {
std::string S;
llvm::raw_string_ostream OS(S);
// Create the default manifest file as a temporary file.
static std::string createDefaultXml() {
// Create a temporary file.
SmallString<128> Path;
if (auto EC = sys::fs::createTemporaryFile("tmp", "manifest", Path))
fatal(EC, "cannot create a temporary file");
// Open the temporary file for writing.
std::error_code EC;
llvm::raw_fd_ostream OS(Path, EC, sys::fs::F_Text);
if (EC)
fatal(EC, "failed to open " + Path);
// Emit the XML. Note that we do *not* verify that the XML attributes are
// syntactically correct. This is intentional for link.exe compatibility.
OS << "<?xml version=\"1.0\" standalone=\"yes\"?>\n"
@ -267,21 +315,57 @@ static std::string createManifestXml() {
}
}
OS << "</assembly>\n";
OS.flush();
return S;
OS.close();
return StringRef(Path);
}
static std::string readFile(StringRef Path) {
std::unique_ptr<MemoryBuffer> MB =
check(MemoryBuffer::getFile(Path), "could not open " + Path);
std::unique_ptr<MemoryBuffer> Buf(std::move(MB));
return Buf->getBuffer();
}
static std::string createManifestXml() {
// Create the default manifest file.
std::string Path1 = createDefaultXml();
if (Config->ManifestInput.empty())
return readFile(Path1);
// If manifest files are supplied by the user using /MANIFESTINPUT
// option, we need to merge them with the default manifest.
SmallString<128> Path2;
if (auto EC = sys::fs::createTemporaryFile("tmp", "manifest", Path2))
fatal(EC, "cannot create a temporary file");
FileRemover Remover1(Path1);
FileRemover Remover2(Path2);
Executor E("mt.exe");
E.add("/manifest");
E.add(Path1);
for (StringRef Filename : Config->ManifestInput) {
E.add("/manifest");
E.add(Filename);
}
E.add("/nologo");
E.add("/out:" + StringRef(Path2));
E.run();
return readFile(Path2);
}
// Create a resource file containing a manifest XML.
std::unique_ptr<MemoryBuffer> createManifestRes() {
// Create a temporary file for the resource script file.
SmallString<128> RCPath;
std::error_code EC = sys::fs::createTemporaryFile("tmp", "rc", RCPath);
error(EC, "cannot create a temporary file");
if (auto EC = sys::fs::createTemporaryFile("tmp", "rc", RCPath))
fatal(EC, "cannot create a temporary file");
FileRemover RCRemover(RCPath);
// Open the temporary file for writing.
std::error_code EC;
llvm::raw_fd_ostream Out(RCPath, EC, sys::fs::F_Text);
error(EC, Twine("failed to open ") + RCPath);
if (EC)
fatal(EC, "failed to open " + RCPath);
// Write resource script to the RC file.
Out << "#define LANG_ENGLISH 9\n"
@ -296,8 +380,8 @@ std::unique_ptr<MemoryBuffer> createManifestRes() {
// Create output resource file.
SmallString<128> ResPath;
EC = sys::fs::createTemporaryFile("tmp", "res", ResPath);
error(EC, "cannot create a temporary file");
if (auto EC = sys::fs::createTemporaryFile("tmp", "res", ResPath))
fatal(EC, "cannot create a temporary file");
Executor E("rc.exe");
E.add("/fo");
@ -305,18 +389,17 @@ std::unique_ptr<MemoryBuffer> createManifestRes() {
E.add("/nologo");
E.add(RCPath.str());
E.run();
ErrorOr<std::unique_ptr<MemoryBuffer>> Ret = MemoryBuffer::getFile(ResPath);
error(Ret, Twine("Could not open ") + ResPath);
return std::move(*Ret);
return check(MemoryBuffer::getFile(ResPath), "could not open " + ResPath);
}
void createSideBySideManifest() {
std::string Path = Config->ManifestFile;
if (Path == "")
Path = (Twine(Config->OutputFile) + ".manifest").str();
Path = Config->OutputFile + ".manifest";
std::error_code EC;
llvm::raw_fd_ostream Out(Path, EC, llvm::sys::fs::F_Text);
error(EC, "failed to create manifest");
if (EC)
fatal(EC, "failed to create manifest");
Out << createManifestXml();
}
@ -380,7 +463,7 @@ Export parseExport(StringRef Arg) {
return E;
err:
error(Twine("invalid /export: ") + Arg);
fatal("invalid /export: " + Arg);
}
static StringRef undecorate(StringRef Sym) {
@ -398,7 +481,7 @@ void fixupExports() {
if (E.Ordinal == 0)
continue;
if (!Ords.insert(E.Ordinal).second)
error("duplicate export ordinal: " + E.Name);
fatal("duplicate export ordinal: " + E.Name);
}
for (Export &E : Config->Exports) {
@ -459,11 +542,11 @@ void checkFailIfMismatch(StringRef Arg) {
StringRef K, V;
std::tie(K, V) = Arg.split('=');
if (K.empty() || V.empty())
error(Twine("/failifmismatch: invalid argument: ") + Arg);
fatal("/failifmismatch: invalid argument: " + Arg);
StringRef Existing = Config->MustMatch[K];
if (!Existing.empty() && V != Existing)
error(Twine("/failifmismatch: mismatch detected: ") + Existing + " and " +
V + " for key " + K);
fatal("/failifmismatch: mismatch detected: " + Existing + " and " + V +
" for key " + K);
Config->MustMatch[K] = V;
}
@ -473,8 +556,8 @@ std::unique_ptr<MemoryBuffer>
convertResToCOFF(const std::vector<MemoryBufferRef> &MBs) {
// Create an output file path.
SmallString<128> Path;
if (llvm::sys::fs::createTemporaryFile("resource", "obj", Path))
error("Could not create temporary file");
if (auto EC = llvm::sys::fs::createTemporaryFile("resource", "obj", Path))
fatal(EC, "could not create temporary file");
// Execute cvtres.exe.
Executor E("cvtres.exe");
@ -485,170 +568,7 @@ convertResToCOFF(const std::vector<MemoryBufferRef> &MBs) {
for (MemoryBufferRef MB : MBs)
E.add(MB.getBufferIdentifier());
E.run();
ErrorOr<std::unique_ptr<MemoryBuffer>> Ret = MemoryBuffer::getFile(Path);
error(Ret, Twine("Could not open ") + Path);
return std::move(*Ret);
}
static std::string writeToTempFile(StringRef Contents) {
SmallString<128> Path;
int FD;
if (llvm::sys::fs::createTemporaryFile("tmp", "def", FD, Path)) {
llvm::errs() << "failed to create a temporary file\n";
return "";
}
llvm::raw_fd_ostream OS(FD, /*shouldClose*/ true);
OS << Contents;
return Path.str();
}
void touchFile(StringRef Path) {
int FD;
std::error_code EC = sys::fs::openFileForWrite(Path, FD, sys::fs::F_Append);
error(EC, "failed to create a file");
sys::Process::SafelyCloseFileDescriptor(FD);
}
static std::string getImplibPath() {
if (!Config->Implib.empty())
return Config->Implib;
SmallString<128> Out = StringRef(Config->OutputFile);
sys::path::replace_extension(Out, ".lib");
return Out.str();
}
static std::unique_ptr<MemoryBuffer> createEmptyImportLibrary() {
std::string S = (Twine("LIBRARY \"") +
llvm::sys::path::filename(Config->OutputFile) + "\"\n")
.str();
std::string Path1 = writeToTempFile(S);
std::string Path2 = getImplibPath();
llvm::FileRemover Remover1(Path1);
llvm::FileRemover Remover2(Path2);
Executor E("lib.exe");
E.add("/nologo");
E.add("/machine:" + machineToStr(Config->Machine));
E.add(Twine("/def:") + Path1);
E.add(Twine("/out:") + Path2);
E.run();
ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
MemoryBuffer::getFile(Path2, -1, false);
error(BufOrErr, Twine("Failed to open ") + Path2);
return MemoryBuffer::getMemBufferCopy((*BufOrErr)->getBuffer());
}
static std::vector<NewArchiveIterator>
readMembers(const object::Archive &Archive) {
std::vector<NewArchiveIterator> V;
for (const auto &ChildOrErr : Archive.children()) {
error(ChildOrErr, "Archive::Child::getName failed");
const object::Archive::Child C(*ChildOrErr);
ErrorOr<StringRef> NameOrErr = C.getName();
error(NameOrErr, "Archive::Child::getName failed");
V.emplace_back(C, *NameOrErr);
}
return V;
}
// This class creates short import files which is described in
// PE/COFF spec 7. Import Library Format.
class ShortImportCreator {
public:
ShortImportCreator(object::Archive *A, StringRef S) : Parent(A), DLLName(S) {}
NewArchiveIterator create(StringRef Sym, uint16_t Ordinal,
ImportNameType NameType, bool isData) {
size_t ImpSize = DLLName.size() + Sym.size() + 2; // +2 for NULs
size_t Size = sizeof(object::ArchiveMemberHeader) +
sizeof(coff_import_header) + ImpSize;
char *Buf = Alloc.Allocate<char>(Size);
memset(Buf, 0, Size);
char *P = Buf;
// Write archive member header
auto *Hdr = reinterpret_cast<object::ArchiveMemberHeader *>(P);
P += sizeof(*Hdr);
sprintf(Hdr->Name, "%-12s", "dummy");
sprintf(Hdr->LastModified, "%-12d", 0);
sprintf(Hdr->UID, "%-6d", 0);
sprintf(Hdr->GID, "%-6d", 0);
sprintf(Hdr->AccessMode, "%-8d", 0644);
sprintf(Hdr->Size, "%-10d", int(sizeof(coff_import_header) + ImpSize));
// Write short import library.
auto *Imp = reinterpret_cast<coff_import_header *>(P);
P += sizeof(*Imp);
Imp->Sig2 = 0xFFFF;
Imp->Machine = Config->Machine;
Imp->SizeOfData = ImpSize;
if (Ordinal > 0)
Imp->OrdinalHint = Ordinal;
Imp->TypeInfo = (isData ? IMPORT_DATA : IMPORT_CODE);
Imp->TypeInfo |= NameType << 2;
// Write symbol name and DLL name.
memcpy(P, Sym.data(), Sym.size());
P += Sym.size() + 1;
memcpy(P, DLLName.data(), DLLName.size());
std::error_code EC;
object::Archive::Child C(Parent, Buf, &EC);
assert(!EC && "We created an invalid buffer");
return NewArchiveIterator(C, DLLName);
}
private:
BumpPtrAllocator Alloc;
object::Archive *Parent;
StringRef DLLName;
};
static ImportNameType getNameType(StringRef Sym, StringRef ExtName) {
if (Sym != ExtName)
return IMPORT_NAME_UNDECORATE;
if (Config->Machine == I386 && Sym.startswith("_"))
return IMPORT_NAME_NOPREFIX;
return IMPORT_NAME;
}
static std::string replace(StringRef S, StringRef From, StringRef To) {
size_t Pos = S.find(From);
assert(Pos != StringRef::npos);
return (Twine(S.substr(0, Pos)) + To + S.substr(Pos + From.size())).str();
}
// Creates an import library for a DLL. In this function, we first
// create an empty import library using lib.exe and then adds short
// import files to that file.
void writeImportLibrary() {
std::unique_ptr<MemoryBuffer> Buf = createEmptyImportLibrary();
std::error_code EC;
object::Archive Archive(Buf->getMemBufferRef(), EC);
error(EC, "Error reading an empty import file");
std::vector<NewArchiveIterator> Members = readMembers(Archive);
std::string DLLName = llvm::sys::path::filename(Config->OutputFile);
ShortImportCreator ShortImport(&Archive, DLLName);
for (Export &E : Config->Exports) {
if (E.Private)
continue;
if (E.ExtName.empty()) {
Members.push_back(ShortImport.create(
E.SymbolName, E.Ordinal, getNameType(E.SymbolName, E.Name), E.Data));
} else {
Members.push_back(ShortImport.create(
replace(E.SymbolName, E.Name, E.ExtName), E.Ordinal,
getNameType(E.SymbolName, E.Name), E.Data));
}
}
std::string Path = getImplibPath();
std::pair<StringRef, std::error_code> Result =
writeArchive(Path, Members, /*WriteSymtab*/ true, object::Archive::K_GNU,
/*Deterministic*/ true, /*Thin*/ false);
error(Result.second, Twine("Failed to write ") + Path);
return check(MemoryBuffer::getFile(Path), "could not open " + Path);
}
// Create OptTable
@ -695,7 +615,7 @@ llvm::opt::InputArgList ArgParser::parse(ArrayRef<const char *> ArgsArr) {
}
if (MissingCount)
error(Twine("missing arg value for \"") + Args.getArgString(MissingIndex) +
fatal("missing arg value for \"" + Twine(Args.getArgString(MissingIndex)) +
"\", expected " + Twine(MissingCount) +
(MissingCount == 1 ? " argument." : " arguments."));
for (auto *Arg : Args.filtered(OPT_UNKNOWN))

View File

@ -10,20 +10,23 @@
#include "Error.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/raw_ostream.h"
namespace lld {
namespace coff {
void error(const Twine &Msg) {
void fatal(const Twine &Msg) {
llvm::errs() << Msg << "\n";
exit(1);
}
void error(std::error_code EC, const Twine &Prefix) {
if (!EC)
return;
error(Prefix + ": " + EC.message());
void fatal(std::error_code EC, const Twine &Msg) {
fatal(Msg + ": " + EC.message());
}
void fatal(llvm::Error &Err, const Twine &Msg) {
fatal(errorToErrorCode(std::move(Err)), Msg);
}
} // namespace coff

View File

@ -11,15 +11,25 @@
#define LLD_COFF_ERROR_H
#include "lld/Core/LLVM.h"
#include "llvm/Support/Error.h"
namespace lld {
namespace coff {
LLVM_ATTRIBUTE_NORETURN void error(const Twine &Msg);
void error(std::error_code EC, const Twine &Prefix);
LLVM_ATTRIBUTE_NORETURN void fatal(const Twine &Msg);
LLVM_ATTRIBUTE_NORETURN void fatal(std::error_code EC, const Twine &Prefix);
LLVM_ATTRIBUTE_NORETURN void fatal(llvm::Error &Err, const Twine &Prefix);
template <typename T> void error(const ErrorOr<T> &V, const Twine &Prefix) {
error(V.getError(), Prefix);
template <class T> T check(ErrorOr<T> &&V, const Twine &Prefix) {
if (auto EC = V.getError())
fatal(EC, Prefix);
return std::move(*V);
}
template <class T> T check(Expected<T> E, const Twine &Prefix) {
if (llvm::Error Err = E.takeError())
fatal(Err, Prefix);
return std::move(*E);
}
} // namespace coff

View File

@ -70,7 +70,7 @@ class ICF {
static bool equalsConstant(const SectionChunk *A, const SectionChunk *B);
static bool equalsVariable(const SectionChunk *A, const SectionChunk *B);
bool forEachGroup(std::vector<SectionChunk *> &Chunks, Comparator Eq);
bool partition(ChunkIterator Begin, ChunkIterator End, Comparator Eq);
bool segregate(ChunkIterator Begin, ChunkIterator End, Comparator Eq);
std::atomic<uint64_t> NextID = { 1 };
};
@ -148,7 +148,7 @@ bool ICF::equalsVariable(const SectionChunk *A, const SectionChunk *B) {
return std::equal(A->Relocs.begin(), A->Relocs.end(), B->Relocs.begin(), Eq);
}
bool ICF::partition(ChunkIterator Begin, ChunkIterator End, Comparator Eq) {
bool ICF::segregate(ChunkIterator Begin, ChunkIterator End, Comparator Eq) {
bool R = false;
for (auto It = Begin;;) {
SectionChunk *Head = *It;
@ -171,7 +171,7 @@ bool ICF::forEachGroup(std::vector<SectionChunk *> &Chunks, Comparator Eq) {
auto Bound = std::find_if(It + 1, End, [&](SectionChunk *SC) {
return SC->GroupID != Head->GroupID;
});
if (partition(It, Bound, Eq))
if (segregate(It, Bound, Eq))
R = true;
It = Bound;
}

View File

@ -8,30 +8,41 @@
//===----------------------------------------------------------------------===//
#include "Chunks.h"
#include "Config.h"
#include "Error.h"
#include "InputFiles.h"
#include "Symbols.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/LTO/LTOModule.h"
#include "llvm/LTO/legacy/LTOModule.h"
#include "llvm/Object/Binary.h"
#include "llvm/Object/COFF.h"
#include "llvm/Support/COFF.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm-c/lto.h"
#include <cstring>
#include <system_error>
#include <utility>
using namespace llvm::COFF;
using namespace llvm::object;
using namespace llvm::support::endian;
using llvm::Triple;
using llvm::support::ulittle32_t;
using llvm::sys::fs::file_magic;
using llvm::sys::fs::identify_magic;
namespace lld {
namespace coff {
int InputFile::NextIndex = 0;
llvm::LLVMContext BitcodeFile::Context;
// Returns the last element of a path, which is supposed to be a filename.
static StringRef getBasename(StringRef Path) {
@ -52,9 +63,7 @@ std::string InputFile::getShortName() {
void ArchiveFile::parse() {
// Parse a MemoryBufferRef as an archive file.
auto ArchiveOrErr = Archive::create(MB);
error(ArchiveOrErr, "Failed to parse static library");
File = std::move(*ArchiveOrErr);
File = check(Archive::create(MB), "failed to parse static library");
// Allocate a buffer for Lazy objects.
size_t NumSyms = File->getNumberOfSymbols();
@ -67,40 +76,38 @@ void ArchiveFile::parse() {
// Seen is a map from member files to boolean values. Initially
// all members are mapped to false, which indicates all these files
// are not read yet.
for (auto &ChildOrErr : File->children()) {
error(ChildOrErr, "Failed to parse static library");
const Archive::Child &Child = *ChildOrErr;
Error Err;
for (auto &Child : File->children(Err))
Seen[Child.getChildOffset()].clear();
}
if (Err)
fatal(Err, "failed to parse static library");
}
// Returns a buffer pointing to a member file containing a given symbol.
// This function is thread-safe.
MemoryBufferRef ArchiveFile::getMember(const Archive::Symbol *Sym) {
auto COrErr = Sym->getMember();
error(COrErr, Twine("Could not get the member for symbol ") + Sym->getName());
const Archive::Child &C = *COrErr;
const Archive::Child &C =
check(Sym->getMember(),
"could not get the member for symbol " + Sym->getName());
// Return an empty buffer if we have already returned the same buffer.
if (Seen[C.getChildOffset()].test_and_set())
return MemoryBufferRef();
ErrorOr<MemoryBufferRef> Ret = C.getMemoryBufferRef();
error(Ret, Twine("Could not get the buffer for the member defining symbol ") +
Sym->getName());
return *Ret;
return check(C.getMemoryBufferRef(),
"could not get the buffer for the member defining symbol " +
Sym->getName());
}
void ObjectFile::parse() {
// Parse a memory buffer as a COFF file.
auto BinOrErr = createBinary(MB);
error(BinOrErr, "Failed to parse object file");
std::unique_ptr<Binary> Bin = std::move(*BinOrErr);
std::unique_ptr<Binary> Bin =
check(createBinary(MB), "failed to parse object file");
if (auto *Obj = dyn_cast<COFFObjectFile>(Bin.get())) {
Bin.release();
COFFObj.reset(Obj);
} else {
error(Twine(getName()) + " is not a COFF file.");
fatal(getName() + " is not a COFF file");
}
// Read section and symbol tables.
@ -116,10 +123,10 @@ void ObjectFile::initializeChunks() {
for (uint32_t I = 1; I < NumSections + 1; ++I) {
const coff_section *Sec;
StringRef Name;
std::error_code EC = COFFObj->getSection(I, Sec);
error(EC, Twine("getSection failed: #") + Twine(I));
EC = COFFObj->getSectionName(Sec, Name);
error(EC, Twine("getSectionName failed: #") + Twine(I));
if (auto EC = COFFObj->getSection(I, Sec))
fatal(EC, "getSection failed: #" + Twine(I));
if (auto EC = COFFObj->getSectionName(Sec, Name))
fatal(EC, "getSectionName failed: #" + Twine(I));
if (Name == ".sxdata") {
SXData = Sec;
continue;
@ -149,14 +156,12 @@ void ObjectFile::initializeSymbols() {
uint32_t NumSymbols = COFFObj->getNumberOfSymbols();
SymbolBodies.reserve(NumSymbols);
SparseSymbolBodies.resize(NumSymbols);
llvm::SmallVector<Undefined *, 8> WeakAliases;
llvm::SmallVector<std::pair<Undefined *, uint32_t>, 8> WeakAliases;
int32_t LastSectionNumber = 0;
for (uint32_t I = 0; I < NumSymbols; ++I) {
// Get a COFFSymbolRef object.
auto SymOrErr = COFFObj->getSymbol(I);
error(SymOrErr, Twine("broken object file: ") + getName());
COFFSymbolRef Sym = *SymOrErr;
COFFSymbolRef Sym =
check(COFFObj->getSymbol(I), "broken object file: " + getName());
const void *AuxP = nullptr;
if (Sym.getNumberOfAuxSymbols())
@ -167,8 +172,10 @@ void ObjectFile::initializeSymbols() {
if (Sym.isUndefined()) {
Body = createUndefined(Sym);
} else if (Sym.isWeakExternal()) {
Body = createWeakExternal(Sym, AuxP);
WeakAliases.push_back((Undefined *)Body);
Body = createUndefined(Sym);
uint32_t TagIndex =
static_cast<const coff_aux_weak_external *>(AuxP)->TagIndex;
WeakAliases.emplace_back((Undefined *)Body, TagIndex);
} else {
Body = createDefined(Sym, AuxP, IsFirst);
}
@ -179,8 +186,8 @@ void ObjectFile::initializeSymbols() {
I += Sym.getNumberOfAuxSymbols();
LastSectionNumber = Sym.getSectionNumber();
}
for (Undefined *U : WeakAliases)
U->WeakAlias = SparseSymbolBodies[(uintptr_t)U->WeakAlias];
for (auto WeakAlias : WeakAliases)
WeakAlias.first->WeakAlias = SparseSymbolBodies[WeakAlias.second];
}
Undefined *ObjectFile::createUndefined(COFFSymbolRef Sym) {
@ -189,15 +196,6 @@ Undefined *ObjectFile::createUndefined(COFFSymbolRef Sym) {
return new (Alloc) Undefined(Name);
}
Undefined *ObjectFile::createWeakExternal(COFFSymbolRef Sym, const void *AuxP) {
StringRef Name;
COFFObj->getSymbolName(Sym, Name);
auto *U = new (Alloc) Undefined(Name);
auto *Aux = (const coff_aux_weak_external *)AuxP;
U->WeakAlias = (Undefined *)(uintptr_t)Aux->TagIndex;
return U;
}
Defined *ObjectFile::createDefined(COFFSymbolRef Sym, const void *AuxP,
bool IsFirst) {
StringRef Name;
@ -219,11 +217,21 @@ Defined *ObjectFile::createDefined(COFFSymbolRef Sym, const void *AuxP,
}
return new (Alloc) DefinedAbsolute(Name, Sym);
}
if (Sym.getSectionNumber() == llvm::COFF::IMAGE_SYM_DEBUG)
int32_t SectionNumber = Sym.getSectionNumber();
if (SectionNumber == llvm::COFF::IMAGE_SYM_DEBUG)
return nullptr;
// Reserved sections numbers don't have contents.
if (llvm::COFF::isReservedSectionNumber(SectionNumber))
fatal("broken object file: " + getName());
// This symbol references a section which is not present in the section
// header.
if ((uint32_t)SectionNumber >= SparseChunks.size())
fatal("broken object file: " + getName());
// Nothing else to do without a section chunk.
auto *SC = cast_or_null<SectionChunk>(SparseChunks[Sym.getSectionNumber()]);
auto *SC = cast_or_null<SectionChunk>(SparseChunks[SectionNumber]);
if (!SC)
return nullptr;
@ -250,7 +258,7 @@ void ObjectFile::initializeSEH() {
ArrayRef<uint8_t> A;
COFFObj->getSectionContents(SXData, A);
if (A.size() % 4 != 0)
error(".sxdata must be an array of symbol table indices");
fatal(".sxdata must be an array of symbol table indices");
auto *I = reinterpret_cast<const ulittle32_t *>(A.data());
auto *E = reinterpret_cast<const ulittle32_t *>(A.data() + A.size());
for (; I != E; ++I)
@ -276,11 +284,11 @@ void ImportFile::parse() {
// Check if the total size is valid.
if ((size_t)(End - Buf) != (sizeof(*Hdr) + Hdr->SizeOfData))
error("broken import library");
fatal("broken import library");
// Read names and create an __imp_ symbol.
StringRef Name = StringAlloc.save(StringRef(Buf + sizeof(*Hdr)));
StringRef ImpName = StringAlloc.save(Twine("__imp_") + Name);
StringRef ImpName = StringAlloc.save("__imp_" + Name);
const char *NameStart = Buf + sizeof(coff_import_header) + Name.size() + 1;
DLLName = StringRef(NameStart);
StringRef ExtName;
@ -315,11 +323,10 @@ void BitcodeFile::parse() {
// Usually parse() is thread-safe, but bitcode file is an exception.
std::lock_guard<std::mutex> Lock(Mu);
ErrorOr<std::unique_ptr<LTOModule>> ModOrErr =
LTOModule::createFromBuffer(llvm::getGlobalContext(), MB.getBufferStart(),
MB.getBufferSize(), llvm::TargetOptions());
error(ModOrErr, "Could not create lto module");
M = std::move(*ModOrErr);
Context.enableDebugTypeODRUniquing();
ErrorOr<std::unique_ptr<LTOModule>> ModOrErr = LTOModule::createFromBuffer(
Context, MB.getBufferStart(), MB.getBufferSize(), llvm::TargetOptions());
M = check(std::move(ModOrErr), "could not create LTO module");
llvm::StringSaver Saver(Alloc);
for (unsigned I = 0, E = M->getSymbolCount(); I != E; ++I) {

View File

@ -12,7 +12,8 @@
#include "lld/Core/LLVM.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/LTO/LTOModule.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/LTO/legacy/LTOModule.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/COFF.h"
#include "llvm/Support/StringSaver.h"
@ -103,7 +104,7 @@ class ArchiveFile : public InputFile {
// All symbols returned by ArchiveFiles are of Lazy type.
std::vector<SymbolBody *> &getSymbols() override {
llvm_unreachable("internal error");
llvm_unreachable("internal fatal");
}
private:
@ -147,7 +148,6 @@ class ObjectFile : public InputFile {
Defined *createDefined(COFFSymbolRef Sym, const void *Aux, bool IsFirst);
Undefined *createUndefined(COFFSymbolRef Sym);
Undefined *createWeakExternal(COFFSymbolRef Sym, const void *Aux);
std::unique_ptr<COFFObjectFile> COFFObj;
llvm::BumpPtrAllocator Alloc;
@ -204,9 +204,10 @@ class BitcodeFile : public InputFile {
static bool classof(const InputFile *F) { return F->kind() == BitcodeKind; }
std::vector<SymbolBody *> &getSymbols() override { return SymbolBodies; }
MachineTypes getMachineType() override;
std::unique_ptr<LTOModule> takeModule() { return std::move(M); }
static llvm::LLVMContext Context;
private:
void parse() override;

489
COFF/Librarian.cpp Normal file
View File

@ -0,0 +1,489 @@
//===- Librarian.cpp ------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains functions for the Librarian. The librarian creates and
// manages libraries of the Common Object File Format (COFF) object files. It
// primarily is used for creating static libraries and import libraries.
//
//===----------------------------------------------------------------------===//
#include "Config.h"
#include "Driver.h"
#include "Error.h"
#include "Symbols.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/ArchiveWriter.h"
#include "llvm/Object/COFF.h"
#include "llvm/Support/Path.h"
#include <vector>
using namespace lld::coff;
using namespace llvm::COFF;
using namespace llvm::object;
using namespace llvm;
static bool is32bit() {
switch (Config->Machine) {
default:
llvm_unreachable("unsupported machine");
case IMAGE_FILE_MACHINE_AMD64:
return false;
case IMAGE_FILE_MACHINE_ARMNT:
case IMAGE_FILE_MACHINE_I386:
return true;
}
}
static uint16_t getImgRelRelocation() {
switch (Config->Machine) {
default:
llvm_unreachable("unsupported machine");
case IMAGE_FILE_MACHINE_AMD64:
return IMAGE_REL_AMD64_ADDR32NB;
case IMAGE_FILE_MACHINE_ARMNT:
return IMAGE_REL_ARM_ADDR32NB;
case IMAGE_FILE_MACHINE_I386:
return IMAGE_REL_I386_DIR32NB;
}
}
template <class T> void append(std::vector<uint8_t> &B, const T &Data) {
size_t S = B.size();
B.resize(S + sizeof(T));
memcpy(&B[S], &Data, sizeof(T));
}
static void writeStringTable(std::vector<uint8_t> &B,
ArrayRef<const std::string> Strings) {
// The COFF string table consists of a 4-byte value which is the size of the
// table, including the length field itself. This value is followed by the
// string content itself, which is an array of null-terminated C-style
// strings. The termination is important as they are referenced to by offset
// by the symbol entity in the file format.
std::vector<uint8_t>::size_type Pos = B.size();
std::vector<uint8_t>::size_type Offset = B.size();
// Skip over the length field, we will fill it in later as we will have
// computed the length while emitting the string content itself.
Pos += sizeof(uint32_t);
for (const auto &S : Strings) {
B.resize(Pos + S.length() + 1);
strcpy(reinterpret_cast<char *>(&B[Pos]), S.c_str());
Pos += S.length() + 1;
}
// Backfill the length of the table now that it has been computed.
support::ulittle32_t Length(B.size() - Offset);
memcpy(&B[Offset], &Length, sizeof(Length));
}
static std::string getImplibPath() {
if (!Config->Implib.empty())
return Config->Implib;
SmallString<128> Out = StringRef(Config->OutputFile);
sys::path::replace_extension(Out, ".lib");
return Out.str();
}
static ImportNameType getNameType(StringRef Sym, StringRef ExtName) {
if (Sym != ExtName)
return IMPORT_NAME_UNDECORATE;
if (Config->Machine == I386 && Sym.startswith("_"))
return IMPORT_NAME_NOPREFIX;
return IMPORT_NAME;
}
static std::string replace(StringRef S, StringRef From, StringRef To) {
size_t Pos = S.find(From);
assert(Pos != StringRef::npos);
return (Twine(S.substr(0, Pos)) + To + S.substr(Pos + From.size())).str();
}
static const std::string NullImportDescriptorSymbolName =
"__NULL_IMPORT_DESCRIPTOR";
namespace {
// This class constructs various small object files necessary to support linking
// symbols imported from a DLL. The contents are pretty strictly defined and
// nearly entirely static. The details of the structures files are defined in
// WINNT.h and the PE/COFF specification.
class ObjectFactory {
using u16 = support::ulittle16_t;
using u32 = support::ulittle32_t;
BumpPtrAllocator Alloc;
StringRef DLLName;
StringRef Library;
std::string ImportDescriptorSymbolName;
std::string NullThunkSymbolName;
public:
ObjectFactory(StringRef S)
: DLLName(S), Library(S.drop_back(4)),
ImportDescriptorSymbolName(("__IMPORT_DESCRIPTOR_" + Library).str()),
NullThunkSymbolName(("\x7f" + Library + "_NULL_THUNK_DATA").str()) {}
// Creates an Import Descriptor. This is a small object file which contains a
// reference to the terminators and contains the library name (entry) for the
// import name table. It will force the linker to construct the necessary
// structure to import symbols from the DLL.
NewArchiveMember createImportDescriptor(std::vector<uint8_t> &Buffer);
// Creates a NULL import descriptor. This is a small object file whcih
// contains a NULL import descriptor. It is used to terminate the imports
// from a specific DLL.
NewArchiveMember createNullImportDescriptor(std::vector<uint8_t> &Buffer);
// Create a NULL Thunk Entry. This is a small object file which contains a
// NULL Import Address Table entry and a NULL Import Lookup Table Entry. It
// is used to terminate the IAT and ILT.
NewArchiveMember createNullThunk(std::vector<uint8_t> &Buffer);
// Create a short import file which is described in PE/COFF spec 7. Import
// Library Format.
NewArchiveMember createShortImport(StringRef Sym, uint16_t Ordinal,
ImportNameType NameType, bool isData);
};
}
NewArchiveMember
ObjectFactory::createImportDescriptor(std::vector<uint8_t> &Buffer) {
static const uint32_t NumberOfSections = 2;
static const uint32_t NumberOfSymbols = 7;
static const uint32_t NumberOfRelocations = 3;
// COFF Header
coff_file_header Header{
u16(Config->Machine), u16(NumberOfSections), u32(0),
u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) +
// .idata$2
sizeof(coff_import_directory_table_entry) +
NumberOfRelocations * sizeof(coff_relocation) +
// .idata$4
(DLLName.size() + 1)),
u32(NumberOfSymbols), u16(0),
u16(is32bit() ? IMAGE_FILE_32BIT_MACHINE : 0),
};
append(Buffer, Header);
// Section Header Table
static const coff_section SectionTable[NumberOfSections] = {
{{'.', 'i', 'd', 'a', 't', 'a', '$', '2'},
u32(0),
u32(0),
u32(sizeof(coff_import_directory_table_entry)),
u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section)),
u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) +
sizeof(coff_import_directory_table_entry)),
u32(0),
u16(NumberOfRelocations),
u16(0),
u32(IMAGE_SCN_ALIGN_4BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA |
IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)},
{{'.', 'i', 'd', 'a', 't', 'a', '$', '6'},
u32(0),
u32(0),
u32(DLLName.size() + 1),
u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) +
sizeof(coff_import_directory_table_entry) +
NumberOfRelocations * sizeof(coff_relocation)),
u32(0),
u32(0),
u16(0),
u16(0),
u32(IMAGE_SCN_ALIGN_2BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA |
IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)},
};
append(Buffer, SectionTable);
// .idata$2
static const coff_import_directory_table_entry ImportDescriptor{
u32(0), u32(0), u32(0), u32(0), u32(0),
};
append(Buffer, ImportDescriptor);
static const coff_relocation RelocationTable[NumberOfRelocations] = {
{u32(offsetof(coff_import_directory_table_entry, NameRVA)), u32(2),
u16(getImgRelRelocation())},
{u32(offsetof(coff_import_directory_table_entry, ImportLookupTableRVA)),
u32(3), u16(getImgRelRelocation())},
{u32(offsetof(coff_import_directory_table_entry, ImportAddressTableRVA)),
u32(4), u16(getImgRelRelocation())},
};
append(Buffer, RelocationTable);
// .idata$6
auto S = Buffer.size();
Buffer.resize(S + DLLName.size() + 1);
memcpy(&Buffer[S], DLLName.data(), DLLName.size());
Buffer[S + DLLName.size()] = '\0';
// Symbol Table
coff_symbol16 SymbolTable[NumberOfSymbols] = {
{{{0, 0, 0, 0, 0, 0, 0, 0}},
u32(0),
u16(1),
u16(0),
IMAGE_SYM_CLASS_EXTERNAL,
0},
{{{'.', 'i', 'd', 'a', 't', 'a', '$', '2'}},
u32(0),
u16(1),
u16(0),
IMAGE_SYM_CLASS_SECTION,
0},
{{{'.', 'i', 'd', 'a', 't', 'a', '$', '6'}},
u32(0),
u16(2),
u16(0),
IMAGE_SYM_CLASS_STATIC,
0},
{{{'.', 'i', 'd', 'a', 't', 'a', '$', '4'}},
u32(0),
u16(0),
u16(0),
IMAGE_SYM_CLASS_SECTION,
0},
{{{'.', 'i', 'd', 'a', 't', 'a', '$', '5'}},
u32(0),
u16(0),
u16(0),
IMAGE_SYM_CLASS_SECTION,
0},
{{{0, 0, 0, 0, 0, 0, 0, 0}},
u32(0),
u16(0),
u16(0),
IMAGE_SYM_CLASS_EXTERNAL,
0},
{{{0, 0, 0, 0, 0, 0, 0, 0}},
u32(0),
u16(0),
u16(0),
IMAGE_SYM_CLASS_EXTERNAL,
0},
};
reinterpret_cast<StringTableOffset &>(SymbolTable[0].Name).Offset =
sizeof(uint32_t);
reinterpret_cast<StringTableOffset &>(SymbolTable[5].Name).Offset =
sizeof(uint32_t) + ImportDescriptorSymbolName.length() + 1;
reinterpret_cast<StringTableOffset &>(SymbolTable[6].Name).Offset =
sizeof(uint32_t) + ImportDescriptorSymbolName.length() + 1 +
NullImportDescriptorSymbolName.length() + 1;
append(Buffer, SymbolTable);
// String Table
writeStringTable(Buffer,
{ImportDescriptorSymbolName, NullImportDescriptorSymbolName,
NullThunkSymbolName});
StringRef F{reinterpret_cast<const char *>(Buffer.data()), Buffer.size()};
return {MemoryBufferRef(F, DLLName)};
}
NewArchiveMember
ObjectFactory::createNullImportDescriptor(std::vector<uint8_t> &Buffer) {
static const uint32_t NumberOfSections = 1;
static const uint32_t NumberOfSymbols = 1;
// COFF Header
coff_file_header Header{
u16(Config->Machine), u16(NumberOfSections), u32(0),
u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) +
// .idata$3
sizeof(coff_import_directory_table_entry)),
u32(NumberOfSymbols), u16(0),
u16(is32bit() ? IMAGE_FILE_32BIT_MACHINE : 0),
};
append(Buffer, Header);
// Section Header Table
static const coff_section SectionTable[NumberOfSections] = {
{{'.', 'i', 'd', 'a', 't', 'a', '$', '3'},
u32(0),
u32(0),
u32(sizeof(coff_import_directory_table_entry)),
u32(sizeof(coff_file_header) +
(NumberOfSections * sizeof(coff_section))),
u32(0),
u32(0),
u16(0),
u16(0),
u32(IMAGE_SCN_ALIGN_4BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA |
IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)},
};
append(Buffer, SectionTable);
// .idata$3
static const coff_import_directory_table_entry ImportDescriptor{
u32(0), u32(0), u32(0), u32(0), u32(0),
};
append(Buffer, ImportDescriptor);
// Symbol Table
coff_symbol16 SymbolTable[NumberOfSymbols] = {
{{{0, 0, 0, 0, 0, 0, 0, 0}},
u32(0),
u16(1),
u16(0),
IMAGE_SYM_CLASS_EXTERNAL,
0},
};
reinterpret_cast<StringTableOffset &>(SymbolTable[0].Name).Offset =
sizeof(uint32_t);
append(Buffer, SymbolTable);
// String Table
writeStringTable(Buffer, {NullImportDescriptorSymbolName});
StringRef F{reinterpret_cast<const char *>(Buffer.data()), Buffer.size()};
return {MemoryBufferRef(F, DLLName)};
}
NewArchiveMember ObjectFactory::createNullThunk(std::vector<uint8_t> &Buffer) {
static const uint32_t NumberOfSections = 2;
static const uint32_t NumberOfSymbols = 1;
// COFF Header
coff_file_header Header{
u16(Config->Machine), u16(NumberOfSections), u32(0),
u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) +
// .idata$5
sizeof(export_address_table_entry) +
// .idata$4
sizeof(export_address_table_entry)),
u32(NumberOfSymbols), u16(0),
u16(is32bit() ? IMAGE_FILE_32BIT_MACHINE : 0),
};
append(Buffer, Header);
// Section Header Table
static const coff_section SectionTable[NumberOfSections] = {
{{'.', 'i', 'd', 'a', 't', 'a', '$', '5'},
u32(0),
u32(0),
u32(sizeof(export_address_table_entry)),
u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section)),
u32(0),
u32(0),
u16(0),
u16(0),
u32(IMAGE_SCN_ALIGN_4BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA |
IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)},
{{'.', 'i', 'd', 'a', 't', 'a', '$', '4'},
u32(0),
u32(0),
u32(sizeof(export_address_table_entry)),
u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) +
sizeof(export_address_table_entry)),
u32(0),
u32(0),
u16(0),
u16(0),
u32(IMAGE_SCN_ALIGN_4BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA |
IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)},
};
append(Buffer, SectionTable);
// .idata$5
static const export_address_table_entry ILT{u32(0)};
append(Buffer, ILT);
// .idata$4
static const export_address_table_entry IAT{u32(0)};
append(Buffer, IAT);
// Symbol Table
coff_symbol16 SymbolTable[NumberOfSymbols] = {
{{{0, 0, 0, 0, 0, 0, 0, 0}},
u32(0),
u16(1),
u16(0),
IMAGE_SYM_CLASS_EXTERNAL,
0},
};
reinterpret_cast<StringTableOffset &>(SymbolTable[0].Name).Offset =
sizeof(uint32_t);
append(Buffer, SymbolTable);
// String Table
writeStringTable(Buffer, {NullThunkSymbolName});
StringRef F{reinterpret_cast<const char *>(Buffer.data()), Buffer.size()};
return {MemoryBufferRef{F, DLLName}};
}
NewArchiveMember ObjectFactory::createShortImport(StringRef Sym,
uint16_t Ordinal,
ImportNameType NameType,
bool isData) {
size_t ImpSize = DLLName.size() + Sym.size() + 2; // +2 for NULs
size_t Size = sizeof(coff_import_header) + ImpSize;
char *Buf = Alloc.Allocate<char>(Size);
memset(Buf, 0, Size);
char *P = Buf;
// Write short import library.
auto *Imp = reinterpret_cast<coff_import_header *>(P);
P += sizeof(*Imp);
Imp->Sig2 = 0xFFFF;
Imp->Machine = Config->Machine;
Imp->SizeOfData = ImpSize;
if (Ordinal > 0)
Imp->OrdinalHint = Ordinal;
Imp->TypeInfo = (isData ? IMPORT_DATA : IMPORT_CODE);
Imp->TypeInfo |= NameType << 2;
// Write symbol name and DLL name.
memcpy(P, Sym.data(), Sym.size());
P += Sym.size() + 1;
memcpy(P, DLLName.data(), DLLName.size());
return {MemoryBufferRef(StringRef(Buf, Size), DLLName)};
}
// Creates an import library for a DLL. In this function, we first
// create an empty import library using lib.exe and then adds short
// import files to that file.
void lld::coff::writeImportLibrary() {
std::vector<NewArchiveMember> Members;
std::string Path = getImplibPath();
std::string DLLName = llvm::sys::path::filename(Config->OutputFile);
ObjectFactory OF(DLLName);
std::vector<uint8_t> ImportDescriptor;
Members.push_back(OF.createImportDescriptor(ImportDescriptor));
std::vector<uint8_t> NullImportDescriptor;
Members.push_back(OF.createNullImportDescriptor(NullImportDescriptor));
std::vector<uint8_t> NullThunk;
Members.push_back(OF.createNullThunk(NullThunk));
for (Export &E : Config->Exports) {
if (E.Private)
continue;
ImportNameType Type = getNameType(E.SymbolName, E.Name);
std::string Name = E.ExtName.empty()
? std::string(E.SymbolName)
: replace(E.SymbolName, E.Name, E.ExtName);
Members.push_back(OF.createShortImport(Name, E.Ordinal, Type, E.Data));
}
std::pair<StringRef, std::error_code> Result =
writeArchive(Path, Members, /*WriteSymtab*/ true, object::Archive::K_GNU,
/*Deterministic*/ true, /*Thin*/ false);
if (auto EC = Result.second)
fatal(EC, "failed to write " + Path);
}

View File

@ -134,13 +134,13 @@ class Parser {
void readAsInt(uint64_t *I) {
read();
if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I))
error("integer expected");
fatal("integer expected");
}
void expect(Kind Expected, StringRef Msg) {
read();
if (Tok.K != Expected)
error(Msg);
fatal(Msg);
}
void unget() { Stack.push_back(Tok); }
@ -177,7 +177,7 @@ class Parser {
parseVersion(&Config->MajorImageVersion, &Config->MinorImageVersion);
return;
default:
error(Twine("unknown directive: ") + Tok.Value);
fatal("unknown directive: " + Tok.Value);
}
}
@ -188,7 +188,7 @@ class Parser {
if (Tok.K == Equal) {
read();
if (Tok.K != Identifier)
error(Twine("identifier expected, but got ") + Tok.Value);
fatal("identifier expected, but got " + Tok.Value);
E.ExtName = E.Name;
E.Name = Tok.Value;
} else {
@ -264,15 +264,15 @@ class Parser {
void parseVersion(uint32_t *Major, uint32_t *Minor) {
read();
if (Tok.K != Identifier)
error(Twine("identifier expected, but got ") + Tok.Value);
fatal("identifier expected, but got " + Tok.Value);
StringRef V1, V2;
std::tie(V1, V2) = Tok.Value.split('.');
if (V1.getAsInteger(10, *Major))
error(Twine("integer expected, but got ") + Tok.Value);
fatal("integer expected, but got " + Tok.Value);
if (V2.empty())
*Minor = 0;
else if (V2.getAsInteger(10, *Minor))
error(Twine("integer expected, but got ") + Tok.Value);
fatal("integer expected, but got " + Tok.Value);
}
Lexer Lex;

View File

@ -48,6 +48,7 @@ def manifestuac : P<"manifestuac", "User access control">;
def manifestfile : P<"manifestfile", "Manifest file path">;
def manifestdependency : P<"manifestdependency",
"Attributes for <dependency> in manifest file">;
def manifestinput : P<"manifestinput", "Specify manifest file">;
// We cannot use multiclass P because class name "incl" is different
// from its command line option name. We do this because "include" is
@ -110,6 +111,7 @@ def no_incremental : F<"incremental:no">;
def nologo : F<"nologo">;
def throwingnew : F<"throwingnew">;
def editandcontinue : F<"editandcontinue">;
def fastfail : F<"fastfail">;
def delay : QF<"delay">;
def errorreport : QF<"errorreport">;

View File

@ -38,7 +38,8 @@ void lld::coff::createPDB(StringRef Path) {
size_t FileSize = PageSize * 3;
ErrorOr<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
FileOutputBuffer::create(Path, FileSize);
error(BufferOrErr, Twine("failed to open ") + Path);
if (auto EC = BufferOrErr.getError())
fatal(EC, "failed to open " + Path);
std::unique_ptr<FileOutputBuffer> Buffer = std::move(*BufferOrErr);
// Write the file header.

View File

@ -1,265 +1 @@
The PE/COFF Linker
==================
This directory contains a linker for Windows operating system.
Because the fundamental design of this port is different from
the other ports of LLD, this port is separated to this directory.
The linker is command-line compatible with MSVC linker and is
generally 2x faster than that. It can be used to link real-world
programs such as LLD itself or Clang, or even web browsers which
are probably the largest open-source programs for Windows.
This document is also applicable to ELF linker because the linker
shares the same design as this COFF linker.
Overall Design
--------------
This is a list of important data types in this linker.
* SymbolBody
SymbolBody is a class for symbols. They may be created for symbols
in object files or in archive file headers. The linker may create
them out of nothing.
There are mainly three types of SymbolBodies: Defined, Undefined, or
Lazy. Defined symbols are for all symbols that are considered as
"resolved", including real defined symbols, COMDAT symbols, common
symbols, absolute symbols, linker-created symbols, etc. Undefined
symbols are for undefined symbols, which need to be replaced by
Defined symbols by the resolver. Lazy symbols represent symbols we
found in archive file headers -- which can turn into Defined symbols
if we read archieve members, but we haven't done that yet.
* Symbol
Symbol is a pointer to a SymbolBody. There's only one Symbol for
each unique symbol name (this uniqueness is guaranteed by the symbol
table). Because SymbolBodies are created for each file
independently, there can be many SymbolBodies for the same
name. Thus, the relationship between Symbols and SymbolBodies is 1:N.
The resolver keeps the Symbol's pointer to always point to the "best"
SymbolBody. Pointer mutation is the resolve operation in this
linker.
SymbolBodies have pointers to their Symbols. That means you can
always find the best SymbolBody from any SymbolBody by following
pointers twice. This structure makes it very easy to find
replacements for symbols. For example, if you have an Undefined
SymbolBody, you can find a Defined SymbolBody for that symbol just
by going to its Symbol and then to SymbolBody, assuming the resolver
have successfully resolved all undefined symbols.
* Chunk
Chunk represents a chunk of data that will occupy space in an
output. Each regular section becomes a chunk.
Chunks created for common or BSS symbols are not backed by sections.
The linker may create chunks out of nothing to append additional
data to an output.
Chunks know about their size, how to copy their data to mmap'ed
outputs, and how to apply relocations to them. Specifically,
section-based chunks know how to read relocation tables and how to
apply them.
* SymbolTable
SymbolTable is basically a hash table from strings to Symbols, with
a logic to resolve symbol conflicts. It resolves conflicts by symbol
type. For example, if we add Undefined and Defined symbols, the
symbol table will keep the latter. If we add Defined and Lazy
symbols, it will keep the former. If we add Lazy and Undefined, it
will keep the former, but it will also trigger the Lazy symbol to
load the archive member to actually resolve the symbol.
* OutputSection
OutputSection is a container of Chunks. A Chunk belongs to at most
one OutputSection.
There are mainly three actors in this linker.
* InputFile
InputFile is a superclass of file readers. We have a different
subclass for each input file type, such as regular object file,
archive file, etc. They are responsible for creating and owning
SymbolBodies and Chunks.
* Writer
The writer is responsible for writing file headers and Chunks to a
file. It creates OutputSections, put all Chunks into them, assign
unique, non-overlapping addresses and file offsets to them, and then
write them down to a file.
* Driver
The linking process is drived by the driver. The driver
- processes command line options,
- creates a symbol table,
- creates an InputFile for each input file and put all symbols in it
into the symbol table,
- checks if there's no remaining undefined symbols,
- creates a writer,
- and passes the symbol table to the writer to write the result to a
file.
Performance
-----------
It's generally 2x faster than MSVC link.exe. It takes 3.5 seconds to
self-host on my Xeon 2580 machine. MSVC linker takes 7.0 seconds to
link the same executable. The resulting output is 65MB.
The old LLD is buggy that it produces 120MB executable for some reason,
and it takes 30 seconds to do that.
We believe the performance difference comes from simplification and
optimizations we made to the new port. Notable differences are listed
below.
* Reduced number of relocation table reads
In the old design, relocation tables are read from beginning to
construct graphs because they consist of graph edges. In the new
design, they are not read until we actually apply relocations.
This simplification has two benefits. One is that we don't create
additional objects for relocations but instead consume relocation
tables directly. The other is that it reduces number of relocation
entries we have to read, because we won't read relocations for
dead-stripped COMDAT sections. Large C++ programs tend to consist of
lots of COMDAT sections. In the old design, the time to process
relocation table is linear to size of input. In this new model, it's
linear to size of output.
* Reduced number of symbol table lookup
Symbol table lookup can be a heavy operation because number of
symbols can be very large and each symbol name can be very long
(think of C++ mangled symbols -- time to compute a hash value for a
string is linear to the length.)
We look up the symbol table exactly only once for each symbol in the
new design. This is I believe the minimum possible number. This is
achieved by the separation of Symbol and SymbolBody. Once you get a
pointer to a Symbol by looking up the symbol table, you can always
get the latest symbol resolution result by just dereferencing a
pointer. (I'm not sure if the idea is new to the linker. At least,
all other linkers I've investigated so far seem to look up hash
tables or sets more than once for each new symbol, but I may be
wrong.)
* Reduced number of file visits
The symbol table implements the Windows linker semantics. We treat
the symbol table as a bucket of all known symbols, including symbols
in archive file headers. We put all symbols into one bucket as we
visit new files. That means we visit each file only once.
This is different from the Unix linker semantics, in which we only
keep undefined symbols and visit each file one by one until we
resolve all undefined symbols. In the Unix model, we have to visit
archive files many times if there are circular dependencies between
archives.
* Avoiding creating additional objects or copying data
The data structures described in the previous section are all thin
wrappers for classes that LLVM libObject provides. We avoid copying
data from libObject's objects to our objects. We read much less data
than before. For example, we don't read symbol values until we apply
relocations because these values are not relevant to symbol
resolution. Again, COMDAT symbols may be discarded during symbol
resolution, so reading their attributes too early could result in a
waste. We use underlying objects directly where doing so makes
sense.
Parallelism
-----------
The abovementioned data structures are also chosen with
multi-threading in mind. It should relatively be easy to make the
symbol table a concurrent hash map, so that we let multiple workers
work on symbol table concurrently. Symbol resolution in this design is
a single pointer mutation, which allows the resolver work concurrently
in a lock-free manner using atomic pointer compare-and-swap.
It should also be easy to apply relocations and write chunks concurrently.
We created an experimental multi-threaded linker using the Microsoft
ConcRT concurrency library, and it was able to link itself in 0.5
seconds, so we think the design is promising.
Link-Time Optimization
----------------------
LTO is implemented by handling LLVM bitcode files as object files.
The linker resolves symbols in bitcode files normally. If all symbols
are successfully resolved, it then calls an LLVM libLTO function
with all bitcode files to convert them to one big regular COFF file.
Finally, the linker replaces bitcode symbols with COFF symbols,
so that we can link the input files as if they were in the native
format from the beginning.
The details are described in this document.
http://llvm.org/docs/LinkTimeOptimization.html
Glossary
--------
* RVA
Short for Relative Virtual Address.
Windows executables or DLLs are not position-independent; they are
linked against a fixed address called an image base. RVAs are
offsets from an image base.
Default image bases are 0x140000000 for executables and 0x18000000
for DLLs. For example, when we are creating an executable, we assume
that the executable will be loaded at address 0x140000000 by the
loader, so we apply relocations accordingly. Result texts and data
will contain raw absolute addresses.
* VA
Short for Virtual Address. Equivalent to RVA + image base. It is
rarely used. We almost always use RVAs instead.
* Base relocations
Relocation information for the loader. If the loader decides to map
an executable or a DLL to a different address than their image
bases, it fixes up binaries using information contained in the base
relocation table. A base relocation table consists of a list of
locations containing addresses. The loader adds a difference between
RVA and actual load address to all locations listed there.
Note that this run-time relocation mechanism is much simpler than ELF.
There's no PLT or GOT. Images are relocated as a whole just
by shifting entire images in memory by some offsets. Although doing
this breaks text sharing, I think this mechanism is not actually bad
on today's computers.
* ICF
Short for Identical COMDAT Folding.
ICF is an optimization to reduce output size by merging COMDAT sections
by not only their names but by their contents. If two COMDAT sections
happen to have the same metadata, actual contents and relocations,
they are merged by ICF. It is known as an effective technique,
and it usually reduces C++ program's size by a few percent or more.
Note that this is not entirely sound optimization. C/C++ require
different functions have different addresses. If a program depends on
that property, it would fail at runtime. However, that's not really an
issue on Windows because MSVC link.exe enabled the optimization by
default. As long as your program works with the linker's default
settings, your program should be safe with ICF.
See docs/NewLLD.rst

View File

@ -14,7 +14,7 @@
#include "Symbols.h"
#include "lld/Core/Parallel.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/LTO/LTOCodeGenerator.h"
#include "llvm/LTO/legacy/LTOCodeGenerator.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <utility>
@ -164,7 +164,7 @@ void SymbolTable::reportRemainingUndefines(bool Resolve) {
llvm::errs() << File->getShortName() << ": undefined symbol: "
<< Sym->getName() << "\n";
if (!Config->Force)
error("Link failed");
fatal("link failed");
}
void SymbolTable::addLazy(Lazy *New, std::vector<Symbol *> *Accum) {
@ -211,7 +211,7 @@ void SymbolTable::addSymbol(SymbolBody *New) {
// equivalent (conflicting), or more preferable, respectively.
int Comp = Existing->compare(New);
if (Comp == 0)
error(Twine("duplicate symbol: ") + Existing->getDebugName() + " and " +
fatal("duplicate symbol: " + Existing->getDebugName() + " and " +
New->getDebugName());
if (Comp < 0)
Sym->Body = New;
@ -338,21 +338,25 @@ void SymbolTable::addCombinedLTOObject(ObjectFile *Obj) {
// diagnose them later in reportRemainingUndefines().
StringRef Name = Body->getName();
Symbol *Sym = insert(Body);
SymbolBody *Existing = Sym->Body;
if (isa<DefinedBitcode>(Sym->Body)) {
if (Existing == Body)
continue;
if (isa<DefinedBitcode>(Existing)) {
Sym->Body = Body;
continue;
}
if (auto *L = dyn_cast<Lazy>(Sym->Body)) {
if (auto *L = dyn_cast<Lazy>(Existing)) {
// We may see new references to runtime library symbols such as __chkstk
// here. These symbols must be wholly defined in non-bitcode files.
addMemberFile(L);
continue;
}
SymbolBody *Existing = Sym->Body;
int Comp = Existing->compare(Body);
if (Comp == 0)
error(Twine("LTO: unexpected duplicate symbol: ") + Name);
fatal("LTO: unexpected duplicate symbol: " + Name);
if (Comp < 0)
Sym->Body = Body;
}
@ -369,7 +373,7 @@ void SymbolTable::addCombinedLTOObjects() {
// Create an object file and add it to the symbol table by replacing any
// DefinedBitcode symbols with the definitions in the object file.
LTOCodeGenerator CG(getGlobalContext());
LTOCodeGenerator CG(BitcodeFile::Context);
CG.setOptLevel(Config->LTOOptLevel);
std::vector<ObjectFile *> Objs = createLTOObjects(&CG);
@ -379,7 +383,7 @@ void SymbolTable::addCombinedLTOObjects() {
size_t NumBitcodeFiles = BitcodeFiles.size();
run();
if (BitcodeFiles.size() != NumBitcodeFiles)
error("LTO: late loaded symbol created new bitcode reference");
fatal("LTO: late loaded symbol created new bitcode reference");
}
// Combine and compile bitcode files and then return the result
@ -414,24 +418,23 @@ std::vector<ObjectFile *> SymbolTable::createLTOObjects(LTOCodeGenerator *CG) {
DisableVerify = false;
#endif
if (!CG->optimize(DisableVerify, false, false, false))
error(""); // optimize() should have emitted any error message.
fatal(""); // optimize() should have emitted any error message.
Objs.resize(Config->LTOJobs);
// Use std::list to avoid invalidation of pointers in OSPtrs.
std::list<raw_svector_ostream> OSs;
std::vector<raw_pwrite_stream *> OSPtrs;
for (SmallVector<char, 0> &Obj : Objs) {
for (SmallString<0> &Obj : Objs) {
OSs.emplace_back(Obj);
OSPtrs.push_back(&OSs.back());
}
if (!CG->compileOptimized(OSPtrs))
error(""); // compileOptimized() should have emitted any error message.
fatal(""); // compileOptimized() should have emitted any error message.
std::vector<ObjectFile *> ObjFiles;
for (SmallVector<char, 0> &Obj : Objs) {
auto *ObjFile = new ObjectFile(
MemoryBufferRef(StringRef(Obj.data(), Obj.size()), "<LTO object>"));
for (SmallString<0> &Obj : Objs) {
auto *ObjFile = new ObjectFile(MemoryBufferRef(Obj, "<LTO object>"));
Files.emplace_back(ObjFile);
ObjectFiles.push_back(ObjFile);
ObjFile->parse();

View File

@ -115,7 +115,7 @@ class SymbolTable {
std::vector<std::future<InputFile *>> ObjectQueue;
std::vector<BitcodeFile *> BitcodeFiles;
std::vector<SmallVector<char, 0>> Objs;
std::vector<SmallString<0>> Objs;
llvm::BumpPtrAllocator Alloc;
};

View File

@ -162,32 +162,6 @@ std::string SymbolBody::getDebugName() {
return N;
}
uint64_t Defined::getFileOff() {
switch (kind()) {
case DefinedImportDataKind:
return cast<DefinedImportData>(this)->getFileOff();
case DefinedImportThunkKind:
return cast<DefinedImportThunk>(this)->getFileOff();
case DefinedLocalImportKind:
return cast<DefinedLocalImport>(this)->getFileOff();
case DefinedCommonKind:
return cast<DefinedCommon>(this)->getFileOff();
case DefinedRegularKind:
return cast<DefinedRegular>(this)->getFileOff();
case DefinedBitcodeKind:
llvm_unreachable("There is no file offset for a bitcode symbol.");
case DefinedAbsoluteKind:
llvm_unreachable("Cannot get a file offset for an absolute symbol.");
case DefinedRelativeKind:
llvm_unreachable("Cannot get a file offset for a relative symbol.");
case LazyKind:
case UndefinedKind:
llvm_unreachable("Cannot get a file offset for an undefined symbol.");
}
llvm_unreachable("unknown symbol kind");
}
COFFSymbolRef DefinedCOFF::getCOFFSymbol() {
size_t SymSize = File->getCOFFObj()->getSymbolTableEntrySize();
if (SymSize == sizeof(coff_symbol16))
@ -225,7 +199,7 @@ std::unique_ptr<InputFile> Lazy::getMember() {
else if (Magic == file_magic::bitcode)
Obj.reset(new BitcodeFile(MBRef));
else
error(Twine(File->getName()) + ": unknown file type");
fatal("unknown file type: " + File->getName());
Obj->setParentName(File->getName());
return Obj;

View File

@ -125,10 +125,6 @@ class Defined : public SymbolBody {
// writer sets and uses RVAs.
uint64_t getRVA();
// Returns the file offset of this symbol in the final executable.
// The writer uses this information to apply relocations.
uint64_t getFileOff();
// Returns the RVA relative to the beginning of the output section.
// Used to implement SECREL relocation type.
uint64_t getSecrel();

View File

@ -59,6 +59,7 @@ class Writer {
void openFile(StringRef OutputPath);
template <typename PEHeaderTy> void writeHeader();
void fixSafeSEHSymbols();
void setSectionPermissions();
void writeSections();
void sortExceptionTable();
void applyRelocations();
@ -114,6 +115,7 @@ class OutputSection {
StringRef getName() { return Name; }
std::vector<Chunk *> &getChunks() { return Chunks; }
void addPermissions(uint32_t C);
void setPermissions(uint32_t C);
uint32_t getPermissions() { return Header.Characteristics & PermMask; }
uint32_t getCharacteristics() { return Header.Characteristics; }
uint64_t getRVA() { return Header.VirtualAddress; }
@ -163,19 +165,23 @@ void OutputSection::addChunk(Chunk *C) {
Chunks.push_back(C);
C->setOutputSection(this);
uint64_t Off = Header.VirtualSize;
Off = align(Off, C->getAlign());
Off = alignTo(Off, C->getAlign());
C->setRVA(Off);
C->setOutputSectionOff(Off);
Off += C->getSize();
Header.VirtualSize = Off;
if (C->hasData())
Header.SizeOfRawData = align(Off, SectorSize);
Header.SizeOfRawData = alignTo(Off, SectorSize);
}
void OutputSection::addPermissions(uint32_t C) {
Header.Characteristics |= C & PermMask;
}
void OutputSection::setPermissions(uint32_t C) {
Header.Characteristics = C & PermMask;
}
// Write the section header to a given buffer.
void OutputSection::writeHeaderTo(uint8_t *Buf) {
auto *Hdr = reinterpret_cast<coff_section *>(Buf);
@ -193,13 +199,13 @@ void OutputSection::writeHeaderTo(uint8_t *Buf) {
uint64_t Defined::getSecrel() {
if (auto *D = dyn_cast<DefinedRegular>(this))
return getRVA() - D->getChunk()->getOutputSection()->getRVA();
error("SECREL relocation points to a non-regular symbol");
fatal("SECREL relocation points to a non-regular symbol");
}
uint64_t Defined::getSectionIndex() {
if (auto *D = dyn_cast<DefinedRegular>(this))
return D->getChunk()->getOutputSection()->SectionIndex;
error("SECTION relocation points to a non-regular symbol");
fatal("SECTION relocation points to a non-regular symbol");
}
bool Defined::isExecutable() {
@ -222,6 +228,7 @@ void Writer::run() {
createSection(".reloc");
assignAddresses();
removeEmptySections();
setSectionPermissions();
createSymbolAndStringTable();
openFile(Config->OutputFile);
if (Config->is64()) {
@ -232,7 +239,8 @@ void Writer::run() {
fixSafeSEHSymbols();
writeSections();
sortExceptionTable();
error(Buffer->commit(), "Failed to write the output file");
if (auto EC = Buffer->commit())
fatal(EC, "failed to write the output file");
}
static StringRef getOutputSection(StringRef Name) {
@ -447,15 +455,15 @@ void Writer::createSymbolAndStringTable() {
OutputSection *LastSection = OutputSections.back();
// We position the symbol table to be adjacent to the end of the last section.
uint64_t FileOff =
LastSection->getFileOff() + align(LastSection->getRawSize(), SectorSize);
uint64_t FileOff = LastSection->getFileOff() +
alignTo(LastSection->getRawSize(), SectorSize);
if (!OutputSymtab.empty()) {
PointerToSymbolTable = FileOff;
FileOff += OutputSymtab.size() * sizeof(coff_symbol16);
}
if (!Strtab.empty())
FileOff += Strtab.size() + 4;
FileSize = align(FileOff, SectorSize);
FileSize = alignTo(FileOff, SectorSize);
}
// Visits all sections to assign incremental, non-overlapping RVAs and
@ -466,7 +474,7 @@ void Writer::assignAddresses() {
sizeof(coff_section) * OutputSections.size();
SizeOfHeaders +=
Config->is64() ? sizeof(pe32plus_header) : sizeof(pe32_header);
SizeOfHeaders = align(SizeOfHeaders, SectorSize);
SizeOfHeaders = alignTo(SizeOfHeaders, SectorSize);
uint64_t RVA = 0x1000; // The first page is kept unmapped.
FileSize = SizeOfHeaders;
// Move DISCARDABLE (or non-memory-mapped) sections to the end of file because
@ -480,10 +488,10 @@ void Writer::assignAddresses() {
addBaserels(Sec);
Sec->setRVA(RVA);
Sec->setFileOffset(FileSize);
RVA += align(Sec->getVirtualSize(), PageSize);
FileSize += align(Sec->getRawSize(), SectorSize);
RVA += alignTo(Sec->getVirtualSize(), PageSize);
FileSize += alignTo(Sec->getRawSize(), SectorSize);
}
SizeOfImage = SizeOfHeaders + align(RVA - 0x1000, PageSize);
SizeOfImage = SizeOfHeaders + alignTo(RVA - 0x1000, PageSize);
}
template <typename PEHeaderTy> void Writer::writeHeader() {
@ -596,13 +604,26 @@ template <typename PEHeaderTy> void Writer::writeHeader() {
if (Symbol *Sym = Symtab->findUnderscore("_tls_used")) {
if (Defined *B = dyn_cast<Defined>(Sym->Body)) {
Dir[TLS_TABLE].RelativeVirtualAddress = B->getRVA();
Dir[TLS_TABLE].Size = 40;
Dir[TLS_TABLE].Size = Config->is64()
? sizeof(object::coff_tls_directory64)
: sizeof(object::coff_tls_directory32);
}
}
if (Symbol *Sym = Symtab->findUnderscore("_load_config_used")) {
if (Defined *B = dyn_cast<Defined>(Sym->Body)) {
if (auto *B = dyn_cast<DefinedRegular>(Sym->Body)) {
SectionChunk *SC = B->getChunk();
assert(B->getRVA() >= SC->getRVA());
uint64_t OffsetInChunk = B->getRVA() - SC->getRVA();
if (!SC->hasData() || OffsetInChunk + 4 > SC->getSize())
fatal("_load_config_used is malformed");
ArrayRef<uint8_t> SecContents = SC->getContents();
uint32_t LoadConfigSize =
*reinterpret_cast<const ulittle32_t *>(&SecContents[OffsetInChunk]);
if (OffsetInChunk + LoadConfigSize > SC->getSize())
fatal("_load_config_used is too large");
Dir[LOAD_CONFIG_TABLE].RelativeVirtualAddress = B->getRVA();
Dir[LOAD_CONFIG_TABLE].Size = Config->is64() ? 112 : 64;
Dir[LOAD_CONFIG_TABLE].Size = LoadConfigSize;
}
}
@ -626,14 +647,14 @@ template <typename PEHeaderTy> void Writer::writeHeader() {
// The first 4 bytes is length including itself.
Buf = reinterpret_cast<uint8_t *>(&SymbolTable[NumberOfSymbols]);
write32le(Buf, Strtab.size() + 4);
memcpy(Buf + 4, Strtab.data(), Strtab.size());
if (!Strtab.empty())
memcpy(Buf + 4, Strtab.data(), Strtab.size());
}
void Writer::openFile(StringRef Path) {
ErrorOr<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
FileOutputBuffer::create(Path, FileSize, FileOutputBuffer::F_executable);
error(BufferOrErr, Twine("failed to open ") + Path);
Buffer = std::move(*BufferOrErr);
Buffer = check(
FileOutputBuffer::create(Path, FileSize, FileOutputBuffer::F_executable),
"failed to open " + Path);
}
void Writer::fixSafeSEHSymbols() {
@ -643,6 +664,17 @@ void Writer::fixSafeSEHSymbols() {
Config->SEHCount->setVA(SEHTable->getSize() / 4);
}
// Handles /section options to allow users to overwrite
// section attributes.
void Writer::setSectionPermissions() {
for (auto &P : Config->Section) {
StringRef Name = P.first;
uint32_t Perm = P.second;
if (auto *Sec = findSection(Name))
Sec->setPermissions(Perm);
}
}
// Write section contents to a mmap'ed file.
void Writer::writeSections() {
uint8_t *Buf = Buffer->getBufferStart();

View File

@ -2,25 +2,49 @@ set(LLVM_TARGET_DEFINITIONS Options.td)
tablegen(LLVM Options.inc -gen-opt-parser-defs)
add_public_tablegen_target(ELFOptionsTableGen)
add_lld_library(lldELF2
add_lld_library(lldELF
Driver.cpp
DriverUtils.cpp
EhFrame.cpp
Error.cpp
ICF.cpp
InputFiles.cpp
InputSection.cpp
LTO.cpp
LinkerScript.cpp
MarkLive.cpp
OutputSections.cpp
Relocations.cpp
ScriptParser.cpp
Strings.cpp
SymbolListFile.cpp
SymbolTable.cpp
Symbols.cpp
Target.cpp
Thunks.cpp
Writer.cpp
LINK_COMPONENTS
${LLVM_TARGETS_TO_BUILD}
Analysis
BitReader
BitWriter
Codegen
Core
IPO
Linker
LTO
Object
Option
Passes
MC
Support
Target
TransformUtils
LINK_LIBS
lldConfig
${PTHREAD_LIB}
)
add_dependencies(lldELF2 ELFOptionsTableGen)
add_dependencies(lldELF intrinsics_gen ELFOptionsTableGen)

View File

@ -17,10 +17,10 @@
#include <vector>
namespace lld {
namespace elf2 {
namespace elf {
class InputFile;
class SymbolBody;
struct Symbol;
enum ELFKind {
ELFNoneKind,
@ -30,60 +30,105 @@ enum ELFKind {
ELF64BEKind
};
enum class BuildIdKind { None, Fnv1, Md5, Sha1, Hexstring };
enum class UnresolvedPolicy { NoUndef, Error, Warn, Ignore };
struct SymbolVersion {
llvm::StringRef Name;
bool IsExternCpp;
};
// This struct contains symbols version definition that
// can be found in version script if it is used for link.
struct VersionDefinition {
VersionDefinition(llvm::StringRef Name, size_t Id) : Name(Name), Id(Id) {}
llvm::StringRef Name;
size_t Id;
std::vector<SymbolVersion> Globals;
size_t NameOff; // Offset in string table.
};
// This struct contains the global configuration for the linker.
// Most fields are direct mapping from the command line options
// and such fields have the same name as the corresponding options.
// Most fields are initialized by the driver.
struct Configuration {
SymbolBody *EntrySym = nullptr;
SymbolBody *MipsGpDisp = nullptr;
Symbol *EntrySym = nullptr;
InputFile *FirstElf = nullptr;
llvm::StringRef DynamicLinker;
llvm::StringRef Entry;
llvm::StringRef Emulation;
llvm::StringRef Fini;
llvm::StringRef Init;
llvm::StringRef LtoAAPipeline;
llvm::StringRef LtoNewPmPasses;
llvm::StringRef OutputFile;
llvm::StringRef SoName;
llvm::StringRef Sysroot;
std::string RPath;
llvm::MapVector<llvm::StringRef, std::vector<llvm::StringRef>> OutputSections;
std::vector<VersionDefinition> VersionDefinitions;
std::vector<llvm::StringRef> DynamicList;
std::vector<llvm::StringRef> SearchPaths;
std::vector<llvm::StringRef> Undefined;
std::vector<SymbolVersion> VersionScriptGlobals;
std::vector<uint8_t> BuildIdVector;
bool AllowMultipleDefinition;
bool AsNeeded = false;
bool Bsymbolic;
bool BsymbolicFunctions;
bool Demangle = true;
bool DisableVerify;
bool DiscardAll;
bool DiscardLocals;
bool DiscardNone;
bool EhFrameHdr;
bool EnableNewDtags;
bool ExportDynamic;
bool FatalWarnings;
bool GcSections;
bool GnuHash = false;
bool ICF;
bool Mips64EL = false;
bool NoInhibitExec;
bool NoUndefined;
bool NoGnuUnique;
bool NoUndefinedVersion;
bool Pic;
bool Pie;
bool PrintGcSections;
bool Rela;
bool Relocatable;
bool SaveTemps;
bool Shared;
bool Static = false;
bool StripAll;
bool StripDebug;
bool SysvHash = true;
bool Threads;
bool Trace;
bool Verbose;
bool WarnCommon;
bool ZCombreloc;
bool ZExecStack;
bool ZNodelete;
bool ZNow;
bool ZOrigin;
bool ZRelro;
UnresolvedPolicy UnresolvedSymbols;
BuildIdKind BuildId = BuildIdKind::None;
ELFKind EKind = ELFNoneKind;
uint16_t DefaultSymbolVersion = llvm::ELF::VER_NDX_GLOBAL;
uint16_t EMachine = llvm::ELF::EM_NONE;
uint64_t EntryAddr = -1;
unsigned Optimize = 0;
uint64_t ImageBase;
unsigned LtoJobs;
unsigned LtoO;
unsigned Optimize;
};
// The only instance of Configuration struct.
extern Configuration *Config;
} // namespace elf2
} // namespace elf
} // namespace lld
#endif

View File

@ -10,119 +10,218 @@
#include "Driver.h"
#include "Config.h"
#include "Error.h"
#include "ICF.h"
#include "InputFiles.h"
#include "InputSection.h"
#include "LinkerScript.h"
#include "Strings.h"
#include "SymbolListFile.h"
#include "SymbolTable.h"
#include "Target.h"
#include "Writer.h"
#include "llvm/ADT/STLExtras.h"
#include "lld/Driver/Driver.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdlib>
#include <utility>
using namespace llvm;
using namespace llvm::ELF;
using namespace llvm::object;
using namespace llvm::sys;
using namespace lld;
using namespace lld::elf2;
using namespace lld::elf;
Configuration *elf2::Config;
LinkerDriver *elf2::Driver;
Configuration *elf::Config;
LinkerDriver *elf::Driver;
bool elf::link(ArrayRef<const char *> Args, raw_ostream &Error) {
HasError = false;
ErrorOS = &Error;
void elf2::link(ArrayRef<const char *> Args) {
Configuration C;
LinkerDriver D;
ScriptConfiguration SC;
Config = &C;
Driver = &D;
Driver->main(Args.slice(1));
ScriptConfig = &SC;
Driver->main(Args);
return !HasError;
}
// Parses a linker -m option.
static std::pair<ELFKind, uint16_t> parseEmulation(StringRef S) {
if (S == "elf32btsmip")
return {ELF32BEKind, EM_MIPS};
if (S == "elf32ltsmip")
return {ELF32LEKind, EM_MIPS};
if (S == "elf32ppc" || S == "elf32ppc_fbsd")
return {ELF32BEKind, EM_PPC};
if (S == "elf64ppc" || S == "elf64ppc_fbsd")
return {ELF64BEKind, EM_PPC64};
if (S == "elf_i386")
return {ELF32LEKind, EM_386};
if (S == "elf_x86_64")
return {ELF64LEKind, EM_X86_64};
if (S == "aarch64linux")
return {ELF64LEKind, EM_AARCH64};
if (S == "i386pe" || S == "i386pep" || S == "thumb2pe")
error("Windows targets are not supported on the ELF frontend: " + S);
error("Unknown emulation: " + S);
if (S.endswith("_fbsd"))
S = S.drop_back(5);
std::pair<ELFKind, uint16_t> Ret =
StringSwitch<std::pair<ELFKind, uint16_t>>(S)
.Case("aarch64linux", {ELF64LEKind, EM_AARCH64})
.Case("armelf_linux_eabi", {ELF32LEKind, EM_ARM})
.Case("elf32_x86_64", {ELF32LEKind, EM_X86_64})
.Case("elf32btsmip", {ELF32BEKind, EM_MIPS})
.Case("elf32ltsmip", {ELF32LEKind, EM_MIPS})
.Case("elf32ppc", {ELF32BEKind, EM_PPC})
.Case("elf64btsmip", {ELF64BEKind, EM_MIPS})
.Case("elf64ltsmip", {ELF64LEKind, EM_MIPS})
.Case("elf64ppc", {ELF64BEKind, EM_PPC64})
.Case("elf_i386", {ELF32LEKind, EM_386})
.Case("elf_x86_64", {ELF64LEKind, EM_X86_64})
.Default({ELFNoneKind, EM_NONE});
if (Ret.first == ELFNoneKind) {
if (S == "i386pe" || S == "i386pep" || S == "thumb2pe")
error("Windows targets are not supported on the ELF frontend: " + S);
else
error("unknown emulation: " + S);
}
return Ret;
}
// Returns slices of MB by parsing MB as an archive file.
// Each slice consists of a member file in the archive.
static std::vector<MemoryBufferRef> getArchiveMembers(MemoryBufferRef MB) {
ErrorOr<std::unique_ptr<Archive>> FileOrErr = Archive::create(MB);
error(FileOrErr, "Failed to parse archive");
std::unique_ptr<Archive> File = std::move(*FileOrErr);
std::vector<MemoryBufferRef>
LinkerDriver::getArchiveMembers(MemoryBufferRef MB) {
std::unique_ptr<Archive> File =
check(Archive::create(MB), "failed to parse archive");
std::vector<MemoryBufferRef> V;
for (const ErrorOr<Archive::Child> &C : File->children()) {
error(C, "Could not get the child of the archive " + File->getFileName());
ErrorOr<MemoryBufferRef> MbOrErr = C->getMemoryBufferRef();
error(MbOrErr, "Could not get the buffer for a child of the archive " +
File->getFileName());
V.push_back(*MbOrErr);
Error Err;
for (const ErrorOr<Archive::Child> &COrErr : File->children(Err)) {
Archive::Child C = check(COrErr, "could not get the child of the archive " +
File->getFileName());
MemoryBufferRef MBRef =
check(C.getMemoryBufferRef(),
"could not get the buffer for a child of the archive " +
File->getFileName());
V.push_back(MBRef);
}
if (Err)
Error(Err);
// Take ownership of memory buffers created for members of thin archives.
for (std::unique_ptr<MemoryBuffer> &MB : File->takeThinBuffers())
OwningMBs.push_back(std::move(MB));
return V;
}
// Opens and parses a file. Path has to be resolved already.
// Newly created memory buffers are owned by this driver.
void LinkerDriver::addFile(StringRef Path) {
using namespace llvm::sys::fs;
using namespace sys::fs;
if (Config->Verbose)
llvm::outs() << Path << "\n";
auto MBOrErr = MemoryBuffer::getFile(Path);
error(MBOrErr, "cannot open " + Path);
std::unique_ptr<MemoryBuffer> &MB = *MBOrErr;
MemoryBufferRef MBRef = MB->getMemBufferRef();
OwningMBs.push_back(std::move(MB)); // take MB ownership
outs() << Path << "\n";
Optional<MemoryBufferRef> Buffer = readFile(Path);
if (!Buffer.hasValue())
return;
MemoryBufferRef MBRef = *Buffer;
switch (identify_magic(MBRef.getBuffer())) {
case file_magic::unknown:
readLinkerScript(&Alloc, MBRef);
readLinkerScript(MBRef);
return;
case file_magic::archive:
if (WholeArchive) {
for (MemoryBufferRef MB : getArchiveMembers(MBRef))
Files.push_back(createObjectFile(MB));
Files.push_back(createObjectFile(MB, Path));
return;
}
Files.push_back(make_unique<ArchiveFile>(MBRef));
return;
case file_magic::elf_shared_object:
if (Config->Relocatable) {
error("attempted static link of dynamic object " + Path);
return;
}
Files.push_back(createSharedFile(MBRef));
return;
default:
Files.push_back(createObjectFile(MBRef));
if (InLib)
Files.push_back(make_unique<LazyObjectFile>(MBRef));
else
Files.push_back(createObjectFile(MBRef));
}
}
Optional<MemoryBufferRef> LinkerDriver::readFile(StringRef Path) {
auto MBOrErr = MemoryBuffer::getFile(Path);
if (auto EC = MBOrErr.getError()) {
error(EC, "cannot open " + Path);
return None;
}
std::unique_ptr<MemoryBuffer> &MB = *MBOrErr;
MemoryBufferRef MBRef = MB->getMemBufferRef();
OwningMBs.push_back(std::move(MB)); // take MB ownership
if (Cpio)
Cpio->append(relativeToRoot(Path), MBRef.getBuffer());
return MBRef;
}
// Add a given library by searching it from input search paths.
void LinkerDriver::addLibrary(StringRef Name) {
std::string Path = searchLibrary(Name);
if (Path.empty())
error("unable to find library -l" + Name);
else
addFile(Path);
}
// This function is called on startup. We need this for LTO since
// LTO calls LLVM functions to compile bitcode files to native code.
// Technically this can be delayed until we read bitcode files, but
// we don't bother to do lazily because the initialization is fast.
static void initLLVM(opt::InputArgList &Args) {
InitializeAllTargets();
InitializeAllTargetMCs();
InitializeAllAsmPrinters();
InitializeAllAsmParsers();
// This is a flag to discard all but GlobalValue names.
// We want to enable it by default because it saves memory.
// Disable it only when a developer option (-save-temps) is given.
Driver->Context.setDiscardValueNames(!Config->SaveTemps);
Driver->Context.enableDebugTypeODRUniquing();
// Parse and evaluate -mllvm options.
std::vector<const char *> V;
V.push_back("lld (LLVM option parsing)");
for (auto *Arg : Args.filtered(OPT_mllvm))
V.push_back(Arg->getValue());
cl::ParseCommandLineOptions(V.size(), V.data());
}
// Some command line options or some combinations of them are not allowed.
// This function checks for such errors.
static void checkOptions(opt::InputArgList &Args) {
// Traditional linkers can generate re-linkable object files instead
// of executables or DSOs. We don't support that since the feature
// does not seem to provide more value than the static archiver.
if (Args.hasArg(OPT_relocatable))
error("-r option is not supported. Use 'ar' command instead.");
// The MIPS ABI as of 2016 does not support the GNU-style symbol lookup
// table which is a relatively new feature.
if (Config->EMachine == EM_MIPS && Config->GnuHash)
error("The .gnu.hash section is not compatible with the MIPS target.");
error("the .gnu.hash section is not compatible with the MIPS target.");
if (Config->EMachine == EM_AMDGPU && !Config->Entry.empty())
error("-e option is not valid for AMDGPU.");
if (Config->Pie && Config->Shared)
error("-shared and -pie may not be used together");
if (Config->Relocatable) {
if (Config->Shared)
error("-r and -shared may not be used together");
if (Config->GcSections)
error("-r and --gc-sections may not be used together");
if (Config->ICF)
error("-r and --icf may not be used together");
if (Config->Pie)
error("-r and -pie may not be used together");
}
}
static StringRef
@ -132,6 +231,22 @@ getString(opt::InputArgList &Args, unsigned Key, StringRef Default = "") {
return Default;
}
static int getInteger(opt::InputArgList &Args, unsigned Key, int Default) {
int V = Default;
if (auto *Arg = Args.getLastArg(Key)) {
StringRef S = Arg->getValue();
if (S.getAsInteger(10, V))
error(Arg->getSpelling() + ": number expected, but got " + S);
}
return V;
}
static const char *getReproduceOption(opt::InputArgList &Args) {
if (auto *Arg = Args.getLastArg(OPT_reproduce))
return Arg->getValue();
return getenv("LLD_REPRODUCE");
}
static bool hasZOption(opt::InputArgList &Args, StringRef Key) {
for (auto *Arg : Args.filtered(OPT_z))
if (Key == Arg->getValue())
@ -140,12 +255,33 @@ static bool hasZOption(opt::InputArgList &Args, StringRef Key) {
}
void LinkerDriver::main(ArrayRef<const char *> ArgsArr) {
initSymbols();
ELFOptTable Parser;
opt::InputArgList Args = Parser.parse(ArgsArr.slice(1));
if (Args.hasArg(OPT_help)) {
printHelp(ArgsArr[0]);
return;
}
if (Args.hasArg(OPT_version)) {
outs() << getVersionString();
return;
}
if (const char *Path = getReproduceOption(Args)) {
// Note that --reproduce is a debug option so you can ignore it
// if you are trying to understand the whole picture of the code.
Cpio.reset(CpioFile::create(Path));
if (Cpio) {
Cpio->append("response.txt", createResponseFile(Args));
Cpio->append("version.txt", getVersionString());
}
}
opt::InputArgList Args = parseArgs(&Alloc, ArgsArr);
readConfigs(Args);
initLLVM(Args);
createFiles(Args);
checkOptions(Args);
if (HasError)
return;
switch (Config->EKind) {
case ELF32LEKind:
@ -165,6 +301,25 @@ void LinkerDriver::main(ArrayRef<const char *> ArgsArr) {
}
}
static UnresolvedPolicy getUnresolvedSymbolOption(opt::InputArgList &Args) {
if (Args.hasArg(OPT_noinhibit_exec))
return UnresolvedPolicy::Warn;
if (Args.hasArg(OPT_no_undefined) || hasZOption(Args, "defs"))
return UnresolvedPolicy::NoUndef;
if (Config->Relocatable)
return UnresolvedPolicy::Ignore;
if (auto *Arg = Args.getLastArg(OPT_unresolved_symbols)) {
StringRef S = Arg->getValue();
if (S == "ignore-all" || S == "ignore-in-object-files")
return UnresolvedPolicy::Ignore;
if (S == "ignore-in-shared-libs" || S == "report-all")
return UnresolvedPolicy::Error;
error("unknown --unresolved-symbols value: " + S);
}
return UnresolvedPolicy::Error;
}
// Initializes Config members by the command line options.
void LinkerDriver::readConfigs(opt::InputArgList &Args) {
for (auto *Arg : Args.filtered(OPT_L))
@ -185,38 +340,66 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) {
Config->AllowMultipleDefinition = Args.hasArg(OPT_allow_multiple_definition);
Config->Bsymbolic = Args.hasArg(OPT_Bsymbolic);
Config->BsymbolicFunctions = Args.hasArg(OPT_Bsymbolic_functions);
Config->Demangle = !Args.hasArg(OPT_no_demangle);
Config->DisableVerify = Args.hasArg(OPT_disable_verify);
Config->DiscardAll = Args.hasArg(OPT_discard_all);
Config->DiscardLocals = Args.hasArg(OPT_discard_locals);
Config->DiscardNone = Args.hasArg(OPT_discard_none);
Config->EhFrameHdr = Args.hasArg(OPT_eh_frame_hdr);
Config->EnableNewDtags = !Args.hasArg(OPT_disable_new_dtags);
Config->ExportDynamic = Args.hasArg(OPT_export_dynamic);
Config->FatalWarnings = Args.hasArg(OPT_fatal_warnings);
Config->GcSections = Args.hasArg(OPT_gc_sections);
Config->NoInhibitExec = Args.hasArg(OPT_noinhibit_exec);
Config->NoUndefined = Args.hasArg(OPT_no_undefined);
Config->ICF = Args.hasArg(OPT_icf);
Config->NoGnuUnique = Args.hasArg(OPT_no_gnu_unique);
Config->NoUndefinedVersion = Args.hasArg(OPT_no_undefined_version);
Config->Pie = Args.hasArg(OPT_pie);
Config->PrintGcSections = Args.hasArg(OPT_print_gc_sections);
Config->Relocatable = Args.hasArg(OPT_relocatable);
Config->SaveTemps = Args.hasArg(OPT_save_temps);
Config->Shared = Args.hasArg(OPT_shared);
Config->StripAll = Args.hasArg(OPT_strip_all);
Config->StripDebug = Args.hasArg(OPT_strip_debug);
Config->Threads = Args.hasArg(OPT_threads);
Config->Trace = Args.hasArg(OPT_trace);
Config->Verbose = Args.hasArg(OPT_verbose);
Config->WarnCommon = Args.hasArg(OPT_warn_common);
Config->DynamicLinker = getString(Args, OPT_dynamic_linker);
Config->Entry = getString(Args, OPT_entry);
Config->Fini = getString(Args, OPT_fini, "_fini");
Config->Init = getString(Args, OPT_init, "_init");
Config->LtoAAPipeline = getString(Args, OPT_lto_aa_pipeline);
Config->LtoNewPmPasses = getString(Args, OPT_lto_newpm_passes);
Config->OutputFile = getString(Args, OPT_o);
Config->SoName = getString(Args, OPT_soname);
Config->Sysroot = getString(Args, OPT_sysroot);
Config->Optimize = getInteger(Args, OPT_O, 1);
Config->LtoO = getInteger(Args, OPT_lto_O, 2);
if (Config->LtoO > 3)
error("invalid optimization level for LTO: " + getString(Args, OPT_lto_O));
Config->LtoJobs = getInteger(Args, OPT_lto_jobs, 1);
if (Config->LtoJobs == 0)
error("number of threads must be > 0");
Config->ZCombreloc = !hasZOption(Args, "nocombreloc");
Config->ZExecStack = hasZOption(Args, "execstack");
Config->ZNodelete = hasZOption(Args, "nodelete");
Config->ZNow = hasZOption(Args, "now");
Config->ZOrigin = hasZOption(Args, "origin");
Config->ZRelro = !hasZOption(Args, "norelro");
if (auto *Arg = Args.getLastArg(OPT_O)) {
StringRef Val = Arg->getValue();
if (Val.getAsInteger(10, Config->Optimize))
error("Invalid optimization level");
}
if (Config->Relocatable)
Config->StripAll = false;
// --strip-all implies --strip-debug.
if (Config->StripAll)
Config->StripDebug = true;
// Config->Pic is true if we are generating position-independent code.
Config->Pic = Config->Pie || Config->Shared;
if (auto *Arg = Args.getLastArg(OPT_hash_style)) {
StringRef S = Arg->getValue();
@ -226,19 +409,52 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) {
} else if (S == "both") {
Config->GnuHash = true;
} else if (S != "sysv")
error("Unknown hash style: " + S);
error("unknown hash style: " + S);
}
// Parse --build-id or --build-id=<style>.
if (Args.hasArg(OPT_build_id))
Config->BuildId = BuildIdKind::Fnv1;
if (auto *Arg = Args.getLastArg(OPT_build_id_eq)) {
StringRef S = Arg->getValue();
if (S == "md5") {
Config->BuildId = BuildIdKind::Md5;
} else if (S == "sha1") {
Config->BuildId = BuildIdKind::Sha1;
} else if (S == "none") {
Config->BuildId = BuildIdKind::None;
} else if (S.startswith("0x")) {
Config->BuildId = BuildIdKind::Hexstring;
Config->BuildIdVector = parseHex(S.substr(2));
} else {
error("unknown --build-id style: " + S);
}
}
for (auto *Arg : Args.filtered(OPT_undefined))
Config->Undefined.push_back(Arg->getValue());
Config->UnresolvedSymbols = getUnresolvedSymbolOption(Args);
if (auto *Arg = Args.getLastArg(OPT_dynamic_list))
if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue()))
parseDynamicList(*Buffer);
for (auto *Arg : Args.filtered(OPT_export_dynamic_symbol))
Config->DynamicList.push_back(Arg->getValue());
if (auto *Arg = Args.getLastArg(OPT_version_script))
if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue()))
parseVersionScript(*Buffer);
}
void LinkerDriver::createFiles(opt::InputArgList &Args) {
for (auto *Arg : Args) {
switch (Arg->getOption().getID()) {
case OPT_l:
addFile(searchLibrary(Arg->getValue()));
addLibrary(Arg->getValue());
break;
case OPT_alias_script_T:
case OPT_INPUT:
case OPT_script:
addFile(Arg->getValue());
@ -261,75 +477,112 @@ void LinkerDriver::createFiles(opt::InputArgList &Args) {
case OPT_no_whole_archive:
WholeArchive = false;
break;
case OPT_start_lib:
InLib = true;
break;
case OPT_end_lib:
InLib = false;
break;
}
}
if (Files.empty())
if (Files.empty() && !HasError)
error("no input files.");
// If -m <machine_type> was not given, infer it from object files.
if (Config->EKind == ELFNoneKind) {
for (std::unique_ptr<InputFile> &F : Files) {
if (F->EKind == ELFNoneKind)
continue;
Config->EKind = F->EKind;
Config->EMachine = F->EMachine;
break;
}
}
}
// Do actual linking. Note that when this function is called,
// all linker scripts have already been parsed.
template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) {
SymbolTable<ELFT> Symtab;
Target.reset(createTarget());
elf::Symtab<ELFT>::X = &Symtab;
if (!Config->Shared) {
// Add entry symbol.
//
// There is no entry symbol for AMDGPU binaries, so skip adding one to avoid
// having and undefined symbol.
if (Config->Entry.empty() && Config->EMachine != EM_AMDGPU)
Config->Entry = (Config->EMachine == EM_MIPS) ? "__start" : "_start";
std::unique_ptr<TargetInfo> TI(createTarget());
Target = TI.get();
LinkerScript<ELFT> LS;
Script<ELFT>::X = &LS;
// In the assembly for 32 bit x86 the _GLOBAL_OFFSET_TABLE_ symbol
// is magical and is used to produce a R_386_GOTPC relocation.
// The R_386_GOTPC relocation value doesn't actually depend on the
// symbol value, so it could use an index of STN_UNDEF which, according
// to the spec, means the symbol value is 0.
// Unfortunately both gas and MC keep the _GLOBAL_OFFSET_TABLE_ symbol in
// the object file.
// The situation is even stranger on x86_64 where the assembly doesn't
// need the magical symbol, but gas still puts _GLOBAL_OFFSET_TABLE_ as
// an undefined symbol in the .o files.
// Given that the symbol is effectively unused, we just create a dummy
// hidden one to avoid the undefined symbol error.
Symtab.addIgnored("_GLOBAL_OFFSET_TABLE_");
}
Config->Rela = ELFT::Is64Bits || Config->EMachine == EM_X86_64;
Config->Mips64EL =
(Config->EMachine == EM_MIPS && Config->EKind == ELF64LEKind);
// Add entry symbol. Note that AMDGPU binaries have no entry points.
if (Config->Entry.empty() && !Config->Shared && !Config->Relocatable &&
Config->EMachine != EM_AMDGPU)
Config->Entry = (Config->EMachine == EM_MIPS) ? "__start" : "_start";
// Default output filename is "a.out" by the Unix tradition.
if (Config->OutputFile.empty())
Config->OutputFile = "a.out";
// Handle --trace-symbol.
for (auto *Arg : Args.filtered(OPT_trace_symbol))
Symtab.trace(Arg->getValue());
// Set either EntryAddr (if S is a number) or EntrySym (otherwise).
if (!Config->Entry.empty()) {
// Set either EntryAddr (if S is a number) or EntrySym (otherwise).
StringRef S = Config->Entry;
if (S.getAsInteger(0, Config->EntryAddr))
Config->EntrySym = Symtab.addUndefined(S);
}
if (Config->EMachine == EM_MIPS) {
// On MIPS O32 ABI, _gp_disp is a magic symbol designates offset between
// start of function and gp pointer into GOT. Use 'strong' variant of
// the addIgnored to prevent '_gp_disp' substitution.
Config->MipsGpDisp = Symtab.addIgnoredStrong("_gp_disp");
// Define _gp for MIPS. st_value of _gp symbol will be updated by Writer
// so that it points to an absolute address which is relative to GOT.
// See "Global Data Symbols" in Chapter 6 in the following document:
// ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf
Symtab.addAbsolute("_gp", ElfSym<ELFT>::MipsGp);
// Initialize Config->ImageBase.
if (auto *Arg = Args.getLastArg(OPT_image_base)) {
StringRef S = Arg->getValue();
if (S.getAsInteger(0, Config->ImageBase))
error(Arg->getSpelling() + ": number expected, but got " + S);
else if ((Config->ImageBase % Target->PageSize) != 0)
warning(Arg->getSpelling() + ": address isn't multiple of page size");
} else {
Config->ImageBase = Config->Pic ? 0 : Target->DefaultImageBase;
}
for (std::unique_ptr<InputFile> &F : Files)
Symtab.addFile(std::move(F));
if (HasError)
return; // There were duplicate symbols or incompatible files
for (StringRef S : Config->Undefined)
Symtab.addUndefinedOpt(S);
Symtab.scanUndefinedFlags();
Symtab.scanShlibUndefined();
Symtab.scanDynamicList();
Symtab.scanVersionScript();
Symtab.scanSymbolVersions();
Symtab.addCombinedLtoObject();
if (HasError)
return;
for (auto *Arg : Args.filtered(OPT_wrap))
Symtab.wrap(Arg->getValue());
if (Config->OutputFile.empty())
Config->OutputFile = "a.out";
// Write the result to the file.
Symtab.scanShlibUndefined();
if (Config->GcSections)
markLive<ELFT>(&Symtab);
markLive<ELFT>();
if (Config->ICF)
doIcf<ELFT>();
// MergeInputSection::splitIntoPieces needs to be called before
// any call of MergeInputSection::getOffset. Do that.
for (const std::unique_ptr<elf::ObjectFile<ELFT>> &F :
Symtab.getObjectFiles())
for (InputSectionBase<ELFT> *S : F->getSections()) {
if (!S || S == &InputSection<ELFT>::Discarded || !S->Live)
continue;
if (S->Compressed)
S->uncompress();
if (auto *MS = dyn_cast<MergeInputSection<ELFT>>(S))
MS->splitIntoPieces();
}
writeResult<ELFT>(&Symtab);
}

View File

@ -12,36 +12,54 @@
#include "SymbolTable.h"
#include "lld/Core/LLVM.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Support/raw_ostream.h"
namespace lld {
namespace elf2 {
namespace elf {
extern class LinkerDriver *Driver;
// Entry point of the ELF linker.
void link(ArrayRef<const char *> Args);
class CpioFile;
class LinkerDriver {
public:
void main(ArrayRef<const char *> Args);
void addFile(StringRef Path);
void addLibrary(StringRef Name);
llvm::LLVMContext Context; // to parse bitcode ifles
std::unique_ptr<CpioFile> Cpio; // for reproduce
private:
std::vector<MemoryBufferRef> getArchiveMembers(MemoryBufferRef MB);
llvm::Optional<MemoryBufferRef> readFile(StringRef Path);
void readConfigs(llvm::opt::InputArgList &Args);
void createFiles(llvm::opt::InputArgList &Args);
template <class ELFT> void link(llvm::opt::InputArgList &Args);
llvm::BumpPtrAllocator Alloc;
// True if we are in --whole-archive and --no-whole-archive.
bool WholeArchive = false;
// True if we are in --start-lib and --end-lib.
bool InLib = false;
llvm::BumpPtrAllocator Alloc;
std::vector<std::unique_ptr<InputFile>> Files;
std::vector<std::unique_ptr<MemoryBuffer>> OwningMBs;
};
// Parses command line options.
llvm::opt::InputArgList parseArgs(llvm::BumpPtrAllocator *A,
ArrayRef<const char *> Args);
class ELFOptTable : public llvm::opt::OptTable {
public:
ELFOptTable();
llvm::opt::InputArgList parse(ArrayRef<const char *> Argv);
private:
llvm::BumpPtrAllocator Alloc;
};
// Create enum with OPT_xxx values for each option in Options.td
enum {
@ -51,14 +69,43 @@ enum {
#undef OPTION
};
// Parses a linker script. Calling this function updates the Symtab and Config.
void readLinkerScript(llvm::BumpPtrAllocator *A, MemoryBufferRef MB);
// This is the class to create a .cpio file for --reproduce.
//
// If "--reproduce foo" is given, we create a file "foo.cpio" and
// copy all input files to the archive, along with a response file
// to re-run the same command with the same inputs.
// It is useful for reporting issues to LLD developers.
//
// Cpio as a file format is a deliberate choice. It's standardized in
// POSIX and very easy to create. cpio command is available virtually
// on all Unix systems. See
// http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_07
// for the format details.
class CpioFile {
public:
static CpioFile *create(StringRef OutputPath);
void append(StringRef Path, StringRef Data);
private:
CpioFile(std::unique_ptr<llvm::raw_fd_ostream> OS, StringRef Basename);
std::unique_ptr<llvm::raw_fd_ostream> OS;
llvm::StringSet<> Seen;
std::string Basename;
};
void printHelp(const char *Argv0);
std::string getVersionString();
std::vector<uint8_t> parseHexstring(StringRef S);
std::string createResponseFile(const llvm::opt::InputArgList &Args);
std::string relativeToRoot(StringRef Path);
std::string findFromSearchPaths(StringRef Path);
std::string searchLibrary(StringRef Path);
std::string buildSysrootedPath(llvm::StringRef Dir, llvm::StringRef File);
} // namespace elf2
} // namespace elf
} // namespace lld
#endif

View File

@ -15,16 +15,20 @@
#include "Driver.h"
#include "Error.h"
#include "lld/Config/Version.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Option/Option.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/StringSaver.h"
using namespace llvm;
using namespace llvm::sys;
using namespace lld;
using namespace lld::elf2;
using namespace lld::elf;
// Create OptTable
@ -34,55 +38,208 @@ using namespace lld::elf2;
#undef PREFIX
// Create table mapping all options defined in Options.td
static const opt::OptTable::Info infoTable[] = {
static const opt::OptTable::Info OptInfo[] = {
#define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X6, X7, X8, X9, X10) \
{ \
X1, X2, X9, X10, OPT_##ID, opt::Option::KIND##Class, X8, X7, OPT_##GROUP, \
OPT_##ALIAS, X6 \
} \
,
},
#include "Options.inc"
#undef OPTION
};
class ELFOptTable : public opt::OptTable {
public:
ELFOptTable() : OptTable(infoTable) {}
};
ELFOptTable::ELFOptTable() : OptTable(OptInfo) {}
static cl::TokenizerCallback getQuotingStyle(opt::InputArgList &Args) {
if (auto *Arg = Args.getLastArg(OPT_rsp_quoting)) {
StringRef S = Arg->getValue();
if (S != "windows" && S != "posix")
error("invalid response file quoting: " + S);
if (S == "windows")
return cl::TokenizeWindowsCommandLine;
return cl::TokenizeGNUCommandLine;
}
if (Triple(sys::getProcessTriple()).getOS() == Triple::Win32)
return cl::TokenizeWindowsCommandLine;
return cl::TokenizeGNUCommandLine;
}
// Parses a given list of options.
opt::InputArgList elf2::parseArgs(llvm::BumpPtrAllocator *A,
ArrayRef<const char *> Argv) {
opt::InputArgList ELFOptTable::parse(ArrayRef<const char *> Argv) {
// Make InputArgList from string vectors.
ELFOptTable Table;
unsigned MissingIndex;
unsigned MissingCount;
SmallVector<const char *, 256> Vec(Argv.data(), Argv.data() + Argv.size());
// We need to get the quoting style for response files before parsing all
// options so we parse here before and ignore all the options but
// --rsp-quoting.
opt::InputArgList Args = this->ParseArgs(Vec, MissingIndex, MissingCount);
// Expand response files. '@<filename>' is replaced by the file's contents.
SmallVector<const char *, 256> Vec(Argv.data(), Argv.data() + Argv.size());
StringSaver Saver(*A);
llvm::cl::ExpandResponseFiles(Saver, llvm::cl::TokenizeGNUCommandLine, Vec);
StringSaver Saver(Alloc);
cl::ExpandResponseFiles(Saver, getQuotingStyle(Args), Vec);
// Parse options and then do error checking.
opt::InputArgList Args = Table.ParseArgs(Vec, MissingIndex, MissingCount);
Args = this->ParseArgs(Vec, MissingIndex, MissingCount);
if (MissingCount)
error(Twine("missing arg value for \"") + Args.getArgString(MissingIndex) +
"\", expected " + Twine(MissingCount) +
(MissingCount == 1 ? " argument.\n" : " arguments"));
iterator_range<opt::arg_iterator> Unknowns = Args.filtered(OPT_UNKNOWN);
for (auto *Arg : Unknowns)
warning("warning: unknown argument: " + Arg->getSpelling());
if (Unknowns.begin() != Unknowns.end())
error("unknown argument(s) found");
for (auto *Arg : Args.filtered(OPT_UNKNOWN))
error("unknown argument: " + Arg->getSpelling());
return Args;
}
std::string elf2::findFromSearchPaths(StringRef Path) {
void elf::printHelp(const char *Argv0) {
ELFOptTable Table;
Table.PrintHelp(outs(), Argv0, "lld", false);
}
std::string elf::getVersionString() {
std::string Version = getLLDVersion();
std::string Repo = getLLDRepositoryVersion();
if (Repo.empty())
return "LLD " + Version + "\n";
return "LLD " + Version + " " + Repo + "\n";
}
// Makes a given pathname an absolute path first, and then remove
// beginning /. For example, "../foo.o" is converted to "home/john/foo.o",
// assuming that the current directory is "/home/john/bar".
std::string elf::relativeToRoot(StringRef Path) {
SmallString<128> Abs = Path;
if (std::error_code EC = fs::make_absolute(Abs))
fatal("make_absolute failed: " + EC.message());
path::remove_dots(Abs, /*remove_dot_dot=*/true);
// This is Windows specific. root_name() returns a drive letter
// (e.g. "c:") or a UNC name (//net). We want to keep it as part
// of the result.
SmallString<128> Res;
StringRef Root = path::root_name(Abs);
if (Root.endswith(":"))
Res = Root.drop_back();
else if (Root.startswith("//"))
Res = Root.substr(2);
path::append(Res, path::relative_path(Abs));
return Res.str();
}
CpioFile::CpioFile(std::unique_ptr<raw_fd_ostream> OS, StringRef S)
: OS(std::move(OS)), Basename(S) {}
CpioFile *CpioFile::create(StringRef OutputPath) {
std::string Path = (OutputPath + ".cpio").str();
std::error_code EC;
auto OS = llvm::make_unique<raw_fd_ostream>(Path, EC, fs::F_None);
if (EC) {
error(EC, "--reproduce: failed to open " + Path);
return nullptr;
}
return new CpioFile(std::move(OS), path::filename(OutputPath));
}
static void writeMember(raw_fd_ostream &OS, StringRef Path, StringRef Data) {
// The c_dev/c_ino pair should be unique according to the spec,
// but no one seems to care.
OS << "070707"; // c_magic
OS << "000000"; // c_dev
OS << "000000"; // c_ino
OS << "100664"; // c_mode: C_ISREG | rw-rw-r--
OS << "000000"; // c_uid
OS << "000000"; // c_gid
OS << "000001"; // c_nlink
OS << "000000"; // c_rdev
OS << "00000000000"; // c_mtime
OS << format("%06o", Path.size() + 1); // c_namesize
OS << format("%011o", Data.size()); // c_filesize
OS << Path << '\0'; // c_name
OS << Data; // c_filedata
}
void CpioFile::append(StringRef Path, StringRef Data) {
if (!Seen.insert(Path).second)
return;
// Construct an in-archive filename so that /home/foo/bar is stored
// as baz/home/foo/bar where baz is the basename of the output file.
// (i.e. in that case we are creating baz.cpio.)
SmallString<128> Fullpath;
path::append(Fullpath, Basename, Path);
// Use unix path separators so the cpio can be extracted on both unix and
// windows.
std::replace(Fullpath.begin(), Fullpath.end(), '\\', '/');
writeMember(*OS, Fullpath, Data);
// Print the trailer and seek back.
// This way we have a valid archive if we crash.
uint64_t Pos = OS->tell();
writeMember(*OS, "TRAILER!!!", "");
OS->seek(Pos);
}
// Quote a given string if it contains a space character.
static std::string quote(StringRef S) {
if (S.find(' ') == StringRef::npos)
return S;
return ("\"" + S + "\"").str();
}
static std::string rewritePath(StringRef S) {
if (fs::exists(S))
return relativeToRoot(S);
return S;
}
static std::string stringize(opt::Arg *Arg) {
std::string K = Arg->getSpelling();
if (Arg->getNumValues() == 0)
return K;
std::string V = quote(Arg->getValue());
if (Arg->getOption().getRenderStyle() == opt::Option::RenderJoinedStyle)
return K + V;
return K + " " + V;
}
// Reconstructs command line arguments so that so that you can re-run
// the same command with the same inputs. This is for --reproduce.
std::string elf::createResponseFile(const opt::InputArgList &Args) {
SmallString<0> Data;
raw_svector_ostream OS(Data);
// Copy the command line to the output while rewriting paths.
for (auto *Arg : Args) {
switch (Arg->getOption().getID()) {
case OPT_reproduce:
break;
case OPT_INPUT:
OS << quote(rewritePath(Arg->getValue())) << "\n";
break;
case OPT_L:
case OPT_dynamic_list:
case OPT_rpath:
case OPT_alias_script_T:
case OPT_script:
case OPT_version_script:
OS << Arg->getSpelling() << " "
<< quote(rewritePath(Arg->getValue())) << "\n";
break;
default:
OS << stringize(Arg) << "\n";
}
}
return Data.str();
}
std::string elf::findFromSearchPaths(StringRef Path) {
for (StringRef Dir : Config->SearchPaths) {
std::string FullPath = buildSysrootedPath(Dir, Path);
if (sys::fs::exists(FullPath))
if (fs::exists(FullPath))
return FullPath;
}
return "";
@ -90,31 +247,30 @@ std::string elf2::findFromSearchPaths(StringRef Path) {
// Searches a given library from input search paths, which are filled
// from -L command line switches. Returns a path to an existent library file.
std::string elf2::searchLibrary(StringRef Path) {
std::vector<std::string> Names;
if (Path[0] == ':') {
Names.push_back(Path.drop_front());
} else {
if (!Config->Static)
Names.push_back(("lib" + Path + ".so").str());
Names.push_back(("lib" + Path + ".a").str());
}
for (const std::string &Name : Names) {
std::string S = findFromSearchPaths(Name);
if (!S.empty())
std::string elf::searchLibrary(StringRef Path) {
if (Path.startswith(":"))
return findFromSearchPaths(Path.substr(1));
for (StringRef Dir : Config->SearchPaths) {
if (!Config->Static) {
std::string S = buildSysrootedPath(Dir, ("lib" + Path + ".so").str());
if (fs::exists(S))
return S;
}
std::string S = buildSysrootedPath(Dir, ("lib" + Path + ".a").str());
if (fs::exists(S))
return S;
}
error("Unable to find library -l" + Path);
return "";
}
// Makes a path by concatenating Dir and File.
// If Dir starts with '=' the result will be preceded by Sysroot,
// which can be set with --sysroot command line switch.
std::string elf2::buildSysrootedPath(StringRef Dir, StringRef File) {
std::string elf::buildSysrootedPath(StringRef Dir, StringRef File) {
SmallString<128> Path;
if (Dir.startswith("="))
sys::path::append(Path, Config->Sysroot, Dir.substr(1), File);
path::append(Path, Config->Sysroot, Dir.substr(1), File);
else
sys::path::append(Path, Dir, File);
path::append(Path, Dir, File);
return Path.str();
}

167
ELF/EhFrame.cpp Normal file
View File

@ -0,0 +1,167 @@
//===- EhFrame.cpp -------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// .eh_frame section contains information on how to unwind the stack when
// an exception is thrown. The section consists of sequence of CIE and FDE
// records. The linker needs to merge CIEs and associate FDEs to CIEs.
// That means the linker has to understand the format of the section.
//
// This file contains a few utility functions to read .eh_frame contents.
//
//===----------------------------------------------------------------------===//
#include "EhFrame.h"
#include "Error.h"
#include "llvm/Object/ELF.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/Endian.h"
using namespace llvm;
using namespace llvm::ELF;
using namespace llvm::dwarf;
using namespace llvm::object;
using namespace llvm::support::endian;
namespace lld {
namespace elf {
// .eh_frame section is a sequence of records. Each record starts with
// a 4 byte length field. This function reads the length.
template <class ELFT> size_t readEhRecordSize(ArrayRef<uint8_t> D) {
const endianness E = ELFT::TargetEndianness;
if (D.size() < 4)
fatal("CIE/FDE too small");
// First 4 bytes of CIE/FDE is the size of the record.
// If it is 0xFFFFFFFF, the next 8 bytes contain the size instead,
// but we do not support that format yet.
uint64_t V = read32<E>(D.data());
if (V == UINT32_MAX)
fatal("CIE/FDE too large");
uint64_t Size = V + 4;
if (Size > D.size())
fatal("CIE/FIE ends past the end of the section");
return Size;
}
// Read a byte and advance D by one byte.
static uint8_t readByte(ArrayRef<uint8_t> &D) {
if (D.empty())
fatal("corrupted or unsupported CIE information");
uint8_t B = D.front();
D = D.slice(1);
return B;
}
// Skip an integer encoded in the LEB128 format.
// Actual number is not of interest because only the runtime needs it.
// But we need to be at least able to skip it so that we can read
// the field that follows a LEB128 number.
static void skipLeb128(ArrayRef<uint8_t> &D) {
while (!D.empty()) {
uint8_t Val = D.front();
D = D.slice(1);
if ((Val & 0x80) == 0)
return;
}
fatal("corrupted or unsupported CIE information");
}
template <class ELFT> static size_t getAugPSize(unsigned Enc) {
switch (Enc & 0x0f) {
case DW_EH_PE_absptr:
case DW_EH_PE_signed:
return ELFT::Is64Bits ? 8 : 4;
case DW_EH_PE_udata2:
case DW_EH_PE_sdata2:
return 2;
case DW_EH_PE_udata4:
case DW_EH_PE_sdata4:
return 4;
case DW_EH_PE_udata8:
case DW_EH_PE_sdata8:
return 8;
}
fatal("unknown FDE encoding");
}
template <class ELFT> static void skipAugP(ArrayRef<uint8_t> &D) {
uint8_t Enc = readByte(D);
if ((Enc & 0xf0) == DW_EH_PE_aligned)
fatal("DW_EH_PE_aligned encoding is not supported");
size_t Size = getAugPSize<ELFT>(Enc);
if (Size >= D.size())
fatal("corrupted CIE");
D = D.slice(Size);
}
template <class ELFT> uint8_t getFdeEncoding(ArrayRef<uint8_t> D) {
if (D.size() < 8)
fatal("CIE too small");
D = D.slice(8);
uint8_t Version = readByte(D);
if (Version != 1 && Version != 3)
fatal("FDE version 1 or 3 expected, but got " + Twine((unsigned)Version));
const unsigned char *AugEnd = std::find(D.begin(), D.end(), '\0');
if (AugEnd == D.end())
fatal("corrupted CIE");
StringRef Aug(reinterpret_cast<const char *>(D.begin()), AugEnd - D.begin());
D = D.slice(Aug.size() + 1);
// Code alignment factor should always be 1 for .eh_frame.
if (readByte(D) != 1)
fatal("CIE code alignment must be 1");
// Skip data alignment factor.
skipLeb128(D);
// Skip the return address register. In CIE version 1 this is a single
// byte. In CIE version 3 this is an unsigned LEB128.
if (Version == 1)
readByte(D);
else
skipLeb128(D);
// We only care about an 'R' value, but other records may precede an 'R'
// record. Unfortunately records are not in TLV (type-length-value) format,
// so we need to teach the linker how to skip records for each type.
for (char C : Aug) {
if (C == 'R')
return readByte(D);
if (C == 'z') {
skipLeb128(D);
continue;
}
if (C == 'P') {
skipAugP<ELFT>(D);
continue;
}
if (C == 'L') {
readByte(D);
continue;
}
fatal("unknown .eh_frame augmentation string: " + Aug);
}
return DW_EH_PE_absptr;
}
template size_t readEhRecordSize<ELF32LE>(ArrayRef<uint8_t>);
template size_t readEhRecordSize<ELF32BE>(ArrayRef<uint8_t>);
template size_t readEhRecordSize<ELF64LE>(ArrayRef<uint8_t>);
template size_t readEhRecordSize<ELF64BE>(ArrayRef<uint8_t>);
template uint8_t getFdeEncoding<ELF32LE>(ArrayRef<uint8_t>);
template uint8_t getFdeEncoding<ELF32BE>(ArrayRef<uint8_t>);
template uint8_t getFdeEncoding<ELF64LE>(ArrayRef<uint8_t>);
template uint8_t getFdeEncoding<ELF64BE>(ArrayRef<uint8_t>);
}
}

22
ELF/EhFrame.h Normal file
View File

@ -0,0 +1,22 @@
//===- EhFrame.h ------------------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_ELF_EHFRAME_H
#define LLD_ELF_EHFRAME_H
#include "lld/Core/LLVM.h"
namespace lld {
namespace elf {
template <class ELFT> size_t readEhRecordSize(ArrayRef<uint8_t> Data);
template <class ELFT> uint8_t getFdeEncoding(ArrayRef<uint8_t> Data);
}
}
#endif

View File

@ -8,31 +8,58 @@
//===----------------------------------------------------------------------===//
#include "Error.h"
#include "Config.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/raw_ostream.h"
namespace lld {
namespace elf2 {
using namespace llvm;
void warning(const Twine &Msg) { llvm::errs() << Msg << "\n"; }
namespace lld {
namespace elf {
bool HasError;
raw_ostream *ErrorOS;
void log(const Twine &Msg) {
if (Config->Verbose)
outs() << Msg << "\n";
}
void warning(const Twine &Msg) {
if (Config->FatalWarnings)
error(Msg);
else
*ErrorOS << Msg << "\n";
}
void error(const Twine &Msg) {
llvm::errs() << Msg << "\n";
exit(1);
*ErrorOS << Msg << "\n";
HasError = true;
}
void error(std::error_code EC, const Twine &Prefix) {
if (!EC)
return;
error(Prefix + ": " + EC.message());
}
void error(std::error_code EC) {
if (!EC)
return;
error(EC.message());
void fatal(const Twine &Msg) {
*ErrorOS << Msg << "\n";
exit(1);
}
} // namespace elf2
void fatal(const Twine &Msg, const Twine &Prefix) {
fatal(Prefix + ": " + Msg);
}
void check(std::error_code EC) {
if (EC)
fatal(EC.message());
}
void check(Error Err) {
check(errorToErrorCode(std::move(Err)));
}
} // namespace elf
} // namespace lld

View File

@ -13,20 +13,49 @@
#include "lld/Core/LLVM.h"
namespace lld {
namespace elf2 {
namespace elf {
extern bool HasError;
extern llvm::raw_ostream *ErrorOS;
void log(const Twine &Msg);
void warning(const Twine &Msg);
LLVM_ATTRIBUTE_NORETURN void error(const Twine &Msg);
void error(const Twine &Msg);
void error(std::error_code EC, const Twine &Prefix);
void error(std::error_code EC);
template <typename T> void error(const ErrorOr<T> &V, const Twine &Prefix) {
error(V.getError(), Prefix);
}
template <typename T> void error(const ErrorOr<T> &V) { error(V.getError()); }
} // namespace elf2
LLVM_ATTRIBUTE_NORETURN void fatal(const Twine &Msg);
LLVM_ATTRIBUTE_NORETURN void fatal(const Twine &Msg, const Twine &Prefix);
template <class T> T check(ErrorOr<T> E) {
if (auto EC = E.getError())
fatal(EC.message());
return std::move(*E);
}
template <class T> T check(Expected<T> E) {
if (!E)
fatal(errorToErrorCode(E.takeError()).message());
return std::move(*E);
}
template <class T> T check(ErrorOr<T> E, const Twine &Prefix) {
if (auto EC = E.getError())
fatal(EC.message(), Prefix);
return std::move(*E);
}
template <class T> T check(Expected<T> E, const Twine &Prefix) {
if (!E)
fatal(errorToErrorCode(E.takeError()).message(), Prefix);
return std::move(*E);
}
} // namespace elf
} // namespace lld
#endif

345
ELF/ICF.cpp Normal file
View File

@ -0,0 +1,345 @@
//===- ICF.cpp ------------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Identical Code Folding is a feature to merge sections not by name (which
// is regular comdat handling) but by contents. If two non-writable sections
// have the same data, relocations, attributes, etc., then the two
// are considered identical and merged by the linker. This optimization
// makes outputs smaller.
//
// ICF is theoretically a problem of reducing graphs by merging as many
// identical subgraphs as possible if we consider sections as vertices and
// relocations as edges. It may sound simple, but it is a bit more
// complicated than you might think. The order of processing sections
// matters because merging two sections can make other sections, whose
// relocations now point to the same section, mergeable. Graphs may contain
// cycles. We need a sophisticated algorithm to do this properly and
// efficiently.
//
// What we do in this file is this. We split sections into groups. Sections
// in the same group are considered identical.
//
// We begin by optimistically putting all sections into a single equivalence
// class. Then we apply a series of checks that split this initial
// equivalence class into more and more refined equivalence classes based on
// the properties by which a section can be distinguished.
//
// We begin by checking that the section contents and flags are the
// same. This only needs to be done once since these properties don't depend
// on the current equivalence class assignment.
//
// Then we split the equivalence classes based on checking that their
// relocations are the same, where relocation targets are compared by their
// equivalence class, not the concrete section. This may need to be done
// multiple times because as the equivalence classes are refined, two
// sections that had a relocation target in the same equivalence class may
// now target different equivalence classes, and hence these two sections
// must be put in different equivalence classes (whereas in the previous
// iteration they were not since the relocation target was the same.)
//
// Our algorithm is smart enough to merge the following mutually-recursive
// functions.
//
// void foo() { bar(); }
// void bar() { foo(); }
//
// This algorithm is so-called "optimistic" algorithm described in
// http://research.google.com/pubs/pub36912.html. (Note that what GNU
// gold implemented is different from the optimistic algorithm.)
//
//===----------------------------------------------------------------------===//
#include "ICF.h"
#include "Config.h"
#include "OutputSections.h"
#include "SymbolTable.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/Object/ELF.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/raw_ostream.h"
using namespace lld;
using namespace lld::elf;
using namespace llvm;
using namespace llvm::ELF;
using namespace llvm::object;
namespace lld {
namespace elf {
template <class ELFT> class ICF {
typedef typename ELFT::Shdr Elf_Shdr;
typedef typename ELFT::Sym Elf_Sym;
typedef typename ELFT::uint uintX_t;
typedef Elf_Rel_Impl<ELFT, false> Elf_Rel;
using Comparator = std::function<bool(const InputSection<ELFT> *,
const InputSection<ELFT> *)>;
public:
void run();
private:
uint64_t NextId = 1;
static void setLive(SymbolTable<ELFT> *S);
static uint64_t relSize(InputSection<ELFT> *S);
static uint64_t getHash(InputSection<ELFT> *S);
static bool isEligible(InputSectionBase<ELFT> *Sec);
static std::vector<InputSection<ELFT> *> getSections();
void segregate(InputSection<ELFT> **Begin, InputSection<ELFT> **End,
Comparator Eq);
void forEachGroup(std::vector<InputSection<ELFT> *> &V, Comparator Eq);
template <class RelTy>
static bool relocationEq(ArrayRef<RelTy> RA, ArrayRef<RelTy> RB);
template <class RelTy>
static bool variableEq(const InputSection<ELFT> *A,
const InputSection<ELFT> *B, ArrayRef<RelTy> RA,
ArrayRef<RelTy> RB);
static bool equalsConstant(const InputSection<ELFT> *A,
const InputSection<ELFT> *B);
static bool equalsVariable(const InputSection<ELFT> *A,
const InputSection<ELFT> *B);
};
}
}
// Returns a hash value for S. Note that the information about
// relocation targets is not included in the hash value.
template <class ELFT> uint64_t ICF<ELFT>::getHash(InputSection<ELFT> *S) {
uint64_t Flags = S->getSectionHdr()->sh_flags;
uint64_t H = hash_combine(Flags, S->getSize());
for (const Elf_Shdr *Rel : S->RelocSections)
H = hash_combine(H, (uint64_t)Rel->sh_size);
return H;
}
// Returns true if Sec is subject of ICF.
template <class ELFT> bool ICF<ELFT>::isEligible(InputSectionBase<ELFT> *Sec) {
if (!Sec || Sec == &InputSection<ELFT>::Discarded || !Sec->Live)
return false;
auto *S = dyn_cast<InputSection<ELFT>>(Sec);
if (!S)
return false;
// .init and .fini contains instructions that must be executed to
// initialize and finalize the process. They cannot and should not
// be merged.
StringRef Name = S->getSectionName();
if (Name == ".init" || Name == ".fini")
return false;
const Elf_Shdr &H = *S->getSectionHdr();
return (H.sh_flags & SHF_ALLOC) && (~H.sh_flags & SHF_WRITE);
}
template <class ELFT>
std::vector<InputSection<ELFT> *> ICF<ELFT>::getSections() {
std::vector<InputSection<ELFT> *> V;
for (const std::unique_ptr<ObjectFile<ELFT>> &F :
Symtab<ELFT>::X->getObjectFiles())
for (InputSectionBase<ELFT> *S : F->getSections())
if (isEligible(S))
V.push_back(cast<InputSection<ELFT>>(S));
return V;
}
// All sections between Begin and End must have the same group ID before
// you call this function. This function compare sections between Begin
// and End using Eq and assign new group IDs for new groups.
template <class ELFT>
void ICF<ELFT>::segregate(InputSection<ELFT> **Begin, InputSection<ELFT> **End,
Comparator Eq) {
// This loop rearranges [Begin, End) so that all sections that are
// equal in terms of Eq are contiguous. The algorithm is quadratic in
// the worst case, but that is not an issue in practice because the
// number of distinct sections in [Begin, End) is usually very small.
InputSection<ELFT> **I = Begin;
for (;;) {
InputSection<ELFT> *Head = *I;
auto Bound = std::stable_partition(
I + 1, End, [&](InputSection<ELFT> *S) { return Eq(Head, S); });
if (Bound == End)
return;
uint64_t Id = NextId++;
for (; I != Bound; ++I)
(*I)->GroupId = Id;
}
}
template <class ELFT>
void ICF<ELFT>::forEachGroup(std::vector<InputSection<ELFT> *> &V,
Comparator Eq) {
for (InputSection<ELFT> **I = V.data(), **E = I + V.size(); I != E;) {
InputSection<ELFT> *Head = *I;
auto Bound = std::find_if(I + 1, E, [&](InputSection<ELFT> *S) {
return S->GroupId != Head->GroupId;
});
segregate(I, Bound, Eq);
I = Bound;
}
}
// Compare two lists of relocations.
template <class ELFT>
template <class RelTy>
bool ICF<ELFT>::relocationEq(ArrayRef<RelTy> RelsA, ArrayRef<RelTy> RelsB) {
const RelTy *IA = RelsA.begin();
const RelTy *EA = RelsA.end();
const RelTy *IB = RelsB.begin();
const RelTy *EB = RelsB.end();
if (EA - IA != EB - IB)
return false;
for (; IA != EA; ++IA, ++IB)
if (IA->r_offset != IB->r_offset ||
IA->getType(Config->Mips64EL) != IB->getType(Config->Mips64EL) ||
getAddend<ELFT>(*IA) != getAddend<ELFT>(*IB))
return false;
return true;
}
// Compare "non-moving" part of two InputSections, namely everything
// except relocation targets.
template <class ELFT>
bool ICF<ELFT>::equalsConstant(const InputSection<ELFT> *A,
const InputSection<ELFT> *B) {
if (A->RelocSections.size() != B->RelocSections.size())
return false;
for (size_t I = 0, E = A->RelocSections.size(); I != E; ++I) {
const Elf_Shdr *RA = A->RelocSections[I];
const Elf_Shdr *RB = B->RelocSections[I];
ELFFile<ELFT> &FileA = A->File->getObj();
ELFFile<ELFT> &FileB = B->File->getObj();
if (RA->sh_type == SHT_RELA) {
if (!relocationEq(FileA.relas(RA), FileB.relas(RB)))
return false;
} else {
if (!relocationEq(FileA.rels(RA), FileB.rels(RB)))
return false;
}
}
return A->getSectionHdr()->sh_flags == B->getSectionHdr()->sh_flags &&
A->getSize() == B->getSize() &&
A->getSectionData() == B->getSectionData();
}
template <class ELFT>
template <class RelTy>
bool ICF<ELFT>::variableEq(const InputSection<ELFT> *A,
const InputSection<ELFT> *B, ArrayRef<RelTy> RelsA,
ArrayRef<RelTy> RelsB) {
const RelTy *IA = RelsA.begin();
const RelTy *EA = RelsA.end();
const RelTy *IB = RelsB.begin();
for (; IA != EA; ++IA, ++IB) {
SymbolBody &SA = A->File->getRelocTargetSym(*IA);
SymbolBody &SB = B->File->getRelocTargetSym(*IB);
if (&SA == &SB)
continue;
// Or, the symbols should be pointing to the same section
// in terms of the group ID.
auto *DA = dyn_cast<DefinedRegular<ELFT>>(&SA);
auto *DB = dyn_cast<DefinedRegular<ELFT>>(&SB);
if (!DA || !DB)
return false;
if (DA->Value != DB->Value)
return false;
InputSection<ELFT> *X = dyn_cast<InputSection<ELFT>>(DA->Section);
InputSection<ELFT> *Y = dyn_cast<InputSection<ELFT>>(DB->Section);
if (X && Y && X->GroupId && X->GroupId == Y->GroupId)
continue;
return false;
}
return true;
}
// Compare "moving" part of two InputSections, namely relocation targets.
template <class ELFT>
bool ICF<ELFT>::equalsVariable(const InputSection<ELFT> *A,
const InputSection<ELFT> *B) {
for (size_t I = 0, E = A->RelocSections.size(); I != E; ++I) {
const Elf_Shdr *RA = A->RelocSections[I];
const Elf_Shdr *RB = B->RelocSections[I];
ELFFile<ELFT> &FileA = A->File->getObj();
ELFFile<ELFT> &FileB = B->File->getObj();
if (RA->sh_type == SHT_RELA) {
if (!variableEq(A, B, FileA.relas(RA), FileB.relas(RB)))
return false;
} else {
if (!variableEq(A, B, FileA.rels(RA), FileB.rels(RB)))
return false;
}
}
return true;
}
// The main function of ICF.
template <class ELFT> void ICF<ELFT>::run() {
// Initially, we use hash values as section group IDs. Therefore,
// if two sections have the same ID, they are likely (but not
// guaranteed) to have the same static contents in terms of ICF.
std::vector<InputSection<ELFT> *> V = getSections();
for (InputSection<ELFT> *S : V)
// Set MSB on to avoid collisions with serial group IDs
S->GroupId = getHash(S) | (uint64_t(1) << 63);
// From now on, sections in V are ordered so that sections in
// the same group are consecutive in the vector.
std::stable_sort(V.begin(), V.end(),
[](InputSection<ELFT> *A, InputSection<ELFT> *B) {
return A->GroupId < B->GroupId;
});
// Compare static contents and assign unique IDs for each static content.
forEachGroup(V, equalsConstant);
// Split groups by comparing relocations until we get a convergence.
int Cnt = 1;
for (;;) {
++Cnt;
uint64_t Id = NextId;
forEachGroup(V, equalsVariable);
if (Id == NextId)
break;
}
log("ICF needed " + Twine(Cnt) + " iterations.");
// Merge sections in the same group.
for (auto I = V.begin(), E = V.end(); I != E;) {
InputSection<ELFT> *Head = *I++;
auto Bound = std::find_if(I, E, [&](InputSection<ELFT> *S) {
return Head->GroupId != S->GroupId;
});
if (I == Bound)
continue;
log("selected " + Head->getSectionName());
while (I != Bound) {
InputSection<ELFT> *S = *I++;
log(" removed " + S->getSectionName());
Head->replace(S);
}
}
}
// ICF entry point function.
template <class ELFT> void elf::doIcf() { ICF<ELFT>().run(); }
template void elf::doIcf<ELF32LE>();
template void elf::doIcf<ELF32BE>();
template void elf::doIcf<ELF64LE>();
template void elf::doIcf<ELF64BE>();

19
ELF/ICF.h Normal file
View File

@ -0,0 +1,19 @@
//===- ICF.h --------------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_ELF_ICF_H
#define LLD_ELF_ICF_H
namespace lld {
namespace elf {
template <class ELFT> void doIcf();
}
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -18,11 +18,16 @@
#include "lld/Core/LLVM.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/Comdat.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/ELF.h"
#include "llvm/Object/IRObjectFile.h"
#include "llvm/Support/StringSaver.h"
#include <map>
namespace lld {
namespace elf2 {
namespace elf {
using llvm::object::Archive;
@ -33,25 +38,45 @@ class SymbolBody;
// The root class of input files.
class InputFile {
public:
enum Kind { ObjectKind, SharedKind, ArchiveKind };
enum Kind {
ObjectKind,
SharedKind,
LazyObjectKind,
ArchiveKind,
BitcodeKind,
};
Kind kind() const { return FileKind; }
StringRef getName() const { return MB.getBufferIdentifier(); }
MemoryBufferRef MB;
// Filename of .a which contained this file. If this file was
// not in an archive file, it is the empty string. We use this
// string for creating error messages.
StringRef ArchiveName;
// If this is an architecture-specific file, the following members
// have ELF type (i.e. ELF{32,64}{LE,BE}) and target machine type.
ELFKind EKind = ELFNoneKind;
uint16_t EMachine = llvm::ELF::EM_NONE;
protected:
InputFile(Kind K, MemoryBufferRef M) : MB(M), FileKind(K) {}
MemoryBufferRef MB;
private:
const Kind FileKind;
};
// Returns "(internal)", "foo.a(bar.o)" or "baz.o".
std::string getFilename(const InputFile *F);
template <typename ELFT> class ELFFileBase : public InputFile {
public:
typedef typename llvm::object::ELFFile<ELFT>::Elf_Shdr Elf_Shdr;
typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym Elf_Sym;
typedef typename llvm::object::ELFFile<ELFT>::Elf_Word Elf_Word;
typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym_Range Elf_Sym_Range;
typedef typename ELFT::Shdr Elf_Shdr;
typedef typename ELFT::Sym Elf_Sym;
typedef typename ELFT::Word Elf_Word;
typedef typename ELFT::SymRange Elf_Sym_Range;
ELFFileBase(Kind K, MemoryBufferRef M);
static bool classof(const InputFile *F) {
@ -59,11 +84,9 @@ template <typename ELFT> class ELFFileBase : public InputFile {
return K == ObjectKind || K == SharedKind;
}
static ELFKind getELFKind();
const llvm::object::ELFFile<ELFT> &getObj() const { return ELFObj; }
llvm::object::ELFFile<ELFT> &getObj() { return ELFObj; }
uint16_t getEMachine() const { return getObj().getHeader()->e_machine; }
uint8_t getOSABI() const {
return getObj().getHeader()->e_ident[llvm::ELF::EI_OSABI];
}
@ -72,39 +95,36 @@ template <typename ELFT> class ELFFileBase : public InputFile {
uint32_t getSectionIndex(const Elf_Sym &Sym) const;
Elf_Sym_Range getElfSymbols(bool OnlyGlobals);
protected:
llvm::object::ELFFile<ELFT> ELFObj;
const Elf_Shdr *Symtab = nullptr;
ArrayRef<Elf_Word> SymtabSHNDX;
StringRef StringTable;
void initStringTable();
Elf_Sym_Range getNonLocalSymbols();
Elf_Sym_Range getSymbolsHelper(bool);
};
// .o file.
template <class ELFT> class ObjectFile : public ELFFileBase<ELFT> {
typedef ELFFileBase<ELFT> Base;
typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym Elf_Sym;
typedef typename llvm::object::ELFFile<ELFT>::Elf_Shdr Elf_Shdr;
typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym_Range Elf_Sym_Range;
typedef typename llvm::object::ELFFile<ELFT>::Elf_Word Elf_Word;
typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t;
// uint32 in ELFT's byte order
typedef llvm::support::detail::packed_endian_specific_integral<
uint32_t, ELFT::TargetEndianness, 2>
uint32_X;
typedef typename ELFT::Sym Elf_Sym;
typedef typename ELFT::Shdr Elf_Shdr;
typedef typename ELFT::SymRange Elf_Sym_Range;
typedef typename ELFT::Word Elf_Word;
typedef typename ELFT::uint uintX_t;
StringRef getShtGroupSignature(const Elf_Shdr &Sec);
ArrayRef<uint32_X> getShtGroupEntries(const Elf_Shdr &Sec);
ArrayRef<Elf_Word> getShtGroupEntries(const Elf_Shdr &Sec);
public:
static bool classof(const InputFile *F) {
return F->kind() == Base::ObjectKind;
}
ArrayRef<SymbolBody *> getSymbols() { return SymbolBodies; }
ArrayRef<SymbolBody *> getSymbols();
ArrayRef<SymbolBody *> getLocalSymbols();
ArrayRef<SymbolBody *> getNonLocalSymbols();
explicit ObjectFile(MemoryBufferRef M);
void parse(llvm::DenseSet<StringRef> &ComdatGroups);
@ -112,15 +132,14 @@ template <class ELFT> class ObjectFile : public ELFFileBase<ELFT> {
ArrayRef<InputSectionBase<ELFT> *> getSections() const { return Sections; }
InputSectionBase<ELFT> *getSection(const Elf_Sym &Sym) const;
SymbolBody *getSymbolBody(uint32_t SymbolIndex) const {
uint32_t FirstNonLocal = this->Symtab->sh_info;
if (SymbolIndex < FirstNonLocal)
return nullptr;
return SymbolBodies[SymbolIndex - FirstNonLocal];
SymbolBody &getSymbolBody(uint32_t SymbolIndex) const {
return *SymbolBodies[SymbolIndex];
}
Elf_Sym_Range getLocalSymbols();
const Elf_Sym *getLocalSymbol(uintX_t SymIndex);
template <typename RelT> SymbolBody &getRelocTargetSym(const RelT &Rel) const {
uint32_t SymIndex = Rel.getSymbol(Config->Mips64EL);
return getSymbolBody(SymIndex);
}
const Elf_Shdr *getSymbolTable() const { return this->Symtab; };
@ -129,12 +148,22 @@ template <class ELFT> class ObjectFile : public ELFFileBase<ELFT> {
// R_MIPS_GPREL16 / R_MIPS_GPREL32 relocations.
uint32_t getMipsGp0() const;
// The number is the offset in the string table. It will be used as the
// st_name of the symbol.
std::vector<std::pair<const DefinedRegular<ELFT> *, unsigned>> KeptLocalSyms;
// SymbolBodies and Thunks for sections in this file are allocated
// using this buffer.
llvm::BumpPtrAllocator Alloc;
private:
void initializeSections(llvm::DenseSet<StringRef> &ComdatGroups);
void initializeSymbols();
InputSectionBase<ELFT> *getRelocTarget(const Elf_Shdr &Sec);
InputSectionBase<ELFT> *createInputSection(const Elf_Shdr &Sec);
SymbolBody *createSymbolBody(StringRef StringTable, const Elf_Sym *Sym);
bool shouldMerge(const Elf_Shdr &Sec);
SymbolBody *createSymbolBody(const Elf_Sym *Sym);
// List of all sections defined by this file.
std::vector<InputSectionBase<ELFT> *> Sections;
@ -143,49 +172,97 @@ template <class ELFT> class ObjectFile : public ELFFileBase<ELFT> {
std::vector<SymbolBody *> SymbolBodies;
// MIPS .reginfo section defined by this file.
MipsReginfoInputSection<ELFT> *MipsReginfo = nullptr;
std::unique_ptr<MipsReginfoInputSection<ELFT>> MipsReginfo;
// MIPS .MIPS.options section defined by this file.
std::unique_ptr<MipsOptionsInputSection<ELFT>> MipsOptions;
llvm::BumpPtrAllocator Alloc;
llvm::SpecificBumpPtrAllocator<InputSection<ELFT>> IAlloc;
llvm::SpecificBumpPtrAllocator<MergeInputSection<ELFT>> MAlloc;
llvm::SpecificBumpPtrAllocator<EHInputSection<ELFT>> EHAlloc;
llvm::SpecificBumpPtrAllocator<EhInputSection<ELFT>> EHAlloc;
};
// LazyObjectFile is analogous to ArchiveFile in the sense that
// the file contains lazy symbols. The difference is that
// LazyObjectFile wraps a single file instead of multiple files.
//
// This class is used for --start-lib and --end-lib options which
// instruct the linker to link object files between them with the
// archive file semantics.
class LazyObjectFile : public InputFile {
public:
explicit LazyObjectFile(MemoryBufferRef M) : InputFile(LazyObjectKind, M) {}
static bool classof(const InputFile *F) {
return F->kind() == LazyObjectKind;
}
template <class ELFT> void parse();
MemoryBufferRef getBuffer();
private:
std::vector<StringRef> getSymbols();
template <class ELFT> std::vector<StringRef> getElfSymbols();
std::vector<StringRef> getBitcodeSymbols();
llvm::BumpPtrAllocator Alloc;
llvm::StringSaver Saver{Alloc};
bool Seen = false;
};
// An ArchiveFile object represents a .a file.
class ArchiveFile : public InputFile {
public:
explicit ArchiveFile(MemoryBufferRef M) : InputFile(ArchiveKind, M) {}
static bool classof(const InputFile *F) { return F->kind() == ArchiveKind; }
void parse();
template <class ELFT> void parse();
// Returns a memory buffer for a given symbol. An empty memory buffer
// is returned if we have already returned the same memory buffer.
// (So that we don't instantiate same members more than once.)
MemoryBufferRef getMember(const Archive::Symbol *Sym);
llvm::MutableArrayRef<Lazy> getLazySymbols() { return LazySymbols; }
private:
std::unique_ptr<Archive> File;
std::vector<Lazy> LazySymbols;
llvm::DenseSet<uint64_t> Seen;
};
class BitcodeFile : public InputFile {
public:
explicit BitcodeFile(MemoryBufferRef M);
static bool classof(const InputFile *F) { return F->kind() == BitcodeKind; }
template <class ELFT>
void parse(llvm::DenseSet<StringRef> &ComdatGroups);
ArrayRef<Symbol *> getSymbols() { return Symbols; }
static bool shouldSkip(uint32_t Flags);
std::unique_ptr<llvm::object::IRObjectFile> Obj;
private:
std::vector<Symbol *> Symbols;
llvm::BumpPtrAllocator Alloc;
llvm::StringSaver Saver{Alloc};
template <class ELFT>
Symbol *createSymbol(const llvm::DenseSet<const llvm::Comdat *> &KeptComdats,
const llvm::object::IRObjectFile &Obj,
const llvm::object::BasicSymbolRef &Sym);
};
// .so file.
template <class ELFT> class SharedFile : public ELFFileBase<ELFT> {
typedef ELFFileBase<ELFT> Base;
typedef typename llvm::object::ELFFile<ELFT>::Elf_Shdr Elf_Shdr;
typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym Elf_Sym;
typedef typename llvm::object::ELFFile<ELFT>::Elf_Word Elf_Word;
typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym_Range Elf_Sym_Range;
typedef typename ELFT::Shdr Elf_Shdr;
typedef typename ELFT::Sym Elf_Sym;
typedef typename ELFT::Word Elf_Word;
typedef typename ELFT::SymRange Elf_Sym_Range;
typedef typename ELFT::Versym Elf_Versym;
typedef typename ELFT::Verdef Elf_Verdef;
std::vector<SharedSymbol<ELFT>> SymbolBodies;
std::vector<StringRef> Undefs;
StringRef SoName;
const Elf_Shdr *VersymSec = nullptr;
const Elf_Shdr *VerdefSec = nullptr;
public:
StringRef getSoName() const { return SoName; }
llvm::MutableArrayRef<SharedSymbol<ELFT>> getSharedSymbols() {
return SymbolBodies;
}
const Elf_Shdr *getSection(const Elf_Sym &Sym) const;
llvm::ArrayRef<StringRef> getUndefinedSymbols() { return Undefs; }
@ -197,6 +274,19 @@ template <class ELFT> class SharedFile : public ELFFileBase<ELFT> {
void parseSoName();
void parseRest();
std::vector<const Elf_Verdef *> parseVerdefs(const Elf_Versym *&Versym);
struct NeededVer {
// The string table offset of the version name in the output file.
size_t StrTab;
// The version identifier for this version name.
uint16_t Index;
};
// Mapping from Elf_Verdef data structures to information about Elf_Vernaux
// data structures in the output file.
std::map<const Elf_Verdef *, NeededVer> VerdefMap;
// Used for --as-needed
bool AsNeeded = false;
@ -204,10 +294,11 @@ template <class ELFT> class SharedFile : public ELFFileBase<ELFT> {
bool isNeeded() const { return !AsNeeded || IsUsed; }
};
std::unique_ptr<InputFile> createObjectFile(MemoryBufferRef MB);
std::unique_ptr<InputFile> createObjectFile(MemoryBufferRef MB,
StringRef ArchiveName = "");
std::unique_ptr<InputFile> createSharedFile(MemoryBufferRef MB);
} // namespace elf2
} // namespace elf
} // namespace lld
#endif

View File

@ -9,86 +9,118 @@
#include "InputSection.h"
#include "Config.h"
#include "EhFrame.h"
#include "Error.h"
#include "InputFiles.h"
#include "LinkerScript.h"
#include "OutputSections.h"
#include "Target.h"
#include "Thunks.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/Endian.h"
using namespace llvm;
using namespace llvm::ELF;
using namespace llvm::object;
using namespace llvm::support::endian;
using namespace lld;
using namespace lld::elf2;
using namespace lld::elf;
template <class ELFT> bool elf::isDiscarded(InputSectionBase<ELFT> *S) {
return !S || S == &InputSection<ELFT>::Discarded || !S->Live ||
Script<ELFT>::X->isDiscarded(S);
}
template <class ELFT>
InputSectionBase<ELFT>::InputSectionBase(ObjectFile<ELFT> *File,
InputSectionBase<ELFT>::InputSectionBase(elf::ObjectFile<ELFT> *File,
const Elf_Shdr *Header,
Kind SectionKind)
: Header(Header), File(File), SectionKind(SectionKind) {}
: Header(Header), File(File), SectionKind(SectionKind), Repl(this),
Compressed(Header->sh_flags & SHF_COMPRESSED) {
// The garbage collector sets sections' Live bits.
// If GC is disabled, all sections are considered live by default.
Live = !Config->GcSections;
// The ELF spec states that a value of 0 means the section has
// no alignment constraits.
Alignment = std::max<uintX_t>(Header->sh_addralign, 1);
}
template <class ELFT> size_t InputSectionBase<ELFT>::getSize() const {
if (auto *D = dyn_cast<InputSection<ELFT>>(this))
if (D->getThunksSize() > 0)
return D->getThunkOff() + D->getThunksSize();
return Header->sh_size;
}
template <class ELFT> StringRef InputSectionBase<ELFT>::getSectionName() const {
ErrorOr<StringRef> Name = File->getObj().getSectionName(this->Header);
error(Name);
return *Name;
return check(File->getObj().getSectionName(this->Header));
}
template <class ELFT>
ArrayRef<uint8_t> InputSectionBase<ELFT>::getSectionData() const {
ErrorOr<ArrayRef<uint8_t>> Ret =
this->File->getObj().getSectionContents(this->Header);
error(Ret);
return *Ret;
if (Compressed)
return ArrayRef<uint8_t>((const uint8_t *)Uncompressed.data(),
Uncompressed.size());
return check(this->File->getObj().getSectionContents(this->Header));
}
template <class ELFT>
typename ELFFile<ELFT>::uintX_t
InputSectionBase<ELFT>::getOffset(uintX_t Offset) {
typename ELFT::uint InputSectionBase<ELFT>::getOffset(uintX_t Offset) const {
switch (SectionKind) {
case Regular:
return cast<InputSection<ELFT>>(this)->OutSecOff + Offset;
case EHFrame:
return cast<EHInputSection<ELFT>>(this)->getOffset(Offset);
return cast<EhInputSection<ELFT>>(this)->getOffset(Offset);
case Merge:
return cast<MergeInputSection<ELFT>>(this)->getOffset(Offset);
case MipsReginfo:
// MIPS .reginfo sections are consumed by the linker,
// so it should never be copied to output.
llvm_unreachable("MIPS .reginfo reached writeTo().");
case MipsOptions:
// MIPS .reginfo and .MIPS.options sections are consumed by the linker,
// and the linker produces a single output section. It is possible that
// input files contain section symbol points to the corresponding input
// section. Redirect it to the produced output section.
if (Offset != 0)
fatal("Unsupported reference to the middle of '" + getSectionName() +
"' section");
return this->OutSec->getVA();
}
llvm_unreachable("Invalid section kind");
llvm_unreachable("invalid section kind");
}
template <class ELFT> void InputSectionBase<ELFT>::uncompress() {
if (!zlib::isAvailable())
fatal("build lld with zlib to enable compressed sections support");
// A compressed section consists of a header of Elf_Chdr type
// followed by compressed data.
ArrayRef<uint8_t> Data =
check(this->File->getObj().getSectionContents(this->Header));
if (Data.size() < sizeof(Elf_Chdr))
fatal("corrupt compressed section");
auto *Hdr = reinterpret_cast<const Elf_Chdr *>(Data.data());
Data = Data.slice(sizeof(Elf_Chdr));
if (Hdr->ch_type != ELFCOMPRESS_ZLIB)
fatal("unsupported compression type");
StringRef Buf((const char *)Data.data(), Data.size());
if (zlib::uncompress(Buf, Uncompressed, Hdr->ch_size) != zlib::StatusOK)
fatal("error uncompressing section");
}
template <class ELFT>
typename ELFFile<ELFT>::uintX_t
InputSectionBase<ELFT>::getOffset(const Elf_Sym &Sym) {
return getOffset(Sym.st_value);
}
// Returns a section that Rel relocation is pointing to.
template <class ELFT>
InputSectionBase<ELFT> *
InputSectionBase<ELFT>::getRelocTarget(const Elf_Rel &Rel) {
// Global symbol
uint32_t SymIndex = Rel.getSymbol(Config->Mips64EL);
if (SymbolBody *B = File->getSymbolBody(SymIndex))
if (auto *D = dyn_cast<DefinedRegular<ELFT>>(B->repl()))
return D->Section;
// Local symbol
if (const Elf_Sym *Sym = File->getLocalSymbol(SymIndex))
if (InputSectionBase<ELFT> *Sec = File->getSection(*Sym))
return Sec;
return nullptr;
typename ELFT::uint
InputSectionBase<ELFT>::getOffset(const DefinedRegular<ELFT> &Sym) const {
return getOffset(Sym.Value);
}
template <class ELFT>
InputSectionBase<ELFT> *
InputSectionBase<ELFT>::getRelocTarget(const Elf_Rela &Rel) {
return getRelocTarget(reinterpret_cast<const Elf_Rel &>(Rel));
}
template <class ELFT>
InputSection<ELFT>::InputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Header)
InputSection<ELFT>::InputSection(elf::ObjectFile<ELFT> *F,
const Elf_Shdr *Header)
: InputSectionBase<ELFT>(F, Header, Base::Regular) {}
template <class ELFT>
@ -97,267 +129,494 @@ bool InputSection<ELFT>::classof(const InputSectionBase<ELFT> *S) {
}
template <class ELFT>
template <bool isRela>
uint8_t *
InputSectionBase<ELFT>::findMipsPairedReloc(uint8_t *Buf, uint32_t SymIndex,
uint32_t Type,
RelIteratorRange<isRela> Rels) {
// Some MIPS relocations use addend calculated from addend of the relocation
// itself and addend of paired relocation. ABI requires to compute such
// combined addend in case of REL relocation record format only.
// See p. 4-17 at ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf
if (isRela || Config->EMachine != EM_MIPS)
return nullptr;
if (Type == R_MIPS_HI16)
Type = R_MIPS_LO16;
else if (Type == R_MIPS_PCHI16)
Type = R_MIPS_PCLO16;
else if (Type == R_MICROMIPS_HI16)
Type = R_MICROMIPS_LO16;
else
return nullptr;
for (const auto &RI : Rels) {
if (RI.getType(Config->Mips64EL) != Type)
continue;
if (RI.getSymbol(Config->Mips64EL) != SymIndex)
continue;
uintX_t Offset = getOffset(RI.r_offset);
if (Offset == (uintX_t)-1)
return nullptr;
return Buf + Offset;
InputSectionBase<ELFT> *InputSection<ELFT>::getRelocatedSection() {
assert(this->Header->sh_type == SHT_RELA || this->Header->sh_type == SHT_REL);
ArrayRef<InputSectionBase<ELFT> *> Sections = this->File->getSections();
return Sections[this->Header->sh_info];
}
template <class ELFT>
void InputSection<ELFT>::addThunk(const Thunk<ELFT> *T) {
Thunks.push_back(T);
}
template <class ELFT> uint64_t InputSection<ELFT>::getThunkOff() const {
return this->Header->sh_size;
}
template <class ELFT> uint64_t InputSection<ELFT>::getThunksSize() const {
uint64_t Total = 0;
for (const Thunk<ELFT> *T : Thunks)
Total += T->size();
return Total;
}
// This is used for -r. We can't use memcpy to copy relocations because we need
// to update symbol table offset and section index for each relocation. So we
// copy relocations one by one.
template <class ELFT>
template <class RelTy>
void InputSection<ELFT>::copyRelocations(uint8_t *Buf, ArrayRef<RelTy> Rels) {
InputSectionBase<ELFT> *RelocatedSection = getRelocatedSection();
for (const RelTy &Rel : Rels) {
uint32_t Type = Rel.getType(Config->Mips64EL);
SymbolBody &Body = this->File->getRelocTargetSym(Rel);
RelTy *P = reinterpret_cast<RelTy *>(Buf);
Buf += sizeof(RelTy);
P->r_offset = RelocatedSection->getOffset(Rel.r_offset);
P->setSymbolAndType(Body.DynsymIndex, Type, Config->Mips64EL);
}
return nullptr;
}
// Page(Expr) is the page address of the expression Expr, defined
// as (Expr & ~0xFFF). (This applies even if the machine page size
// supported by the platform has a different value.)
static uint64_t getAArch64Page(uint64_t Expr) {
return Expr & (~static_cast<uint64_t>(0xFFF));
}
template <class ELFT>
static typename llvm::object::ELFFile<ELFT>::uintX_t
getSymSize(SymbolBody &Body) {
if (auto *SS = dyn_cast<DefinedElf<ELFT>>(&Body))
return SS->Sym.st_size;
return 0;
static typename ELFT::uint getSymVA(uint32_t Type, typename ELFT::uint A,
typename ELFT::uint P,
const SymbolBody &Body, RelExpr Expr) {
typedef typename ELFT::uint uintX_t;
switch (Expr) {
case R_HINT:
llvm_unreachable("cannot relocate hint relocs");
case R_TLSLD:
return Out<ELFT>::Got->getTlsIndexOff() + A -
Out<ELFT>::Got->getNumEntries() * sizeof(uintX_t);
case R_TLSLD_PC:
return Out<ELFT>::Got->getTlsIndexVA() + A - P;
case R_THUNK_ABS:
return Body.getThunkVA<ELFT>() + A;
case R_THUNK_PC:
case R_THUNK_PLT_PC:
return Body.getThunkVA<ELFT>() + A - P;
case R_PPC_TOC:
return getPPC64TocBase() + A;
case R_TLSGD:
return Out<ELFT>::Got->getGlobalDynOffset(Body) + A -
Out<ELFT>::Got->getNumEntries() * sizeof(uintX_t);
case R_TLSGD_PC:
return Out<ELFT>::Got->getGlobalDynAddr(Body) + A - P;
case R_TLSDESC:
return Out<ELFT>::Got->getGlobalDynAddr(Body) + A;
case R_TLSDESC_PAGE:
return getAArch64Page(Out<ELFT>::Got->getGlobalDynAddr(Body) + A) -
getAArch64Page(P);
case R_PLT:
return Body.getPltVA<ELFT>() + A;
case R_PLT_PC:
case R_PPC_PLT_OPD:
return Body.getPltVA<ELFT>() + A - P;
case R_SIZE:
return Body.getSize<ELFT>() + A;
case R_GOTREL:
return Body.getVA<ELFT>(A) - Out<ELFT>::Got->getVA();
case R_RELAX_TLS_GD_TO_IE_END:
case R_GOT_FROM_END:
return Body.getGotOffset<ELFT>() + A -
Out<ELFT>::Got->getNumEntries() * sizeof(uintX_t);
case R_RELAX_TLS_GD_TO_IE_ABS:
case R_GOT:
return Body.getGotVA<ELFT>() + A;
case R_RELAX_TLS_GD_TO_IE_PAGE_PC:
case R_GOT_PAGE_PC:
return getAArch64Page(Body.getGotVA<ELFT>() + A) - getAArch64Page(P);
case R_RELAX_TLS_GD_TO_IE:
case R_GOT_PC:
return Body.getGotVA<ELFT>() + A - P;
case R_GOTONLY_PC:
return Out<ELFT>::Got->getVA() + A - P;
case R_RELAX_TLS_LD_TO_LE:
case R_RELAX_TLS_IE_TO_LE:
case R_RELAX_TLS_GD_TO_LE:
case R_TLS:
if (Target->TcbSize)
return Body.getVA<ELFT>(A) +
alignTo(Target->TcbSize, Out<ELFT>::TlsPhdr->p_align);
return Body.getVA<ELFT>(A) - Out<ELFT>::TlsPhdr->p_memsz;
case R_RELAX_TLS_GD_TO_LE_NEG:
case R_NEG_TLS:
return Out<ELF32LE>::TlsPhdr->p_memsz - Body.getVA<ELFT>(A);
case R_ABS:
case R_RELAX_GOT_PC_NOPIC:
return Body.getVA<ELFT>(A);
case R_GOT_OFF:
return Body.getGotOffset<ELFT>() + A;
case R_MIPS_GOT_LOCAL_PAGE:
// If relocation against MIPS local symbol requires GOT entry, this entry
// should be initialized by 'page address'. This address is high 16-bits
// of sum the symbol's value and the addend.
return Out<ELFT>::Got->getMipsLocalPageOffset(Body.getVA<ELFT>(A));
case R_MIPS_GOT_OFF:
// In case of MIPS if a GOT relocation has non-zero addend this addend
// should be applied to the GOT entry content not to the GOT entry offset.
// That is why we use separate expression type.
return Out<ELFT>::Got->getMipsGotOffset(Body, A);
case R_MIPS_TLSGD:
return Out<ELFT>::Got->getGlobalDynOffset(Body) +
Out<ELFT>::Got->getMipsTlsOffset() - MipsGPOffset;
case R_MIPS_TLSLD:
return Out<ELFT>::Got->getTlsIndexOff() +
Out<ELFT>::Got->getMipsTlsOffset() - MipsGPOffset;
case R_PPC_OPD: {
uint64_t SymVA = Body.getVA<ELFT>(A);
// If we have an undefined weak symbol, we might get here with a symbol
// address of zero. That could overflow, but the code must be unreachable,
// so don't bother doing anything at all.
if (!SymVA)
return 0;
if (Out<ELF64BE>::Opd) {
// If this is a local call, and we currently have the address of a
// function-descriptor, get the underlying code address instead.
uint64_t OpdStart = Out<ELF64BE>::Opd->getVA();
uint64_t OpdEnd = OpdStart + Out<ELF64BE>::Opd->getSize();
bool InOpd = OpdStart <= SymVA && SymVA < OpdEnd;
if (InOpd)
SymVA = read64be(&Out<ELF64BE>::OpdBuf[SymVA - OpdStart]);
}
return SymVA - P;
}
case R_PC:
case R_RELAX_GOT_PC:
return Body.getVA<ELFT>(A) - P;
case R_PLT_PAGE_PC:
case R_PAGE_PC:
return getAArch64Page(Body.getVA<ELFT>(A)) - getAArch64Page(P);
}
llvm_unreachable("Invalid expression");
}
// This function applies relocations to sections without SHF_ALLOC bit.
// Such sections are never mapped to memory at runtime. Debug sections are
// an example. Relocations in non-alloc sections are much easier to
// handle than in allocated sections because it will never need complex
// treatement such as GOT or PLT (because at runtime no one refers them).
// So, we handle relocations for non-alloc sections directly in this
// function as a performance optimization.
template <class ELFT>
template <bool isRela>
void InputSectionBase<ELFT>::relocate(uint8_t *Buf, uint8_t *BufEnd,
RelIteratorRange<isRela> Rels) {
typedef Elf_Rel_Impl<ELFT, isRela> RelType;
size_t Num = Rels.end() - Rels.begin();
for (size_t I = 0; I < Num; ++I) {
const RelType &RI = *(Rels.begin() + I);
uint32_t SymIndex = RI.getSymbol(Config->Mips64EL);
uint32_t Type = RI.getType(Config->Mips64EL);
uintX_t Offset = getOffset(RI.r_offset);
if (Offset == (uintX_t)-1)
continue;
template <class RelTy>
void InputSection<ELFT>::relocateNonAlloc(uint8_t *Buf, ArrayRef<RelTy> Rels) {
const unsigned Bits = sizeof(uintX_t) * 8;
for (const RelTy &Rel : Rels) {
uint32_t Type = Rel.getType(Config->Mips64EL);
uintX_t Offset = this->getOffset(Rel.r_offset);
uint8_t *BufLoc = Buf + Offset;
uintX_t AddrLoc = OutSec->getVA() + Offset;
auto NextRelocs = llvm::make_range(&RI, Rels.end());
uintX_t Addend = getAddend<ELFT>(Rel);
if (!RelTy::IsRela)
Addend += Target->getImplicitAddend(BufLoc, Type);
if (Target->isTlsLocalDynamicReloc(Type) &&
!Target->isTlsOptimized(Type, nullptr)) {
Target->relocateOne(BufLoc, BufEnd, Type, AddrLoc,
Out<ELFT>::Got->getLocalTlsIndexVA() +
getAddend<ELFT>(RI));
continue;
SymbolBody &Sym = this->File->getRelocTargetSym(Rel);
if (Target->getRelExpr(Type, Sym) != R_ABS) {
error(this->getSectionName() + " has non-ABS reloc");
return;
}
const Elf_Shdr *SymTab = File->getSymbolTable();
SymbolBody *Body = nullptr;
if (SymIndex >= SymTab->sh_info)
Body = File->getSymbolBody(SymIndex)->repl();
uintX_t AddrLoc = this->OutSec->getVA() + Offset;
uint64_t SymVA =
SignExtend64<Bits>(getSymVA<ELFT>(Type, Addend, AddrLoc, Sym, R_ABS));
Target->relocateOne(BufLoc, Type, SymVA);
}
}
if (Target->isTlsOptimized(Type, Body)) {
uintX_t SymVA;
if (!Body)
SymVA = getLocalRelTarget(*File, RI, 0);
else if (Target->relocNeedsGot(Type, *Body))
SymVA = Out<ELFT>::Got->getEntryAddr(*Body);
template <class ELFT>
void InputSectionBase<ELFT>::relocate(uint8_t *Buf, uint8_t *BufEnd) {
// scanReloc function in Writer.cpp constructs Relocations
// vector only for SHF_ALLOC'ed sections. For other sections,
// we handle relocations directly here.
auto *IS = dyn_cast<InputSection<ELFT>>(this);
if (IS && !(IS->Header->sh_flags & SHF_ALLOC)) {
for (const Elf_Shdr *RelSec : IS->RelocSections) {
if (RelSec->sh_type == SHT_RELA)
IS->relocateNonAlloc(Buf, IS->File->getObj().relas(RelSec));
else
SymVA = getSymVA<ELFT>(*Body);
// By optimizing TLS relocations, it is sometimes needed to skip
// relocations that immediately follow TLS relocations. This function
// knows how many slots we need to skip.
I += Target->relocateTlsOptimize(BufLoc, BufEnd, Type, AddrLoc, SymVA,
*Body);
continue;
IS->relocateNonAlloc(Buf, IS->File->getObj().rels(RelSec));
}
return;
}
// Handle relocations for local symbols -- they never get
// resolved so we don't allocate a SymbolBody.
uintX_t A = getAddend<ELFT>(RI);
if (!Body) {
uintX_t SymVA = getLocalRelTarget(*File, RI, A);
// We need to adjust SymVA value in case of R_MIPS_GPREL16/32 relocations
// because they use the following expression to calculate the relocation's
// result for local symbol: S + A + GP0 - G.
if (Config->EMachine == EM_MIPS &&
(Type == R_MIPS_GPREL16 || Type == R_MIPS_GPREL32))
SymVA += File->getMipsGp0();
Target->relocateOne(BufLoc, BufEnd, Type, AddrLoc, SymVA, 0,
findMipsPairedReloc(Buf, SymIndex, Type, NextRelocs));
continue;
}
const unsigned Bits = sizeof(uintX_t) * 8;
for (const Relocation<ELFT> &Rel : Relocations) {
uintX_t Offset = Rel.InputSec->getOffset(Rel.Offset);
uint8_t *BufLoc = Buf + Offset;
uint32_t Type = Rel.Type;
uintX_t A = Rel.Addend;
if (Target->isTlsGlobalDynamicReloc(Type) &&
!Target->isTlsOptimized(Type, Body)) {
Target->relocateOne(BufLoc, BufEnd, Type, AddrLoc,
Out<ELFT>::Got->getGlobalDynAddr(*Body) +
getAddend<ELFT>(RI));
continue;
}
uintX_t AddrLoc = OutSec->getVA() + Offset;
RelExpr Expr = Rel.Expr;
uint64_t SymVA =
SignExtend64<Bits>(getSymVA<ELFT>(Type, A, AddrLoc, *Rel.Sym, Expr));
uintX_t SymVA = getSymVA<ELFT>(*Body);
if (Target->relocNeedsPlt(Type, *Body)) {
SymVA = Out<ELFT>::Plt->getEntryAddr(*Body);
} else if (Target->relocNeedsGot(Type, *Body)) {
SymVA = Out<ELFT>::Got->getEntryAddr(*Body);
if (Body->isTls())
Type = Target->getTlsGotReloc(Type);
} else if (!Target->needsCopyRel(Type, *Body) &&
isa<SharedSymbol<ELFT>>(*Body)) {
continue;
} else if (Target->isTlsDynReloc(Type, *Body)) {
continue;
} else if (Target->isSizeReloc(Type) && canBePreempted(Body, false)) {
// A SIZE relocation is supposed to set a symbol size, but if a symbol
// can be preempted, the size at runtime may be different than link time.
// If that's the case, we leave the field alone rather than filling it
// with a possibly incorrect value.
continue;
} else if (Config->EMachine == EM_MIPS) {
if (Type == R_MIPS_HI16 && Body == Config->MipsGpDisp)
SymVA = getMipsGpAddr<ELFT>() - AddrLoc;
else if (Type == R_MIPS_LO16 && Body == Config->MipsGpDisp)
SymVA = getMipsGpAddr<ELFT>() - AddrLoc + 4;
switch (Expr) {
case R_RELAX_GOT_PC:
case R_RELAX_GOT_PC_NOPIC:
Target->relaxGot(BufLoc, SymVA);
break;
case R_RELAX_TLS_IE_TO_LE:
Target->relaxTlsIeToLe(BufLoc, Type, SymVA);
break;
case R_RELAX_TLS_LD_TO_LE:
Target->relaxTlsLdToLe(BufLoc, Type, SymVA);
break;
case R_RELAX_TLS_GD_TO_LE:
case R_RELAX_TLS_GD_TO_LE_NEG:
Target->relaxTlsGdToLe(BufLoc, Type, SymVA);
break;
case R_RELAX_TLS_GD_TO_IE:
case R_RELAX_TLS_GD_TO_IE_ABS:
case R_RELAX_TLS_GD_TO_IE_PAGE_PC:
case R_RELAX_TLS_GD_TO_IE_END:
Target->relaxTlsGdToIe(BufLoc, Type, SymVA);
break;
case R_PPC_PLT_OPD:
// Patch a nop (0x60000000) to a ld.
if (BufLoc + 8 <= BufEnd && read32be(BufLoc + 4) == 0x60000000)
write32be(BufLoc + 4, 0xe8410028); // ld %r2, 40(%r1)
// fallthrough
default:
Target->relocateOne(BufLoc, Type, SymVA);
break;
}
uintX_t Size = getSymSize<ELFT>(*Body);
Target->relocateOne(BufLoc, BufEnd, Type, AddrLoc, SymVA + A, Size + A,
findMipsPairedReloc(Buf, SymIndex, Type, NextRelocs));
}
}
template <class ELFT> void InputSection<ELFT>::writeTo(uint8_t *Buf) {
if (this->Header->sh_type == SHT_NOBITS)
return;
ELFFile<ELFT> &EObj = this->File->getObj();
// If -r is given, then an InputSection may be a relocation section.
if (this->Header->sh_type == SHT_RELA) {
copyRelocations(Buf + OutSecOff, EObj.relas(this->Header));
return;
}
if (this->Header->sh_type == SHT_REL) {
copyRelocations(Buf + OutSecOff, EObj.rels(this->Header));
return;
}
// Copy section contents from source object file to output file.
ArrayRef<uint8_t> Data = this->getSectionData();
memcpy(Buf + OutSecOff, Data.data(), Data.size());
ELFFile<ELFT> &EObj = this->File->getObj();
uint8_t *BufEnd = Buf + OutSecOff + Data.size();
// Iterate over all relocation sections that apply to this section.
for (const Elf_Shdr *RelSec : this->RelocSections) {
if (RelSec->sh_type == SHT_RELA)
this->relocate(Buf, BufEnd, EObj.relas(RelSec));
else
this->relocate(Buf, BufEnd, EObj.rels(RelSec));
uint8_t *BufEnd = Buf + OutSecOff + Data.size();
this->relocate(Buf, BufEnd);
// The section might have a data/code generated by the linker and need
// to be written after the section. Usually these are thunks - small piece
// of code used to jump between "incompatible" functions like PIC and non-PIC
// or if the jump target too far and its address does not fit to the short
// jump istruction.
if (!Thunks.empty()) {
Buf += OutSecOff + getThunkOff();
for (const Thunk<ELFT> *T : Thunks) {
T->writeTo(Buf);
Buf += T->size();
}
}
}
template <class ELFT>
void InputSection<ELFT>::replace(InputSection<ELFT> *Other) {
this->Alignment = std::max(this->Alignment, Other->Alignment);
Other->Repl = this->Repl;
Other->Live = false;
}
template <class ELFT>
SplitInputSection<ELFT>::SplitInputSection(
ObjectFile<ELFT> *File, const Elf_Shdr *Header,
elf::ObjectFile<ELFT> *File, const Elf_Shdr *Header,
typename InputSectionBase<ELFT>::Kind SectionKind)
: InputSectionBase<ELFT>(File, Header, SectionKind) {}
template <class ELFT>
EHInputSection<ELFT>::EHInputSection(ObjectFile<ELFT> *F,
EhInputSection<ELFT>::EhInputSection(elf::ObjectFile<ELFT> *F,
const Elf_Shdr *Header)
: SplitInputSection<ELFT>(F, Header, InputSectionBase<ELFT>::EHFrame) {
// Mark .eh_frame sections as live by default because there are
// usually no relocations that point to .eh_frames. Otherwise,
// the garbage collector would drop all .eh_frame sections.
// the garbage collector would drop all .eh_frame sections.
this->Live = true;
}
template <class ELFT>
bool EHInputSection<ELFT>::classof(const InputSectionBase<ELFT> *S) {
bool EhInputSection<ELFT>::classof(const InputSectionBase<ELFT> *S) {
return S->SectionKind == InputSectionBase<ELFT>::EHFrame;
}
// .eh_frame is a sequence of CIE or FDE records.
// This function splits an input section into records and returns them.
template <class ELFT>
typename EHInputSection<ELFT>::uintX_t
EHInputSection<ELFT>::getOffset(uintX_t Offset) {
void EhInputSection<ELFT>::split() {
ArrayRef<uint8_t> Data = this->getSectionData();
for (size_t Off = 0, End = Data.size(); Off != End;) {
size_t Size = readEhRecordSize<ELFT>(Data.slice(Off));
this->Pieces.emplace_back(Off, Data.slice(Off, Size));
// The empty record is the end marker.
if (Size == 4)
break;
Off += Size;
}
}
template <class ELFT>
typename ELFT::uint EhInputSection<ELFT>::getOffset(uintX_t Offset) const {
// The file crtbeginT.o has relocations pointing to the start of an empty
// .eh_frame that is known to be the first in the link. It does that to
// identify the start of the output .eh_frame. Handle this special case.
if (this->getSectionHdr()->sh_size == 0)
return Offset;
std::pair<uintX_t, uintX_t> *I = this->getRangeAndSize(Offset).first;
uintX_t Base = I->second;
if (Base == uintX_t(-1))
const SectionPiece *Piece = this->getSectionPiece(Offset);
if (Piece->OutputOff == size_t(-1))
return -1; // Not in the output
uintX_t Addend = Offset - I->first;
return Base + Addend;
uintX_t Addend = Offset - Piece->InputOff;
return Piece->OutputOff + Addend;
}
static size_t findNull(ArrayRef<uint8_t> A, size_t EntSize) {
// Optimize the common case.
StringRef S((const char *)A.data(), A.size());
if (EntSize == 1)
return S.find(0);
for (unsigned I = 0, N = S.size(); I != N; I += EntSize) {
const char *B = S.begin() + I;
if (std::all_of(B, B + EntSize, [](char C) { return C == 0; }))
return I;
}
return StringRef::npos;
}
// Split SHF_STRINGS section. Such section is a sequence of
// null-terminated strings.
static std::vector<SectionPiece> splitStrings(ArrayRef<uint8_t> Data,
size_t EntSize) {
std::vector<SectionPiece> V;
size_t Off = 0;
while (!Data.empty()) {
size_t End = findNull(Data, EntSize);
if (End == StringRef::npos)
fatal("string is not null terminated");
size_t Size = End + EntSize;
V.emplace_back(Off, Data.slice(0, Size));
Data = Data.slice(Size);
Off += Size;
}
return V;
}
// Split non-SHF_STRINGS section. Such section is a sequence of
// fixed size records.
static std::vector<SectionPiece> splitNonStrings(ArrayRef<uint8_t> Data,
size_t EntSize) {
std::vector<SectionPiece> V;
size_t Size = Data.size();
assert((Size % EntSize) == 0);
for (unsigned I = 0, N = Size; I != N; I += EntSize)
V.emplace_back(I, Data.slice(I, EntSize));
return V;
}
template <class ELFT>
MergeInputSection<ELFT>::MergeInputSection(ObjectFile<ELFT> *F,
MergeInputSection<ELFT>::MergeInputSection(elf::ObjectFile<ELFT> *F,
const Elf_Shdr *Header)
: SplitInputSection<ELFT>(F, Header, InputSectionBase<ELFT>::Merge) {}
template <class ELFT> void MergeInputSection<ELFT>::splitIntoPieces() {
ArrayRef<uint8_t> Data = this->getSectionData();
uintX_t EntSize = this->Header->sh_entsize;
if (this->Header->sh_flags & SHF_STRINGS)
this->Pieces = splitStrings(Data, EntSize);
else
this->Pieces = splitNonStrings(Data, EntSize);
if (Config->GcSections)
for (uintX_t Off : LiveOffsets)
this->getSectionPiece(Off)->Live = true;
}
template <class ELFT>
bool MergeInputSection<ELFT>::classof(const InputSectionBase<ELFT> *S) {
return S->SectionKind == InputSectionBase<ELFT>::Merge;
}
// Do binary search to get a section piece at a given input offset.
template <class ELFT>
std::pair<std::pair<typename ELFFile<ELFT>::uintX_t,
typename ELFFile<ELFT>::uintX_t> *,
typename ELFFile<ELFT>::uintX_t>
SplitInputSection<ELFT>::getRangeAndSize(uintX_t Offset) {
SectionPiece *SplitInputSection<ELFT>::getSectionPiece(uintX_t Offset) {
auto *This = static_cast<const SplitInputSection<ELFT> *>(this);
return const_cast<SectionPiece *>(This->getSectionPiece(Offset));
}
template <class ELFT>
const SectionPiece *
SplitInputSection<ELFT>::getSectionPiece(uintX_t Offset) const {
ArrayRef<uint8_t> D = this->getSectionData();
StringRef Data((const char *)D.data(), D.size());
uintX_t Size = Data.size();
if (Offset >= Size)
error("Entry is past the end of the section");
fatal("entry is past the end of the section");
// Find the element this offset points to.
auto I = std::upper_bound(
Offsets.begin(), Offsets.end(), Offset,
[](const uintX_t &A, const std::pair<uintX_t, uintX_t> &B) {
return A < B.first;
});
uintX_t End = I == Offsets.end() ? Data.size() : I->first;
Pieces.begin(), Pieces.end(), Offset,
[](const uintX_t &A, const SectionPiece &B) { return A < B.InputOff; });
--I;
return std::make_pair(&*I, End);
return &*I;
}
// Returns the offset in an output section for a given input offset.
// Because contents of a mergeable section is not contiguous in output,
// it is not just an addition to a base output offset.
template <class ELFT>
typename ELFT::uint MergeInputSection<ELFT>::getOffset(uintX_t Offset) const {
auto It = OffsetMap.find(Offset);
if (It != OffsetMap.end())
return It->second;
// If Offset is not at beginning of a section piece, it is not in the map.
// In that case we need to search from the original section piece vector.
const SectionPiece &Piece = *this->getSectionPiece(Offset);
assert(Piece.Live);
uintX_t Addend = Offset - Piece.InputOff;
return Piece.OutputOff + Addend;
}
// Create a map from input offsets to output offsets for all section pieces.
// It is called after finalize().
template <class ELFT> void MergeInputSection<ELFT>::finalizePieces() {
OffsetMap.grow(this->Pieces.size());
for (SectionPiece &Piece : this->Pieces) {
if (!Piece.Live)
continue;
if (Piece.OutputOff == size_t(-1)) {
// Offsets of tail-merged strings are computed lazily.
auto *OutSec = static_cast<MergeOutputSection<ELFT> *>(this->OutSec);
ArrayRef<uint8_t> D = Piece.data();
StringRef S((const char *)D.data(), D.size());
Piece.OutputOff = OutSec->getOffset(S);
}
OffsetMap[Piece.InputOff] = Piece.OutputOff;
}
}
template <class ELFT>
typename MergeInputSection<ELFT>::uintX_t
MergeInputSection<ELFT>::getOffset(uintX_t Offset) {
std::pair<std::pair<uintX_t, uintX_t> *, uintX_t> T =
this->getRangeAndSize(Offset);
std::pair<uintX_t, uintX_t> *I = T.first;
uintX_t End = T.second;
uintX_t Start = I->first;
// Compute the Addend and if the Base is cached, return.
uintX_t Addend = Offset - Start;
uintX_t &Base = I->second;
if (Base != uintX_t(-1))
return Base + Addend;
// Map the base to the offset in the output section and cache it.
ArrayRef<uint8_t> D = this->getSectionData();
StringRef Data((const char *)D.data(), D.size());
StringRef Entry = Data.substr(Start, End - Start);
Base =
static_cast<MergeOutputSection<ELFT> *>(this->OutSec)->getOffset(Entry);
return Base + Addend;
}
template <class ELFT>
MipsReginfoInputSection<ELFT>::MipsReginfoInputSection(ObjectFile<ELFT> *F,
MipsReginfoInputSection<ELFT>::MipsReginfoInputSection(elf::ObjectFile<ELFT> *F,
const Elf_Shdr *Hdr)
: InputSectionBase<ELFT>(F, Hdr, InputSectionBase<ELFT>::MipsReginfo) {
// Initialize this->Reginfo.
ArrayRef<uint8_t> D = this->getSectionData();
if (D.size() != sizeof(Elf_Mips_RegInfo<ELFT>))
error("Invalid size of .reginfo section");
if (D.size() != sizeof(Elf_Mips_RegInfo<ELFT>)) {
error("invalid size of .reginfo section");
return;
}
Reginfo = reinterpret_cast<const Elf_Mips_RegInfo<ELFT> *>(D.data());
}
@ -366,31 +625,67 @@ bool MipsReginfoInputSection<ELFT>::classof(const InputSectionBase<ELFT> *S) {
return S->SectionKind == InputSectionBase<ELFT>::MipsReginfo;
}
namespace lld {
namespace elf2 {
template class InputSectionBase<object::ELF32LE>;
template class InputSectionBase<object::ELF32BE>;
template class InputSectionBase<object::ELF64LE>;
template class InputSectionBase<object::ELF64BE>;
template class InputSection<object::ELF32LE>;
template class InputSection<object::ELF32BE>;
template class InputSection<object::ELF64LE>;
template class InputSection<object::ELF64BE>;
template class EHInputSection<object::ELF32LE>;
template class EHInputSection<object::ELF32BE>;
template class EHInputSection<object::ELF64LE>;
template class EHInputSection<object::ELF64BE>;
template class MergeInputSection<object::ELF32LE>;
template class MergeInputSection<object::ELF32BE>;
template class MergeInputSection<object::ELF64LE>;
template class MergeInputSection<object::ELF64BE>;
template class MipsReginfoInputSection<object::ELF32LE>;
template class MipsReginfoInputSection<object::ELF32BE>;
template class MipsReginfoInputSection<object::ELF64LE>;
template class MipsReginfoInputSection<object::ELF64BE>;
template <class ELFT>
MipsOptionsInputSection<ELFT>::MipsOptionsInputSection(elf::ObjectFile<ELFT> *F,
const Elf_Shdr *Hdr)
: InputSectionBase<ELFT>(F, Hdr, InputSectionBase<ELFT>::MipsOptions) {
// Find ODK_REGINFO option in the section's content.
ArrayRef<uint8_t> D = this->getSectionData();
while (!D.empty()) {
if (D.size() < sizeof(Elf_Mips_Options<ELFT>)) {
error("invalid size of .MIPS.options section");
break;
}
auto *O = reinterpret_cast<const Elf_Mips_Options<ELFT> *>(D.data());
if (O->kind == ODK_REGINFO) {
Reginfo = &O->getRegInfo();
break;
}
D = D.slice(O->size);
}
}
template <class ELFT>
bool MipsOptionsInputSection<ELFT>::classof(const InputSectionBase<ELFT> *S) {
return S->SectionKind == InputSectionBase<ELFT>::MipsOptions;
}
template bool elf::isDiscarded<ELF32LE>(InputSectionBase<ELF32LE> *);
template bool elf::isDiscarded<ELF32BE>(InputSectionBase<ELF32BE> *);
template bool elf::isDiscarded<ELF64LE>(InputSectionBase<ELF64LE> *);
template bool elf::isDiscarded<ELF64BE>(InputSectionBase<ELF64BE> *);
template class elf::InputSectionBase<ELF32LE>;
template class elf::InputSectionBase<ELF32BE>;
template class elf::InputSectionBase<ELF64LE>;
template class elf::InputSectionBase<ELF64BE>;
template class elf::InputSection<ELF32LE>;
template class elf::InputSection<ELF32BE>;
template class elf::InputSection<ELF64LE>;
template class elf::InputSection<ELF64BE>;
template class elf::SplitInputSection<ELF32LE>;
template class elf::SplitInputSection<ELF32BE>;
template class elf::SplitInputSection<ELF64LE>;
template class elf::SplitInputSection<ELF64BE>;
template class elf::EhInputSection<ELF32LE>;
template class elf::EhInputSection<ELF32BE>;
template class elf::EhInputSection<ELF64LE>;
template class elf::EhInputSection<ELF64BE>;
template class elf::MergeInputSection<ELF32LE>;
template class elf::MergeInputSection<ELF32BE>;
template class elf::MergeInputSection<ELF64LE>;
template class elf::MergeInputSection<ELF64BE>;
template class elf::MipsReginfoInputSection<ELF32LE>;
template class elf::MipsReginfoInputSection<ELF32BE>;
template class elf::MipsReginfoInputSection<ELF64LE>;
template class elf::MipsReginfoInputSection<ELF64BE>;
template class elf::MipsOptionsInputSection<ELF32LE>;
template class elf::MipsOptionsInputSection<ELF32BE>;
template class elf::MipsOptionsInputSection<ELF64LE>;
template class elf::MipsOptionsInputSection<ELF64BE>;

View File

@ -11,12 +11,22 @@
#define LLD_ELF_INPUT_SECTION_H
#include "Config.h"
#include "Relocations.h"
#include "Thunks.h"
#include "lld/Core/LLVM.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/Object/ELF.h"
namespace lld {
namespace elf2 {
namespace elf {
template <class ELFT> bool isDiscarded(InputSectionBase<ELFT> *S);
class SymbolBody;
template <class ELFT> class ICF;
template <class ELFT> class DefinedRegular;
template <class ELFT> class ObjectFile;
template <class ELFT> class OutputSection;
template <class ELFT> class OutputSectionBase;
@ -24,120 +34,148 @@ template <class ELFT> class OutputSectionBase;
// This corresponds to a section of an input file.
template <class ELFT> class InputSectionBase {
protected:
typedef typename llvm::object::ELFFile<ELFT>::Elf_Rel Elf_Rel;
typedef typename llvm::object::ELFFile<ELFT>::Elf_Rela Elf_Rela;
typedef typename llvm::object::ELFFile<ELFT>::Elf_Shdr Elf_Shdr;
typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym Elf_Sym;
typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t;
typedef typename ELFT::Chdr Elf_Chdr;
typedef typename ELFT::Rel Elf_Rel;
typedef typename ELFT::Rela Elf_Rela;
typedef typename ELFT::Shdr Elf_Shdr;
typedef typename ELFT::Sym Elf_Sym;
typedef typename ELFT::uint uintX_t;
const Elf_Shdr *Header;
// The file this section is from.
ObjectFile<ELFT> *File;
// If a section is compressed, this vector has uncompressed section data.
SmallVector<char, 0> Uncompressed;
public:
enum Kind { Regular, EHFrame, Merge, MipsReginfo };
enum Kind { Regular, EHFrame, Merge, MipsReginfo, MipsOptions };
Kind SectionKind;
InputSectionBase() : Repl(this) {}
InputSectionBase(ObjectFile<ELFT> *File, const Elf_Shdr *Header,
Kind SectionKind);
OutputSectionBase<ELFT> *OutSec = nullptr;
uint32_t Alignment;
// Used for garbage collection.
// Live bit makes sense only when Config->GcSections is true.
bool isLive() const { return !Config->GcSections || Live; }
bool Live = false;
bool Live;
// This pointer points to the "real" instance of this instance.
// Usually Repl == this. However, if ICF merges two sections,
// Repl pointer of one section points to another section. So,
// if you need to get a pointer to this instance, do not use
// this but instead this->Repl.
InputSectionBase<ELFT> *Repl;
// Returns the size of this section (even if this is a common or BSS.)
size_t getSize() const { return Header->sh_size; }
size_t getSize() const;
static InputSectionBase<ELFT> Discarded;
StringRef getSectionName() const;
const Elf_Shdr *getSectionHdr() const { return Header; }
ObjectFile<ELFT> *getFile() const { return File; }
// The writer sets and uses the addresses.
uintX_t getAlign() {
// The ELF spec states that a value of 0 means the section has no alignment
// constraits.
return std::max<uintX_t>(Header->sh_addralign, 1);
}
uintX_t getOffset(const Elf_Sym &Sym);
uintX_t getOffset(const DefinedRegular<ELFT> &Sym) const;
// Translate an offset in the input section to an offset in the output
// section.
uintX_t getOffset(uintX_t Offset);
uintX_t getOffset(uintX_t Offset) const;
ArrayRef<uint8_t> getSectionData() const;
// Returns a section that Rel is pointing to. Used by the garbage collector.
InputSectionBase<ELFT> *getRelocTarget(const Elf_Rel &Rel);
InputSectionBase<ELFT> *getRelocTarget(const Elf_Rela &Rel);
void uncompress();
template <bool isRela>
using RelIteratorRange =
llvm::iterator_range<const llvm::object::Elf_Rel_Impl<ELFT, isRela> *>;
void relocate(uint8_t *Buf, uint8_t *BufEnd);
std::vector<Relocation<ELFT>> Relocations;
template <bool isRela>
void relocate(uint8_t *Buf, uint8_t *BufEnd, RelIteratorRange<isRela> Rels);
private:
template <bool isRela>
uint8_t *findMipsPairedReloc(uint8_t *Buf, uint32_t SymIndex, uint32_t Type,
RelIteratorRange<isRela> Rels);
bool Compressed;
};
template <class ELFT>
InputSectionBase<ELFT>
InputSectionBase<ELFT>::Discarded(nullptr, nullptr,
InputSectionBase<ELFT>::Regular);
template <class ELFT> InputSectionBase<ELFT> InputSectionBase<ELFT>::Discarded;
// SectionPiece represents a piece of splittable section contents.
struct SectionPiece {
SectionPiece(size_t Off, ArrayRef<uint8_t> Data)
: InputOff(Off), Data((const uint8_t *)Data.data()), Size(Data.size()),
Live(!Config->GcSections) {}
ArrayRef<uint8_t> data() { return {Data, Size}; }
size_t size() const { return Size; }
size_t InputOff;
size_t OutputOff = -1;
private:
// We use bitfields because SplitInputSection is accessed by
// std::upper_bound very often.
// We want to save bits to make it cache friendly.
const uint8_t *Data;
uint32_t Size : 31;
public:
uint32_t Live : 1;
};
// Usually sections are copied to the output as atomic chunks of data,
// but some special types of sections are split into small pieces of data
// and each piece is copied to a different place in the output.
// This class represents such special sections.
template <class ELFT> class SplitInputSection : public InputSectionBase<ELFT> {
typedef typename llvm::object::ELFFile<ELFT>::Elf_Shdr Elf_Shdr;
typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t;
typedef typename ELFT::Shdr Elf_Shdr;
typedef typename ELFT::uint uintX_t;
public:
SplitInputSection(ObjectFile<ELFT> *File, const Elf_Shdr *Header,
typename InputSectionBase<ELFT>::Kind SectionKind);
// For each piece of data, we maintain the offsets in the input section and
// in the output section. The latter may be -1 if it is not assigned yet.
std::vector<std::pair<uintX_t, uintX_t>> Offsets;
// Splittable sections are handled as a sequence of data
// rather than a single large blob of data.
std::vector<SectionPiece> Pieces;
std::pair<std::pair<uintX_t, uintX_t> *, uintX_t>
getRangeAndSize(uintX_t Offset);
// Returns the SectionPiece at a given input section offset.
SectionPiece *getSectionPiece(uintX_t Offset);
const SectionPiece *getSectionPiece(uintX_t Offset) const;
};
// This corresponds to a SHF_MERGE section of an input file.
template <class ELFT> class MergeInputSection : public SplitInputSection<ELFT> {
typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t;
typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym Elf_Sym;
typedef typename llvm::object::ELFFile<ELFT>::Elf_Shdr Elf_Shdr;
typedef typename ELFT::uint uintX_t;
typedef typename ELFT::Sym Elf_Sym;
typedef typename ELFT::Shdr Elf_Shdr;
public:
MergeInputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Header);
static bool classof(const InputSectionBase<ELFT> *S);
// Translate an offset in the input section to an offset in the output
// section.
uintX_t getOffset(uintX_t Offset);
void splitIntoPieces();
// Mark the piece at a given offset live. Used by GC.
void markLiveAt(uintX_t Offset) { LiveOffsets.insert(Offset); }
// Translate an offset in the input section to an offset
// in the output section.
uintX_t getOffset(uintX_t Offset) const;
void finalizePieces();
private:
llvm::DenseMap<uintX_t, uintX_t> OffsetMap;
llvm::DenseSet<uintX_t> LiveOffsets;
};
// This corresponds to a .eh_frame section of an input file.
template <class ELFT> class EHInputSection : public SplitInputSection<ELFT> {
template <class ELFT> class EhInputSection : public SplitInputSection<ELFT> {
public:
typedef typename llvm::object::ELFFile<ELFT>::Elf_Shdr Elf_Shdr;
typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t;
EHInputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Header);
typedef typename ELFT::Shdr Elf_Shdr;
typedef typename ELFT::uint uintX_t;
EhInputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Header);
static bool classof(const InputSectionBase<ELFT> *S);
void split();
// Translate an offset in the input section to an offset in the output
// section.
uintX_t getOffset(uintX_t Offset);
uintX_t getOffset(uintX_t Offset) const;
// Relocation section that refer to this one.
const Elf_Shdr *RelocSection = nullptr;
@ -145,12 +183,13 @@ template <class ELFT> class EHInputSection : public SplitInputSection<ELFT> {
// This corresponds to a non SHF_MERGE section of an input file.
template <class ELFT> class InputSection : public InputSectionBase<ELFT> {
friend ICF<ELFT>;
typedef InputSectionBase<ELFT> Base;
typedef typename llvm::object::ELFFile<ELFT>::Elf_Shdr Elf_Shdr;
typedef typename llvm::object::ELFFile<ELFT>::Elf_Rela Elf_Rela;
typedef typename llvm::object::ELFFile<ELFT>::Elf_Rel Elf_Rel;
typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym Elf_Sym;
typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t;
typedef typename ELFT::Shdr Elf_Shdr;
typedef typename ELFT::Rela Elf_Rela;
typedef typename ELFT::Rel Elf_Rel;
typedef typename ELFT::Sym Elf_Sym;
typedef typename ELFT::uint uintX_t;
public:
InputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Header);
@ -160,13 +199,41 @@ template <class ELFT> class InputSection : public InputSectionBase<ELFT> {
void writeTo(uint8_t *Buf);
// Relocation sections that refer to this one.
SmallVector<const Elf_Shdr *, 1> RelocSections;
llvm::TinyPtrVector<const Elf_Shdr *> RelocSections;
// The offset from beginning of the output sections this section was assigned
// to. The writer sets a value.
uint64_t OutSecOff = 0;
static bool classof(const InputSectionBase<ELFT> *S);
InputSectionBase<ELFT> *getRelocatedSection();
// Register thunk related to the symbol. When the section is written
// to a mmap'ed file, target is requested to write an actual thunk code.
// Now thunks is supported for MIPS and ARM target only.
void addThunk(const Thunk<ELFT> *T);
// The offset of synthetic thunk code from beginning of this section.
uint64_t getThunkOff() const;
// Size of chunk with thunks code.
uint64_t getThunksSize() const;
template <class RelTy>
void relocateNonAlloc(uint8_t *Buf, llvm::ArrayRef<RelTy> Rels);
private:
template <class RelTy>
void copyRelocations(uint8_t *Buf, llvm::ArrayRef<RelTy> Rels);
// Called by ICF to merge two input sections.
void replace(InputSection<ELFT> *Other);
// Used by ICF.
uint64_t GroupId = 0;
llvm::TinyPtrVector<const Thunk<ELFT> *> Thunks;
};
// MIPS .reginfo section provides information on the registers used by the code
@ -177,16 +244,27 @@ template <class ELFT> class InputSection : public InputSectionBase<ELFT> {
// ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf
template <class ELFT>
class MipsReginfoInputSection : public InputSectionBase<ELFT> {
typedef typename llvm::object::ELFFile<ELFT>::Elf_Shdr Elf_Shdr;
typedef typename ELFT::Shdr Elf_Shdr;
public:
MipsReginfoInputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Hdr);
static bool classof(const InputSectionBase<ELFT> *S);
const llvm::object::Elf_Mips_RegInfo<ELFT> *Reginfo;
const llvm::object::Elf_Mips_RegInfo<ELFT> *Reginfo = nullptr;
};
} // namespace elf2
template <class ELFT>
class MipsOptionsInputSection : public InputSectionBase<ELFT> {
typedef typename ELFT::Shdr Elf_Shdr;
public:
MipsOptionsInputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Hdr);
static bool classof(const InputSectionBase<ELFT> *S);
const llvm::object::Elf_Mips_RegInfo<ELFT> *Reginfo = nullptr;
};
} // namespace elf
} // namespace lld
#endif

325
ELF/LTO.cpp Normal file
View File

@ -0,0 +1,325 @@
//===- LTO.cpp ------------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "LTO.h"
#include "Config.h"
#include "Driver.h"
#include "Error.h"
#include "InputFiles.h"
#include "Symbols.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/LoopPassManager.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Bitcode/ReaderWriter.h"
#include "llvm/CodeGen/CommandFlags.h"
#include "llvm/CodeGen/ParallelCG.h"
#include "llvm/IR/AutoUpgrade.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Verifier.h"
#include "llvm/LTO/legacy/UpdateCompilerUsed.h"
#include "llvm/Linker/IRMover.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/StringSaver.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
using namespace llvm;
using namespace llvm::object;
using namespace llvm::ELF;
using namespace lld;
using namespace lld::elf;
// This is for use when debugging LTO.
static void saveBuffer(StringRef Buffer, const Twine &Path) {
std::error_code EC;
raw_fd_ostream OS(Path.str(), EC, sys::fs::OpenFlags::F_None);
if (EC)
error(EC, "cannot create " + Path);
OS << Buffer;
}
// This is for use when debugging LTO.
static void saveBCFile(Module &M, const Twine &Path) {
std::error_code EC;
raw_fd_ostream OS(Path.str(), EC, sys::fs::OpenFlags::F_None);
if (EC)
error(EC, "cannot create " + Path);
WriteBitcodeToFile(&M, OS, /* ShouldPreserveUseListOrder */ true);
}
static void runNewCustomLtoPasses(Module &M, TargetMachine &TM) {
PassBuilder PB(&TM);
AAManager AA;
// Parse a custom AA pipeline if asked to.
if (!PB.parseAAPipeline(AA, Config->LtoAAPipeline)) {
error("Unable to parse AA pipeline description: " + Config->LtoAAPipeline);
return;
}
LoopAnalysisManager LAM;
FunctionAnalysisManager FAM;
CGSCCAnalysisManager CGAM;
ModuleAnalysisManager MAM;
// Register the AA manager first so that our version is the one used.
FAM.registerPass([&] { return std::move(AA); });
// Register all the basic analyses with the managers.
PB.registerModuleAnalyses(MAM);
PB.registerCGSCCAnalyses(CGAM);
PB.registerFunctionAnalyses(FAM);
PB.registerLoopAnalyses(LAM);
PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
ModulePassManager MPM;
if (!Config->DisableVerify)
MPM.addPass(VerifierPass());
// Now, add all the passes we've been requested to.
if (!PB.parsePassPipeline(MPM, Config->LtoNewPmPasses)) {
error("unable to parse pass pipeline description: " +
Config->LtoNewPmPasses);
return;
}
if (!Config->DisableVerify)
MPM.addPass(VerifierPass());
MPM.run(M, MAM);
}
static void runOldLtoPasses(Module &M, TargetMachine &TM) {
// Note that the gold plugin has a similar piece of code, so
// it is probably better to move this code to a common place.
legacy::PassManager LtoPasses;
LtoPasses.add(createTargetTransformInfoWrapperPass(TM.getTargetIRAnalysis()));
PassManagerBuilder PMB;
PMB.LibraryInfo = new TargetLibraryInfoImpl(Triple(TM.getTargetTriple()));
PMB.Inliner = createFunctionInliningPass();
PMB.VerifyInput = PMB.VerifyOutput = !Config->DisableVerify;
PMB.LoopVectorize = true;
PMB.SLPVectorize = true;
PMB.OptLevel = Config->LtoO;
PMB.populateLTOPassManager(LtoPasses);
LtoPasses.run(M);
}
static void runLTOPasses(Module &M, TargetMachine &TM) {
if (!Config->LtoNewPmPasses.empty()) {
// The user explicitly asked for a set of passes to be run.
// This needs the new PM to work as there's no clean way to
// pass a set of passes to run in the legacy PM.
runNewCustomLtoPasses(M, TM);
if (HasError)
return;
} else {
// Run the 'default' set of LTO passes. This code still uses
// the legacy PM as the new one is not the default.
runOldLtoPasses(M, TM);
}
if (Config->SaveTemps)
saveBCFile(M, Config->OutputFile + ".lto.opt.bc");
}
static bool shouldInternalize(const SmallPtrSet<GlobalValue *, 8> &Used,
Symbol *S, GlobalValue *GV) {
if (S->IsUsedInRegularObj || Used.count(GV))
return false;
return !S->includeInDynsym();
}
BitcodeCompiler::BitcodeCompiler()
: Combined(new Module("ld-temp.o", Driver->Context)) {}
static void undefine(Symbol *S) {
replaceBody<Undefined>(S, S->body()->getName(), STV_DEFAULT, S->body()->Type,
nullptr);
}
static void handleUndefinedAsmRefs(const BasicSymbolRef &Sym, GlobalValue *GV,
StringSet<> &AsmUndefinedRefs) {
// GV associated => not an assembly symbol, bail out.
if (GV)
return;
// This is an undefined reference to a symbol in asm. We put that in
// compiler.used, so that we can preserve it from being dropped from
// the output, without necessarily preventing its internalization.
SmallString<64> Name;
raw_svector_ostream OS(Name);
Sym.printName(OS);
AsmUndefinedRefs.insert(Name.str());
}
void BitcodeCompiler::add(BitcodeFile &F) {
std::unique_ptr<IRObjectFile> Obj = std::move(F.Obj);
std::vector<GlobalValue *> Keep;
unsigned BodyIndex = 0;
ArrayRef<Symbol *> Syms = F.getSymbols();
Module &M = Obj->getModule();
if (M.getDataLayoutStr().empty())
fatal("invalid bitcode file: " + F.getName() + " has no datalayout");
// Discard non-compatible debug infos if necessary.
M.materializeMetadata();
UpgradeDebugInfo(M);
// If a symbol appears in @llvm.used, the linker is required
// to treat the symbol as there is a reference to the symbol
// that it cannot see. Therefore, we can't internalize.
SmallPtrSet<GlobalValue *, 8> Used;
collectUsedGlobalVariables(M, Used, /* CompilerUsed */ false);
for (const BasicSymbolRef &Sym : Obj->symbols()) {
uint32_t Flags = Sym.getFlags();
GlobalValue *GV = Obj->getSymbolGV(Sym.getRawDataRefImpl());
if (GV && GV->hasAppendingLinkage())
Keep.push_back(GV);
if (BitcodeFile::shouldSkip(Flags))
continue;
Symbol *S = Syms[BodyIndex++];
if (Flags & BasicSymbolRef::SF_Undefined) {
handleUndefinedAsmRefs(Sym, GV, AsmUndefinedRefs);
continue;
}
auto *B = dyn_cast<DefinedBitcode>(S->body());
if (!B || B->file() != &F)
continue;
// We collect the set of symbols we want to internalize here
// and change the linkage after the IRMover executed, i.e. after
// we imported the symbols and satisfied undefined references
// to it. We can't just change linkage here because otherwise
// the IRMover will just rename the symbol.
if (GV && shouldInternalize(Used, S, GV))
InternalizedSyms.insert(GV->getName());
// At this point we know that either the combined LTO object will provide a
// definition of a symbol, or we will internalize it. In either case, we
// need to undefine the symbol. In the former case, the real definition
// needs to be able to replace the original definition without conflicting.
// In the latter case, we need to allow the combined LTO object to provide a
// definition with the same name, for example when doing parallel codegen.
undefine(S);
if (!GV)
// Module asm symbol.
continue;
switch (GV->getLinkage()) {
default:
break;
case GlobalValue::LinkOnceAnyLinkage:
GV->setLinkage(GlobalValue::WeakAnyLinkage);
break;
case GlobalValue::LinkOnceODRLinkage:
GV->setLinkage(GlobalValue::WeakODRLinkage);
break;
}
Keep.push_back(GV);
}
IRMover Mover(*Combined);
if (Error E = Mover.move(Obj->takeModule(), Keep,
[](GlobalValue &, IRMover::ValueAdder) {})) {
handleAllErrors(std::move(E), [&](const ErrorInfoBase &EIB) {
fatal("failed to link module " + F.getName() + ": " + EIB.message());
});
}
}
static void internalize(GlobalValue &GV) {
assert(!GV.hasLocalLinkage() &&
"Trying to internalize a symbol with local linkage!");
GV.setLinkage(GlobalValue::InternalLinkage);
}
std::vector<std::unique_ptr<InputFile>> BitcodeCompiler::runSplitCodegen(
const std::function<std::unique_ptr<TargetMachine>()> &TMFactory) {
unsigned NumThreads = Config->LtoJobs;
OwningData.resize(NumThreads);
std::list<raw_svector_ostream> OSs;
std::vector<raw_pwrite_stream *> OSPtrs;
for (SmallString<0> &Obj : OwningData) {
OSs.emplace_back(Obj);
OSPtrs.push_back(&OSs.back());
}
splitCodeGen(std::move(Combined), OSPtrs, {}, TMFactory);
std::vector<std::unique_ptr<InputFile>> ObjFiles;
for (SmallString<0> &Obj : OwningData)
ObjFiles.push_back(createObjectFile(
MemoryBufferRef(Obj, "LLD-INTERNAL-combined-lto-object")));
// If -save-temps is given, we need to save temporary objects to files.
// This is for debugging.
if (Config->SaveTemps) {
if (NumThreads == 1) {
saveBuffer(OwningData[0], Config->OutputFile + ".lto.o");
} else {
for (unsigned I = 0; I < NumThreads; ++I)
saveBuffer(OwningData[I], Config->OutputFile + Twine(I) + ".lto.o");
}
}
return ObjFiles;
}
// Merge all the bitcode files we have seen, codegen the result
// and return the resulting ObjectFile.
std::vector<std::unique_ptr<InputFile>> BitcodeCompiler::compile() {
for (const auto &Name : InternalizedSyms) {
GlobalValue *GV = Combined->getNamedValue(Name.first());
assert(GV);
internalize(*GV);
}
std::string TheTriple = Combined->getTargetTriple();
std::string Msg;
const Target *T = TargetRegistry::lookupTarget(TheTriple, Msg);
if (!T)
fatal("target not found: " + Msg);
// LLD supports the new relocations.
TargetOptions Options = InitTargetOptionsFromCodeGenFlags();
Options.RelaxELFRelocations = true;
auto CreateTargetMachine = [&]() {
return std::unique_ptr<TargetMachine>(T->createTargetMachine(
TheTriple, "", "", Options, Config->Pic ? Reloc::PIC_ : Reloc::Static));
};
std::unique_ptr<TargetMachine> TM = CreateTargetMachine();
// Update llvm.compiler.used so that optimizations won't strip
// off AsmUndefinedReferences.
updateCompilerUsed(*Combined, *TM, AsmUndefinedRefs);
if (Config->SaveTemps)
saveBCFile(*Combined, Config->OutputFile + ".lto.bc");
runLTOPasses(*Combined, *TM);
if (HasError)
return {};
return runSplitCodegen(CreateTargetMachine);
}

54
ELF/LTO.h Normal file
View File

@ -0,0 +1,54 @@
//===- LTO.h ----------------------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file provides a way to combine bitcode files into one ELF
// file by compiling them using LLVM.
//
// If LTO is in use, your input files are not in regular ELF files
// but instead LLVM bitcode files. In that case, the linker has to
// convert bitcode files into the native format so that we can create
// an ELF file that contains native code. This file provides that
// functionality.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_ELF_LTO_H
#define LLD_ELF_LTO_H
#include "lld/Core/LLVM.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/IR/Module.h"
#include "llvm/Linker/IRMover.h"
namespace lld {
namespace elf {
class BitcodeFile;
class InputFile;
class BitcodeCompiler {
public:
BitcodeCompiler();
void add(BitcodeFile &F);
std::vector<std::unique_ptr<InputFile>> compile();
private:
std::vector<std::unique_ptr<InputFile>> runSplitCodegen(
const std::function<std::unique_ptr<llvm::TargetMachine>()> &TMFactory);
std::unique_ptr<llvm::Module> Combined;
std::vector<SmallString<0>> OwningData;
llvm::StringSet<> InternalizedSyms;
llvm::StringSet<> AsmUndefinedRefs;
};
}
}
#endif

View File

@ -13,33 +13,317 @@
//
//===----------------------------------------------------------------------===//
#include "LinkerScript.h"
#include "Config.h"
#include "Driver.h"
#include "InputSection.h"
#include "OutputSections.h"
#include "ScriptParser.h"
#include "Strings.h"
#include "Symbols.h"
#include "SymbolTable.h"
#include "Target.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/StringSaver.h"
using namespace llvm;
using namespace llvm::ELF;
using namespace llvm::object;
using namespace lld;
using namespace lld::elf2;
using namespace lld::elf;
ScriptConfiguration *elf::ScriptConfig;
// This is an operator-precedence parser to parse and evaluate
// a linker script expression. For each linker script arithmetic
// expression (e.g. ". = . + 0x1000"), a new instance of ExprParser
// is created and ran.
namespace {
class LinkerScript {
class ExprParser : public ScriptParserBase {
public:
LinkerScript(BumpPtrAllocator *A, StringRef S, bool B)
: Saver(*A), Tokens(tokenize(S)), IsUnderSysroot(B) {}
ExprParser(std::vector<StringRef> &Tokens, uint64_t Dot)
: ScriptParserBase(Tokens), Dot(Dot) {}
uint64_t run();
private:
uint64_t parsePrimary();
uint64_t parseTernary(uint64_t Cond);
uint64_t apply(StringRef Op, uint64_t L, uint64_t R);
uint64_t parseExpr1(uint64_t Lhs, int MinPrec);
uint64_t parseExpr();
uint64_t Dot;
};
}
static int precedence(StringRef Op) {
return StringSwitch<int>(Op)
.Case("*", 4)
.Case("/", 4)
.Case("+", 3)
.Case("-", 3)
.Case("<", 2)
.Case(">", 2)
.Case(">=", 2)
.Case("<=", 2)
.Case("==", 2)
.Case("!=", 2)
.Case("&", 1)
.Default(-1);
}
static uint64_t evalExpr(std::vector<StringRef> &Tokens, uint64_t Dot) {
return ExprParser(Tokens, Dot).run();
}
uint64_t ExprParser::run() {
uint64_t V = parseExpr();
if (!atEOF() && !Error)
setError("stray token: " + peek());
return V;
}
// This is a part of the operator-precedence parser to evaluate
// arithmetic expressions in SECTIONS command. This function evaluates an
// integer literal, a parenthesized expression, the ALIGN function,
// or the special variable ".".
uint64_t ExprParser::parsePrimary() {
StringRef Tok = next();
if (Tok == ".")
return Dot;
if (Tok == "(") {
uint64_t V = parseExpr();
expect(")");
return V;
}
if (Tok == "ALIGN") {
expect("(");
uint64_t V = parseExpr();
expect(")");
return alignTo(Dot, V);
}
uint64_t V = 0;
if (Tok.getAsInteger(0, V))
setError("malformed number: " + Tok);
return V;
}
uint64_t ExprParser::parseTernary(uint64_t Cond) {
next();
uint64_t V = parseExpr();
expect(":");
uint64_t W = parseExpr();
return Cond ? V : W;
}
uint64_t ExprParser::apply(StringRef Op, uint64_t L, uint64_t R) {
if (Op == "*")
return L * R;
if (Op == "/") {
if (R == 0) {
error("division by zero");
return 0;
}
return L / R;
}
if (Op == "+")
return L + R;
if (Op == "-")
return L - R;
if (Op == "<")
return L < R;
if (Op == ">")
return L > R;
if (Op == ">=")
return L >= R;
if (Op == "<=")
return L <= R;
if (Op == "==")
return L == R;
if (Op == "!=")
return L != R;
if (Op == "&")
return L & R;
llvm_unreachable("invalid operator");
}
// This is a part of the operator-precedence parser.
// This function assumes that the remaining token stream starts
// with an operator.
uint64_t ExprParser::parseExpr1(uint64_t Lhs, int MinPrec) {
while (!atEOF()) {
// Read an operator and an expression.
StringRef Op1 = peek();
if (Op1 == "?")
return parseTernary(Lhs);
if (precedence(Op1) < MinPrec)
return Lhs;
next();
uint64_t Rhs = parsePrimary();
// Evaluate the remaining part of the expression first if the
// next operator has greater precedence than the previous one.
// For example, if we have read "+" and "3", and if the next
// operator is "*", then we'll evaluate 3 * ... part first.
while (!atEOF()) {
StringRef Op2 = peek();
if (precedence(Op2) <= precedence(Op1))
break;
Rhs = parseExpr1(Rhs, precedence(Op2));
}
Lhs = apply(Op1, Lhs, Rhs);
}
return Lhs;
}
// Reads and evaluates an arithmetic expression.
uint64_t ExprParser::parseExpr() { return parseExpr1(parsePrimary(), 0); }
template <class ELFT>
StringRef LinkerScript<ELFT>::getOutputSection(InputSectionBase<ELFT> *S) {
for (SectionRule &R : Opt.Sections)
if (globMatch(R.SectionPattern, S->getSectionName()))
return R.Dest;
return "";
}
template <class ELFT>
bool LinkerScript<ELFT>::isDiscarded(InputSectionBase<ELFT> *S) {
return getOutputSection(S) == "/DISCARD/";
}
template <class ELFT>
bool LinkerScript<ELFT>::shouldKeep(InputSectionBase<ELFT> *S) {
for (StringRef Pat : Opt.KeptSections)
if (globMatch(Pat, S->getSectionName()))
return true;
return false;
}
template <class ELFT>
void LinkerScript<ELFT>::assignAddresses(
ArrayRef<OutputSectionBase<ELFT> *> Sections) {
// Orphan sections are sections present in the input files which
// are not explicitly placed into the output file by the linker script.
// We place orphan sections at end of file.
// Other linkers places them using some heuristics as described in
// https://sourceware.org/binutils/docs/ld/Orphan-Sections.html#Orphan-Sections.
for (OutputSectionBase<ELFT> *Sec : Sections) {
StringRef Name = Sec->getName();
if (getSectionIndex(Name) == INT_MAX)
Opt.Commands.push_back({SectionKind, {}, Name});
}
// Assign addresses as instructed by linker script SECTIONS sub-commands.
Dot = Out<ELFT>::ElfHeader->getSize() + Out<ELFT>::ProgramHeaders->getSize();
uintX_t MinVA = std::numeric_limits<uintX_t>::max();
uintX_t ThreadBssOffset = 0;
for (SectionsCommand &Cmd : Opt.Commands) {
if (Cmd.Kind == AssignmentKind) {
uint64_t Val = evalExpr(Cmd.Expr, Dot);
if (Cmd.Name == ".") {
Dot = Val;
} else {
auto *D = cast<DefinedRegular<ELFT>>(Symtab<ELFT>::X->find(Cmd.Name));
D->Value = Val;
}
continue;
}
// Find all the sections with required name. There can be more than
// ont section with such name, if the alignment, flags or type
// attribute differs.
assert(Cmd.Kind == SectionKind);
for (OutputSectionBase<ELFT> *Sec : Sections) {
if (Sec->getName() != Cmd.Name)
continue;
if ((Sec->getFlags() & SHF_TLS) && Sec->getType() == SHT_NOBITS) {
uintX_t TVA = Dot + ThreadBssOffset;
TVA = alignTo(TVA, Sec->getAlignment());
Sec->setVA(TVA);
ThreadBssOffset = TVA - Dot + Sec->getSize();
continue;
}
if (Sec->getFlags() & SHF_ALLOC) {
Dot = alignTo(Dot, Sec->getAlignment());
Sec->setVA(Dot);
MinVA = std::min(MinVA, Dot);
Dot += Sec->getSize();
continue;
}
}
}
// ELF and Program headers need to be right before the first section in
// memory.
// Set their addresses accordingly.
MinVA = alignDown(MinVA - Out<ELFT>::ElfHeader->getSize() -
Out<ELFT>::ProgramHeaders->getSize(),
Target->PageSize);
Out<ELFT>::ElfHeader->setVA(MinVA);
Out<ELFT>::ProgramHeaders->setVA(Out<ELFT>::ElfHeader->getSize() + MinVA);
}
template <class ELFT>
ArrayRef<uint8_t> LinkerScript<ELFT>::getFiller(StringRef Name) {
auto I = Opt.Filler.find(Name);
if (I == Opt.Filler.end())
return {};
return I->second;
}
// Returns the index of the given section name in linker script
// SECTIONS commands. Sections are laid out as the same order as they
// were in the script. If a given name did not appear in the script,
// it returns INT_MAX, so that it will be laid out at end of file.
template <class ELFT>
int LinkerScript<ELFT>::getSectionIndex(StringRef Name) {
auto Begin = Opt.Commands.begin();
auto End = Opt.Commands.end();
auto I = std::find_if(Begin, End, [&](SectionsCommand &N) {
return N.Kind == SectionKind && N.Name == Name;
});
return I == End ? INT_MAX : (I - Begin);
}
// A compartor to sort output sections. Returns -1 or 1 if
// A or B are mentioned in linker script. Otherwise, returns 0.
template <class ELFT>
int LinkerScript<ELFT>::compareSections(StringRef A, StringRef B) {
int I = getSectionIndex(A);
int J = getSectionIndex(B);
if (I == INT_MAX && J == INT_MAX)
return 0;
return I < J ? -1 : 1;
}
template <class ELFT>
void LinkerScript<ELFT>::addScriptedSymbols() {
for (SectionsCommand &Cmd : Opt.Commands)
if (Cmd.Kind == AssignmentKind)
if (Cmd.Name != "." && Symtab<ELFT>::X->find(Cmd.Name) == nullptr)
Symtab<ELFT>::X->addAbsolute(Cmd.Name, STV_DEFAULT);
}
class elf::ScriptParser : public ScriptParserBase {
typedef void (ScriptParser::*Handler)();
public:
ScriptParser(StringRef S, bool B) : ScriptParserBase(S), IsUnderSysroot(B) {}
void run();
private:
static std::vector<StringRef> tokenize(StringRef S);
static StringRef skipSpace(StringRef S);
StringRef next();
bool skip(StringRef Tok);
bool atEOF() { return Tokens.size() == Pos; }
void expect(StringRef Expect);
void addFile(StringRef Path);
void readAsNeeded();
@ -47,120 +331,48 @@ class LinkerScript {
void readExtern();
void readGroup();
void readInclude();
void readNothing() {}
void readOutput();
void readOutputArch();
void readOutputFormat();
void readSearchDir();
void readSections();
void readOutputSectionDescription();
void readLocationCounterValue();
void readOutputSectionDescription(StringRef OutSec);
void readSymbolAssignment(StringRef Name);
std::vector<StringRef> readSectionsCommandExpr();
StringSaver Saver;
std::vector<StringRef> Tokens;
size_t Pos = 0;
const static StringMap<Handler> Cmd;
ScriptConfiguration &Opt = *ScriptConfig;
StringSaver Saver = {ScriptConfig->Alloc};
bool IsUnderSysroot;
};
}
void LinkerScript::run() {
const StringMap<elf::ScriptParser::Handler> elf::ScriptParser::Cmd = {
{"ENTRY", &ScriptParser::readEntry},
{"EXTERN", &ScriptParser::readExtern},
{"GROUP", &ScriptParser::readGroup},
{"INCLUDE", &ScriptParser::readInclude},
{"INPUT", &ScriptParser::readGroup},
{"OUTPUT", &ScriptParser::readOutput},
{"OUTPUT_ARCH", &ScriptParser::readOutputArch},
{"OUTPUT_FORMAT", &ScriptParser::readOutputFormat},
{"SEARCH_DIR", &ScriptParser::readSearchDir},
{"SECTIONS", &ScriptParser::readSections},
{";", &ScriptParser::readNothing}};
void ScriptParser::run() {
while (!atEOF()) {
StringRef Tok = next();
if (Tok == ";")
continue;
if (Tok == "ENTRY") {
readEntry();
} else if (Tok == "EXTERN") {
readExtern();
} else if (Tok == "GROUP" || Tok == "INPUT") {
readGroup();
} else if (Tok == "INCLUDE") {
readInclude();
} else if (Tok == "OUTPUT") {
readOutput();
} else if (Tok == "OUTPUT_ARCH") {
readOutputArch();
} else if (Tok == "OUTPUT_FORMAT") {
readOutputFormat();
} else if (Tok == "SEARCH_DIR") {
readSearchDir();
} else if (Tok == "SECTIONS") {
readSections();
} else {
error("unknown directive: " + Tok);
}
if (Handler Fn = Cmd.lookup(Tok))
(this->*Fn)();
else
setError("unknown directive: " + Tok);
}
}
// Split S into linker script tokens.
std::vector<StringRef> LinkerScript::tokenize(StringRef S) {
std::vector<StringRef> Ret;
for (;;) {
S = skipSpace(S);
if (S.empty())
return Ret;
// Quoted token
if (S.startswith("\"")) {
size_t E = S.find("\"", 1);
if (E == StringRef::npos)
error("unclosed quote");
Ret.push_back(S.substr(1, E));
S = S.substr(E + 1);
continue;
}
// Unquoted token
size_t Pos = S.find_first_not_of(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
"0123456789_.$/\\~=+[]*?-:");
// A character that cannot start a word (which is usually a
// punctuation) forms a single character token.
if (Pos == 0)
Pos = 1;
Ret.push_back(S.substr(0, Pos));
S = S.substr(Pos);
}
}
// Skip leading whitespace characters or /**/-style comments.
StringRef LinkerScript::skipSpace(StringRef S) {
for (;;) {
if (S.startswith("/*")) {
size_t E = S.find("*/", 2);
if (E == StringRef::npos)
error("unclosed comment in a linker script");
S = S.substr(E + 2);
continue;
}
size_t Size = S.size();
S = S.ltrim();
if (S.size() == Size)
return S;
}
}
StringRef LinkerScript::next() {
if (atEOF())
error("unexpected EOF");
return Tokens[Pos++];
}
bool LinkerScript::skip(StringRef Tok) {
if (atEOF())
error("unexpected EOF");
if (Tok != Tokens[Pos])
return false;
++Pos;
return true;
}
void LinkerScript::expect(StringRef Expect) {
StringRef Tok = next();
if (Tok != Expect)
error(Expect + " expected, but got " + Tok);
}
void LinkerScript::addFile(StringRef S) {
void ScriptParser::addFile(StringRef S) {
if (IsUnderSysroot && S.startswith("/")) {
SmallString<128> Path;
(Config->Sysroot + S).toStringRef(Path);
@ -178,22 +390,23 @@ void LinkerScript::addFile(StringRef S) {
else
Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1)));
} else if (S.startswith("-l")) {
Driver->addFile(searchLibrary(S.substr(2)));
Driver->addLibrary(S.substr(2));
} else if (sys::fs::exists(S)) {
Driver->addFile(S);
} else {
std::string Path = findFromSearchPaths(S);
if (Path.empty())
error("Unable to find " + S);
Driver->addFile(Saver.save(Path));
setError("unable to find " + S);
else
Driver->addFile(Saver.save(Path));
}
}
void LinkerScript::readAsNeeded() {
void ScriptParser::readAsNeeded() {
expect("(");
bool Orig = Config->AsNeeded;
Config->AsNeeded = true;
for (;;) {
while (!Error) {
StringRef Tok = next();
if (Tok == ")")
break;
@ -202,7 +415,7 @@ void LinkerScript::readAsNeeded() {
Config->AsNeeded = Orig;
}
void LinkerScript::readEntry() {
void ScriptParser::readEntry() {
// -e <symbol> takes predecence over ENTRY(<symbol>).
expect("(");
StringRef Tok = next();
@ -211,9 +424,9 @@ void LinkerScript::readEntry() {
expect(")");
}
void LinkerScript::readExtern() {
void ScriptParser::readExtern() {
expect("(");
for (;;) {
while (!Error) {
StringRef Tok = next();
if (Tok == ")")
return;
@ -221,9 +434,9 @@ void LinkerScript::readExtern() {
}
}
void LinkerScript::readGroup() {
void ScriptParser::readGroup() {
expect("(");
for (;;) {
while (!Error) {
StringRef Tok = next();
if (Tok == ")")
return;
@ -235,17 +448,20 @@ void LinkerScript::readGroup() {
}
}
void LinkerScript::readInclude() {
void ScriptParser::readInclude() {
StringRef Tok = next();
auto MBOrErr = MemoryBuffer::getFile(Tok);
error(MBOrErr, "cannot open " + Tok);
if (!MBOrErr) {
setError("cannot open " + Tok);
return;
}
std::unique_ptr<MemoryBuffer> &MB = *MBOrErr;
StringRef S = Saver.save(MB->getMemBufferRef().getBuffer());
std::vector<StringRef> V = tokenize(S);
Tokens.insert(Tokens.begin() + Pos, V.begin(), V.end());
}
void LinkerScript::readOutput() {
void ScriptParser::readOutput() {
// -o <file> takes predecence over OUTPUT(<file>).
expect("(");
StringRef Tok = next();
@ -254,52 +470,119 @@ void LinkerScript::readOutput() {
expect(")");
}
void LinkerScript::readOutputArch() {
void ScriptParser::readOutputArch() {
// Error checking only for now.
expect("(");
next();
expect(")");
}
void LinkerScript::readOutputFormat() {
void ScriptParser::readOutputFormat() {
// Error checking only for now.
expect("(");
next();
StringRef Tok = next();
if (Tok == ")")
return;
if (Tok != ",")
error("unexpected token: " + Tok);
if (Tok != ",") {
setError("unexpected token: " + Tok);
return;
}
next();
expect(",");
next();
expect(")");
}
void LinkerScript::readSearchDir() {
void ScriptParser::readSearchDir() {
expect("(");
Config->SearchPaths.push_back(next());
expect(")");
}
void LinkerScript::readSections() {
void ScriptParser::readSections() {
Opt.DoLayout = true;
expect("{");
while (!skip("}"))
readOutputSectionDescription();
while (!Error && !skip("}")) {
StringRef Tok = peek();
if (Tok == ".") {
readLocationCounterValue();
continue;
}
next();
if (peek() == "=")
readSymbolAssignment(Tok);
else
readOutputSectionDescription(Tok);
}
}
void LinkerScript::readOutputSectionDescription() {
StringRef Name = next();
std::vector<StringRef> &InputSections = Config->OutputSections[Name];
void ScriptParser::readLocationCounterValue() {
expect(".");
expect("=");
std::vector<StringRef> Expr = readSectionsCommandExpr();
if (Expr.empty())
error("error in location counter expression");
else
Opt.Commands.push_back({AssignmentKind, std::move(Expr), "."});
}
void ScriptParser::readOutputSectionDescription(StringRef OutSec) {
Opt.Commands.push_back({SectionKind, {}, OutSec});
expect(":");
expect("{");
while (!skip("}")) {
next(); // Skip input file name.
expect("(");
while (!skip(")"))
InputSections.push_back(next());
while (!Error && !skip("}")) {
StringRef Tok = next();
if (Tok == "*") {
expect("(");
while (!Error && !skip(")"))
Opt.Sections.emplace_back(OutSec, next());
} else if (Tok == "KEEP") {
expect("(");
expect("*");
expect("(");
while (!Error && !skip(")")) {
StringRef Sec = next();
Opt.Sections.emplace_back(OutSec, Sec);
Opt.KeptSections.push_back(Sec);
}
expect(")");
} else {
setError("unknown command " + Tok);
}
}
StringRef Tok = peek();
if (Tok.startswith("=")) {
if (!Tok.startswith("=0x")) {
setError("filler should be a hexadecimal value");
return;
}
Tok = Tok.substr(3);
Opt.Filler[OutSec] = parseHex(Tok);
next();
}
}
void ScriptParser::readSymbolAssignment(StringRef Name) {
expect("=");
std::vector<StringRef> Expr = readSectionsCommandExpr();
if (Expr.empty())
error("error in symbol assignment expression");
else
Opt.Commands.push_back({AssignmentKind, std::move(Expr), Name});
}
std::vector<StringRef> ScriptParser::readSectionsCommandExpr() {
std::vector<StringRef> Expr;
while (!Error) {
StringRef Tok = next();
if (Tok == ";")
break;
Expr.push_back(Tok);
}
return Expr;
}
static bool isUnderSysroot(StringRef Path) {
@ -311,8 +594,13 @@ static bool isUnderSysroot(StringRef Path) {
return false;
}
// Entry point. The other functions or classes are private to this file.
void elf2::readLinkerScript(BumpPtrAllocator *A, MemoryBufferRef MB) {
// Entry point.
void elf::readLinkerScript(MemoryBufferRef MB) {
StringRef Path = MB.getBufferIdentifier();
LinkerScript(A, MB.getBuffer(), isUnderSysroot(Path)).run();
ScriptParser(MB.getBuffer(), isUnderSysroot(Path)).run();
}
template class elf::LinkerScript<ELF32LE>;
template class elf::LinkerScript<ELF32BE>;
template class elf::LinkerScript<ELF64LE>;
template class elf::LinkerScript<ELF64BE>;

103
ELF/LinkerScript.h Normal file
View File

@ -0,0 +1,103 @@
//===- LinkerScript.h -------------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_ELF_LINKER_SCRIPT_H
#define LLD_ELF_LINKER_SCRIPT_H
#include "lld/Core/LLVM.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/MemoryBuffer.h"
namespace lld {
namespace elf {
// Parses a linker script. Calling this function updates
// Config and ScriptConfig.
void readLinkerScript(MemoryBufferRef MB);
class ScriptParser;
template <class ELFT> class InputSectionBase;
template <class ELFT> class OutputSectionBase;
// This class represents each rule in SECTIONS command.
struct SectionRule {
SectionRule(StringRef D, StringRef S)
: Dest(D), SectionPattern(S) {}
StringRef Dest;
StringRef SectionPattern;
};
// This enum represents what we can observe in SECTIONS tag of script:
// ExprKind is a location counter change, like ". = . + 0x1000"
// SectionKind is a description of output section, like ".data :..."
enum SectionsCommandKind { SectionKind, AssignmentKind };
struct SectionsCommand {
SectionsCommandKind Kind;
std::vector<StringRef> Expr;
StringRef Name;
};
// ScriptConfiguration holds linker script parse results.
struct ScriptConfiguration {
// SECTIONS commands.
std::vector<SectionRule> Sections;
// Section fill attribute for each section.
llvm::StringMap<std::vector<uint8_t>> Filler;
// Used to assign addresses to sections.
std::vector<SectionsCommand> Commands;
bool DoLayout = false;
llvm::BumpPtrAllocator Alloc;
// List of section patterns specified with KEEP commands. They will
// be kept even if they are unused and --gc-sections is specified.
std::vector<StringRef> KeptSections;
};
extern ScriptConfiguration *ScriptConfig;
// This is a runner of the linker script.
template <class ELFT> class LinkerScript {
typedef typename ELFT::uint uintX_t;
public:
StringRef getOutputSection(InputSectionBase<ELFT> *S);
ArrayRef<uint8_t> getFiller(StringRef Name);
bool isDiscarded(InputSectionBase<ELFT> *S);
bool shouldKeep(InputSectionBase<ELFT> *S);
void assignAddresses(ArrayRef<OutputSectionBase<ELFT> *> S);
int compareSections(StringRef A, StringRef B);
void addScriptedSymbols();
private:
// "ScriptConfig" is a bit too long, so define a short name for it.
ScriptConfiguration &Opt = *ScriptConfig;
int getSectionIndex(StringRef Name);
uintX_t Dot;
};
// Variable template is a C++14 feature, so we can't template
// a global variable. Use a struct to workaround.
template <class ELFT> struct Script { static LinkerScript<ELFT> *X; };
template <class ELFT> LinkerScript<ELFT> *Script<ELFT>::X;
} // namespace elf
} // namespace lld
#endif

View File

@ -21,9 +21,12 @@
//===----------------------------------------------------------------------===//
#include "InputSection.h"
#include "LinkerScript.h"
#include "OutputSections.h"
#include "Strings.h"
#include "SymbolTable.h"
#include "Symbols.h"
#include "Target.h"
#include "Writer.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Object/ELF.h"
@ -35,28 +38,76 @@ using namespace llvm::ELF;
using namespace llvm::object;
using namespace lld;
using namespace lld::elf2;
using namespace lld::elf;
// A resolved relocation. The Sec and Offset fields are set if the relocation
// was resolved to an offset within a section.
template <class ELFT>
struct ResolvedReloc {
InputSectionBase<ELFT> *Sec;
typename ELFT::uint Offset;
};
template <class ELFT>
static typename ELFT::uint getAddend(InputSectionBase<ELFT> &Sec,
const typename ELFT::Rel &Rel) {
return Target->getImplicitAddend(Sec.getSectionData().begin(),
Rel.getType(Config->Mips64EL));
}
template <class ELFT>
static typename ELFT::uint getAddend(InputSectionBase<ELFT> &Sec,
const typename ELFT::Rela &Rel) {
return Rel.r_addend;
}
template <class ELFT, class RelT>
static ResolvedReloc<ELFT> resolveReloc(InputSectionBase<ELFT> &Sec,
RelT &Rel) {
SymbolBody &B = Sec.getFile()->getRelocTargetSym(Rel);
auto *D = dyn_cast<DefinedRegular<ELFT>>(&B);
if (!D || !D->Section)
return {nullptr, 0};
typename ELFT::uint Offset = D->Value;
if (D->isSection())
Offset += getAddend(Sec, Rel);
return {D->Section->Repl, Offset};
}
template <class ELFT, class Elf_Shdr>
static void run(ELFFile<ELFT> &Obj, InputSectionBase<ELFT> &Sec,
Elf_Shdr *RelSec, std::function<void(ResolvedReloc<ELFT>)> Fn) {
if (RelSec->sh_type == SHT_RELA) {
for (const typename ELFT::Rela &RI : Obj.relas(RelSec))
Fn(resolveReloc(Sec, RI));
} else {
for (const typename ELFT::Rel &RI : Obj.rels(RelSec))
Fn(resolveReloc(Sec, RI));
}
}
// Calls Fn for each section that Sec refers to via relocations.
template <class ELFT>
static void forEachSuccessor(InputSection<ELFT> *Sec,
std::function<void(InputSectionBase<ELFT> *)> Fn) {
typedef typename ELFFile<ELFT>::Elf_Rel Elf_Rel;
typedef typename ELFFile<ELFT>::Elf_Rela Elf_Rela;
typedef typename ELFFile<ELFT>::Elf_Shdr Elf_Shdr;
static void forEachSuccessor(InputSection<ELFT> &Sec,
std::function<void(ResolvedReloc<ELFT>)> Fn) {
ELFFile<ELFT> &Obj = Sec.getFile()->getObj();
for (const typename ELFT::Shdr *RelSec : Sec.RelocSections)
run(Obj, Sec, RelSec, Fn);
}
ELFFile<ELFT> &Obj = Sec->getFile()->getObj();
for (const Elf_Shdr *RelSec : Sec->RelocSections) {
if (RelSec->sh_type == SHT_RELA) {
for (const Elf_Rela &RI : Obj.relas(RelSec))
if (InputSectionBase<ELFT> *Succ = Sec->getRelocTarget(RI))
Fn(Succ);
} else {
for (const Elf_Rel &RI : Obj.rels(RelSec))
if (InputSectionBase<ELFT> *Succ = Sec->getRelocTarget(RI))
Fn(Succ);
}
}
template <class ELFT>
static void scanEhFrameSection(EhInputSection<ELFT> &EH,
std::function<void(ResolvedReloc<ELFT>)> Fn) {
if (!EH.RelocSection)
return;
ELFFile<ELFT> &EObj = EH.getFile()->getObj();
run<ELFT>(EObj, EH, EH.RelocSection, [&](ResolvedReloc<ELFT> R) {
if (!R.Sec || R.Sec == &InputSection<ELFT>::Discarded)
return;
if (R.Sec->getSectionHdr()->sh_flags & SHF_EXECINSTR)
return;
Fn({R.Sec, 0});
});
}
// Sections listed below are special because they are used by the loader
@ -70,6 +121,12 @@ template <class ELFT> static bool isReserved(InputSectionBase<ELFT> *Sec) {
return true;
default:
StringRef S = Sec->getSectionName();
// We do not want to reclaim sections if they can be referred
// by __start_* and __stop_* symbols.
if (isValidCIdentifier(S))
return true;
return S.startswith(".ctors") || S.startswith(".dtors") ||
S.startswith(".init") || S.startswith(".fini") ||
S.startswith(".jcr");
@ -79,52 +136,66 @@ template <class ELFT> static bool isReserved(InputSectionBase<ELFT> *Sec) {
// This is the main function of the garbage collector.
// Starting from GC-root sections, this function visits all reachable
// sections to set their "Live" bits.
template <class ELFT> void elf2::markLive(SymbolTable<ELFT> *Symtab) {
template <class ELFT> void elf::markLive() {
SmallVector<InputSection<ELFT> *, 256> Q;
auto Enqueue = [&](InputSectionBase<ELFT> *Sec) {
if (!Sec || Sec->Live)
auto Enqueue = [&](ResolvedReloc<ELFT> R) {
if (!R.Sec)
return;
Sec->Live = true;
if (InputSection<ELFT> *S = dyn_cast<InputSection<ELFT>>(Sec))
// Usually, a whole section is marked as live or dead, but in mergeable
// (splittable) sections, each piece of data has independent liveness bit.
// So we explicitly tell it which offset is in use.
if (auto *MS = dyn_cast<MergeInputSection<ELFT>>(R.Sec))
MS->markLiveAt(R.Offset);
if (R.Sec->Live)
return;
R.Sec->Live = true;
if (InputSection<ELFT> *S = dyn_cast<InputSection<ELFT>>(R.Sec))
Q.push_back(S);
};
auto MarkSymbol = [&](SymbolBody *Sym) {
if (Sym)
if (auto *D = dyn_cast<DefinedRegular<ELFT>>(Sym->repl()))
Enqueue(D->Section);
auto MarkSymbol = [&](const SymbolBody *Sym) {
if (auto *D = dyn_cast_or_null<DefinedRegular<ELFT>>(Sym))
Enqueue({D->Section, D->Value});
};
// Add GC root symbols.
MarkSymbol(Config->EntrySym);
MarkSymbol(Symtab->find(Config->Init));
MarkSymbol(Symtab->find(Config->Fini));
if (Config->EntrySym)
MarkSymbol(Config->EntrySym->body());
MarkSymbol(Symtab<ELFT>::X->find(Config->Init));
MarkSymbol(Symtab<ELFT>::X->find(Config->Fini));
for (StringRef S : Config->Undefined)
MarkSymbol(Symtab->find(S));
MarkSymbol(Symtab<ELFT>::X->find(S));
// Preserve externally-visible symbols if the symbols defined by this
// file can interrupt other ELF file's symbols at runtime.
if (Config->Shared || Config->ExportDynamic) {
for (const std::pair<StringRef, Symbol *> &P : Symtab->getSymbols()) {
SymbolBody *B = P.second->Body;
if (B->getVisibility() == STV_DEFAULT)
MarkSymbol(B);
}
}
for (const Symbol *S : Symtab<ELFT>::X->getSymbols())
if (S->includeInDynsym())
MarkSymbol(S->body());
// Preserve special sections.
for (const std::unique_ptr<ObjectFile<ELFT>> &F : Symtab->getObjectFiles())
// Preserve special sections and those which are specified in linker
// script KEEP command.
for (const std::unique_ptr<ObjectFile<ELFT>> &F :
Symtab<ELFT>::X->getObjectFiles())
for (InputSectionBase<ELFT> *Sec : F->getSections())
if (Sec && Sec != &InputSection<ELFT>::Discarded && isReserved(Sec))
Enqueue(Sec);
if (Sec && Sec != &InputSection<ELFT>::Discarded) {
// .eh_frame is always marked as live now, but also it can reference to
// sections that contain personality. We preserve all non-text sections
// referred by .eh_frame here.
if (auto *EH = dyn_cast_or_null<EhInputSection<ELFT>>(Sec))
scanEhFrameSection<ELFT>(*EH, Enqueue);
if (isReserved(Sec) || Script<ELFT>::X->shouldKeep(Sec))
Enqueue({Sec, 0});
}
// Mark all reachable sections.
while (!Q.empty())
forEachSuccessor<ELFT>(Q.pop_back_val(), Enqueue);
forEachSuccessor<ELFT>(*Q.pop_back_val(), Enqueue);
}
template void elf2::markLive<ELF32LE>(SymbolTable<ELF32LE> *);
template void elf2::markLive<ELF32BE>(SymbolTable<ELF32BE> *);
template void elf2::markLive<ELF64LE>(SymbolTable<ELF64LE> *);
template void elf2::markLive<ELF64BE>(SymbolTable<ELF64BE> *);
template void elf::markLive<ELF32LE>();
template void elf::markLive<ELF32BE>();
template void elf::markLive<ELF64LE>();
template void elf::markLive<ELF64BE>();

View File

@ -1,165 +1,276 @@
include "llvm/Option/OptParser.td"
def Bsymbolic: Flag<["-"], "Bsymbolic">,
HelpText<"Bind defined symbols locally">;
// For options whose names are multiple letters, either one dash or
// two can precede the option name except those that start with 'o'.
class F<string name>: Flag<["--", "-"], name>;
class J<string name>: Joined<["--", "-"], name>;
class S<string name>: Separate<["--", "-"], name>;
class JS<string name>: JoinedOrSeparate<["--", "-"], name>;
def Bdynamic: Flag<["-"], "Bdynamic">,
HelpText<"Link against shared libraries">;
def Bsymbolic: F<"Bsymbolic">, HelpText<"Bind defined symbols locally">;
def Bstatic: Flag<["-"], "Bstatic">,
HelpText<"Do not link against shared libraries">;
def Bsymbolic_functions: F<"Bsymbolic-functions">,
HelpText<"Bind defined function symbols locally">;
def L : JoinedOrSeparate<["-"], "L">, MetaVarName<"<dir>">,
HelpText<"Directory to search for libraries">;
def Bdynamic: F<"Bdynamic">, HelpText<"Link against shared libraries">;
def O : Joined<["-"], "O">, HelpText<"Optimize">;
def Bstatic: F<"Bstatic">, HelpText<"Do not link against shared libraries">;
def allow_multiple_definition: Flag<["--"], "allow-multiple-definition">,
def build_id: F<"build-id">, HelpText<"Generate build ID note">;
def build_id_eq: J<"build-id=">, HelpText<"Generate build ID note">;
def L: JoinedOrSeparate<["-"], "L">, MetaVarName<"<dir>">,
HelpText<"Add a directory to the library search path">;
def O: Joined<["-"], "O">, HelpText<"Optimize output file size">;
def allow_multiple_definition: F<"allow-multiple-definition">,
HelpText<"Allow multiple definitions">;
def allow_shlib_undefined : Flag<["--", "-"], "allow-shlib-undefined">;
def as_needed: F<"as-needed">,
HelpText<"Only set DT_NEEDED for shared libraries if used">;
def as_needed : Flag<["--"], "as-needed">;
def disable_new_dtags : Flag<["--"], "disable-new-dtags">,
def disable_new_dtags: F<"disable-new-dtags">,
HelpText<"Disable new dynamic tags">;
def discard_all : Flag<["-"], "discard-all">,
HelpText<"Delete all local symbols">;
def discard_all: F<"discard-all">, HelpText<"Delete all local symbols">;
def discard_locals : Flag<["-"], "discard-locals">,
def discard_locals: F<"discard-locals">,
HelpText<"Delete temporary local symbols">;
def discard_none : Flag<["-"], "discard-none">,
def discard_none: F<"discard-none">,
HelpText<"Keep all symbols in the symbol table">;
def dynamic_linker : Separate<["--", "-"], "dynamic-linker">,
def dynamic_linker: S<"dynamic-linker">,
HelpText<"Which dynamic linker to use">;
def enable_new_dtags : Flag<["--"], "enable-new-dtags">,
def dynamic_list: S<"dynamic-list">,
HelpText<"Read a list of dynamic symbols">;
def eh_frame_hdr: F<"eh-frame-hdr">,
HelpText<"Request creation of .eh_frame_hdr section and PT_GNU_EH_FRAME segment header">;
def enable_new_dtags: F<"enable-new-dtags">,
HelpText<"Enable new dynamic tags">;
def entry : Separate<["--", "-"], "entry">, MetaVarName<"<entry>">,
def end_lib: F<"end-lib">,
HelpText<"End a grouping of objects that should be treated as if they were together in an archive">;
def entry: S<"entry">, MetaVarName<"<entry>">,
HelpText<"Name of entry point symbol">;
def export_dynamic : Flag<["--", "-"], "export-dynamic">,
def export_dynamic: F<"export-dynamic">,
HelpText<"Put symbols in the dynamic symbol table">;
def fini : Separate<["-"], "fini">, MetaVarName<"<symbol>">,
def export_dynamic_symbol: S<"export-dynamic-symbol">,
HelpText<"Put a symbol in the dynamic symbol table">;
def fatal_warnings: F<"fatal-warnings">,
HelpText<"Treat warnings as errors">;
def fini: S<"fini">, MetaVarName<"<symbol>">,
HelpText<"Specify a finalizer function">;
def hash_style : Separate<["--", "-"], "hash-style">,
def hash_style: S<"hash-style">,
HelpText<"Specify hash style (sysv, gnu or both)">;
def gc_sections : Flag<["--"], "gc-sections">,
def help: F<"help">, HelpText<"Print option help">;
def icf: F<"icf=all">, HelpText<"Enable identical code folding">;
def image_base : J<"image-base=">, HelpText<"Set the base address">;
def gc_sections: F<"gc-sections">,
HelpText<"Enable garbage collection of unused sections">;
def init : Separate<["-"], "init">, MetaVarName<"<symbol>">,
def init: S<"init">, MetaVarName<"<symbol>">,
HelpText<"Specify an initializer function">;
def l : JoinedOrSeparate<["-"], "l">, MetaVarName<"<libName>">,
def l: JoinedOrSeparate<["-"], "l">, MetaVarName<"<libName>">,
HelpText<"Root name of library to use">;
def m : JoinedOrSeparate<["-"], "m">,
HelpText<"Set target emulation">;
def lto_O: J<"lto-O">, MetaVarName<"<opt-level>">,
HelpText<"Optimization level for LTO">;
def no_allow_shlib_undefined : Flag<["--"], "no-allow-shlib-undefined">;
def m: JoinedOrSeparate<["-"], "m">, HelpText<"Set target emulation">;
def no_as_needed : Flag<["--"], "no-as-needed">;
def no_as_needed: F<"no-as-needed">,
HelpText<"Always DT_NEEDED for shared libraries">;
def no_whole_archive : Flag<["--", "-"], "no-whole-archive">,
def no_demangle: F<"no-demangle">,
HelpText<"Do not demangle symbol names">;
def no_gnu_unique: F<"no-gnu-unique">,
HelpText<"Disable STB_GNU_UNIQUE symbol binding">;
def no_whole_archive: F<"no-whole-archive">,
HelpText<"Restores the default behavior of loading archive members">;
def noinhibit_exec : Flag<["--"], "noinhibit-exec">,
def noinhibit_exec: F<"noinhibit-exec">,
HelpText<"Retain the executable output file whenever it is still usable">;
def no_undefined : Flag<["--"], "no-undefined">,
def no_undefined: F<"no-undefined">,
HelpText<"Report unresolved symbols even if the linker is creating a shared library">;
def o : Separate<["-"], "o">, MetaVarName<"<path>">,
def no_undefined_version: F<"no-undefined-version">,
HelpText<"Report version scripts that refer undefined symbols">;
def o: JoinedOrSeparate<["-"], "o">, MetaVarName<"<path>">,
HelpText<"Path to file to write output">;
def print_gc_sections: Flag<["--"], "print-gc-sections">,
def pie: F<"pie">, HelpText<"Create a position independent executable">;
def print_gc_sections: F<"print-gc-sections">,
HelpText<"List removed unused sections">;
def rpath : Separate<["-"], "rpath">,
HelpText<"Add a DT_RUNPATH to the output">;
def reproduce: S<"reproduce">,
HelpText<"Dump linker invocation and input files for debugging">;
def relocatable : Flag<["--"], "relocatable">;
def rpath: S<"rpath">, HelpText<"Add a DT_RUNPATH to the output">;
def script : Separate<["--"], "script">, HelpText<"Read linker script">;
def relocatable: F<"relocatable">, HelpText<"Create relocatable object file">;
def shared : Flag<["-"], "shared">,
HelpText<"Build a shared object">;
def script: S<"script">, HelpText<"Read linker script">;
def soname : Joined<["-"], "soname=">,
HelpText<"Set DT_SONAME">;
def shared: F<"shared">, HelpText<"Build a shared object">;
def strip_all : Flag<["--"], "strip-all">,
HelpText<"Strip all symbols">;
def soname: J<"soname=">, HelpText<"Set DT_SONAME">;
def sysroot : Joined<["--"], "sysroot=">,
HelpText<"Set the system root">;
def start_lib: F<"start-lib">,
HelpText<"Start a grouping of objects that should be treated as if they were together in an archive">;
def undefined : Joined<["--"], "undefined=">,
def strip_all: F<"strip-all">, HelpText<"Strip all symbols">;
def strip_debug: F<"strip-debug">, HelpText<"Strip debugging information">;
def sysroot: J<"sysroot=">, HelpText<"Set the system root">;
def threads: F<"threads">, HelpText<"Enable use of threads">;
def trace: F<"trace">, HelpText<"Print the names of the input files">;
def trace_symbol : J<"trace-symbol=">, HelpText<"Trace references to symbols">;
def undefined: J<"undefined=">,
HelpText<"Force undefined symbol during linking">;
def verbose : Flag<["--"], "verbose">;
def unresolved_symbols: J<"unresolved-symbols=">,
HelpText<"Determine how to handle unresolved symbols">;
def whole_archive : Flag<["--", "-"], "whole-archive">,
def rsp_quoting: J<"rsp-quoting=">,
HelpText<"Quoting style for response files. Values supported: windows|posix">;
def verbose: F<"verbose">, HelpText<"Verbose mode">;
def version: F<"version">, HelpText<"Display the version number">;
def version_script: S<"version-script">,
HelpText<"Read a version script">;
def warn_common: F<"warn-common">,
HelpText<"Warn about duplicate common symbols">;
def whole_archive: F<"whole-archive">,
HelpText<"Force load of all members in a static library">;
def wrap : Separate<["--", "-"], "wrap">, MetaVarName<"<symbol>">,
def wrap: S<"wrap">, MetaVarName<"<symbol>">,
HelpText<"Use wrapper functions for symbol">;
def z : JoinedOrSeparate<["-"], "z">, MetaVarName<"<option>">,
def z: JoinedOrSeparate<["-"], "z">, MetaVarName<"<option>">,
HelpText<"Linker option extensions">;
// Aliases
def alias_Bdynamic_call_shared: Flag<["-"], "call_shared">, Alias<Bdynamic>;
def alias_Bdynamic_dy: Flag<["-"], "dy">, Alias<Bdynamic>;
def alias_Bstatic_dn: Flag<["-"], "dn">, Alias<Bstatic>;
def alias_Bstatic_non_shared: Flag<["-"], "non_shared">, Alias<Bstatic>;
def alias_Bstatic_static: Flag<["-"], "static">, Alias<Bstatic>;
def alias_L__library_path : Joined<["--"], "library-path=">, Alias<L>;
def alias_Bdynamic_call_shared: F<"call_shared">, Alias<Bdynamic>;
def alias_Bdynamic_dy: F<"dy">, Alias<Bdynamic>;
def alias_Bstatic_dn: F<"dn">, Alias<Bstatic>;
def alias_Bstatic_non_shared: F<"non_shared">, Alias<Bstatic>;
def alias_Bstatic_static: F<"static">, Alias<Bstatic>;
def alias_L__library_path: J<"library-path=">, Alias<L>;
def alias_discard_all_x: Flag<["-"], "x">, Alias<discard_all>;
def alias_discard_locals_X: Flag<["-"], "X">, Alias<discard_locals>;
def alias_entry_e : Separate<["-"], "e">, Alias<entry>;
def alias_dynamic_list: J<"dynamic-list=">, Alias<dynamic_list>;
def alias_entry_e: JoinedOrSeparate<["-"], "e">, Alias<entry>;
def alias_entry_entry: J<"entry=">, Alias<entry>;
def alias_export_dynamic_E: Flag<["-"], "E">, Alias<export_dynamic>;
def alias_fini_fini : Joined<["-"], "fini=">, Alias<fini>;
def alias_hash_style_hash_style : Joined<["--", "-"], "hash-style=">, Alias<hash_style>;
def alias_init_init : Joined<["-"], "init=">, Alias<init>;
def alias_l__library : Joined<["--"], "library=">, Alias<l>;
def alias_o_output : Joined<["--"], "output=">, Alias<o>;
def alias_rpath_rpath : Joined<["-"], "rpath=">, Alias<rpath>;
def alias_relocatable_r : Flag<["-"], "r">, Alias<relocatable>;
def alias_shared_Bshareable : Flag<["-"], "Bshareable">, Alias<shared>;
def alias_soname_h : Separate<["-"], "h">, Alias<soname>;
def alias_soname_soname : Separate<["-"], "soname">, Alias<soname>;
def alias_script_T : Separate<["-"], "T">, Alias<script>;
def alias_export_dynamic_symbol: J<"export-dynamic-symbol=">,
Alias<export_dynamic_symbol>;
def alias_fini_fini: J<"fini=">, Alias<fini>;
def alias_hash_style_hash_style: J<"hash-style=">, Alias<hash_style>;
def alias_init_init: J<"init=">, Alias<init>;
def alias_l__library: J<"library=">, Alias<l>;
def alias_o_output: Joined<["--"], "output=">, Alias<o>;
def alias_pie_pic_executable: F<"pic-executable">, Alias<pie>;
def alias_relocatable_r: Flag<["-"], "r">, Alias<relocatable>;
def alias_rpath_R: Joined<["-"], "R">, Alias<rpath>;
def alias_rpath_rpath: J<"rpath=">, Alias<rpath>;
def alias_script_T: JoinedOrSeparate<["-"], "T">, Alias<script>;
def alias_shared_Bshareable: F<"Bshareable">, Alias<shared>;
def alias_soname_h: JoinedOrSeparate<["-"], "h">, Alias<soname>;
def alias_soname_soname: S<"soname">, Alias<soname>;
def alias_strip_all: Flag<["-"], "s">, Alias<strip_all>;
def alias_undefined_u : Separate<["-"], "u">, Alias<undefined>;
def alias_wrap_wrap : Joined<["--", "-"], "wrap=">, Alias<wrap>;
def alias_strip_debug_S: Flag<["-"], "S">, Alias<strip_debug>;
def alias_trace: Flag<["-"], "t">, Alias<trace>;
def alias_trace_symbol_y : JoinedOrSeparate<["-"], "y">, Alias<trace_symbol>;
def alias_undefined_u: JoinedOrSeparate<["-"], "u">, Alias<undefined>;
def alias_version_V: Flag<["-"], "V">, Alias<version>;
def alias_version_v: Flag<["-"], "v">, Alias<version>;
def alias_wrap_wrap: J<"wrap=">, Alias<wrap>;
// Our symbol resolution algorithm handles symbols in archive files differently
// than traditional linkers, so we don't need --start-group and --end-group.
// These options are recongized for compatibility but ignored.
def end_group : Flag<["--"], "end-group">;
def end_group: F<"end-group">;
def end_group_paren: Flag<["-"], ")">;
def start_group : Flag<["--"], "start-group">;
def start_group: F<"start-group">;
def start_group_paren: Flag<["-"], "(">;
// Ignore LTO plugin-related options.
// clang -flto passes -plugin and -plugin-opt to the linker. This is required
// for ld.gold and ld.bfd to get LTO working. But it's not for lld which doesn't
// rely on a plugin. Instead of detecting which linker is used on clang side we
// just ignore the option on lld side as it's easier. In fact, the linker could
// be called 'ld' and understanding which linker is used would require parsing of
// --version output.
def plugin: S<"plugin">;
def plugin_eq: J<"plugin=">;
def plugin_opt: S<"plugin-opt">;
def plugin_opt_eq: J<"plugin-opt=">;
// Options listed below are silently ignored for now for compatibility.
def build_id : Flag<["--"], "build-id">;
def eh_frame_hdr : Flag<["--"], "eh-frame-hdr">;
def fatal_warnings : Flag<["--"], "fatal-warnings">;
def no_add_needed : Flag<["--"], "no-add-needed">;
def no_fatal_warnings : Flag<["--"], "no-fatal-warnings">;
def no_warn_mismatch : Flag<["--"], "no-warn-mismatch">;
def version_script : Separate<["--"], "version-script">;
def warn_common : Flag<["--"], "warn-common">;
def warn_shared_textrel : Flag<["--"], "warn-shared-textrel">;
def G : Separate<["-"], "G">;
def allow_shlib_undefined: F<"allow-shlib-undefined">;
def define_common: F<"define-common">;
def demangle: F<"demangle">;
def detect_odr_violations: F<"detect-odr-violations">;
def no_add_needed: F<"no-add-needed">;
def no_allow_shlib_undefined: F<"no-allow-shlib-undefined">;
def no_copy_dt_needed_entries: F<"no-copy-dt-needed-entries">,
Alias<no_add_needed>;
def no_dynamic_linker: F<"no-dynamic-linker">;
def no_fatal_warnings: F<"no-fatal-warnings">;
def no_mmap_output_file: F<"no-mmap-output-file">;
def no_warn_common: F<"no-warn-common">;
def no_warn_mismatch: F<"no-warn-mismatch">;
def rpath_link: S<"rpath-link">;
def rpath_link_eq: J<"rpath-link=">;
def sort_common: F<"sort-common">;
def warn_execstack: F<"warn-execstack">;
def warn_shared_textrel: F<"warn-shared-textrel">;
def G: Separate<["-"], "G">;
// Aliases for ignored options
def alias_version_script_version_script : Joined<["--"], "version-script=">, Alias<version_script>;
def alias_define_common_d: Flag<["-"], "d">, Alias<define_common>;
def alias_define_common_dc: F<"dc">, Alias<define_common>;
def alias_define_common_dp: F<"dp">, Alias<define_common>;
def alias_version_script_version_script: J<"version-script=">,
Alias<version_script>;
// LTO-related options.
def lto_jobs: J<"lto-jobs=">, HelpText<"Number of threads to run codegen">;
def lto_aa_pipeline: J<"lto-aa-pipeline=">,
HelpText<"AA pipeline to run during LTO. Used in conjunction with -lto-newpm-passes">;
def lto_newpm_passes: J<"lto-newpm-passes=">,
HelpText<"Passes to run during LTO">;
def disable_verify: F<"disable-verify">;
def mllvm: S<"mllvm">;
def save_temps: F<"save-temps">;

File diff suppressed because it is too large Load Diff

View File

@ -10,64 +10,35 @@
#ifndef LLD_ELF_OUTPUT_SECTIONS_H
#define LLD_ELF_OUTPUT_SECTIONS_H
#include "lld/Core/LLVM.h"
#include "Config.h"
#include "Relocations.h"
#include "llvm/ADT/MapVector.h"
#include "lld/Core/LLVM.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Object/ELF.h"
#include "Config.h"
#include <type_traits>
#include "llvm/Support/MD5.h"
#include "llvm/Support/SHA1.h"
namespace lld {
namespace elf2 {
namespace elf {
class SymbolBody;
struct SectionPiece;
template <class ELFT> class SymbolTable;
template <class ELFT> class SymbolTableSection;
template <class ELFT> class StringTableSection;
template <class ELFT> class EHInputSection;
template <class ELFT> class EhInputSection;
template <class ELFT> class InputSection;
template <class ELFT> class InputSectionBase;
template <class ELFT> class MergeInputSection;
template <class ELFT> class MipsReginfoInputSection;
template <class ELFT> class OutputSection;
template <class ELFT> class ObjectFile;
template <class ELFT> class SharedFile;
template <class ELFT> class SharedSymbol;
template <class ELFT> class DefinedRegular;
// Flag to force GOT to be in output if we have relocations
// that relies on its address.
extern bool HasGotOffRel;
template <class ELFT>
static inline typename llvm::object::ELFFile<ELFT>::uintX_t
getAddend(const typename llvm::object::ELFFile<ELFT>::Elf_Rel &Rel) {
return 0;
}
template <class ELFT>
static inline typename llvm::object::ELFFile<ELFT>::uintX_t
getAddend(const typename llvm::object::ELFFile<ELFT>::Elf_Rela &Rel) {
return Rel.r_addend;
}
template <class ELFT>
typename llvm::object::ELFFile<ELFT>::uintX_t getSymVA(const SymbolBody &S);
template <class ELFT, bool IsRela>
typename llvm::object::ELFFile<ELFT>::uintX_t
getLocalRelTarget(const ObjectFile<ELFT> &File,
const llvm::object::Elf_Rel_Impl<ELFT, IsRela> &Rel,
typename llvm::object::ELFFile<ELFT>::uintX_t Addend);
bool canBePreempted(const SymbolBody *Body, bool NeedsGot);
template <class ELFT>
bool shouldKeepInSymtab(
const ObjectFile<ELFT> &File, StringRef Name,
const typename llvm::object::ELFFile<ELFT>::Elf_Sym &Sym);
// This represents a section in an output file.
// Different sub classes represent different types of sections. Some contain
// input sections, others are created by the linker.
@ -75,13 +46,14 @@ bool shouldKeepInSymtab(
// non-overlapping file offsets and VAs.
template <class ELFT> class OutputSectionBase {
public:
typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t;
typedef typename llvm::object::ELFFile<ELFT>::Elf_Shdr Elf_Shdr;
typedef typename ELFT::uint uintX_t;
typedef typename ELFT::Shdr Elf_Shdr;
OutputSectionBase(StringRef Name, uint32_t sh_type, uintX_t sh_flags);
OutputSectionBase(StringRef Name, uint32_t Type, uintX_t Flags);
void setVA(uintX_t VA) { Header.sh_addr = VA; }
uintX_t getVA() const { return Header.sh_addr; }
void setFileOffset(uintX_t Off) { Header.sh_offset = Off; }
void setSHName(unsigned Val) { Header.sh_name = Val; }
void writeHeaderTo(Elf_Shdr *SHdr);
StringRef getName() { return Name; }
@ -92,21 +64,24 @@ template <class ELFT> class OutputSectionBase {
// Returns the size of the section in the output file.
uintX_t getSize() const { return Header.sh_size; }
void setSize(uintX_t Val) { Header.sh_size = Val; }
uintX_t getFlags() { return Header.sh_flags; }
uintX_t getFileOff() { return Header.sh_offset; }
uintX_t getAlign() {
// The ELF spec states that a value of 0 means the section has no alignment
// constraits.
return std::max<uintX_t>(Header.sh_addralign, 1);
}
uint32_t getType() { return Header.sh_type; }
void updateAlign(uintX_t Align) {
if (Align > Header.sh_addralign)
Header.sh_addralign = Align;
uintX_t getFlags() const { return Header.sh_flags; }
uintX_t getFileOff() const { return Header.sh_offset; }
uintX_t getAlignment() const { return Header.sh_addralign; }
uint32_t getType() const { return Header.sh_type; }
void updateAlignment(uintX_t Alignment) {
if (Alignment > Header.sh_addralign)
Header.sh_addralign = Alignment;
}
// If true, this section will be page aligned on disk.
// Typically the first section of each PT_LOAD segment has this flag.
bool PageAlign = false;
virtual void finalize() {}
virtual void writeTo(uint8_t *Buf) = 0;
virtual void finalizePieces() {}
virtual void assignOffsets() {}
virtual void writeTo(uint8_t *Buf) {}
virtual ~OutputSectionBase() = default;
protected:
@ -116,18 +91,21 @@ template <class ELFT> class OutputSectionBase {
template <class ELFT> class GotSection final : public OutputSectionBase<ELFT> {
typedef OutputSectionBase<ELFT> Base;
typedef typename Base::uintX_t uintX_t;
typedef typename ELFT::uint uintX_t;
public:
GotSection();
void finalize() override;
void writeTo(uint8_t *Buf) override;
void addEntry(SymbolBody *Sym);
bool addDynTlsEntry(SymbolBody *Sym);
bool addCurrentModuleTlsIndex();
bool empty() const { return Entries.empty(); }
uintX_t getEntryAddr(const SymbolBody &B) const;
void addEntry(SymbolBody &Sym);
void addMipsEntry(SymbolBody &Sym, uintX_t Addend, RelExpr Expr);
bool addDynTlsEntry(SymbolBody &Sym);
bool addTlsIndex();
bool empty() const { return MipsPageEntries == 0 && Entries.empty(); }
uintX_t getMipsLocalPageOffset(uintX_t Addr);
uintX_t getMipsGotOffset(const SymbolBody &B, uintX_t Addend) const;
uintX_t getGlobalDynAddr(const SymbolBody &B) const;
uintX_t getGlobalDynOffset(const SymbolBody &B) const;
uintX_t getNumEntries() const { return Entries.size(); }
// Returns the symbol which corresponds to the first entry of the global part
@ -140,24 +118,52 @@ template <class ELFT> class GotSection final : public OutputSectionBase<ELFT> {
// the number of reserved entries. This method is MIPS-specific.
unsigned getMipsLocalEntriesNum() const;
uint32_t getLocalTlsIndexVA() { return Base::getVA() + LocalTlsIndexOff; }
// Returns offset of TLS part of the MIPS GOT table. This part goes
// after 'local' and 'global' entries.
uintX_t getMipsTlsOffset();
uintX_t getTlsIndexVA() { return Base::getVA() + TlsIndexOff; }
uint32_t getTlsIndexOff() { return TlsIndexOff; }
// Flag to force GOT to be in output if we have relocations
// that relies on its address.
bool HasGotOffRel = false;
private:
std::vector<const SymbolBody *> Entries;
uint32_t LocalTlsIndexOff = -1;
uint32_t TlsIndexOff = -1;
uint32_t MipsPageEntries = 0;
// Output sections referenced by MIPS GOT relocations.
llvm::SmallPtrSet<const OutputSectionBase<ELFT> *, 10> MipsOutSections;
llvm::DenseMap<uintX_t, size_t> MipsLocalGotPos;
// MIPS ABI requires to create unique GOT entry for each Symbol/Addend
// pairs. The `MipsGotMap` maps (S,A) pair to the GOT index in the `MipsLocal`
// or `MipsGlobal` vectors. In general it does not have a sence to take in
// account addend for preemptible symbols because the corresponding
// GOT entries should have one-to-one mapping with dynamic symbols table.
// But we use the same container's types for both kind of GOT entries
// to handle them uniformly.
typedef std::pair<const SymbolBody*, uintX_t> MipsGotEntry;
typedef std::vector<MipsGotEntry> MipsGotEntries;
llvm::DenseMap<MipsGotEntry, size_t> MipsGotMap;
MipsGotEntries MipsLocal;
MipsGotEntries MipsGlobal;
// Write MIPS-specific parts of the GOT.
void writeMipsGot(uint8_t *&Buf);
};
template <class ELFT>
class GotPltSection final : public OutputSectionBase<ELFT> {
typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t;
typedef typename ELFT::uint uintX_t;
public:
GotPltSection();
void finalize() override;
void writeTo(uint8_t *Buf) override;
void addEntry(SymbolBody *Sym);
void addEntry(SymbolBody &Sym);
bool empty() const;
uintX_t getEntryAddr(const SymbolBody &B) const;
private:
std::vector<const SymbolBody *> Entries;
@ -165,156 +171,248 @@ class GotPltSection final : public OutputSectionBase<ELFT> {
template <class ELFT> class PltSection final : public OutputSectionBase<ELFT> {
typedef OutputSectionBase<ELFT> Base;
typedef typename Base::uintX_t uintX_t;
typedef typename ELFT::uint uintX_t;
public:
PltSection();
void finalize() override;
void writeTo(uint8_t *Buf) override;
void addEntry(SymbolBody *Sym);
void addEntry(SymbolBody &Sym);
bool empty() const { return Entries.empty(); }
uintX_t getEntryAddr(const SymbolBody &B) const;
private:
std::vector<std::pair<const SymbolBody *, unsigned>> Entries;
};
template <class ELFT> struct DynamicReloc {
typedef typename llvm::object::ELFFile<ELFT>::Elf_Rel Elf_Rel;
InputSectionBase<ELFT> *C;
const Elf_Rel *RI;
template <class ELFT> class DynamicReloc {
typedef typename ELFT::uint uintX_t;
public:
DynamicReloc(uint32_t Type, const InputSectionBase<ELFT> *InputSec,
uintX_t OffsetInSec, bool UseSymVA, SymbolBody *Sym,
uintX_t Addend)
: Type(Type), Sym(Sym), InputSec(InputSec), OffsetInSec(OffsetInSec),
UseSymVA(UseSymVA), Addend(Addend) {}
DynamicReloc(uint32_t Type, const OutputSectionBase<ELFT> *OutputSec,
uintX_t OffsetInSec, bool UseSymVA, SymbolBody *Sym,
uintX_t Addend)
: Type(Type), Sym(Sym), OutputSec(OutputSec), OffsetInSec(OffsetInSec),
UseSymVA(UseSymVA), Addend(Addend) {}
uintX_t getOffset() const;
uintX_t getAddend() const;
uint32_t getSymIndex() const;
const OutputSectionBase<ELFT> *getOutputSec() const { return OutputSec; }
uint32_t Type;
private:
SymbolBody *Sym;
const InputSectionBase<ELFT> *InputSec = nullptr;
const OutputSectionBase<ELFT> *OutputSec = nullptr;
uintX_t OffsetInSec;
bool UseSymVA;
uintX_t Addend;
};
template <class ELFT>
class SymbolTableSection final : public OutputSectionBase<ELFT> {
public:
typedef typename llvm::object::ELFFile<ELFT>::Elf_Shdr Elf_Shdr;
typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym Elf_Sym;
typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym_Range Elf_Sym_Range;
typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t;
SymbolTableSection(SymbolTable<ELFT> &Table,
StringTableSection<ELFT> &StrTabSec);
typedef typename ELFT::Shdr Elf_Shdr;
typedef typename ELFT::Sym Elf_Sym;
typedef typename ELFT::SymRange Elf_Sym_Range;
typedef typename ELFT::uint uintX_t;
SymbolTableSection(StringTableSection<ELFT> &StrTabSec);
void finalize() override;
void writeTo(uint8_t *Buf) override;
void addLocalSymbol(StringRef Name);
void addSymbol(SymbolBody *Body);
StringTableSection<ELFT> &getStrTabSec() const { return StrTabSec; }
unsigned getNumSymbols() const { return NumVisible + 1; }
unsigned getNumSymbols() const { return NumLocals + Symbols.size() + 1; }
ArrayRef<SymbolBody *> getSymbols() const { return Symbols; }
ArrayRef<std::pair<SymbolBody *, size_t>> getSymbols() const {
return Symbols;
}
unsigned NumLocals = 0;
StringTableSection<ELFT> &StrTabSec;
private:
void writeLocalSymbols(uint8_t *&Buf);
void writeGlobalSymbols(uint8_t *Buf);
static uint8_t getSymbolBinding(SymbolBody *Body);
const OutputSectionBase<ELFT> *getOutputSection(SymbolBody *Sym);
SymbolTable<ELFT> &Table;
StringTableSection<ELFT> &StrTabSec;
std::vector<SymbolBody *> Symbols;
unsigned NumVisible = 0;
unsigned NumLocals = 0;
// A vector of symbols and their string table offsets.
std::vector<std::pair<SymbolBody *, size_t>> Symbols;
};
// For more information about .gnu.version and .gnu.version_r see:
// https://www.akkadia.org/drepper/symbol-versioning
// The .gnu.version_d section which has a section type of SHT_GNU_verdef shall
// contain symbol version definitions. The number of entries in this section
// shall be contained in the DT_VERDEFNUM entry of the .dynamic section.
// The section shall contain an array of Elf_Verdef structures, optionally
// followed by an array of Elf_Verdaux structures.
template <class ELFT>
class VersionDefinitionSection final : public OutputSectionBase<ELFT> {
typedef typename ELFT::Verdef Elf_Verdef;
typedef typename ELFT::Verdaux Elf_Verdaux;
public:
VersionDefinitionSection();
void finalize() override;
void writeTo(uint8_t *Buf) override;
private:
void writeOne(uint8_t *Buf, uint32_t Index, StringRef Name, size_t NameOff);
unsigned FileDefNameOff;
};
// The .gnu.version section specifies the required version of each symbol in the
// dynamic symbol table. It contains one Elf_Versym for each dynamic symbol
// table entry. An Elf_Versym is just a 16-bit integer that refers to a version
// identifier defined in the either .gnu.version_r or .gnu.version_d section.
// The values 0 and 1 are reserved. All other values are used for versions in
// the own object or in any of the dependencies.
template <class ELFT>
class VersionTableSection final : public OutputSectionBase<ELFT> {
typedef typename ELFT::Versym Elf_Versym;
public:
VersionTableSection();
void finalize() override;
void writeTo(uint8_t *Buf) override;
};
// The .gnu.version_r section defines the version identifiers used by
// .gnu.version. It contains a linked list of Elf_Verneed data structures. Each
// Elf_Verneed specifies the version requirements for a single DSO, and contains
// a reference to a linked list of Elf_Vernaux data structures which define the
// mapping from version identifiers to version names.
template <class ELFT>
class VersionNeedSection final : public OutputSectionBase<ELFT> {
typedef typename ELFT::Verneed Elf_Verneed;
typedef typename ELFT::Vernaux Elf_Vernaux;
// A vector of shared files that need Elf_Verneed data structures and the
// string table offsets of their sonames.
std::vector<std::pair<SharedFile<ELFT> *, size_t>> Needed;
// The next available version identifier.
unsigned NextIndex;
public:
VersionNeedSection();
void addSymbol(SharedSymbol<ELFT> *SS);
void finalize() override;
void writeTo(uint8_t *Buf) override;
size_t getNeedNum() const { return Needed.size(); }
};
template <class ELFT>
class RelocationSection final : public OutputSectionBase<ELFT> {
typedef typename llvm::object::ELFFile<ELFT>::Elf_Rel Elf_Rel;
typedef typename llvm::object::ELFFile<ELFT>::Elf_Rela Elf_Rela;
typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t;
typedef typename ELFT::Rel Elf_Rel;
typedef typename ELFT::Rela Elf_Rela;
typedef typename ELFT::uint uintX_t;
public:
RelocationSection(StringRef Name, bool IsRela);
void addReloc(const DynamicReloc<ELFT> &Reloc) { Relocs.push_back(Reloc); }
RelocationSection(StringRef Name, bool Sort);
void addReloc(const DynamicReloc<ELFT> &Reloc);
unsigned getRelocOffset();
void finalize() override;
void writeTo(uint8_t *Buf) override;
bool hasRelocs() const { return !Relocs.empty(); }
bool isRela() const { return IsRela; }
bool Static = false;
private:
bool applyTlsDynamicReloc(SymbolBody *Body, uint32_t Type, Elf_Rel *P,
Elf_Rel *N);
bool Sort;
std::vector<DynamicReloc<ELFT>> Relocs;
const bool IsRela;
};
template <class ELFT>
class OutputSection final : public OutputSectionBase<ELFT> {
public:
typedef typename llvm::object::ELFFile<ELFT>::Elf_Shdr Elf_Shdr;
typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym Elf_Sym;
typedef typename llvm::object::ELFFile<ELFT>::Elf_Rel Elf_Rel;
typedef typename llvm::object::ELFFile<ELFT>::Elf_Rela Elf_Rela;
typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t;
OutputSection(StringRef Name, uint32_t sh_type, uintX_t sh_flags);
typedef typename ELFT::Shdr Elf_Shdr;
typedef typename ELFT::Sym Elf_Sym;
typedef typename ELFT::Rel Elf_Rel;
typedef typename ELFT::Rela Elf_Rela;
typedef typename ELFT::uint uintX_t;
OutputSection(StringRef Name, uint32_t Type, uintX_t Flags);
void addSection(InputSectionBase<ELFT> *C) override;
void sortInitFini();
void sortCtorsDtors();
void writeTo(uint8_t *Buf) override;
private:
void finalize() override;
void assignOffsets() override;
std::vector<InputSection<ELFT> *> Sections;
};
template <class ELFT>
class MergeOutputSection final : public OutputSectionBase<ELFT> {
typedef typename OutputSectionBase<ELFT>::uintX_t uintX_t;
bool shouldTailMerge() const;
typedef typename ELFT::uint uintX_t;
public:
MergeOutputSection(StringRef Name, uint32_t sh_type, uintX_t sh_flags);
MergeOutputSection(StringRef Name, uint32_t Type, uintX_t Flags,
uintX_t Alignment);
void addSection(InputSectionBase<ELFT> *S) override;
void writeTo(uint8_t *Buf) override;
unsigned getOffset(StringRef Val);
void finalize() override;
void finalizePieces() override;
bool shouldTailMerge() const;
private:
llvm::StringTableBuilder Builder{llvm::StringTableBuilder::RAW};
llvm::StringTableBuilder Builder;
std::vector<MergeInputSection<ELFT> *> Sections;
};
// FDE or CIE
template <class ELFT> struct EHRegion {
typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t;
EHRegion(EHInputSection<ELFT> *S, unsigned Index);
StringRef data() const;
EHInputSection<ELFT> *S;
unsigned Index;
};
template <class ELFT> struct Cie : public EHRegion<ELFT> {
Cie(EHInputSection<ELFT> *S, unsigned Index);
std::vector<EHRegion<ELFT>> Fdes;
struct CieRecord {
SectionPiece *Piece = nullptr;
std::vector<SectionPiece *> FdePieces;
};
// Output section for .eh_frame.
template <class ELFT>
class EHOutputSection final : public OutputSectionBase<ELFT> {
public:
typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t;
typedef typename llvm::object::ELFFile<ELFT>::Elf_Shdr Elf_Shdr;
typedef typename llvm::object::ELFFile<ELFT>::Elf_Rel Elf_Rel;
typedef typename llvm::object::ELFFile<ELFT>::Elf_Rela Elf_Rela;
EHOutputSection(StringRef Name, uint32_t sh_type, uintX_t sh_flags);
void writeTo(uint8_t *Buf) override;
class EhOutputSection final : public OutputSectionBase<ELFT> {
typedef typename ELFT::uint uintX_t;
typedef typename ELFT::Shdr Elf_Shdr;
typedef typename ELFT::Rel Elf_Rel;
typedef typename ELFT::Rela Elf_Rela;
template <bool IsRela>
void addSectionAux(
EHInputSection<ELFT> *S,
llvm::iterator_range<const llvm::object::Elf_Rel_Impl<ELFT, IsRela> *>
Rels);
public:
EhOutputSection();
void writeTo(uint8_t *Buf) override;
void finalize() override;
bool empty() const { return Sections.empty(); }
void addSection(InputSectionBase<ELFT> *S) override;
size_t NumFdes = 0;
private:
uintX_t readEntryLength(ArrayRef<uint8_t> D);
template <class RelTy>
void addSectionAux(EhInputSection<ELFT> *S, llvm::ArrayRef<RelTy> Rels);
std::vector<EHInputSection<ELFT> *> Sections;
std::vector<Cie<ELFT>> Cies;
template <class RelTy>
CieRecord *addCie(SectionPiece &Piece, EhInputSection<ELFT> *Sec,
ArrayRef<RelTy> &Rels);
// Maps CIE content + personality to a index in Cies.
llvm::DenseMap<std::pair<StringRef, StringRef>, unsigned> CieMap;
template <class RelTy>
bool isFdeLive(SectionPiece &Piece, EhInputSection<ELFT> *Sec,
ArrayRef<RelTy> &Rels);
uintX_t getFdePc(uint8_t *Buf, size_t Off, uint8_t Enc);
std::vector<EhInputSection<ELFT> *> Sections;
std::vector<CieRecord *> Cies;
// CIE records are uniquified by their contents and personality functions.
llvm::DenseMap<std::pair<ArrayRef<uint8_t>, SymbolBody *>, CieRecord> CieMap;
};
template <class ELFT>
@ -327,25 +425,24 @@ class InterpSection final : public OutputSectionBase<ELFT> {
template <class ELFT>
class StringTableSection final : public OutputSectionBase<ELFT> {
public:
typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t;
typedef typename ELFT::uint uintX_t;
StringTableSection(StringRef Name, bool Dynamic);
void reserve(StringRef S);
size_t addString(StringRef S);
unsigned addString(StringRef S, bool HashIt = true);
void writeTo(uint8_t *Buf) override;
size_t getSize() const { return Used + Reserved; }
unsigned getSize() const { return Size; }
void finalize() override { this->Header.sh_size = getSize(); }
bool isDynamic() const { return Dynamic; }
private:
const bool Dynamic;
llvm::DenseMap<StringRef, unsigned> StringMap;
std::vector<StringRef> Strings;
size_t Used = 1; // ELF string tables start with a NUL byte, so 1.
size_t Reserved = 0;
unsigned Size = 1; // ELF string tables start with a NUL byte, so 1.
};
template <class ELFT>
class HashTableSection final : public OutputSectionBase<ELFT> {
typedef typename llvm::object::ELFFile<ELFT>::Elf_Word Elf_Word;
typedef typename ELFT::Word Elf_Word;
public:
HashTableSection();
@ -357,9 +454,9 @@ class HashTableSection final : public OutputSectionBase<ELFT> {
// https://blogs.oracle.com/ali/entry/gnu_hash_elf_sections
template <class ELFT>
class GnuHashTableSection final : public OutputSectionBase<ELFT> {
typedef typename llvm::object::ELFFile<ELFT>::Elf_Off Elf_Off;
typedef typename llvm::object::ELFFile<ELFT>::Elf_Word Elf_Word;
typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t;
typedef typename ELFT::Off Elf_Off;
typedef typename ELFT::Word Elf_Word;
typedef typename ELFT::uint uintX_t;
public:
GnuHashTableSection();
@ -368,7 +465,7 @@ class GnuHashTableSection final : public OutputSectionBase<ELFT> {
// Adds symbols to the hash table.
// Sorts the input to satisfy GNU hash section requirements.
void addSymbols(std::vector<SymbolBody *> &Symbols);
void addSymbols(std::vector<std::pair<SymbolBody *, size_t>> &Symbols);
private:
static unsigned calcNBuckets(unsigned NumHashed);
@ -378,12 +475,13 @@ class GnuHashTableSection final : public OutputSectionBase<ELFT> {
void writeBloomFilter(uint8_t *&Buf);
void writeHashTable(uint8_t *Buf);
struct HashedSymbolData {
struct SymbolData {
SymbolBody *Body;
size_t STName;
uint32_t Hash;
};
std::vector<HashedSymbolData> HashedSymbols;
std::vector<SymbolData> Symbols;
unsigned MaskWords;
unsigned NBuckets;
@ -393,27 +491,45 @@ class GnuHashTableSection final : public OutputSectionBase<ELFT> {
template <class ELFT>
class DynamicSection final : public OutputSectionBase<ELFT> {
typedef OutputSectionBase<ELFT> Base;
typedef typename llvm::object::ELFFile<ELFT>::Elf_Dyn Elf_Dyn;
typedef typename llvm::object::ELFFile<ELFT>::Elf_Rel Elf_Rel;
typedef typename llvm::object::ELFFile<ELFT>::Elf_Rela Elf_Rela;
typedef typename llvm::object::ELFFile<ELFT>::Elf_Shdr Elf_Shdr;
typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym Elf_Sym;
typedef typename ELFT::Dyn Elf_Dyn;
typedef typename ELFT::Rel Elf_Rel;
typedef typename ELFT::Rela Elf_Rela;
typedef typename ELFT::Shdr Elf_Shdr;
typedef typename ELFT::Sym Elf_Sym;
typedef typename ELFT::uint uintX_t;
// The .dynamic section contains information for the dynamic linker.
// The section consists of fixed size entries, which consist of
// type and value fields. Value are one of plain integers, symbol
// addresses, or section addresses. This struct represents the entry.
struct Entry {
int32_t Tag;
union {
OutputSectionBase<ELFT> *OutSec;
uint64_t Val;
const SymbolBody *Sym;
};
enum KindT { SecAddr, SymAddr, PlainInt } Kind;
Entry(int32_t Tag, OutputSectionBase<ELFT> *OutSec)
: Tag(Tag), OutSec(OutSec), Kind(SecAddr) {}
Entry(int32_t Tag, uint64_t Val) : Tag(Tag), Val(Val), Kind(PlainInt) {}
Entry(int32_t Tag, const SymbolBody *Sym)
: Tag(Tag), Sym(Sym), Kind(SymAddr) {}
};
// finalize() fills this vector with the section contents. finalize()
// cannot directly create final section contents because when the
// function is called, symbol or section addresses are not fixed yet.
std::vector<Entry> Entries;
public:
DynamicSection(SymbolTable<ELFT> &SymTab);
explicit DynamicSection();
void finalize() override;
void writeTo(uint8_t *Buf) override;
OutputSectionBase<ELFT> *PreInitArraySec = nullptr;
OutputSectionBase<ELFT> *InitArraySec = nullptr;
OutputSectionBase<ELFT> *FiniArraySec = nullptr;
private:
SymbolTable<ELFT> &SymTab;
const SymbolBody *InitSym = nullptr;
const SymbolBody *FiniSym = nullptr;
uint32_t DtFlags = 0;
uint32_t DtFlags1 = 0;
};
template <class ELFT>
@ -429,17 +545,94 @@ class MipsReginfoOutputSection final : public OutputSectionBase<ELFT> {
uint32_t GprMask = 0;
};
inline uint64_t align(uint64_t Value, uint64_t Align) {
return llvm::RoundUpToAlignment(Value, Align);
}
template <class ELFT>
class MipsOptionsOutputSection final : public OutputSectionBase<ELFT> {
typedef llvm::object::Elf_Mips_Options<ELFT> Elf_Mips_Options;
typedef llvm::object::Elf_Mips_RegInfo<ELFT> Elf_Mips_RegInfo;
public:
MipsOptionsOutputSection();
void writeTo(uint8_t *Buf) override;
void addSection(InputSectionBase<ELFT> *S) override;
private:
uint32_t GprMask = 0;
};
// --eh-frame-hdr option tells linker to construct a header for all the
// .eh_frame sections. This header is placed to a section named .eh_frame_hdr
// and also to a PT_GNU_EH_FRAME segment.
// At runtime the unwinder then can find all the PT_GNU_EH_FRAME segments by
// calling dl_iterate_phdr.
// This section contains a lookup table for quick binary search of FDEs.
// Detailed info about internals can be found in Ian Lance Taylor's blog:
// http://www.airs.com/blog/archives/460 (".eh_frame")
// http://www.airs.com/blog/archives/462 (".eh_frame_hdr")
template <class ELFT>
class EhFrameHeader final : public OutputSectionBase<ELFT> {
typedef typename ELFT::uint uintX_t;
public:
EhFrameHeader();
void finalize() override;
void writeTo(uint8_t *Buf) override;
void addFde(uint32_t Pc, uint32_t FdeVA);
private:
struct FdeData {
uint32_t Pc;
uint32_t FdeVA;
};
std::vector<FdeData> Fdes;
};
template <class ELFT> class BuildIdSection : public OutputSectionBase<ELFT> {
public:
void writeTo(uint8_t *Buf) override;
virtual void writeBuildId(ArrayRef<ArrayRef<uint8_t>> Bufs) = 0;
protected:
BuildIdSection(size_t HashSize);
size_t HashSize;
uint8_t *HashBuf = nullptr;
};
template <class ELFT> class BuildIdFnv1 final : public BuildIdSection<ELFT> {
public:
BuildIdFnv1() : BuildIdSection<ELFT>(8) {}
void writeBuildId(ArrayRef<ArrayRef<uint8_t>> Bufs) override;
};
template <class ELFT> class BuildIdMd5 final : public BuildIdSection<ELFT> {
public:
BuildIdMd5() : BuildIdSection<ELFT>(16) {}
void writeBuildId(ArrayRef<ArrayRef<uint8_t>> Bufs) override;
};
template <class ELFT> class BuildIdSha1 final : public BuildIdSection<ELFT> {
public:
BuildIdSha1() : BuildIdSection<ELFT>(20) {}
void writeBuildId(ArrayRef<ArrayRef<uint8_t>> Bufs) override;
};
template <class ELFT>
class BuildIdHexstring final : public BuildIdSection<ELFT> {
public:
BuildIdHexstring();
void writeBuildId(ArrayRef<ArrayRef<uint8_t>> Bufs) override;
};
// All output sections that are hadnled by the linker specially are
// globally accessible. Writer initializes them, so don't use them
// until Writer is initialized.
template <class ELFT> struct Out {
typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t;
typedef typename llvm::object::ELFFile<ELFT>::Elf_Phdr Elf_Phdr;
typedef typename ELFT::uint uintX_t;
typedef typename ELFT::Phdr Elf_Phdr;
static BuildIdSection<ELFT> *BuildId;
static DynamicSection<ELFT> *Dynamic;
static EhFrameHeader<ELFT> *EhFrameHdr;
static EhOutputSection<ELFT> *EhFrame;
static GnuHashTableSection<ELFT> *GnuHashTab;
static GotPltSection<ELFT> *GotPlt;
static GotSection<ELFT> *Got;
@ -457,10 +650,47 @@ template <class ELFT> struct Out {
static StringTableSection<ELFT> *StrTab;
static SymbolTableSection<ELFT> *DynSymTab;
static SymbolTableSection<ELFT> *SymTab;
static VersionDefinitionSection<ELFT> *VerDef;
static VersionTableSection<ELFT> *VerSym;
static VersionNeedSection<ELFT> *VerNeed;
static Elf_Phdr *TlsPhdr;
static OutputSectionBase<ELFT> *ElfHeader;
static OutputSectionBase<ELFT> *ProgramHeaders;
};
template <bool Is64Bits> struct SectionKey {
typedef typename std::conditional<Is64Bits, uint64_t, uint32_t>::type uintX_t;
StringRef Name;
uint32_t Type;
uintX_t Flags;
uintX_t Alignment;
};
// This class knows how to create an output section for a given
// input section. Output section type is determined by various
// factors, including input section's sh_flags, sh_type and
// linker scripts.
template <class ELFT> class OutputSectionFactory {
typedef typename ELFT::Shdr Elf_Shdr;
typedef typename ELFT::uint uintX_t;
typedef typename elf::SectionKey<ELFT::Is64Bits> Key;
public:
std::pair<OutputSectionBase<ELFT> *, bool> create(InputSectionBase<ELFT> *C,
StringRef OutsecName);
OutputSectionBase<ELFT> *lookup(StringRef Name, uint32_t Type, uintX_t Flags);
private:
Key createKey(InputSectionBase<ELFT> *C, StringRef OutsecName);
llvm::SmallDenseMap<Key, OutputSectionBase<ELFT> *> Map;
};
template <class ELFT> BuildIdSection<ELFT> *Out<ELFT>::BuildId;
template <class ELFT> DynamicSection<ELFT> *Out<ELFT>::Dynamic;
template <class ELFT> EhFrameHeader<ELFT> *Out<ELFT>::EhFrameHdr;
template <class ELFT> EhOutputSection<ELFT> *Out<ELFT>::EhFrame;
template <class ELFT> GnuHashTableSection<ELFT> *Out<ELFT>::GnuHashTab;
template <class ELFT> GotPltSection<ELFT> *Out<ELFT>::GotPlt;
template <class ELFT> GotSection<ELFT> *Out<ELFT>::Got;
@ -478,9 +708,25 @@ template <class ELFT> StringTableSection<ELFT> *Out<ELFT>::ShStrTab;
template <class ELFT> StringTableSection<ELFT> *Out<ELFT>::StrTab;
template <class ELFT> SymbolTableSection<ELFT> *Out<ELFT>::DynSymTab;
template <class ELFT> SymbolTableSection<ELFT> *Out<ELFT>::SymTab;
template <class ELFT> typename Out<ELFT>::Elf_Phdr *Out<ELFT>::TlsPhdr;
template <class ELFT> VersionDefinitionSection<ELFT> *Out<ELFT>::VerDef;
template <class ELFT> VersionTableSection<ELFT> *Out<ELFT>::VerSym;
template <class ELFT> VersionNeedSection<ELFT> *Out<ELFT>::VerNeed;
template <class ELFT> typename ELFT::Phdr *Out<ELFT>::TlsPhdr;
template <class ELFT> OutputSectionBase<ELFT> *Out<ELFT>::ElfHeader;
template <class ELFT> OutputSectionBase<ELFT> *Out<ELFT>::ProgramHeaders;
} // namespace elf2
} // namespace elf
} // namespace lld
#endif // LLD_ELF_OUTPUT_SECTIONS_H
namespace llvm {
template <bool Is64Bits> struct DenseMapInfo<lld::elf::SectionKey<Is64Bits>> {
typedef typename lld::elf::SectionKey<Is64Bits> Key;
static Key getEmptyKey();
static Key getTombstoneKey();
static unsigned getHashValue(const Key &Val);
static bool isEqual(const Key &LHS, const Key &RHS);
};
}
#endif

View File

@ -1,21 +1 @@
The New ELF Linker
==================
This directory contains a port of the new PE/COFF linker for ELF.
Overall Design
--------------
See COFF/README.md for details on the design. Note that unlike COFF, we do not
distinguish chunks from input sections; they are merged together.
Capabilities
------------
This linker can link LLVM and Clang on Linux/x86-64 or FreeBSD/x86-64
"Hello world" can be linked on Linux/PPC64 and on Linux/AArch64 or
FreeBSD/AArch64.
Performance
-----------
Achieving good performance is one of our goals. It's too early to reach a
conclusion, but we are optimistic about that as it currently seems to be faster
than GNU gold. It will be interesting to compare when we are close to feature
parity.
See docs/NewLLD.rst

704
ELF/Relocations.cpp Normal file
View File

@ -0,0 +1,704 @@
//===- Relocations.cpp ----------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains platform-independent functions to process relocations.
// I'll describe the overview of this file here.
//
// Simple relocations are easy to handle for the linker. For example,
// for R_X86_64_PC64 relocs, the linker just has to fix up locations
// with the relative offsets to the target symbols. It would just be
// reading records from relocation sections and applying them to output.
//
// But not all relocations are that easy to handle. For example, for
// R_386_GOTOFF relocs, the linker has to create new GOT entries for
// symbols if they don't exist, and fix up locations with GOT entry
// offsets from the beginning of GOT section. So there is more than
// fixing addresses in relocation processing.
//
// ELF defines a large number of complex relocations.
//
// The functions in this file analyze relocations and do whatever needs
// to be done. It includes, but not limited to, the following.
//
// - create GOT/PLT entries
// - create new relocations in .dynsym to let the dynamic linker resolve
// them at runtime (since ELF supports dynamic linking, not all
// relocations can be resolved at link-time)
// - create COPY relocs and reserve space in .bss
// - replace expensive relocs (in terms of runtime cost) with cheap ones
// - error out infeasible combinations such as PIC and non-relative relocs
//
// Note that the functions in this file don't actually apply relocations
// because it doesn't know about the output file nor the output file buffer.
// It instead stores Relocation objects to InputSection's Relocations
// vector to let it apply later in InputSection::writeTo.
//
//===----------------------------------------------------------------------===//
#include "Relocations.h"
#include "Config.h"
#include "OutputSections.h"
#include "SymbolTable.h"
#include "Target.h"
#include "Thunks.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using namespace llvm::ELF;
using namespace llvm::object;
using namespace llvm::support::endian;
namespace lld {
namespace elf {
static bool refersToGotEntry(RelExpr Expr) {
return Expr == R_GOT || Expr == R_GOT_OFF || Expr == R_MIPS_GOT_LOCAL_PAGE ||
Expr == R_MIPS_GOT_OFF || Expr == R_MIPS_TLSGD ||
Expr == R_MIPS_TLSLD || Expr == R_GOT_PAGE_PC || Expr == R_GOT_PC ||
Expr == R_GOT_FROM_END || Expr == R_TLSGD || Expr == R_TLSGD_PC ||
Expr == R_TLSDESC || Expr == R_TLSDESC_PAGE;
}
static bool isPreemptible(const SymbolBody &Body, uint32_t Type) {
// In case of MIPS GP-relative relocations always resolve to a definition
// in a regular input file, ignoring the one-definition rule. So we,
// for example, should not attempt to create a dynamic relocation even
// if the target symbol is preemptible. There are two two MIPS GP-relative
// relocations R_MIPS_GPREL16 and R_MIPS_GPREL32. But only R_MIPS_GPREL16
// can be against a preemptible symbol.
// To get MIPS relocation type we apply 0xff mask. In case of O32 ABI all
// relocation types occupy eight bit. In case of N64 ABI we extract first
// relocation from 3-in-1 packet because only the first relocation can
// be against a real symbol.
if (Config->EMachine == EM_MIPS && (Type & 0xff) == R_MIPS_GPREL16)
return false;
return Body.isPreemptible();
}
// This function is similar to the `handleTlsRelocation`. MIPS does not support
// any relaxations for TLS relocations so by factoring out MIPS handling into
// the separate function we can simplify the code and does not pollute
// `handleTlsRelocation` by MIPS `ifs` statements.
template <class ELFT>
static unsigned
handleMipsTlsRelocation(uint32_t Type, SymbolBody &Body,
InputSectionBase<ELFT> &C, typename ELFT::uint Offset,
typename ELFT::uint Addend, RelExpr Expr) {
if (Expr == R_MIPS_TLSLD) {
if (Out<ELFT>::Got->addTlsIndex())
Out<ELFT>::RelaDyn->addReloc({Target->TlsModuleIndexRel, Out<ELFT>::Got,
Out<ELFT>::Got->getTlsIndexOff(), false,
nullptr, 0});
C.Relocations.push_back({Expr, Type, &C, Offset, Addend, &Body});
return 1;
}
if (Target->isTlsGlobalDynamicRel(Type)) {
if (Out<ELFT>::Got->addDynTlsEntry(Body)) {
typedef typename ELFT::uint uintX_t;
uintX_t Off = Out<ELFT>::Got->getGlobalDynOffset(Body);
Out<ELFT>::RelaDyn->addReloc(
{Target->TlsModuleIndexRel, Out<ELFT>::Got, Off, false, &Body, 0});
Out<ELFT>::RelaDyn->addReloc({Target->TlsOffsetRel, Out<ELFT>::Got,
Off + (uintX_t)sizeof(uintX_t), false,
&Body, 0});
}
C.Relocations.push_back({Expr, Type, &C, Offset, Addend, &Body});
return 1;
}
return 0;
}
// Returns the number of relocations processed.
template <class ELFT>
static unsigned handleTlsRelocation(uint32_t Type, SymbolBody &Body,
InputSectionBase<ELFT> &C,
typename ELFT::uint Offset,
typename ELFT::uint Addend, RelExpr Expr) {
if (!(C.getSectionHdr()->sh_flags & SHF_ALLOC))
return 0;
if (!Body.isTls())
return 0;
typedef typename ELFT::uint uintX_t;
if (Config->EMachine == EM_MIPS)
return handleMipsTlsRelocation<ELFT>(Type, Body, C, Offset, Addend, Expr);
if ((Expr == R_TLSDESC || Expr == R_TLSDESC_PAGE || Expr == R_HINT) &&
Config->Shared) {
if (Out<ELFT>::Got->addDynTlsEntry(Body)) {
uintX_t Off = Out<ELFT>::Got->getGlobalDynOffset(Body);
Out<ELFT>::RelaDyn->addReloc(
{Target->TlsDescRel, Out<ELFT>::Got, Off, false, &Body, 0});
}
if (Expr != R_HINT)
C.Relocations.push_back({Expr, Type, &C, Offset, Addend, &Body});
return 1;
}
if (Expr == R_TLSLD_PC || Expr == R_TLSLD) {
// Local-Dynamic relocs can be relaxed to Local-Exec.
if (!Config->Shared) {
C.Relocations.push_back(
{R_RELAX_TLS_LD_TO_LE, Type, &C, Offset, Addend, &Body});
return 2;
}
if (Out<ELFT>::Got->addTlsIndex())
Out<ELFT>::RelaDyn->addReloc({Target->TlsModuleIndexRel, Out<ELFT>::Got,
Out<ELFT>::Got->getTlsIndexOff(), false,
nullptr, 0});
C.Relocations.push_back({Expr, Type, &C, Offset, Addend, &Body});
return 1;
}
// Local-Dynamic relocs can be relaxed to Local-Exec.
if (Target->isTlsLocalDynamicRel(Type) && !Config->Shared) {
C.Relocations.push_back(
{R_RELAX_TLS_LD_TO_LE, Type, &C, Offset, Addend, &Body});
return 1;
}
if (Expr == R_TLSDESC_PAGE || Expr == R_TLSDESC || Expr == R_HINT ||
Target->isTlsGlobalDynamicRel(Type)) {
if (Config->Shared) {
if (Out<ELFT>::Got->addDynTlsEntry(Body)) {
uintX_t Off = Out<ELFT>::Got->getGlobalDynOffset(Body);
Out<ELFT>::RelaDyn->addReloc(
{Target->TlsModuleIndexRel, Out<ELFT>::Got, Off, false, &Body, 0});
// If the symbol is preemptible we need the dynamic linker to write
// the offset too.
if (isPreemptible(Body, Type))
Out<ELFT>::RelaDyn->addReloc({Target->TlsOffsetRel, Out<ELFT>::Got,
Off + (uintX_t)sizeof(uintX_t), false,
&Body, 0});
}
C.Relocations.push_back({Expr, Type, &C, Offset, Addend, &Body});
return 1;
}
// Global-Dynamic relocs can be relaxed to Initial-Exec or Local-Exec
// depending on the symbol being locally defined or not.
if (isPreemptible(Body, Type)) {
C.Relocations.push_back(
{Target->adjustRelaxExpr(Type, nullptr, R_RELAX_TLS_GD_TO_IE), Type,
&C, Offset, Addend, &Body});
if (!Body.isInGot()) {
Out<ELFT>::Got->addEntry(Body);
Out<ELFT>::RelaDyn->addReloc({Target->TlsGotRel, Out<ELFT>::Got,
Body.getGotOffset<ELFT>(), false, &Body,
0});
}
return Target->TlsGdRelaxSkip;
}
C.Relocations.push_back(
{Target->adjustRelaxExpr(Type, nullptr, R_RELAX_TLS_GD_TO_LE), Type, &C,
Offset, Addend, &Body});
return Target->TlsGdRelaxSkip;
}
// Initial-Exec relocs can be relaxed to Local-Exec if the symbol is locally
// defined.
if (Target->isTlsInitialExecRel(Type) && !Config->Shared &&
!isPreemptible(Body, Type)) {
C.Relocations.push_back(
{R_RELAX_TLS_IE_TO_LE, Type, &C, Offset, Addend, &Body});
return 1;
}
return 0;
}
template <endianness E> static int16_t readSignedLo16(const uint8_t *Loc) {
return read32<E>(Loc) & 0xffff;
}
template <class RelTy>
static uint32_t getMipsPairType(const RelTy *Rel, const SymbolBody &Sym) {
switch (Rel->getType(Config->Mips64EL)) {
case R_MIPS_HI16:
return R_MIPS_LO16;
case R_MIPS_GOT16:
return Sym.isLocal() ? R_MIPS_LO16 : R_MIPS_NONE;
case R_MIPS_PCHI16:
return R_MIPS_PCLO16;
case R_MICROMIPS_HI16:
return R_MICROMIPS_LO16;
default:
return R_MIPS_NONE;
}
}
template <class ELFT, class RelTy>
static int32_t findMipsPairedAddend(const uint8_t *Buf, const uint8_t *BufLoc,
SymbolBody &Sym, const RelTy *Rel,
const RelTy *End) {
uint32_t SymIndex = Rel->getSymbol(Config->Mips64EL);
uint32_t Type = getMipsPairType(Rel, Sym);
// Some MIPS relocations use addend calculated from addend of the relocation
// itself and addend of paired relocation. ABI requires to compute such
// combined addend in case of REL relocation record format only.
// See p. 4-17 at ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf
if (RelTy::IsRela || Type == R_MIPS_NONE)
return 0;
for (const RelTy *RI = Rel; RI != End; ++RI) {
if (RI->getType(Config->Mips64EL) != Type)
continue;
if (RI->getSymbol(Config->Mips64EL) != SymIndex)
continue;
const endianness E = ELFT::TargetEndianness;
return ((read32<E>(BufLoc) & 0xffff) << 16) +
readSignedLo16<E>(Buf + RI->r_offset);
}
warning("can't find matching " + getRelName(Type) + " relocation for " +
getRelName(Rel->getType(Config->Mips64EL)));
return 0;
}
// True if non-preemptable symbol always has the same value regardless of where
// the DSO is loaded.
template <class ELFT> static bool isAbsolute(const SymbolBody &Body) {
if (Body.isUndefined())
return !Body.isLocal() && Body.symbol()->isWeak();
if (const auto *DR = dyn_cast<DefinedRegular<ELFT>>(&Body))
return DR->Section == nullptr; // Absolute symbol.
return false;
}
static bool needsPlt(RelExpr Expr) {
return Expr == R_PLT_PC || Expr == R_PPC_PLT_OPD || Expr == R_PLT ||
Expr == R_PLT_PAGE_PC || Expr == R_THUNK_PLT_PC;
}
// True if this expression is of the form Sym - X, where X is a position in the
// file (PC, or GOT for example).
static bool isRelExpr(RelExpr Expr) {
return Expr == R_PC || Expr == R_GOTREL || Expr == R_PAGE_PC ||
Expr == R_RELAX_GOT_PC || Expr == R_THUNK_PC || Expr == R_THUNK_PLT_PC;
}
template <class ELFT>
static bool isStaticLinkTimeConstant(RelExpr E, uint32_t Type,
const SymbolBody &Body) {
// These expressions always compute a constant
if (E == R_SIZE || E == R_GOT_FROM_END || E == R_GOT_OFF ||
E == R_MIPS_GOT_LOCAL_PAGE || E == R_MIPS_GOT_OFF || E == R_MIPS_TLSGD ||
E == R_GOT_PAGE_PC || E == R_GOT_PC || E == R_PLT_PC || E == R_TLSGD_PC ||
E == R_TLSGD || E == R_PPC_PLT_OPD || E == R_TLSDESC_PAGE ||
E == R_HINT || E == R_THUNK_PC || E == R_THUNK_PLT_PC)
return true;
// These never do, except if the entire file is position dependent or if
// only the low bits are used.
if (E == R_GOT || E == R_PLT || E == R_TLSDESC)
return Target->usesOnlyLowPageBits(Type) || !Config->Pic;
if (isPreemptible(Body, Type))
return false;
if (!Config->Pic)
return true;
bool AbsVal = isAbsolute<ELFT>(Body) || Body.isTls();
bool RelE = isRelExpr(E);
if (AbsVal && !RelE)
return true;
if (!AbsVal && RelE)
return true;
// Relative relocation to an absolute value. This is normally unrepresentable,
// but if the relocation refers to a weak undefined symbol, we allow it to
// resolve to the image base. This is a little strange, but it allows us to
// link function calls to such symbols. Normally such a call will be guarded
// with a comparison, which will load a zero from the GOT.
if (AbsVal && RelE) {
if (Body.isUndefined() && !Body.isLocal() && Body.symbol()->isWeak())
return true;
error("relocation " + getRelName(Type) +
" cannot refer to absolute symbol " + Body.getName());
return true;
}
return Target->usesOnlyLowPageBits(Type);
}
static RelExpr toPlt(RelExpr Expr) {
if (Expr == R_PPC_OPD)
return R_PPC_PLT_OPD;
if (Expr == R_PC)
return R_PLT_PC;
if (Expr == R_PAGE_PC)
return R_PLT_PAGE_PC;
if (Expr == R_ABS)
return R_PLT;
return Expr;
}
static RelExpr fromPlt(RelExpr Expr) {
// We decided not to use a plt. Optimize a reference to the plt to a
// reference to the symbol itself.
if (Expr == R_PLT_PC)
return R_PC;
if (Expr == R_PPC_PLT_OPD)
return R_PPC_OPD;
if (Expr == R_PLT)
return R_ABS;
return Expr;
}
template <class ELFT> static uint32_t getAlignment(SharedSymbol<ELFT> *SS) {
typedef typename ELFT::uint uintX_t;
uintX_t SecAlign = SS->file()->getSection(SS->Sym)->sh_addralign;
uintX_t SymValue = SS->Sym.st_value;
int TrailingZeros =
std::min(countTrailingZeros(SecAlign), countTrailingZeros(SymValue));
return 1 << TrailingZeros;
}
// Reserve space in .bss for copy relocation.
template <class ELFT> static void addCopyRelSymbol(SharedSymbol<ELFT> *SS) {
typedef typename ELFT::uint uintX_t;
typedef typename ELFT::Sym Elf_Sym;
// Copy relocation against zero-sized symbol doesn't make sense.
uintX_t SymSize = SS->template getSize<ELFT>();
if (SymSize == 0)
fatal("cannot create a copy relocation for " + SS->getName());
uintX_t Alignment = getAlignment(SS);
uintX_t Off = alignTo(Out<ELFT>::Bss->getSize(), Alignment);
Out<ELFT>::Bss->setSize(Off + SymSize);
Out<ELFT>::Bss->updateAlignment(Alignment);
uintX_t Shndx = SS->Sym.st_shndx;
uintX_t Value = SS->Sym.st_value;
// Look through the DSO's dynamic symbol table for aliases and create a
// dynamic symbol for each one. This causes the copy relocation to correctly
// interpose any aliases.
for (const Elf_Sym &S : SS->file()->getElfSymbols(true)) {
if (S.st_shndx != Shndx || S.st_value != Value)
continue;
auto *Alias = dyn_cast_or_null<SharedSymbol<ELFT>>(
Symtab<ELFT>::X->find(check(S.getName(SS->file()->getStringTable()))));
if (!Alias)
continue;
Alias->OffsetInBss = Off;
Alias->NeedsCopyOrPltAddr = true;
Alias->symbol()->IsUsedInRegularObj = true;
}
Out<ELFT>::RelaDyn->addReloc(
{Target->CopyRel, Out<ELFT>::Bss, SS->OffsetInBss, false, SS, 0});
}
template <class ELFT>
static RelExpr adjustExpr(const elf::ObjectFile<ELFT> &File, SymbolBody &Body,
bool IsWrite, RelExpr Expr, uint32_t Type,
const uint8_t *Data) {
bool Preemptible = isPreemptible(Body, Type);
if (Body.isGnuIFunc()) {
Expr = toPlt(Expr);
} else if (!Preemptible) {
if (needsPlt(Expr))
Expr = fromPlt(Expr);
if (Expr == R_GOT_PC)
Expr = Target->adjustRelaxExpr(Type, Data, Expr);
}
Expr = Target->getThunkExpr(Expr, Type, File, Body);
if (IsWrite || isStaticLinkTimeConstant<ELFT>(Expr, Type, Body))
return Expr;
// This relocation would require the dynamic linker to write a value to read
// only memory. We can hack around it if we are producing an executable and
// the refered symbol can be preemepted to refer to the executable.
if (Config->Shared || (Config->Pic && !isRelExpr(Expr))) {
error("can't create dynamic relocation " + getRelName(Type) +
" against readonly segment");
return Expr;
}
if (Body.getVisibility() != STV_DEFAULT) {
error("cannot preempt symbol");
return Expr;
}
if (Body.isObject()) {
// Produce a copy relocation.
auto *B = cast<SharedSymbol<ELFT>>(&Body);
if (!B->needsCopy())
addCopyRelSymbol(B);
return Expr;
}
if (Body.isFunc()) {
// This handles a non PIC program call to function in a shared library. In
// an ideal world, we could just report an error saying the relocation can
// overflow at runtime. In the real world with glibc, crt1.o has a
// R_X86_64_PC32 pointing to libc.so.
//
// The general idea on how to handle such cases is to create a PLT entry and
// use that as the function value.
//
// For the static linking part, we just return a plt expr and everything
// else will use the the PLT entry as the address.
//
// The remaining problem is making sure pointer equality still works. We
// need the help of the dynamic linker for that. We let it know that we have
// a direct reference to a so symbol by creating an undefined symbol with a
// non zero st_value. Seeing that, the dynamic linker resolves the symbol to
// the value of the symbol we created. This is true even for got entries, so
// pointer equality is maintained. To avoid an infinite loop, the only entry
// that points to the real function is a dedicated got entry used by the
// plt. That is identified by special relocation types (R_X86_64_JUMP_SLOT,
// R_386_JMP_SLOT, etc).
Body.NeedsCopyOrPltAddr = true;
return toPlt(Expr);
}
error("symbol is missing type");
return Expr;
}
template <class ELFT, class RelTy>
static typename ELFT::uint computeAddend(const elf::ObjectFile<ELFT> &File,
const uint8_t *SectionData,
const RelTy *End, const RelTy &RI,
RelExpr Expr, SymbolBody &Body) {
typedef typename ELFT::uint uintX_t;
uint32_t Type = RI.getType(Config->Mips64EL);
uintX_t Addend = getAddend<ELFT>(RI);
const uint8_t *BufLoc = SectionData + RI.r_offset;
if (!RelTy::IsRela)
Addend += Target->getImplicitAddend(BufLoc, Type);
if (Config->EMachine == EM_MIPS) {
Addend += findMipsPairedAddend<ELFT>(SectionData, BufLoc, Body, &RI, End);
if (Type == R_MIPS_LO16 && Expr == R_PC)
// R_MIPS_LO16 expression has R_PC type iif the target is _gp_disp
// symbol. In that case we should use the following formula for
// calculation "AHL + GP - P + 4". Let's add 4 right here.
// For details see p. 4-19 at
// ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf
Addend += 4;
if (Expr == R_GOTREL) {
Addend -= MipsGPOffset;
if (Body.isLocal())
Addend += File.getMipsGp0();
}
}
if (Config->Pic && Config->EMachine == EM_PPC64 && Type == R_PPC64_TOC)
Addend += getPPC64TocBase();
return Addend;
}
// The reason we have to do this early scan is as follows
// * To mmap the output file, we need to know the size
// * For that, we need to know how many dynamic relocs we will have.
// It might be possible to avoid this by outputting the file with write:
// * Write the allocated output sections, computing addresses.
// * Apply relocations, recording which ones require a dynamic reloc.
// * Write the dynamic relocations.
// * Write the rest of the file.
// This would have some drawbacks. For example, we would only know if .rela.dyn
// is needed after applying relocations. If it is, it will go after rw and rx
// sections. Given that it is ro, we will need an extra PT_LOAD. This
// complicates things for the dynamic linker and means we would have to reserve
// space for the extra PT_LOAD even if we end up not using it.
template <class ELFT, class RelTy>
static void scanRelocs(InputSectionBase<ELFT> &C, ArrayRef<RelTy> Rels) {
typedef typename ELFT::uint uintX_t;
bool IsWrite = C.getSectionHdr()->sh_flags & SHF_WRITE;
auto AddDyn = [=](const DynamicReloc<ELFT> &Reloc) {
Out<ELFT>::RelaDyn->addReloc(Reloc);
};
const elf::ObjectFile<ELFT> &File = *C.getFile();
ArrayRef<uint8_t> SectionData = C.getSectionData();
const uint8_t *Buf = SectionData.begin();
for (auto I = Rels.begin(), E = Rels.end(); I != E; ++I) {
const RelTy &RI = *I;
SymbolBody &Body = File.getRelocTargetSym(RI);
uint32_t Type = RI.getType(Config->Mips64EL);
RelExpr Expr = Target->getRelExpr(Type, Body);
bool Preemptible = isPreemptible(Body, Type);
Expr = adjustExpr(File, Body, IsWrite, Expr, Type, Buf + RI.r_offset);
if (HasError)
continue;
// Skip a relocation that points to a dead piece
// in a mergeable section.
if (C.getOffset(RI.r_offset) == (uintX_t)-1)
continue;
// This relocation does not require got entry, but it is relative to got and
// needs it to be created. Here we request for that.
if (Expr == R_GOTONLY_PC || Expr == R_GOTREL || Expr == R_PPC_TOC)
Out<ELFT>::Got->HasGotOffRel = true;
uintX_t Addend = computeAddend(File, Buf, E, RI, Expr, Body);
if (unsigned Processed = handleTlsRelocation<ELFT>(
Type, Body, C, RI.r_offset, Addend, Expr)) {
I += (Processed - 1);
continue;
}
// Ignore "hint" relocation because it is for optional code optimization.
if (Expr == R_HINT)
continue;
if (needsPlt(Expr) || Expr == R_THUNK_ABS || Expr == R_THUNK_PC ||
Expr == R_THUNK_PLT_PC || refersToGotEntry(Expr) ||
!isPreemptible(Body, Type)) {
// If the relocation points to something in the file, we can process it.
bool Constant = isStaticLinkTimeConstant<ELFT>(Expr, Type, Body);
// If the output being produced is position independent, the final value
// is still not known. In that case we still need some help from the
// dynamic linker. We can however do better than just copying the incoming
// relocation. We can process some of it and and just ask the dynamic
// linker to add the load address.
if (!Constant)
AddDyn({Target->RelativeRel, &C, RI.r_offset, true, &Body, Addend});
// If the produced value is a constant, we just remember to write it
// when outputting this section. We also have to do it if the format
// uses Elf_Rel, since in that case the written value is the addend.
if (Constant || !RelTy::IsRela)
C.Relocations.push_back({Expr, Type, &C, RI.r_offset, Addend, &Body});
} else {
// We don't know anything about the finaly symbol. Just ask the dynamic
// linker to handle the relocation for us.
AddDyn({Target->getDynRel(Type), &C, RI.r_offset, false, &Body, Addend});
// MIPS ABI turns using of GOT and dynamic relocations inside out.
// While regular ABI uses dynamic relocations to fill up GOT entries
// MIPS ABI requires dynamic linker to fills up GOT entries using
// specially sorted dynamic symbol table. This affects even dynamic
// relocations against symbols which do not require GOT entries
// creation explicitly, i.e. do not have any GOT-relocations. So if
// a preemptible symbol has a dynamic relocation we anyway have
// to create a GOT entry for it.
// If a non-preemptible symbol has a dynamic relocation against it,
// dynamic linker takes it st_value, adds offset and writes down
// result of the dynamic relocation. In case of preemptible symbol
// dynamic linker performs symbol resolution, writes the symbol value
// to the GOT entry and reads the GOT entry when it needs to perform
// a dynamic relocation.
// ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf p.4-19
if (Config->EMachine == EM_MIPS)
Out<ELFT>::Got->addMipsEntry(Body, Addend, Expr);
continue;
}
// Some targets might require creation of thunks for relocations.
// Now we support only MIPS which requires LA25 thunk to call PIC
// code from non-PIC one, and ARM which requires interworking.
if (Expr == R_THUNK_ABS || Expr == R_THUNK_PC || Expr == R_THUNK_PLT_PC) {
auto *Sec = cast<InputSection<ELFT>>(&C);
addThunk<ELFT>(Type, Body, *Sec);
}
// At this point we are done with the relocated position. Some relocations
// also require us to create a got or plt entry.
// If a relocation needs PLT, we create a PLT and a GOT slot for the symbol.
if (needsPlt(Expr)) {
if (Body.isInPlt())
continue;
Out<ELFT>::Plt->addEntry(Body);
uint32_t Rel;
if (Body.isGnuIFunc() && !Preemptible)
Rel = Target->IRelativeRel;
else
Rel = Target->PltRel;
Out<ELFT>::GotPlt->addEntry(Body);
Out<ELFT>::RelaPlt->addReloc({Rel, Out<ELFT>::GotPlt,
Body.getGotPltOffset<ELFT>(), !Preemptible,
&Body, 0});
continue;
}
if (refersToGotEntry(Expr)) {
if (Config->EMachine == EM_MIPS) {
// MIPS ABI has special rules to process GOT entries
// and doesn't require relocation entries for them.
// See "Global Offset Table" in Chapter 5 in the following document
// for detailed description:
// ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf
Out<ELFT>::Got->addMipsEntry(Body, Addend, Expr);
if (Body.isTls())
AddDyn({Target->TlsGotRel, Out<ELFT>::Got, Body.getGotOffset<ELFT>(),
!Preemptible, &Body, 0});
continue;
}
if (Body.isInGot())
continue;
Out<ELFT>::Got->addEntry(Body);
if (Preemptible || (Config->Pic && !isAbsolute<ELFT>(Body))) {
uint32_t DynType;
if (Body.isTls())
DynType = Target->TlsGotRel;
else if (Preemptible)
DynType = Target->GotRel;
else
DynType = Target->RelativeRel;
AddDyn({DynType, Out<ELFT>::Got, Body.getGotOffset<ELFT>(),
!Preemptible, &Body, 0});
}
continue;
}
}
}
template <class ELFT> void scanRelocations(InputSection<ELFT> &C) {
typedef typename ELFT::Shdr Elf_Shdr;
// Scan all relocations. Each relocation goes through a series
// of tests to determine if it needs special treatment, such as
// creating GOT, PLT, copy relocations, etc.
// Note that relocations for non-alloc sections are directly
// processed by InputSection::relocateNonAlloc.
if (C.getSectionHdr()->sh_flags & SHF_ALLOC)
for (const Elf_Shdr *RelSec : C.RelocSections)
scanRelocations(C, *RelSec);
}
template <class ELFT>
void scanRelocations(InputSectionBase<ELFT> &S,
const typename ELFT::Shdr &RelSec) {
ELFFile<ELFT> &EObj = S.getFile()->getObj();
if (RelSec.sh_type == SHT_RELA)
scanRelocs(S, EObj.relas(&RelSec));
else
scanRelocs(S, EObj.rels(&RelSec));
}
template void scanRelocations<ELF32LE>(InputSection<ELF32LE> &);
template void scanRelocations<ELF32BE>(InputSection<ELF32BE> &);
template void scanRelocations<ELF64LE>(InputSection<ELF64LE> &);
template void scanRelocations<ELF64BE>(InputSection<ELF64BE> &);
template void scanRelocations<ELF32LE>(InputSectionBase<ELF32LE> &,
const ELF32LE::Shdr &);
template void scanRelocations<ELF32BE>(InputSectionBase<ELF32BE> &,
const ELF32BE::Shdr &);
template void scanRelocations<ELF64LE>(InputSectionBase<ELF64LE> &,
const ELF64LE::Shdr &);
template void scanRelocations<ELF64BE>(InputSectionBase<ELF64BE> &,
const ELF64BE::Shdr &);
}
}

93
ELF/Relocations.h Normal file
View File

@ -0,0 +1,93 @@
//===- Relocations.h -------------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_ELF_RELOCATIONS_H
#define LLD_ELF_RELOCATIONS_H
#include "lld/Core/LLVM.h"
namespace lld {
namespace elf {
class SymbolBody;
template <class ELFT> class InputSection;
template <class ELFT> class InputSectionBase;
enum RelExpr {
R_ABS,
R_GOT,
R_GOTONLY_PC,
R_GOTREL,
R_GOT_FROM_END,
R_GOT_OFF,
R_GOT_PAGE_PC,
R_GOT_PC,
R_HINT,
R_MIPS_GOT_LOCAL_PAGE,
R_MIPS_GOT_OFF,
R_MIPS_TLSGD,
R_MIPS_TLSLD,
R_NEG_TLS,
R_PAGE_PC,
R_PC,
R_PLT,
R_PLT_PC,
R_PLT_PAGE_PC,
R_PPC_OPD,
R_PPC_PLT_OPD,
R_PPC_TOC,
R_RELAX_GOT_PC,
R_RELAX_GOT_PC_NOPIC,
R_RELAX_TLS_GD_TO_IE,
R_RELAX_TLS_GD_TO_IE_END,
R_RELAX_TLS_GD_TO_IE_ABS,
R_RELAX_TLS_GD_TO_IE_PAGE_PC,
R_RELAX_TLS_GD_TO_LE,
R_RELAX_TLS_GD_TO_LE_NEG,
R_RELAX_TLS_IE_TO_LE,
R_RELAX_TLS_LD_TO_LE,
R_SIZE,
R_THUNK_ABS,
R_THUNK_PC,
R_THUNK_PLT_PC,
R_TLS,
R_TLSDESC,
R_TLSDESC_PAGE,
R_TLSGD,
R_TLSGD_PC,
R_TLSLD,
R_TLSLD_PC
};
template <class ELFT> struct Relocation {
RelExpr Expr;
uint32_t Type;
InputSectionBase<ELFT> *InputSec;
uint64_t Offset;
uint64_t Addend;
SymbolBody *Sym;
};
template <class ELFT> void scanRelocations(InputSection<ELFT> &);
template <class ELFT>
void scanRelocations(InputSectionBase<ELFT> &, const typename ELFT::Shdr &);
template <class ELFT>
static inline typename ELFT::uint getAddend(const typename ELFT::Rel &Rel) {
return 0;
}
template <class ELFT>
static inline typename ELFT::uint getAddend(const typename ELFT::Rela &Rel) {
return Rel.r_addend;
}
}
}
#endif

163
ELF/ScriptParser.cpp Normal file
View File

@ -0,0 +1,163 @@
//===- ScriptParser.cpp ---------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the base parser class for linker script and dynamic
// list.
//
//===----------------------------------------------------------------------===//
#include "ScriptParser.h"
#include "Error.h"
#include "llvm/ADT/Twine.h"
using namespace llvm;
using namespace lld;
using namespace lld::elf;
// Returns the line that the character S[Pos] is in.
static StringRef getLine(StringRef S, size_t Pos) {
size_t Begin = S.rfind('\n', Pos);
size_t End = S.find('\n', Pos);
Begin = (Begin == StringRef::npos) ? 0 : Begin + 1;
if (End == StringRef::npos)
End = S.size();
// rtrim for DOS-style newlines.
return S.substr(Begin, End - Begin).rtrim();
}
void ScriptParserBase::printErrorPos() {
StringRef Tok = Tokens[Pos == 0 ? 0 : Pos - 1];
StringRef Line = getLine(Input, Tok.data() - Input.data());
size_t Col = Tok.data() - Line.data();
error(Line);
error(std::string(Col, ' ') + "^");
}
// We don't want to record cascading errors. Keep only the first one.
void ScriptParserBase::setError(const Twine &Msg) {
if (Error)
return;
if (Input.empty() || Tokens.empty()) {
error(Msg);
} else {
error("line " + Twine(getPos()) + ": " + Msg);
printErrorPos();
}
Error = true;
}
// Split S into linker script tokens.
std::vector<StringRef> ScriptParserBase::tokenize(StringRef S) {
std::vector<StringRef> Ret;
for (;;) {
S = skipSpace(S);
if (S.empty())
return Ret;
// Quoted token
if (S.startswith("\"")) {
size_t E = S.find("\"", 1);
if (E == StringRef::npos) {
error("unclosed quote");
return {};
}
Ret.push_back(S.substr(1, E - 1));
S = S.substr(E + 1);
continue;
}
// Unquoted token
size_t Pos = S.find_first_not_of(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
"0123456789_.$/\\~=+[]*?-:!<>");
// A character that cannot start a word (which is usually a
// punctuation) forms a single character token.
if (Pos == 0)
Pos = 1;
Ret.push_back(S.substr(0, Pos));
S = S.substr(Pos);
}
}
// Skip leading whitespace characters or comments.
StringRef ScriptParserBase::skipSpace(StringRef S) {
for (;;) {
if (S.startswith("/*")) {
size_t E = S.find("*/", 2);
if (E == StringRef::npos) {
error("unclosed comment in a linker script");
return "";
}
S = S.substr(E + 2);
continue;
}
if (S.startswith("#")) {
size_t E = S.find('\n', 1);
if (E == StringRef::npos)
E = S.size() - 1;
S = S.substr(E + 1);
continue;
}
size_t Size = S.size();
S = S.ltrim();
if (S.size() == Size)
return S;
}
}
// An erroneous token is handled as if it were the last token before EOF.
bool ScriptParserBase::atEOF() { return Error || Tokens.size() == Pos; }
StringRef ScriptParserBase::next() {
if (Error)
return "";
if (atEOF()) {
setError("unexpected EOF");
return "";
}
return Tokens[Pos++];
}
StringRef ScriptParserBase::peek() {
StringRef Tok = next();
if (Error)
return "";
--Pos;
return Tok;
}
bool ScriptParserBase::skip(StringRef Tok) {
if (Error)
return false;
if (atEOF()) {
setError("unexpected EOF");
return false;
}
if (Tokens[Pos] != Tok)
return false;
++Pos;
return true;
}
void ScriptParserBase::expect(StringRef Expect) {
if (Error)
return;
StringRef Tok = next();
if (Tok != Expect)
setError(Expect + " expected, but got " + Tok);
}
// Returns the current line number.
size_t ScriptParserBase::getPos() {
if (Pos == 0)
return 1;
const char *Begin = Input.data();
const char *Tok = Tokens[Pos - 1].data();
return StringRef(Begin, Tok - Begin).count('\n') + 1;
}

49
ELF/ScriptParser.h Normal file
View File

@ -0,0 +1,49 @@
//===- ScriptParser.h -------------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_ELF_SCRIPT_PARSER_H
#define LLD_ELF_SCRIPT_PARSER_H
#include "lld/Core/LLVM.h"
#include "llvm/ADT/StringRef.h"
#include <utility>
#include <vector>
namespace lld {
namespace elf {
class ScriptParserBase {
public:
explicit ScriptParserBase(StringRef S) : Input(S), Tokens(tokenize(S)) {}
explicit ScriptParserBase(std::vector<StringRef> Tokens)
: Input(""), Tokens(std::move(Tokens)) {}
protected:
void setError(const Twine &Msg);
static std::vector<StringRef> tokenize(StringRef S);
static StringRef skipSpace(StringRef S);
bool atEOF();
StringRef next();
StringRef peek();
bool skip(StringRef Tok);
void expect(StringRef Expect);
size_t getPos();
void printErrorPos();
StringRef Input;
std::vector<StringRef> Tokens;
size_t Pos = 0;
bool Error = false;
};
} // namespace elf
} // namespace lld
#endif

98
ELF/Strings.cpp Normal file
View File

@ -0,0 +1,98 @@
//===- Strings.cpp -------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "Strings.h"
#include "Error.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Config/config.h"
#include <algorithm>
#ifdef HAVE_CXXABI_H
#include <cxxabi.h>
#endif
using namespace llvm;
using namespace lld;
using namespace lld::elf;
// Returns true if S matches T. S can contain glob meta-characters.
// The asterisk ('*') matches zero or more characters, and the question
// mark ('?') matches one character.
bool elf::globMatch(StringRef S, StringRef T) {
for (;;) {
if (S.empty())
return T.empty();
if (S[0] == '*') {
S = S.substr(1);
if (S.empty())
// Fast path. If a pattern is '*', it matches anything.
return true;
for (size_t I = 0, E = T.size(); I < E; ++I)
if (globMatch(S, T.substr(I)))
return true;
return false;
}
if (T.empty() || (S[0] != T[0] && S[0] != '?'))
return false;
S = S.substr(1);
T = T.substr(1);
}
}
// Converts a hex string (e.g. "deadbeef") to a vector.
std::vector<uint8_t> elf::parseHex(StringRef S) {
std::vector<uint8_t> Hex;
while (!S.empty()) {
StringRef B = S.substr(0, 2);
S = S.substr(2);
uint8_t H;
if (B.getAsInteger(16, H)) {
error("not a hexadecimal value: " + B);
return {};
}
Hex.push_back(H);
}
return Hex;
}
static bool isAlpha(char C) {
return ('a' <= C && C <= 'z') || ('A' <= C && C <= 'Z') || C == '_';
}
static bool isAlnum(char C) { return isAlpha(C) || ('0' <= C && C <= '9'); }
// Returns true if S is valid as a C language identifier.
bool elf::isValidCIdentifier(StringRef S) {
return !S.empty() && isAlpha(S[0]) &&
std::all_of(S.begin() + 1, S.end(), isAlnum);
}
// Returns the demangled C++ symbol name for Name.
std::string elf::demangle(StringRef Name) {
#if !defined(HAVE_CXXABI_H)
return Name;
#else
// __cxa_demangle can be used to demangle strings other than symbol
// names which do not necessarily start with "_Z". Name can be
// either a C or C++ symbol. Don't call __cxa_demangle if the name
// does not look like a C++ symbol name to avoid getting unexpected
// result for a C symbol that happens to match a mangled type name.
if (!Name.startswith("_Z"))
return Name;
char *Buf =
abi::__cxa_demangle(Name.str().c_str(), nullptr, nullptr, nullptr);
if (!Buf)
return Name;
std::string S(Buf);
free(Buf);
return S;
#endif
}

29
ELF/Strings.h Normal file
View File

@ -0,0 +1,29 @@
//===- Strings.h ------------------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_COFF_STRINGS_H
#define LLD_COFF_STRINGS_H
#include "lld/Core/LLVM.h"
#include <vector>
namespace lld {
namespace elf {
bool globMatch(StringRef S, StringRef T);
std::vector<uint8_t> parseHex(StringRef S);
bool isValidCIdentifier(StringRef S);
// Returns a demangled C++ symbol name. If Name is not a mangled
// name or the system does not provide __cxa_demangle function,
// it returns an unmodified string.
std::string demangle(StringRef Name);
}
}
#endif

168
ELF/SymbolListFile.cpp Normal file
View File

@ -0,0 +1,168 @@
//===- SymbolListFile.cpp -------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the parser/evaluator of the linker script.
// It does not construct an AST but consume linker script directives directly.
// Results are written to Driver or Config object.
//
//===----------------------------------------------------------------------===//
#include "SymbolListFile.h"
#include "Config.h"
#include "ScriptParser.h"
#include "llvm/Support/MemoryBuffer.h"
using namespace llvm;
using namespace llvm::ELF;
using namespace lld;
using namespace lld::elf;
// Parse the --dynamic-list argument. A dynamic list is in the form
//
// { symbol1; symbol2; [...]; symbolN };
//
// Multiple groups can be defined in the same file, and they are merged
// into a single group.
class DynamicListParser final : public ScriptParserBase {
public:
DynamicListParser(StringRef S) : ScriptParserBase(S) {}
void run();
};
void DynamicListParser::run() {
while (!atEOF()) {
expect("{");
while (!Error) {
Config->DynamicList.push_back(next());
expect(";");
if (skip("}"))
break;
}
expect(";");
}
}
void elf::parseDynamicList(MemoryBufferRef MB) {
DynamicListParser(MB.getBuffer()).run();
}
// Parse the --version-script argument. We currently only accept the following
// version script syntax:
//
// { [ global: symbol1; symbol2; [...]; symbolN; ] local: *; };
//
// No wildcards are supported, other than for the local entry. Symbol versioning
// is also not supported.
class VersionScriptParser final : public ScriptParserBase {
public:
VersionScriptParser(StringRef S) : ScriptParserBase(S) {}
void run();
private:
void parseExtern(std::vector<SymbolVersion> *Globals);
void parseVersion(StringRef VerStr);
void parseGlobal(StringRef VerStr);
void parseLocal();
};
size_t elf::defineSymbolVersion(StringRef VerStr) {
// Identifiers start at 2 because 0 and 1 are reserved
// for VER_NDX_LOCAL and VER_NDX_GLOBAL constants.
size_t VersionId = Config->VersionDefinitions.size() + 2;
Config->VersionDefinitions.push_back({VerStr, VersionId});
return VersionId;
}
void VersionScriptParser::parseVersion(StringRef VerStr) {
defineSymbolVersion(VerStr);
if (skip("global:") || peek() != "local:")
parseGlobal(VerStr);
if (skip("local:"))
parseLocal();
expect("}");
// Each version may have a parent version. For example, "Ver2" defined as
// "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" as a parent. This
// version hierarchy is, probably against your instinct, purely for human; the
// runtime doesn't care about them at all. In LLD, we simply skip the token.
if (!VerStr.empty() && peek() != ";")
next();
expect(";");
}
void VersionScriptParser::parseLocal() {
Config->DefaultSymbolVersion = VER_NDX_LOCAL;
expect("*");
expect(";");
}
void VersionScriptParser::parseExtern(std::vector<SymbolVersion> *Globals) {
expect("C++");
expect("{");
for (;;) {
if (peek() == "}" || Error)
break;
Globals->push_back({next(), true});
expect(";");
}
expect("}");
expect(";");
}
void VersionScriptParser::parseGlobal(StringRef VerStr) {
std::vector<SymbolVersion> *Globals;
if (VerStr.empty())
Globals = &Config->VersionScriptGlobals;
else
Globals = &Config->VersionDefinitions.back().Globals;
for (;;) {
if (skip("extern"))
parseExtern(Globals);
StringRef Cur = peek();
if (Cur == "}" || Cur == "local:" || Error)
return;
next();
Globals->push_back({Cur, false});
expect(";");
}
}
void VersionScriptParser::run() {
StringRef Msg = "anonymous version definition is used in "
"combination with other version definitions";
if (skip("{")) {
parseVersion("");
if (!atEOF())
setError(Msg);
return;
}
while (!atEOF() && !Error) {
StringRef VerStr = next();
if (VerStr == "{") {
setError(Msg);
return;
}
expect("{");
parseVersion(VerStr);
}
}
void elf::parseVersionScript(MemoryBufferRef MB) {
VersionScriptParser(MB.getBuffer()).run();
}

27
ELF/SymbolListFile.h Normal file
View File

@ -0,0 +1,27 @@
//===- SymbolListFile.h -----------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_ELF_SYMBOL_LIST_FILE_H
#define LLD_ELF_SYMBOL_LIST_FILE_H
#include "lld/Core/LLVM.h"
#include "llvm/Support/MemoryBuffer.h"
namespace lld {
namespace elf {
size_t defineSymbolVersion(StringRef Version);
void parseDynamicList(MemoryBufferRef MB);
void parseVersionScript(MemoryBufferRef MB);
} // namespace elf
} // namespace lld
#endif

View File

@ -17,7 +17,11 @@
#include "SymbolTable.h"
#include "Config.h"
#include "Error.h"
#include "LinkerScript.h"
#include "Strings.h"
#include "SymbolListFile.h"
#include "Symbols.h"
#include "llvm/Bitcode/ReaderWriter.h"
#include "llvm/Support/StringSaver.h"
using namespace llvm;
@ -25,40 +29,48 @@ using namespace llvm::object;
using namespace llvm::ELF;
using namespace lld;
using namespace lld::elf2;
using namespace lld::elf;
// All input object files must be for the same architecture
// (e.g. it does not make sense to link x86 object files with
// MIPS object files.) This function checks for that error.
template <class ELFT>
static void checkCompatibility(InputFile *FileP) {
auto *F = dyn_cast<ELFFileBase<ELFT>>(FileP);
if (!F)
return;
if (F->getELFKind() == Config->EKind && F->getEMachine() == Config->EMachine)
return;
template <class ELFT> static bool isCompatible(InputFile *F) {
if (!isa<ELFFileBase<ELFT>>(F) && !isa<BitcodeFile>(F))
return true;
if (F->EKind == Config->EKind && F->EMachine == Config->EMachine)
return true;
StringRef A = F->getName();
StringRef B = Config->Emulation;
if (B.empty())
B = Config->FirstElf->getName();
error(A + " is incompatible with " + B);
return false;
}
// Add symbols in File to the symbol table.
template <class ELFT>
void SymbolTable<ELFT>::addFile(std::unique_ptr<InputFile> File) {
InputFile *FileP = File.get();
checkCompatibility<ELFT>(FileP);
if (!isCompatible<ELFT>(FileP))
return;
// .a file
if (auto *F = dyn_cast<ArchiveFile>(FileP)) {
ArchiveFiles.emplace_back(cast<ArchiveFile>(File.release()));
F->parse();
for (Lazy &Sym : F->getLazySymbols())
addLazy(&Sym);
F->parse<ELFT>();
return;
}
// Lazy object file
if (auto *F = dyn_cast<LazyObjectFile>(FileP)) {
LazyObjectFiles.emplace_back(cast<LazyObjectFile>(File.release()));
F->parse<ELFT>();
return;
}
if (Config->Trace)
outs() << getFilename(FileP) << "\n";
// .so file
if (auto *F = dyn_cast<SharedFile<ELFT>>(FileP)) {
// DSOs are uniquified not by filename but by soname.
@ -68,189 +80,443 @@ void SymbolTable<ELFT>::addFile(std::unique_ptr<InputFile> File) {
SharedFiles.emplace_back(cast<SharedFile<ELFT>>(File.release()));
F->parseRest();
for (SharedSymbol<ELFT> &B : F->getSharedSymbols())
resolve(&B);
return;
}
// .o file
// LLVM bitcode file
if (auto *F = dyn_cast<BitcodeFile>(FileP)) {
BitcodeFiles.emplace_back(cast<BitcodeFile>(File.release()));
F->parse<ELFT>(ComdatGroups);
return;
}
// Regular object file
auto *F = cast<ObjectFile<ELFT>>(FileP);
ObjectFiles.emplace_back(cast<ObjectFile<ELFT>>(File.release()));
F->parse(ComdatGroups);
for (SymbolBody *B : F->getSymbols())
resolve(B);
}
// Add an undefined symbol.
template <class ELFT>
SymbolBody *SymbolTable<ELFT>::addUndefined(StringRef Name) {
auto *Sym = new (Alloc) Undefined(Name, false, STV_DEFAULT, false);
resolve(Sym);
return Sym;
}
// This function is where all the optimizations of link-time
// optimization happens. When LTO is in use, some input files are
// not in native object file format but in the LLVM bitcode format.
// This function compiles bitcode files into a few big native files
// using LLVM functions and replaces bitcode symbols with the results.
// Because all bitcode files that consist of a program are passed
// to the compiler at once, it can do whole-program optimization.
template <class ELFT> void SymbolTable<ELFT>::addCombinedLtoObject() {
if (BitcodeFiles.empty())
return;
// Add an undefined symbol. Unlike addUndefined, that symbol
// doesn't have to be resolved, thus "opt" (optional).
template <class ELFT>
SymbolBody *SymbolTable<ELFT>::addUndefinedOpt(StringRef Name) {
auto *Sym = new (Alloc) Undefined(Name, false, STV_HIDDEN, true);
resolve(Sym);
return Sym;
// Compile bitcode files.
Lto.reset(new BitcodeCompiler);
for (const std::unique_ptr<BitcodeFile> &F : BitcodeFiles)
Lto->add(*F);
std::vector<std::unique_ptr<InputFile>> IFs = Lto->compile();
// Replace bitcode symbols.
for (auto &IF : IFs) {
ObjectFile<ELFT> *Obj = cast<ObjectFile<ELFT>>(IF.release());
DenseSet<StringRef> DummyGroups;
Obj->parse(DummyGroups);
ObjectFiles.emplace_back(Obj);
}
}
template <class ELFT>
SymbolBody *SymbolTable<ELFT>::addAbsolute(StringRef Name, Elf_Sym &ESym) {
// Pass nullptr because absolute symbols have no corresponding input sections.
auto *Sym = new (Alloc) DefinedRegular<ELFT>(Name, ESym, nullptr);
resolve(Sym);
return Sym;
}
template <class ELFT>
SymbolBody *SymbolTable<ELFT>::addSynthetic(StringRef Name,
OutputSectionBase<ELFT> &Section,
uintX_t Value) {
auto *Sym = new (Alloc) DefinedSynthetic<ELFT>(Name, Value, Section);
resolve(Sym);
return Sym;
DefinedRegular<ELFT> *SymbolTable<ELFT>::addAbsolute(StringRef Name,
uint8_t Visibility) {
return cast<DefinedRegular<ELFT>>(
addRegular(Name, STB_GLOBAL, Visibility)->body());
}
// Add Name as an "ignored" symbol. An ignored symbol is a regular
// linker-synthesized defined symbol, but it is not recorded to the output
// file's symbol table. Such symbols are useful for some linker-defined symbols.
// linker-synthesized defined symbol, but is only defined if needed.
template <class ELFT>
SymbolBody *SymbolTable<ELFT>::addIgnored(StringRef Name) {
return addAbsolute(Name, ElfSym<ELFT>::IgnoredWeak);
DefinedRegular<ELFT> *SymbolTable<ELFT>::addIgnored(StringRef Name,
uint8_t Visibility) {
if (!find(Name))
return nullptr;
return addAbsolute(Name, Visibility);
}
// The 'strong' variant of the addIgnored. Adds symbol which has a global
// binding and cannot be substituted.
template <class ELFT>
SymbolBody *SymbolTable<ELFT>::addIgnoredStrong(StringRef Name) {
return addAbsolute(Name, ElfSym<ELFT>::Ignored);
// Set a flag for --trace-symbol so that we can print out a log message
// if a new symbol with the same name is inserted into the symbol table.
template <class ELFT> void SymbolTable<ELFT>::trace(StringRef Name) {
Symtab.insert({Name, {-1, true}});
}
// Rename SYM as __wrap_SYM. The original symbol is preserved as __real_SYM.
// Used to implement --wrap.
template <class ELFT> void SymbolTable<ELFT>::wrap(StringRef Name) {
if (Symtab.count(Name) == 0)
SymbolBody *B = find(Name);
if (!B)
return;
StringSaver Saver(Alloc);
Symbol *Sym = addUndefined(Name)->getSymbol();
Symbol *Real = addUndefined(Saver.save("__real_" + Name))->getSymbol();
Symbol *Wrap = addUndefined(Saver.save("__wrap_" + Name))->getSymbol();
Real->Body = Sym->Body;
Sym->Body = Wrap->Body;
Symbol *Sym = B->symbol();
Symbol *Real = addUndefined(Saver.save("__real_" + Name));
Symbol *Wrap = addUndefined(Saver.save("__wrap_" + Name));
// We rename symbols by replacing the old symbol's SymbolBody with the new
// symbol's SymbolBody. This causes all SymbolBody pointers referring to the
// old symbol to instead refer to the new symbol.
memcpy(Real->Body.buffer, Sym->Body.buffer, sizeof(Sym->Body));
memcpy(Sym->Body.buffer, Wrap->Body.buffer, sizeof(Wrap->Body));
}
// Returns a file from which symbol B was created.
// If B does not belong to any file, returns a nullptr.
static uint8_t getMinVisibility(uint8_t VA, uint8_t VB) {
if (VA == STV_DEFAULT)
return VB;
if (VB == STV_DEFAULT)
return VA;
return std::min(VA, VB);
}
// Find an existing symbol or create and insert a new one.
template <class ELFT>
ELFFileBase<ELFT> *SymbolTable<ELFT>::findFile(SymbolBody *B) {
for (const std::unique_ptr<ObjectFile<ELFT>> &F : ObjectFiles) {
ArrayRef<SymbolBody *> Syms = F->getSymbols();
if (std::find(Syms.begin(), Syms.end(), B) != Syms.end())
return F.get();
std::pair<Symbol *, bool> SymbolTable<ELFT>::insert(StringRef Name) {
auto P = Symtab.insert({Name, {(int)SymVector.size(), false}});
SymIndex &V = P.first->second;
bool IsNew = P.second;
if (V.Idx == -1) {
IsNew = true;
V = {(int)SymVector.size(), true};
}
return nullptr;
Symbol *Sym;
if (IsNew) {
Sym = new (Alloc) Symbol;
Sym->Binding = STB_WEAK;
Sym->Visibility = STV_DEFAULT;
Sym->IsUsedInRegularObj = false;
Sym->ExportDynamic = false;
Sym->VersionId = Config->DefaultSymbolVersion;
Sym->Traced = V.Traced;
SymVector.push_back(Sym);
} else {
Sym = SymVector[V.Idx];
}
return {Sym, IsNew};
}
// Find an existing symbol or create and insert a new one, then apply the given
// attributes.
template <class ELFT>
std::pair<Symbol *, bool>
SymbolTable<ELFT>::insert(StringRef Name, uint8_t Type, uint8_t Visibility,
bool CanOmitFromDynSym, bool IsUsedInRegularObj,
InputFile *File) {
Symbol *S;
bool WasInserted;
std::tie(S, WasInserted) = insert(Name);
// Merge in the new symbol's visibility.
S->Visibility = getMinVisibility(S->Visibility, Visibility);
if (!CanOmitFromDynSym && (Config->Shared || Config->ExportDynamic))
S->ExportDynamic = true;
if (IsUsedInRegularObj)
S->IsUsedInRegularObj = true;
if (!WasInserted && S->body()->Type != SymbolBody::UnknownType &&
((Type == STT_TLS) != S->body()->isTls()))
error("TLS attribute mismatch for symbol: " +
conflictMsg(S->body(), File));
return {S, WasInserted};
}
// Construct a string in the form of "Sym in File1 and File2".
// Used to construct an error message.
template <typename ELFT>
std::string SymbolTable<ELFT>::conflictMsg(SymbolBody *Existing,
InputFile *NewFile) {
std::string Sym = Existing->getName();
if (Config->Demangle)
Sym = demangle(Sym);
return Sym + " in " + getFilename(Existing->File) + " and " +
getFilename(NewFile);
}
template <class ELFT> Symbol *SymbolTable<ELFT>::addUndefined(StringRef Name) {
return addUndefined(Name, STB_GLOBAL, STV_DEFAULT, /*Type*/ 0,
/*CanOmitFromDynSym*/ false, /*File*/ nullptr);
}
template <class ELFT>
std::string SymbolTable<ELFT>::conflictMsg(SymbolBody *Old, SymbolBody *New) {
ELFFileBase<ELFT> *OldFile = findFile(Old);
ELFFileBase<ELFT> *NewFile = findFile(New);
StringRef Sym = Old->getName();
StringRef F1 = OldFile ? OldFile->getName() : "(internal)";
StringRef F2 = NewFile ? NewFile->getName() : "(internal)";
return (Sym + " in " + F1 + " and " + F2).str();
Symbol *SymbolTable<ELFT>::addUndefined(StringRef Name, uint8_t Binding,
uint8_t StOther, uint8_t Type,
bool CanOmitFromDynSym,
InputFile *File) {
Symbol *S;
bool WasInserted;
std::tie(S, WasInserted) =
insert(Name, Type, StOther & 3, CanOmitFromDynSym,
/*IsUsedInRegularObj*/ !File || !isa<BitcodeFile>(File), File);
if (WasInserted) {
S->Binding = Binding;
replaceBody<Undefined>(S, Name, StOther, Type, File);
return S;
}
if (Binding != STB_WEAK) {
if (S->body()->isShared() || S->body()->isLazy())
S->Binding = Binding;
if (auto *SS = dyn_cast<SharedSymbol<ELFT>>(S->body()))
SS->file()->IsUsed = true;
}
if (auto *L = dyn_cast<Lazy>(S->body())) {
// An undefined weak will not fetch archive members, but we have to remember
// its type. See also comment in addLazyArchive.
if (S->isWeak())
L->Type = Type;
else if (auto F = L->fetch())
addFile(std::move(F));
}
return S;
}
// This function resolves conflicts if there's an existing symbol with
// the same name. Decisions are made based on symbol type.
template <class ELFT> void SymbolTable<ELFT>::resolve(SymbolBody *New) {
Symbol *Sym = insert(New);
if (Sym->Body == New)
return;
// We have a new defined symbol with the specified binding. Return 1 if the new
// symbol should win, -1 if the new symbol should lose, or 0 if both symbols are
// strong defined symbols.
static int compareDefined(Symbol *S, bool WasInserted, uint8_t Binding) {
if (WasInserted)
return 1;
SymbolBody *Body = S->body();
if (Body->isLazy() || Body->isUndefined() || Body->isShared())
return 1;
if (Binding == STB_WEAK)
return -1;
if (S->isWeak())
return 1;
return 0;
}
SymbolBody *Existing = Sym->Body;
// We have a new non-common defined symbol with the specified binding. Return 1
// if the new symbol should win, -1 if the new symbol should lose, or 0 if there
// is a conflict. If the new symbol wins, also update the binding.
static int compareDefinedNonCommon(Symbol *S, bool WasInserted, uint8_t Binding) {
if (int Cmp = compareDefined(S, WasInserted, Binding)) {
if (Cmp > 0)
S->Binding = Binding;
return Cmp;
}
if (isa<DefinedCommon>(S->body())) {
// Non-common symbols take precedence over common symbols.
if (Config->WarnCommon)
warning("common " + S->body()->getName() + " is overridden");
return 1;
}
return 0;
}
if (Lazy *L = dyn_cast<Lazy>(Existing)) {
if (auto *Undef = dyn_cast<Undefined>(New)) {
addMemberFile(Undef, L);
return;
template <class ELFT>
Symbol *SymbolTable<ELFT>::addCommon(StringRef N, uint64_t Size,
uint64_t Alignment, uint8_t Binding,
uint8_t StOther, uint8_t Type,
InputFile *File) {
Symbol *S;
bool WasInserted;
std::tie(S, WasInserted) =
insert(N, Type, StOther & 3, /*CanOmitFromDynSym*/ false,
/*IsUsedInRegularObj*/ true, File);
int Cmp = compareDefined(S, WasInserted, Binding);
if (Cmp > 0) {
S->Binding = Binding;
replaceBody<DefinedCommon>(S, N, Size, Alignment, StOther, Type, File);
} else if (Cmp == 0) {
auto *C = dyn_cast<DefinedCommon>(S->body());
if (!C) {
// Non-common symbols take precedence over common symbols.
if (Config->WarnCommon)
warning("common " + S->body()->getName() + " is overridden");
return S;
}
// Found a definition for something also in an archive.
// Ignore the archive definition.
Sym->Body = New;
return;
}
if (New->isTls() != Existing->isTls())
error("TLS attribute mismatch for symbol: " + conflictMsg(Existing, New));
if (Config->WarnCommon)
warning("multiple common of " + S->body()->getName());
// compare() returns -1, 0, or 1 if the lhs symbol is less preferable,
// equivalent (conflicting), or more preferable, respectively.
int Comp = Existing->compare<ELFT>(New);
if (Comp == 0) {
std::string S = "duplicate symbol: " + conflictMsg(Existing, New);
if (!Config->AllowMultipleDefinition)
error(S);
warning(S);
return;
C->Size = std::max(C->Size, Size);
C->Alignment = std::max(C->Alignment, Alignment);
}
if (Comp < 0)
Sym->Body = New;
return S;
}
// Find an existing symbol or create and insert a new one.
template <class ELFT> Symbol *SymbolTable<ELFT>::insert(SymbolBody *New) {
StringRef Name = New->getName();
Symbol *&Sym = Symtab[Name];
if (!Sym)
Sym = new (Alloc) Symbol{New};
New->setBackref(Sym);
return Sym;
template <class ELFT>
void SymbolTable<ELFT>::reportDuplicate(SymbolBody *Existing,
InputFile *NewFile) {
std::string Msg = "duplicate symbol: " + conflictMsg(Existing, NewFile);
if (Config->AllowMultipleDefinition)
warning(Msg);
else
error(Msg);
}
template <typename ELFT>
Symbol *SymbolTable<ELFT>::addRegular(StringRef Name, const Elf_Sym &Sym,
InputSectionBase<ELFT> *Section) {
Symbol *S;
bool WasInserted;
std::tie(S, WasInserted) =
insert(Name, Sym.getType(), Sym.getVisibility(),
/*CanOmitFromDynSym*/ false, /*IsUsedInRegularObj*/ true,
Section ? Section->getFile() : nullptr);
int Cmp = compareDefinedNonCommon(S, WasInserted, Sym.getBinding());
if (Cmp > 0)
replaceBody<DefinedRegular<ELFT>>(S, Name, Sym, Section);
else if (Cmp == 0)
reportDuplicate(S->body(), Section->getFile());
return S;
}
template <typename ELFT>
Symbol *SymbolTable<ELFT>::addRegular(StringRef Name, uint8_t Binding,
uint8_t StOther) {
Symbol *S;
bool WasInserted;
std::tie(S, WasInserted) =
insert(Name, STT_NOTYPE, StOther & 3, /*CanOmitFromDynSym*/ false,
/*IsUsedInRegularObj*/ true, nullptr);
int Cmp = compareDefinedNonCommon(S, WasInserted, Binding);
if (Cmp > 0)
replaceBody<DefinedRegular<ELFT>>(S, Name, StOther);
else if (Cmp == 0)
reportDuplicate(S->body(), nullptr);
return S;
}
template <typename ELFT>
Symbol *SymbolTable<ELFT>::addSynthetic(StringRef N,
OutputSectionBase<ELFT> *Section,
uintX_t Value) {
Symbol *S;
bool WasInserted;
std::tie(S, WasInserted) =
insert(N, STT_NOTYPE, STV_HIDDEN, /*CanOmitFromDynSym*/ false,
/*IsUsedInRegularObj*/ true, nullptr);
int Cmp = compareDefinedNonCommon(S, WasInserted, STB_GLOBAL);
if (Cmp > 0)
replaceBody<DefinedSynthetic<ELFT>>(S, N, Value, Section);
else if (Cmp == 0)
reportDuplicate(S->body(), nullptr);
return S;
}
template <typename ELFT>
void SymbolTable<ELFT>::addShared(SharedFile<ELFT> *F, StringRef Name,
const Elf_Sym &Sym,
const typename ELFT::Verdef *Verdef) {
// DSO symbols do not affect visibility in the output, so we pass STV_DEFAULT
// as the visibility, which will leave the visibility in the symbol table
// unchanged.
Symbol *S;
bool WasInserted;
std::tie(S, WasInserted) =
insert(Name, Sym.getType(), STV_DEFAULT, /*CanOmitFromDynSym*/ true,
/*IsUsedInRegularObj*/ false, F);
// Make sure we preempt DSO symbols with default visibility.
if (Sym.getVisibility() == STV_DEFAULT)
S->ExportDynamic = true;
if (WasInserted || isa<Undefined>(S->body())) {
replaceBody<SharedSymbol<ELFT>>(S, F, Name, Sym, Verdef);
if (!S->isWeak())
F->IsUsed = true;
}
}
template <class ELFT>
Symbol *SymbolTable<ELFT>::addBitcode(StringRef Name, bool IsWeak,
uint8_t StOther, uint8_t Type,
bool CanOmitFromDynSym, BitcodeFile *F) {
Symbol *S;
bool WasInserted;
std::tie(S, WasInserted) = insert(Name, Type, StOther & 3, CanOmitFromDynSym,
/*IsUsedInRegularObj*/ false, F);
int Cmp =
compareDefinedNonCommon(S, WasInserted, IsWeak ? STB_WEAK : STB_GLOBAL);
if (Cmp > 0)
replaceBody<DefinedBitcode>(S, Name, StOther, Type, F);
else if (Cmp == 0)
reportDuplicate(S->body(), F);
return S;
}
template <class ELFT> SymbolBody *SymbolTable<ELFT>::find(StringRef Name) {
auto It = Symtab.find(Name);
if (It == Symtab.end())
return nullptr;
return It->second->Body;
SymIndex V = It->second;
if (V.Idx == -1)
return nullptr;
return SymVector[V.Idx]->body();
}
template <class ELFT> void SymbolTable<ELFT>::addLazy(Lazy *L) {
Symbol *Sym = insert(L);
if (Sym->Body == L)
return;
if (auto *Undef = dyn_cast<Undefined>(Sym->Body)) {
Sym->Body = L;
addMemberFile(Undef, L);
// Returns a list of defined symbols that match with a given glob pattern.
template <class ELFT>
std::vector<SymbolBody *> SymbolTable<ELFT>::findAll(StringRef Pattern) {
std::vector<SymbolBody *> Res;
for (Symbol *Sym : SymVector) {
SymbolBody *B = Sym->body();
if (!B->isUndefined() && globMatch(Pattern, B->getName()))
Res.push_back(B);
}
return Res;
}
template <class ELFT>
void SymbolTable<ELFT>::addMemberFile(Undefined *Undef, Lazy *L) {
// Weak undefined symbols should not fetch members from archives.
// If we were to keep old symbol we would not know that an archive member was
// available if a strong undefined symbol shows up afterwards in the link.
// If a strong undefined symbol never shows up, this lazy symbol will
// get to the end of the link and must be treated as the weak undefined one.
// We set UsedInRegularObj in a similar way to what is done with shared
// symbols and mark it as weak to reduce how many special cases are needed.
if (Undef->isWeak()) {
L->setUsedInRegularObj();
L->setWeak();
void SymbolTable<ELFT>::addLazyArchive(ArchiveFile *F,
const object::Archive::Symbol Sym) {
Symbol *S;
bool WasInserted;
std::tie(S, WasInserted) = insert(Sym.getName());
if (WasInserted) {
replaceBody<LazyArchive>(S, *F, Sym, SymbolBody::UnknownType);
return;
}
if (!S->body()->isUndefined())
return;
// Fetch a member file that has the definition for L.
// getMember returns nullptr if the member was already read from the library.
if (std::unique_ptr<InputFile> File = L->getMember())
addFile(std::move(File));
// Weak undefined symbols should not fetch members from archives. If we were
// to keep old symbol we would not know that an archive member was available
// if a strong undefined symbol shows up afterwards in the link. If a strong
// undefined symbol never shows up, this lazy symbol will get to the end of
// the link and must be treated as the weak undefined one. We already marked
// this symbol as used when we added it to the symbol table, but we also need
// to preserve its type. FIXME: Move the Type field to Symbol.
if (S->isWeak()) {
replaceBody<LazyArchive>(S, *F, Sym, S->body()->Type);
return;
}
MemoryBufferRef MBRef = F->getMember(&Sym);
if (!MBRef.getBuffer().empty())
addFile(createObjectFile(MBRef, F->getName()));
}
template <class ELFT>
void SymbolTable<ELFT>::addLazyObject(StringRef Name, LazyObjectFile &Obj) {
Symbol *S;
bool WasInserted;
std::tie(S, WasInserted) = insert(Name);
if (WasInserted) {
replaceBody<LazyObject>(S, Name, Obj, SymbolBody::UnknownType);
return;
}
if (!S->body()->isUndefined())
return;
// See comment for addLazyArchive above.
if (S->isWeak()) {
replaceBody<LazyObject>(S, Name, Obj, S->body()->Type);
} else {
MemoryBufferRef MBRef = Obj.getBuffer();
if (!MBRef.getBuffer().empty())
addFile(createObjectFile(MBRef));
}
}
// Process undefined (-u) flags by loading lazy symbols named by those flags.
template <class ELFT> void SymbolTable<ELFT>::scanUndefinedFlags() {
for (StringRef S : Config->Undefined)
if (auto *L = dyn_cast_or_null<Lazy>(find(S)))
if (std::unique_ptr<InputFile> File = L->fetch())
addFile(std::move(File));
}
// This function takes care of the case in which shared libraries depend on
@ -265,10 +531,183 @@ template <class ELFT> void SymbolTable<ELFT>::scanShlibUndefined() {
for (StringRef U : File->getUndefinedSymbols())
if (SymbolBody *Sym = find(U))
if (Sym->isDefined())
Sym->setUsedInDynamicReloc();
Sym->symbol()->ExportDynamic = true;
}
template class elf2::SymbolTable<ELF32LE>;
template class elf2::SymbolTable<ELF32BE>;
template class elf2::SymbolTable<ELF64LE>;
template class elf2::SymbolTable<ELF64BE>;
// This function process the dynamic list option by marking all the symbols
// to be exported in the dynamic table.
template <class ELFT> void SymbolTable<ELFT>::scanDynamicList() {
for (StringRef S : Config->DynamicList)
if (SymbolBody *B = find(S))
B->symbol()->ExportDynamic = true;
}
static bool hasWildcard(StringRef S) {
return S.find_first_of("?*") != StringRef::npos;
}
static void setVersionId(SymbolBody *Body, StringRef VersionName,
StringRef Name, uint16_t Version) {
if (!Body || Body->isUndefined()) {
if (Config->NoUndefinedVersion)
error("version script assignment of " + VersionName + " to symbol " +
Name + " failed: symbol not defined");
return;
}
Symbol *Sym = Body->symbol();
if (Sym->VersionId != Config->DefaultSymbolVersion)
warning("duplicate symbol " + Name + " in version script");
Sym->VersionId = Version;
}
template <class ELFT>
std::map<std::string, SymbolBody *> SymbolTable<ELFT>::getDemangledSyms() {
std::map<std::string, SymbolBody *> Result;
for (Symbol *Sym : SymVector) {
SymbolBody *B = Sym->body();
Result[demangle(B->getName())] = B;
}
return Result;
}
static bool hasExternCpp() {
for (VersionDefinition &V : Config->VersionDefinitions)
for (SymbolVersion Sym : V.Globals)
if (Sym.IsExternCpp)
return true;
return false;
}
// This function processes the --version-script option by marking all global
// symbols with the VersionScriptGlobal flag, which acts as a filter on the
// dynamic symbol table.
template <class ELFT> void SymbolTable<ELFT>::scanVersionScript() {
// If version script does not contain versions declarations,
// we just should mark global symbols.
if (!Config->VersionScriptGlobals.empty()) {
for (SymbolVersion &Sym : Config->VersionScriptGlobals)
if (SymbolBody *B = find(Sym.Name))
B->symbol()->VersionId = VER_NDX_GLOBAL;
return;
}
if (Config->VersionDefinitions.empty())
return;
// If we have symbols version declarations, we should
// assign version references for each symbol.
// Current rules are:
// * If there is an exact match for the mangled name or we have extern C++
// exact match, then we use it.
// * Otherwise, we look through the wildcard patterns. We look through the
// version tags in reverse order. We use the first match we find (the last
// matching version tag in the file).
// Handle exact matches and build a map of demangled externs for
// quick search during next step.
std::map<std::string, SymbolBody *> Demangled;
if (hasExternCpp())
Demangled = getDemangledSyms();
for (VersionDefinition &V : Config->VersionDefinitions) {
for (SymbolVersion Sym : V.Globals) {
if (hasWildcard(Sym.Name))
continue;
SymbolBody *B = Sym.IsExternCpp ? Demangled[Sym.Name] : find(Sym.Name);
setVersionId(B, V.Name, Sym.Name, V.Id);
}
}
// Handle wildcards.
for (size_t I = Config->VersionDefinitions.size() - 1; I != (size_t)-1; --I) {
VersionDefinition &V = Config->VersionDefinitions[I];
for (SymbolVersion &Sym : V.Globals)
if (hasWildcard(Sym.Name))
for (SymbolBody *B : findAll(Sym.Name))
if (B->symbol()->VersionId == Config->DefaultSymbolVersion)
B->symbol()->VersionId = V.Id;
}
}
// Returns the size of the longest version name.
static int getMaxVersionLen() {
size_t Len = 0;
for (VersionDefinition &V : Config->VersionDefinitions)
Len = std::max(Len, V.Name.size());
return Len;
}
// Parses a symbol name in the form of <name>@<version> or <name>@@<version>.
static std::pair<StringRef, uint16_t>
getSymbolVersion(SymbolBody *B, int MaxVersionLen) {
StringRef S = B->getName();
// MaxVersionLen was passed so that we don't need to scan
// all characters in a symbol name. It is effective because
// versions are usually short and symbol names can be very long.
size_t Pos = S.find('@', std::max(0, int(S.size()) - MaxVersionLen - 2));
if (Pos == 0 || Pos == StringRef::npos)
return {"", 0};
StringRef Name = S.substr(0, Pos);
StringRef Verstr = S.substr(Pos + 1);
if (Verstr.empty())
return {"", 0};
// '@@' in a symbol name means the default version.
// It is usually the most recent one.
bool IsDefault = (Verstr[0] == '@');
if (IsDefault)
Verstr = Verstr.substr(1);
for (VersionDefinition &V : Config->VersionDefinitions) {
if (V.Name == Verstr)
return {Name, IsDefault ? V.Id : (V.Id | VERSYM_HIDDEN)};
}
// It is an error if the specified version was not defined.
error("symbol " + S + " has undefined version " + Verstr);
return {"", 0};
}
// Versions are usually assigned to symbols using version scripts,
// but there's another way to assign versions to symbols.
// If a symbol name contains '@', the string after it is not
// actually a part of the symbol name but specifies a version.
// This function takes care of it.
template <class ELFT> void SymbolTable<ELFT>::scanSymbolVersions() {
if (Config->VersionDefinitions.empty())
return;
int MaxVersionLen = getMaxVersionLen();
// Unfortunately there's no way other than iterating over all
// symbols to look for '@' characters in symbol names.
// So this is inherently slow. A good news is that we do this
// only when versions have been defined.
for (Symbol *Sym : SymVector) {
// Symbol versions for exported symbols are by nature
// only for defined global symbols.
SymbolBody *B = Sym->body();
if (!B->isDefined())
continue;
uint8_t Visibility = B->getVisibility();
if (Visibility != STV_DEFAULT && Visibility != STV_PROTECTED)
continue;
// Look for '@' in the symbol name.
StringRef Name;
uint16_t Version;
std::tie(Name, Version) = getSymbolVersion(B, MaxVersionLen);
if (Name.empty())
continue;
B->setName(Name);
Sym->VersionId = Version;
}
}
template class elf::SymbolTable<ELF32LE>;
template class elf::SymbolTable<ELF32BE>;
template class elf::SymbolTable<ELF64LE>;
template class elf::SymbolTable<ELF64BE>;

View File

@ -11,14 +11,16 @@
#define LLD_ELF_SYMBOL_TABLE_H
#include "InputFiles.h"
#include "llvm/ADT/MapVector.h"
#include "LTO.h"
#include "llvm/ADT/DenseMap.h"
namespace lld {
namespace elf2 {
namespace elf {
class Lazy;
template <class ELFT> class OutputSectionBase;
struct Symbol;
class Undefined;
typedef llvm::CachedHash<StringRef> SymName;
// SymbolTable is a bucket of all known symbols, including defined,
// undefined, or lazy symbols (the last one is symbols in archive
@ -29,17 +31,18 @@ class Undefined;
// conflicts. For example, obviously, a defined symbol is better than
// an undefined symbol. Or, if there's a conflict between a lazy and a
// undefined, it'll read an archive member to read a real definition
// to replace the lazy symbol. The logic is implemented in resolve().
// to replace the lazy symbol. The logic is implemented in the
// add*() functions, which are called by input files as they are parsed. There
// is one add* function per symbol type.
template <class ELFT> class SymbolTable {
typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym Elf_Sym;
typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t;
typedef typename ELFT::Sym Elf_Sym;
typedef typename ELFT::uint uintX_t;
public:
void addFile(std::unique_ptr<InputFile> File);
void addCombinedLtoObject();
const llvm::MapVector<StringRef, Symbol *> &getSymbols() const {
return Symtab;
}
llvm::ArrayRef<Symbol *> getSymbols() const { return SymVector; }
const std::vector<std::unique_ptr<ObjectFile<ELFT>>> &getObjectFiles() const {
return ObjectFiles;
@ -49,34 +52,69 @@ template <class ELFT> class SymbolTable {
return SharedFiles;
}
SymbolBody *addUndefined(StringRef Name);
SymbolBody *addUndefinedOpt(StringRef Name);
SymbolBody *addAbsolute(StringRef Name, Elf_Sym &ESym);
SymbolBody *addSynthetic(StringRef Name, OutputSectionBase<ELFT> &Section,
uintX_t Value);
SymbolBody *addIgnored(StringRef Name);
SymbolBody *addIgnoredStrong(StringRef Name);
DefinedRegular<ELFT> *addAbsolute(StringRef Name,
uint8_t Visibility = llvm::ELF::STV_HIDDEN);
DefinedRegular<ELFT> *addIgnored(StringRef Name,
uint8_t Visibility = llvm::ELF::STV_HIDDEN);
Symbol *addUndefined(StringRef Name);
Symbol *addUndefined(StringRef Name, uint8_t Binding, uint8_t StOther,
uint8_t Type, bool CanOmitFromDynSym, InputFile *File);
Symbol *addRegular(StringRef Name, const Elf_Sym &Sym,
InputSectionBase<ELFT> *Section);
Symbol *addRegular(StringRef Name, uint8_t Binding, uint8_t StOther);
Symbol *addSynthetic(StringRef N, OutputSectionBase<ELFT> *Section,
uintX_t Value);
void addShared(SharedFile<ELFT> *F, StringRef Name, const Elf_Sym &Sym,
const typename ELFT::Verdef *Verdef);
void addLazyArchive(ArchiveFile *F, const llvm::object::Archive::Symbol S);
void addLazyObject(StringRef Name, LazyObjectFile &Obj);
Symbol *addBitcode(StringRef Name, bool IsWeak, uint8_t StOther, uint8_t Type,
bool CanOmitFromDynSym, BitcodeFile *File);
Symbol *addCommon(StringRef N, uint64_t Size, uint64_t Alignment,
uint8_t Binding, uint8_t StOther, uint8_t Type,
InputFile *File);
void scanUndefinedFlags();
void scanShlibUndefined();
void scanDynamicList();
void scanVersionScript();
void scanSymbolVersions();
SymbolBody *find(StringRef Name);
void trace(StringRef Name);
void wrap(StringRef Name);
ELFFileBase<ELFT> *findFile(SymbolBody *B);
private:
Symbol *insert(SymbolBody *New);
void addLazy(Lazy *New);
void addMemberFile(Undefined *Undef, Lazy *L);
void resolve(SymbolBody *Body);
std::string conflictMsg(SymbolBody *Old, SymbolBody *New);
std::vector<SymbolBody *> findAll(StringRef Pattern);
std::pair<Symbol *, bool> insert(StringRef Name);
std::pair<Symbol *, bool> insert(StringRef Name, uint8_t Type,
uint8_t Visibility, bool CanOmitFromDynSym,
bool IsUsedInRegularObj, InputFile *File);
std::string conflictMsg(SymbolBody *Existing, InputFile *NewFile);
void reportDuplicate(SymbolBody *Existing, InputFile *NewFile);
std::map<std::string, SymbolBody *> getDemangledSyms();
struct SymIndex {
int Idx : 31;
unsigned Traced : 1;
};
// The order the global symbols are in is not defined. We can use an arbitrary
// order, but it has to be reproducible. That is true even when cross linking.
// The default hashing of StringRef produces different results on 32 and 64
// bit systems so we use a MapVector. That is arbitrary, deterministic but
// a bit inefficient.
// bit systems so we use a map to a vector. That is arbitrary, deterministic
// but a bit inefficient.
// FIXME: Experiment with passing in a custom hashing or sorting the symbols
// once symbol resolution is finished.
llvm::MapVector<StringRef, Symbol *> Symtab;
llvm::DenseMap<SymName, SymIndex> Symtab;
std::vector<Symbol *> SymVector;
llvm::BumpPtrAllocator Alloc;
// Comdat groups define "link once" sections. If two comdat groups have the
@ -87,13 +125,20 @@ template <class ELFT> class SymbolTable {
// The symbol table owns all file objects.
std::vector<std::unique_ptr<ArchiveFile>> ArchiveFiles;
std::vector<std::unique_ptr<ObjectFile<ELFT>>> ObjectFiles;
std::vector<std::unique_ptr<LazyObjectFile>> LazyObjectFiles;
std::vector<std::unique_ptr<SharedFile<ELFT>>> SharedFiles;
std::vector<std::unique_ptr<BitcodeFile>> BitcodeFiles;
// Set of .so files to not link the same shared object file more than once.
llvm::DenseSet<StringRef> SoNames;
std::unique_ptr<BitcodeCompiler> Lto;
};
} // namespace elf2
template <class ELFT> struct Symtab { static SymbolTable<ELFT> *X; };
template <class ELFT> SymbolTable<ELFT> *Symtab<ELFT>::X;
} // namespace elf
} // namespace lld
#endif

View File

@ -8,9 +8,11 @@
//===----------------------------------------------------------------------===//
#include "Symbols.h"
#include "InputSection.h"
#include "Error.h"
#include "InputFiles.h"
#include "InputSection.h"
#include "OutputSections.h"
#include "Target.h"
#include "llvm/ADT/STLExtras.h"
@ -19,131 +21,316 @@ using namespace llvm::object;
using namespace llvm::ELF;
using namespace lld;
using namespace lld::elf2;
using namespace lld::elf;
static uint8_t getMinVisibility(uint8_t VA, uint8_t VB) {
if (VA == STV_DEFAULT)
return VB;
if (VB == STV_DEFAULT)
template <class ELFT>
static typename ELFT::uint getSymVA(const SymbolBody &Body,
typename ELFT::uint &Addend) {
typedef typename ELFT::uint uintX_t;
switch (Body.kind()) {
case SymbolBody::DefinedSyntheticKind: {
auto &D = cast<DefinedSynthetic<ELFT>>(Body);
const OutputSectionBase<ELFT> *Sec = D.Section;
if (!Sec)
return D.Value;
if (D.Value == DefinedSynthetic<ELFT>::SectionEnd)
return Sec->getVA() + Sec->getSize();
return Sec->getVA() + D.Value;
}
case SymbolBody::DefinedRegularKind: {
auto &D = cast<DefinedRegular<ELFT>>(Body);
InputSectionBase<ELFT> *SC = D.Section;
// According to the ELF spec reference to a local symbol from outside
// the group are not allowed. Unfortunately .eh_frame breaks that rule
// and must be treated specially. For now we just replace the symbol with
// 0.
if (SC == &InputSection<ELFT>::Discarded)
return 0;
// This is an absolute symbol.
if (!SC)
return D.Value;
uintX_t Offset = D.Value;
if (D.isSection()) {
Offset += Addend;
Addend = 0;
}
uintX_t VA = SC->OutSec->getVA() + SC->getOffset(Offset);
if (D.isTls())
return VA - Out<ELFT>::TlsPhdr->p_vaddr;
return VA;
return std::min(VA, VB);
}
case SymbolBody::DefinedCommonKind:
return Out<ELFT>::Bss->getVA() + cast<DefinedCommon>(Body).OffsetInBss;
case SymbolBody::SharedKind: {
auto &SS = cast<SharedSymbol<ELFT>>(Body);
if (!SS.NeedsCopyOrPltAddr)
return 0;
if (SS.isFunc())
return Body.getPltVA<ELFT>();
return Out<ELFT>::Bss->getVA() + SS.OffsetInBss;
}
case SymbolBody::UndefinedKind:
return 0;
case SymbolBody::LazyArchiveKind:
case SymbolBody::LazyObjectKind:
assert(Body.symbol()->IsUsedInRegularObj && "lazy symbol reached writer");
return 0;
case SymbolBody::DefinedBitcodeKind:
llvm_unreachable("should have been replaced");
}
llvm_unreachable("invalid symbol kind");
}
// Returns 1, 0 or -1 if this symbol should take precedence
// over the Other, tie or lose, respectively.
template <class ELFT> int SymbolBody::compare(SymbolBody *Other) {
typedef typename ELFFile<ELFT>::uintX_t uintX_t;
assert(!isLazy() && !Other->isLazy());
std::pair<bool, bool> L(isDefined(), !isWeak());
std::pair<bool, bool> R(Other->isDefined(), !Other->isWeak());
SymbolBody::SymbolBody(Kind K, uint32_t NameOffset, uint8_t StOther,
uint8_t Type)
: SymbolKind(K), NeedsCopyOrPltAddr(false), IsLocal(true),
IsInGlobalMipsGot(false), Type(Type), StOther(StOther),
NameOffset(NameOffset) {}
// Normalize
if (L > R)
return -Other->compare<ELFT>(this);
SymbolBody::SymbolBody(Kind K, StringRef Name, uint8_t StOther, uint8_t Type)
: SymbolKind(K), NeedsCopyOrPltAddr(false), IsLocal(false),
IsInGlobalMipsGot(false), Type(Type), StOther(StOther),
Name({Name.data(), Name.size()}) {}
Visibility = Other->Visibility =
getMinVisibility(Visibility, Other->Visibility);
StringRef SymbolBody::getName() const {
assert(!isLocal());
return StringRef(Name.S, Name.Len);
}
if (IsUsedInRegularObj || Other->IsUsedInRegularObj)
IsUsedInRegularObj = Other->IsUsedInRegularObj = true;
void SymbolBody::setName(StringRef S) {
Name.S = S.data();
Name.Len = S.size();
}
if (L != R)
return -1;
if (!L.first || !L.second)
return 1;
// Returns true if a symbol can be replaced at load-time by a symbol
// with the same name defined in other ELF executable or DSO.
bool SymbolBody::isPreemptible() const {
if (isLocal())
return false;
// Shared symbols resolve to the definition in the DSO. The exceptions are
// symbols with copy relocations (which resolve to .bss) or preempt plt
// entries (which resolve to that plt entry).
if (isShared())
return -1;
if (Other->isShared())
return 1;
if (isCommon()) {
if (!Other->isCommon())
return -1;
auto *ThisC = cast<DefinedCommon>(this);
auto *OtherC = cast<DefinedCommon>(Other);
uintX_t Align = std::max(ThisC->MaxAlignment, OtherC->MaxAlignment);
if (ThisC->Size >= OtherC->Size) {
ThisC->MaxAlignment = Align;
return 1;
}
OtherC->MaxAlignment = Align;
return -1;
}
if (Other->isCommon())
return 1;
return !NeedsCopyOrPltAddr;
// That's all that can be preempted in a non-DSO.
if (!Config->Shared)
return false;
// Only symbols that appear in dynsym can be preempted.
if (!symbol()->includeInDynsym())
return false;
// Only default visibility symbols can be preempted.
if (symbol()->Visibility != STV_DEFAULT)
return false;
// -Bsymbolic means that definitions are not preempted.
if (Config->Bsymbolic || (Config->BsymbolicFunctions && isFunc()))
return !isDefined();
return true;
}
template <class ELFT> bool SymbolBody::hasThunk() const {
if (auto *DR = dyn_cast<DefinedRegular<ELFT>>(this))
return DR->ThunkData != nullptr;
if (auto *S = dyn_cast<SharedSymbol<ELFT>>(this))
return S->ThunkData != nullptr;
return false;
}
template <class ELFT>
typename ELFT::uint SymbolBody::getVA(typename ELFT::uint Addend) const {
typename ELFT::uint OutVA = getSymVA<ELFT>(*this, Addend);
return OutVA + Addend;
}
template <class ELFT> typename ELFT::uint SymbolBody::getGotVA() const {
return Out<ELFT>::Got->getVA() + getGotOffset<ELFT>();
}
template <class ELFT> typename ELFT::uint SymbolBody::getGotOffset() const {
return GotIndex * Target->GotEntrySize;
}
template <class ELFT> typename ELFT::uint SymbolBody::getGotPltVA() const {
return Out<ELFT>::GotPlt->getVA() + getGotPltOffset<ELFT>();
}
template <class ELFT> typename ELFT::uint SymbolBody::getGotPltOffset() const {
return GotPltIndex * Target->GotPltEntrySize;
}
template <class ELFT> typename ELFT::uint SymbolBody::getPltVA() const {
return Out<ELFT>::Plt->getVA() + Target->PltHeaderSize +
PltIndex * Target->PltEntrySize;
}
template <class ELFT> typename ELFT::uint SymbolBody::getThunkVA() const {
if (const auto *DR = dyn_cast<DefinedRegular<ELFT>>(this))
return DR->ThunkData->getVA();
if (const auto *S = dyn_cast<SharedSymbol<ELFT>>(this))
return S->ThunkData->getVA();
fatal("getThunkVA() not supported for Symbol class\n");
}
template <class ELFT> typename ELFT::uint SymbolBody::getSize() const {
if (const auto *C = dyn_cast<DefinedCommon>(this))
return C->Size;
if (const auto *DR = dyn_cast<DefinedRegular<ELFT>>(this))
return DR->Size;
if (const auto *S = dyn_cast<SharedSymbol<ELFT>>(this))
return S->Sym.st_size;
return 0;
}
Defined::Defined(Kind K, StringRef Name, bool IsWeak, uint8_t Visibility,
bool IsTls)
: SymbolBody(K, Name, IsWeak, Visibility, IsTls) {}
Defined::Defined(Kind K, StringRef Name, uint8_t StOther, uint8_t Type)
: SymbolBody(K, Name, StOther, Type) {}
Undefined::Undefined(SymbolBody::Kind K, StringRef N, bool IsWeak,
uint8_t Visibility, bool IsTls)
: SymbolBody(K, N, IsWeak, Visibility, IsTls), CanKeepUndefined(false) {}
Defined::Defined(Kind K, uint32_t NameOffset, uint8_t StOther, uint8_t Type)
: SymbolBody(K, NameOffset, StOther, Type) {}
Undefined::Undefined(StringRef N, bool IsWeak, uint8_t Visibility,
bool CanKeepUndefined)
: Undefined(SymbolBody::UndefinedKind, N, IsWeak, Visibility,
/*IsTls*/ false) {
this->CanKeepUndefined = CanKeepUndefined;
DefinedBitcode::DefinedBitcode(StringRef Name, uint8_t StOther, uint8_t Type,
BitcodeFile *F)
: Defined(DefinedBitcodeKind, Name, StOther, Type) {
this->File = F;
}
template <typename ELFT>
UndefinedElf<ELFT>::UndefinedElf(StringRef N, const Elf_Sym &Sym)
: Undefined(SymbolBody::UndefinedElfKind, N,
Sym.getBinding() == llvm::ELF::STB_WEAK, Sym.getVisibility(),
Sym.getType() == llvm::ELF::STT_TLS),
Sym(Sym) {}
bool DefinedBitcode::classof(const SymbolBody *S) {
return S->kind() == DefinedBitcodeKind;
}
Undefined::Undefined(StringRef Name, uint8_t StOther, uint8_t Type,
InputFile *File)
: SymbolBody(SymbolBody::UndefinedKind, Name, StOther, Type) {
this->File = File;
}
Undefined::Undefined(uint32_t NameOffset, uint8_t StOther, uint8_t Type,
InputFile *File)
: SymbolBody(SymbolBody::UndefinedKind, NameOffset, StOther, Type) {
this->File = File;
}
template <typename ELFT>
DefinedSynthetic<ELFT>::DefinedSynthetic(StringRef N, uintX_t Value,
OutputSectionBase<ELFT> &Section)
: Defined(SymbolBody::DefinedSyntheticKind, N, false, STV_DEFAULT, false),
OutputSectionBase<ELFT> *Section)
: Defined(SymbolBody::DefinedSyntheticKind, N, STV_HIDDEN, 0 /* Type */),
Value(Value), Section(Section) {}
DefinedCommon::DefinedCommon(StringRef N, uint64_t Size, uint64_t Alignment,
bool IsWeak, uint8_t Visibility)
: Defined(SymbolBody::DefinedCommonKind, N, IsWeak, Visibility, false) {
MaxAlignment = Alignment;
this->Size = Size;
uint8_t StOther, uint8_t Type, InputFile *File)
: Defined(SymbolBody::DefinedCommonKind, N, StOther, Type),
Alignment(Alignment), Size(Size) {
this->File = File;
}
std::unique_ptr<InputFile> Lazy::getMember() {
MemoryBufferRef MBRef = File->getMember(&Sym);
std::unique_ptr<InputFile> Lazy::fetch() {
if (auto *S = dyn_cast<LazyArchive>(this))
return S->fetch();
return cast<LazyObject>(this)->fetch();
}
LazyArchive::LazyArchive(ArchiveFile &File,
const llvm::object::Archive::Symbol S, uint8_t Type)
: Lazy(LazyArchiveKind, S.getName(), Type), Sym(S) {
this->File = &File;
}
LazyObject::LazyObject(StringRef Name, LazyObjectFile &File, uint8_t Type)
: Lazy(LazyObjectKind, Name, Type) {
this->File = &File;
}
std::unique_ptr<InputFile> LazyArchive::fetch() {
MemoryBufferRef MBRef = file()->getMember(&Sym);
// getMember returns an empty buffer if the member was already
// read from the library.
if (MBRef.getBuffer().empty())
return std::unique_ptr<InputFile>(nullptr);
return createObjectFile(MBRef, file()->getName());
}
std::unique_ptr<InputFile> LazyObject::fetch() {
MemoryBufferRef MBRef = file()->getBuffer();
if (MBRef.getBuffer().empty())
return std::unique_ptr<InputFile>(nullptr);
return createObjectFile(MBRef);
}
template <class ELFT> static void doInitSymbols() {
ElfSym<ELFT>::End.setBinding(STB_GLOBAL);
ElfSym<ELFT>::IgnoredWeak.setBinding(STB_WEAK);
ElfSym<ELFT>::IgnoredWeak.setVisibility(STV_HIDDEN);
ElfSym<ELFT>::Ignored.setBinding(STB_GLOBAL);
ElfSym<ELFT>::Ignored.setVisibility(STV_HIDDEN);
bool Symbol::includeInDynsym() const {
if (Visibility != STV_DEFAULT && Visibility != STV_PROTECTED)
return false;
return (ExportDynamic && VersionId != VER_NDX_LOCAL) || body()->isShared() ||
(body()->isUndefined() && Config->Shared);
}
void elf2::initSymbols() {
doInitSymbols<ELF32LE>();
doInitSymbols<ELF32BE>();
doInitSymbols<ELF64LE>();
doInitSymbols<ELF64BE>();
// Print out a log message for --trace-symbol.
void elf::printTraceSymbol(Symbol *Sym) {
SymbolBody *B = Sym->body();
outs() << getFilename(B->File);
if (B->isUndefined())
outs() << ": reference to ";
else if (B->isCommon())
outs() << ": common definition of ";
else
outs() << ": definition of ";
outs() << B->getName() << "\n";
}
template int SymbolBody::compare<ELF32LE>(SymbolBody *Other);
template int SymbolBody::compare<ELF32BE>(SymbolBody *Other);
template int SymbolBody::compare<ELF64LE>(SymbolBody *Other);
template int SymbolBody::compare<ELF64BE>(SymbolBody *Other);
template bool SymbolBody::hasThunk<ELF32LE>() const;
template bool SymbolBody::hasThunk<ELF32BE>() const;
template bool SymbolBody::hasThunk<ELF64LE>() const;
template bool SymbolBody::hasThunk<ELF64BE>() const;
template class elf2::UndefinedElf<ELF32LE>;
template class elf2::UndefinedElf<ELF32BE>;
template class elf2::UndefinedElf<ELF64LE>;
template class elf2::UndefinedElf<ELF64BE>;
template uint32_t SymbolBody::template getVA<ELF32LE>(uint32_t) const;
template uint32_t SymbolBody::template getVA<ELF32BE>(uint32_t) const;
template uint64_t SymbolBody::template getVA<ELF64LE>(uint64_t) const;
template uint64_t SymbolBody::template getVA<ELF64BE>(uint64_t) const;
template class elf2::DefinedSynthetic<ELF32LE>;
template class elf2::DefinedSynthetic<ELF32BE>;
template class elf2::DefinedSynthetic<ELF64LE>;
template class elf2::DefinedSynthetic<ELF64BE>;
template uint32_t SymbolBody::template getGotVA<ELF32LE>() const;
template uint32_t SymbolBody::template getGotVA<ELF32BE>() const;
template uint64_t SymbolBody::template getGotVA<ELF64LE>() const;
template uint64_t SymbolBody::template getGotVA<ELF64BE>() const;
template uint32_t SymbolBody::template getGotOffset<ELF32LE>() const;
template uint32_t SymbolBody::template getGotOffset<ELF32BE>() const;
template uint64_t SymbolBody::template getGotOffset<ELF64LE>() const;
template uint64_t SymbolBody::template getGotOffset<ELF64BE>() const;
template uint32_t SymbolBody::template getGotPltVA<ELF32LE>() const;
template uint32_t SymbolBody::template getGotPltVA<ELF32BE>() const;
template uint64_t SymbolBody::template getGotPltVA<ELF64LE>() const;
template uint64_t SymbolBody::template getGotPltVA<ELF64BE>() const;
template uint32_t SymbolBody::template getThunkVA<ELF32LE>() const;
template uint32_t SymbolBody::template getThunkVA<ELF32BE>() const;
template uint64_t SymbolBody::template getThunkVA<ELF64LE>() const;
template uint64_t SymbolBody::template getThunkVA<ELF64BE>() const;
template uint32_t SymbolBody::template getGotPltOffset<ELF32LE>() const;
template uint32_t SymbolBody::template getGotPltOffset<ELF32BE>() const;
template uint64_t SymbolBody::template getGotPltOffset<ELF64LE>() const;
template uint64_t SymbolBody::template getGotPltOffset<ELF64BE>() const;
template uint32_t SymbolBody::template getPltVA<ELF32LE>() const;
template uint32_t SymbolBody::template getPltVA<ELF32BE>() const;
template uint64_t SymbolBody::template getPltVA<ELF64LE>() const;
template uint64_t SymbolBody::template getPltVA<ELF64BE>() const;
template uint32_t SymbolBody::template getSize<ELF32LE>() const;
template uint32_t SymbolBody::template getSize<ELF32BE>() const;
template uint64_t SymbolBody::template getSize<ELF64LE>() const;
template uint64_t SymbolBody::template getSize<ELF64BE>() const;
template class elf::DefinedSynthetic<ELF32LE>;
template class elf::DefinedSynthetic<ELF32BE>;
template class elf::DefinedSynthetic<ELF64LE>;
template class elf::DefinedSynthetic<ELF64BE>;

View File

@ -10,14 +10,6 @@
// All symbols are handled as SymbolBodies regardless of their types.
// This file defines various types of SymbolBodies.
//
// File-scope symbols in ELF objects are the only exception of SymbolBody
// instantiation. We will never create SymbolBodies for them for performance
// reason. They are often represented as nullptrs. This is fine for symbol
// resolution because the symbol table naturally cares only about
// externally-visible symbols. For relocations, you have to deal with both
// local and non-local functions, and we have two different functions
// where we need them.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_ELF_SYMBOLS_H
@ -28,28 +20,22 @@
#include "lld/Core/LLVM.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/ELF.h"
#include "llvm/Support/AlignOf.h"
namespace lld {
namespace elf2 {
namespace elf {
class ArchiveFile;
class BitcodeFile;
class InputFile;
class LazyObjectFile;
class SymbolBody;
template <class ELFT> class ObjectFile;
template <class ELFT> class OutputSection;
template <class ELFT> class OutputSectionBase;
template <class ELFT> class SharedFile;
// Initializes global objects defined in this file.
// Called at the beginning of main().
void initSymbols();
// A real symbol object, SymbolBody, is usually accessed indirectly
// through a Symbol. There's always one Symbol for each symbol name.
// The resolver updates SymbolBody pointers as it resolves symbols.
struct Symbol {
SymbolBody *Body;
};
struct Symbol;
// The base class for real symbol classes.
class SymbolBody {
@ -58,115 +44,134 @@ class SymbolBody {
DefinedFirst,
DefinedRegularKind = DefinedFirst,
SharedKind,
DefinedElfLast = SharedKind,
DefinedCommonKind,
DefinedBitcodeKind,
DefinedSyntheticKind,
DefinedLast = DefinedSyntheticKind,
UndefinedElfKind,
UndefinedKind,
LazyKind
LazyArchiveKind,
LazyObjectKind,
};
SymbolBody(Kind K) : SymbolKind(K) {}
Symbol *symbol();
const Symbol *symbol() const {
return const_cast<SymbolBody *>(this)->symbol();
}
Kind kind() const { return static_cast<Kind>(SymbolKind); }
bool isWeak() const { return IsWeak; }
bool isUndefined() const {
return SymbolKind == UndefinedKind || SymbolKind == UndefinedElfKind;
}
bool isUndefined() const { return SymbolKind == UndefinedKind; }
bool isDefined() const { return SymbolKind <= DefinedLast; }
bool isCommon() const { return SymbolKind == DefinedCommonKind; }
bool isLazy() const { return SymbolKind == LazyKind; }
bool isLazy() const {
return SymbolKind == LazyArchiveKind || SymbolKind == LazyObjectKind;
}
bool isShared() const { return SymbolKind == SharedKind; }
bool isUsedInRegularObj() const { return IsUsedInRegularObj; }
bool isUsedInDynamicReloc() const { return IsUsedInDynamicReloc; }
void setUsedInDynamicReloc() { IsUsedInDynamicReloc = true; }
bool isTls() const { return IsTls; }
bool isLocal() const { return IsLocal; }
bool isPreemptible() const;
// Returns the symbol name.
StringRef getName() const { return Name; }
StringRef getName() const;
void setName(StringRef S);
uint8_t getVisibility() const { return Visibility; }
uint32_t getNameOffset() const {
assert(isLocal());
return NameOffset;
}
unsigned DynamicSymbolTableIndex = 0;
uint32_t GlobalDynIndex = -1;
uint8_t getVisibility() const { return StOther & 0x3; }
unsigned DynsymIndex = 0;
uint32_t GotIndex = -1;
uint32_t GotPltIndex = -1;
uint32_t PltIndex = -1;
bool hasGlobalDynIndex() { return GlobalDynIndex != uint32_t(-1); }
uint32_t GlobalDynIndex = -1;
bool isInGot() const { return GotIndex != -1U; }
bool isInGotPlt() const { return GotPltIndex != -1U; }
bool isInPlt() const { return PltIndex != -1U; }
template <class ELFT> bool hasThunk() const;
// A SymbolBody has a backreference to a Symbol. Originally they are
// doubly-linked. A backreference will never change. But the pointer
// in the Symbol may be mutated by the resolver. If you have a
// pointer P to a SymbolBody and are not sure whether the resolver
// has chosen the object among other objects having the same name,
// you can access P->Backref->Body to get the resolver's result.
void setBackref(Symbol *P) { Backref = P; }
SymbolBody *repl() { return Backref ? Backref->Body : this; }
Symbol *getSymbol() { return Backref; }
template <class ELFT>
typename ELFT::uint getVA(typename ELFT::uint Addend = 0) const;
// Decides which symbol should "win" in the symbol table, this or
// the Other. Returns 1 if this wins, -1 if the Other wins, or 0 if
// they are duplicate (conflicting) symbols.
template <class ELFT> int compare(SymbolBody *Other);
template <class ELFT> typename ELFT::uint getGotOffset() const;
template <class ELFT> typename ELFT::uint getGotVA() const;
template <class ELFT> typename ELFT::uint getGotPltOffset() const;
template <class ELFT> typename ELFT::uint getGotPltVA() const;
template <class ELFT> typename ELFT::uint getPltVA() const;
template <class ELFT> typename ELFT::uint getThunkVA() const;
template <class ELFT> typename ELFT::uint getSize() const;
// The file from which this symbol was created.
InputFile *File = nullptr;
protected:
SymbolBody(Kind K, StringRef Name, bool IsWeak, uint8_t Visibility,
bool IsTls)
: SymbolKind(K), IsWeak(IsWeak), Visibility(Visibility), IsTls(IsTls),
Name(Name) {
IsUsedInRegularObj = K != SharedKind && K != LazyKind;
IsUsedInDynamicReloc = 0;
}
SymbolBody(Kind K, StringRef Name, uint8_t StOther, uint8_t Type);
SymbolBody(Kind K, uint32_t NameOffset, uint8_t StOther, uint8_t Type);
const unsigned SymbolKind : 8;
unsigned IsWeak : 1;
unsigned Visibility : 2;
// True if the symbol was used for linking and thus need to be
// added to the output file's symbol table. It is usually true,
// but if it is a shared symbol that were not referenced by anyone,
// it can be false.
unsigned IsUsedInRegularObj : 1;
public:
// True if the linker has to generate a copy relocation for this shared
// symbol or if the symbol should point to its plt entry.
unsigned NeedsCopyOrPltAddr : 1;
// If true, the symbol is added to .dynsym symbol table.
unsigned IsUsedInDynamicReloc : 1;
// True if this is a local symbol.
unsigned IsLocal : 1;
unsigned IsTls : 1;
StringRef Name;
Symbol *Backref = nullptr;
// True if this symbol has an entry in the global part of MIPS GOT.
unsigned IsInGlobalMipsGot : 1;
// The following fields have the same meaning as the ELF symbol attributes.
uint8_t Type; // symbol type
uint8_t StOther; // st_other field value
// The Type field may also have this value. It means that we have not yet seen
// a non-Lazy symbol with this name, so we don't know what its type is. The
// Type field is normally set to this value for Lazy symbols unless we saw a
// weak undefined symbol first, in which case we need to remember the original
// symbol's type in order to check for TLS mismatches.
enum { UnknownType = 255 };
bool isSection() const { return Type == llvm::ELF::STT_SECTION; }
bool isTls() const { return Type == llvm::ELF::STT_TLS; }
bool isFunc() const { return Type == llvm::ELF::STT_FUNC; }
bool isGnuIFunc() const { return Type == llvm::ELF::STT_GNU_IFUNC; }
bool isObject() const { return Type == llvm::ELF::STT_OBJECT; }
bool isFile() const { return Type == llvm::ELF::STT_FILE; }
protected:
struct Str {
const char *S;
size_t Len;
};
union {
Str Name;
uint32_t NameOffset;
};
};
// The base class for any defined symbols.
class Defined : public SymbolBody {
public:
Defined(Kind K, StringRef Name, bool IsWeak, uint8_t Visibility, bool IsTls);
Defined(Kind K, StringRef Name, uint8_t StOther, uint8_t Type);
Defined(Kind K, uint32_t NameOffset, uint8_t StOther, uint8_t Type);
static bool classof(const SymbolBody *S) { return S->isDefined(); }
};
// Any defined symbol from an ELF file.
template <class ELFT> class DefinedElf : public Defined {
protected:
typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym Elf_Sym;
// The defined symbol in LLVM bitcode files.
class DefinedBitcode : public Defined {
public:
DefinedElf(Kind K, StringRef N, const Elf_Sym &Sym)
: Defined(K, N, Sym.getBinding() == llvm::ELF::STB_WEAK,
Sym.getVisibility(), Sym.getType() == llvm::ELF::STT_TLS),
Sym(Sym) {}
const Elf_Sym &Sym;
static bool classof(const SymbolBody *S) {
return S->kind() <= DefinedElfLast;
}
DefinedBitcode(StringRef Name, uint8_t StOther, uint8_t Type, BitcodeFile *F);
static bool classof(const SymbolBody *S);
BitcodeFile *file() { return (BitcodeFile *)this->File; }
};
class DefinedCommon : public Defined {
public:
DefinedCommon(StringRef N, uint64_t Size, uint64_t Alignment, bool IsWeak,
uint8_t Visibility);
DefinedCommon(StringRef N, uint64_t Size, uint64_t Alignment, uint8_t StOther,
uint8_t Type, InputFile *File);
static bool classof(const SymbolBody *S) {
return S->kind() == SymbolBody::DefinedCommonKind;
@ -177,95 +182,137 @@ class DefinedCommon : public Defined {
uint64_t OffsetInBss;
// The maximum alignment we have seen for this symbol.
uint64_t MaxAlignment;
uint64_t Alignment;
uint64_t Size;
};
// Regular defined symbols read from object file symbol tables.
template <class ELFT> class DefinedRegular : public DefinedElf<ELFT> {
typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym Elf_Sym;
template <class ELFT> class DefinedRegular : public Defined {
typedef typename ELFT::Sym Elf_Sym;
typedef typename ELFT::uint uintX_t;
public:
DefinedRegular(StringRef N, const Elf_Sym &Sym,
DefinedRegular(StringRef Name, const Elf_Sym &Sym,
InputSectionBase<ELFT> *Section)
: DefinedElf<ELFT>(SymbolBody::DefinedRegularKind, N, Sym),
Section(Section) {}
: Defined(SymbolBody::DefinedRegularKind, Name, Sym.st_other,
Sym.getType()),
Value(Sym.st_value), Size(Sym.st_size),
Section(Section ? Section->Repl : NullInputSection) {
if (Section)
this->File = Section->getFile();
}
DefinedRegular(const Elf_Sym &Sym, InputSectionBase<ELFT> *Section)
: Defined(SymbolBody::DefinedRegularKind, Sym.st_name, Sym.st_other,
Sym.getType()),
Value(Sym.st_value), Size(Sym.st_size),
Section(Section ? Section->Repl : NullInputSection) {
assert(isLocal());
if (Section)
this->File = Section->getFile();
}
DefinedRegular(StringRef Name, uint8_t StOther)
: Defined(SymbolBody::DefinedRegularKind, Name, StOther,
llvm::ELF::STT_NOTYPE),
Value(0), Size(0), Section(NullInputSection) {}
static bool classof(const SymbolBody *S) {
return S->kind() == SymbolBody::DefinedRegularKind;
}
// If this is null, the symbol is absolute.
InputSectionBase<ELFT> *Section;
uintX_t Value;
uintX_t Size;
// The input section this symbol belongs to. Notice that this is
// a reference to a pointer. We are using two levels of indirections
// because of ICF. If ICF decides two sections need to be merged, it
// manipulates this Section pointers so that they point to the same
// section. This is a bit tricky, so be careful to not be confused.
// If this is null, the symbol is an absolute symbol.
InputSectionBase<ELFT> *&Section;
// If non-null the symbol has a Thunk that may be used as an alternative
// destination for callers of this Symbol.
Thunk<ELFT> *ThunkData = nullptr;
private:
static InputSectionBase<ELFT> *NullInputSection;
};
template <class ELFT>
InputSectionBase<ELFT> *DefinedRegular<ELFT>::NullInputSection;
// DefinedSynthetic is a class to represent linker-generated ELF symbols.
// The difference from the regular symbol is that DefinedSynthetic symbols
// don't belong to any input files or sections. Thus, its constructor
// takes an output section to calculate output VA, etc.
// If Section is null, this symbol is relative to the image base.
template <class ELFT> class DefinedSynthetic : public Defined {
public:
typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym Elf_Sym;
typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t;
typedef typename ELFT::uint uintX_t;
DefinedSynthetic(StringRef N, uintX_t Value,
OutputSectionBase<ELFT> &Section);
OutputSectionBase<ELFT> *Section);
static bool classof(const SymbolBody *S) {
return S->kind() == SymbolBody::DefinedSyntheticKind;
}
// Special value designates that the symbol 'points'
// to the end of the section.
static const uintX_t SectionEnd = uintX_t(-1);
uintX_t Value;
const OutputSectionBase<ELFT> &Section;
const OutputSectionBase<ELFT> *Section;
};
// Undefined symbol.
class Undefined : public SymbolBody {
typedef SymbolBody::Kind Kind;
bool CanKeepUndefined;
protected:
Undefined(Kind K, StringRef N, bool IsWeak, uint8_t Visibility, bool IsTls);
public:
Undefined(StringRef N, bool IsWeak, uint8_t Visibility,
bool CanKeepUndefined);
static bool classof(const SymbolBody *S) { return S->isUndefined(); }
bool canKeepUndefined() const { return CanKeepUndefined; }
};
template <class ELFT> class UndefinedElf : public Undefined {
typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym Elf_Sym;
public:
UndefinedElf(StringRef N, const Elf_Sym &Sym);
const Elf_Sym &Sym;
Undefined(StringRef Name, uint8_t StOther, uint8_t Type, InputFile *F);
Undefined(uint32_t NameOffset, uint8_t StOther, uint8_t Type, InputFile *F);
static bool classof(const SymbolBody *S) {
return S->kind() == SymbolBody::UndefinedElfKind;
return S->kind() == UndefinedKind;
}
InputFile *file() { return this->File; }
};
template <class ELFT> class SharedSymbol : public DefinedElf<ELFT> {
typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym Elf_Sym;
typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t;
template <class ELFT> class SharedSymbol : public Defined {
typedef typename ELFT::Sym Elf_Sym;
typedef typename ELFT::Verdef Elf_Verdef;
typedef typename ELFT::uint uintX_t;
public:
static bool classof(const SymbolBody *S) {
return S->kind() == SymbolBody::SharedKind;
}
SharedSymbol(SharedFile<ELFT> *F, StringRef Name, const Elf_Sym &Sym)
: DefinedElf<ELFT>(SymbolBody::SharedKind, Name, Sym), File(F) {}
SharedSymbol(SharedFile<ELFT> *F, StringRef Name, const Elf_Sym &Sym,
const Elf_Verdef *Verdef)
: Defined(SymbolBody::SharedKind, Name, Sym.st_other, Sym.getType()),
Sym(Sym), Verdef(Verdef) {
// IFuncs defined in DSOs are treated as functions by the static linker.
if (isGnuIFunc())
Type = llvm::ELF::STT_FUNC;
this->File = F;
}
SharedFile<ELFT> *File;
SharedFile<ELFT> *file() { return (SharedFile<ELFT> *)this->File; }
// True if the linker has to generate a copy relocation for this shared
// symbol. OffsetInBss is significant only when NeedsCopy is true.
bool NeedsCopy = false;
const Elf_Sym &Sym;
// This field is a pointer to the symbol's version definition.
const Elf_Verdef *Verdef;
// OffsetInBss is significant only when needsCopy() is true.
uintX_t OffsetInBss = 0;
// If non-null the symbol has a Thunk that may be used as an alternative
// destination for callers of this Symbol.
Thunk<ELFT> *ThunkData = nullptr;
bool needsCopy() const { return this->NeedsCopyOrPltAddr && !this->isFunc(); }
};
// This class represents a symbol defined in an archive file. It is
@ -275,58 +322,153 @@ template <class ELFT> class SharedSymbol : public DefinedElf<ELFT> {
// the same name, it will ask the Lazy to load a file.
class Lazy : public SymbolBody {
public:
Lazy(ArchiveFile *F, const llvm::object::Archive::Symbol S)
: SymbolBody(LazyKind, S.getName(), false, llvm::ELF::STV_DEFAULT, false),
File(F), Sym(S) {}
static bool classof(const SymbolBody *S) { return S->kind() == LazyKind; }
static bool classof(const SymbolBody *S) { return S->isLazy(); }
// Returns an object file for this symbol, or a nullptr if the file
// was already returned.
std::unique_ptr<InputFile> getMember();
std::unique_ptr<InputFile> fetch();
void setWeak() { IsWeak = true; }
void setUsedInRegularObj() { IsUsedInRegularObj = true; }
protected:
Lazy(SymbolBody::Kind K, StringRef Name, uint8_t Type)
: SymbolBody(K, Name, llvm::ELF::STV_DEFAULT, Type) {}
};
// LazyArchive symbols represents symbols in archive files.
class LazyArchive : public Lazy {
public:
LazyArchive(ArchiveFile &File, const llvm::object::Archive::Symbol S,
uint8_t Type);
static bool classof(const SymbolBody *S) {
return S->kind() == LazyArchiveKind;
}
ArchiveFile *file() { return (ArchiveFile *)this->File; }
std::unique_ptr<InputFile> fetch();
private:
ArchiveFile *File;
const llvm::object::Archive::Symbol Sym;
};
// Some linker-generated symbols need to be created as
// DefinedRegular symbols, so they need Elf_Sym symbols.
// Here we allocate such Elf_Sym symbols statically.
template <class ELFT> struct ElfSym {
typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym Elf_Sym;
// LazyObject symbols represents symbols in object files between
// --start-lib and --end-lib options.
class LazyObject : public Lazy {
public:
LazyObject(StringRef Name, LazyObjectFile &File, uint8_t Type);
// Used to represent an undefined symbol which we don't want
// to add to the output file's symbol table. The `IgnoredWeak`
// has weak binding and can be substituted. The `Ignore` has
// global binding and gets priority over symbols from shared libs.
static Elf_Sym IgnoredWeak;
static Elf_Sym Ignored;
static bool classof(const SymbolBody *S) {
return S->kind() == LazyObjectKind;
}
// The content for _end and end symbols.
static Elf_Sym End;
// The content for _gp symbol for MIPS target.
static Elf_Sym MipsGp;
// __rel_iplt_start/__rel_iplt_end for signaling
// where R_[*]_IRELATIVE relocations do live.
static Elf_Sym RelaIpltStart;
static Elf_Sym RelaIpltEnd;
LazyObjectFile *file() { return (LazyObjectFile *)this->File; }
std::unique_ptr<InputFile> fetch();
};
template <class ELFT> typename ElfSym<ELFT>::Elf_Sym ElfSym<ELFT>::IgnoredWeak;
template <class ELFT> typename ElfSym<ELFT>::Elf_Sym ElfSym<ELFT>::Ignored;
template <class ELFT> typename ElfSym<ELFT>::Elf_Sym ElfSym<ELFT>::End;
template <class ELFT> typename ElfSym<ELFT>::Elf_Sym ElfSym<ELFT>::MipsGp;
template <class ELFT>
typename ElfSym<ELFT>::Elf_Sym ElfSym<ELFT>::RelaIpltStart;
template <class ELFT> typename ElfSym<ELFT>::Elf_Sym ElfSym<ELFT>::RelaIpltEnd;
// Some linker-generated symbols need to be created as
// DefinedRegular symbols.
template <class ELFT> struct ElfSym {
// The content for _etext and etext symbols.
static DefinedRegular<ELFT> *Etext;
static DefinedRegular<ELFT> *Etext2;
} // namespace elf2
// The content for _edata and edata symbols.
static DefinedRegular<ELFT> *Edata;
static DefinedRegular<ELFT> *Edata2;
// The content for _end and end symbols.
static DefinedRegular<ELFT> *End;
static DefinedRegular<ELFT> *End2;
// The content for _gp_disp symbol for MIPS target.
static SymbolBody *MipsGpDisp;
};
template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::Etext;
template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::Etext2;
template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::Edata;
template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::Edata2;
template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::End;
template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::End2;
template <class ELFT> SymbolBody *ElfSym<ELFT>::MipsGpDisp;
// A real symbol object, SymbolBody, is usually stored within a Symbol. There's
// always one Symbol for each symbol name. The resolver updates the SymbolBody
// stored in the Body field of this object as it resolves symbols. Symbol also
// holds computed properties of symbol names.
struct Symbol {
// Symbol binding. This is on the Symbol to track changes during resolution.
// In particular:
// An undefined weak is still weak when it resolves to a shared library.
// An undefined weak will not fetch archive members, but we have to remember
// it is weak.
uint8_t Binding;
// Version definition index.
uint16_t VersionId;
// Symbol visibility. This is the computed minimum visibility of all
// observed non-DSO symbols.
unsigned Visibility : 2;
// True if the symbol was used for linking and thus need to be added to the
// output file's symbol table. This is true for all symbols except for
// unreferenced DSO symbols and bitcode symbols that are unreferenced except
// by other bitcode objects.
unsigned IsUsedInRegularObj : 1;
// If this flag is true and the symbol has protected or default visibility, it
// will appear in .dynsym. This flag is set by interposable DSO symbols in
// executables, by most symbols in DSOs and executables built with
// --export-dynamic, and by dynamic lists.
unsigned ExportDynamic : 1;
// True if this symbol is specified by --trace-symbol option.
unsigned Traced : 1;
bool includeInDynsym() const;
bool isWeak() const { return Binding == llvm::ELF::STB_WEAK; }
// This field is used to store the Symbol's SymbolBody. This instantiation of
// AlignedCharArrayUnion gives us a struct with a char array field that is
// large and aligned enough to store any derived class of SymbolBody. We
// assume that the size and alignment of ELF64LE symbols is sufficient for any
// ELFT, and we verify this with the static_asserts in replaceBody.
llvm::AlignedCharArrayUnion<
DefinedBitcode, DefinedCommon, DefinedRegular<llvm::object::ELF64LE>,
DefinedSynthetic<llvm::object::ELF64LE>, Undefined,
SharedSymbol<llvm::object::ELF64LE>, LazyArchive, LazyObject>
Body;
SymbolBody *body() { return reinterpret_cast<SymbolBody *>(Body.buffer); }
const SymbolBody *body() const { return const_cast<Symbol *>(this)->body(); }
};
void printTraceSymbol(Symbol *Sym);
template <typename T, typename... ArgT>
void replaceBody(Symbol *S, ArgT &&... Arg) {
static_assert(sizeof(T) <= sizeof(S->Body), "Body too small");
static_assert(llvm::AlignOf<T>::Alignment <=
llvm::AlignOf<decltype(S->Body)>::Alignment,
"Body not aligned enough");
assert(static_cast<SymbolBody *>(static_cast<T *>(nullptr)) == nullptr &&
"Not a SymbolBody");
new (S->Body.buffer) T(std::forward<ArgT>(Arg)...);
// Print out a log message if --trace-symbol was specified.
// This is for debugging.
if (S->Traced)
printTraceSymbol(S);
}
inline Symbol *SymbolBody::symbol() {
assert(!isLocal());
return reinterpret_cast<Symbol *>(reinterpret_cast<char *>(this) -
offsetof(Symbol, Body));
}
} // namespace elf
} // namespace lld
#endif

File diff suppressed because it is too large Load Diff

View File

@ -10,76 +10,57 @@
#ifndef LLD_ELF_TARGET_H
#define LLD_ELF_TARGET_H
#include "InputSection.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Object/ELF.h"
#include <memory>
namespace lld {
namespace elf2 {
namespace elf {
class InputFile;
class SymbolBody;
class TargetInfo {
public:
unsigned getPageSize() const { return PageSize; }
uint64_t getVAStart() const;
unsigned getCopyReloc() const { return CopyReloc; }
unsigned getGotReloc() const { return GotReloc; }
unsigned getPltReloc() const { return PltReloc; }
unsigned getRelativeReloc() const { return RelativeReloc; }
unsigned getIRelativeReloc() const { return IRelativeReloc; }
bool isTlsLocalDynamicReloc(unsigned Type) const {
return Type == TlsLocalDynamicReloc;
}
bool isTlsGlobalDynamicReloc(unsigned Type) const {
return Type == TlsGlobalDynamicReloc;
}
unsigned getTlsModuleIndexReloc() const { return TlsModuleIndexReloc; }
unsigned getTlsOffsetReloc() const { return TlsOffsetReloc; }
unsigned getPltZeroEntrySize() const { return PltZeroEntrySize; }
unsigned getPltEntrySize() const { return PltEntrySize; }
bool supportsLazyRelocations() const { return LazyRelocations; }
unsigned getGotHeaderEntriesNum() const { return GotHeaderEntriesNum; }
unsigned getGotPltHeaderEntriesNum() const { return GotPltHeaderEntriesNum; }
virtual unsigned getDynReloc(unsigned Type) const { return Type; }
virtual bool isTlsDynReloc(unsigned Type, const SymbolBody &S) const {
return false;
}
virtual unsigned getTlsGotReloc(unsigned Type = -1) const {
return TlsGotReloc;
}
virtual void writeGotHeaderEntries(uint8_t *Buf) const;
virtual void writeGotPltHeaderEntries(uint8_t *Buf) const;
virtual void writeGotPltEntry(uint8_t *Buf, uint64_t Plt) const = 0;
virtual void writePltZeroEntry(uint8_t *Buf, uint64_t GotEntryAddr,
uint64_t PltEntryAddr) const = 0;
virtual void writePltEntry(uint8_t *Buf, uint64_t GotAddr,
uint64_t GotEntryAddr, uint64_t PltEntryAddr,
int32_t Index, unsigned RelOff) const = 0;
virtual bool isTlsInitialExecRel(uint32_t Type) const;
virtual bool isTlsLocalDynamicRel(uint32_t Type) const;
virtual bool isTlsGlobalDynamicRel(uint32_t Type) const;
virtual uint32_t getDynRel(uint32_t Type) const { return Type; }
virtual void writeGotPltHeader(uint8_t *Buf) const {}
virtual void writeGotPlt(uint8_t *Buf, const SymbolBody &S) const {};
virtual uint64_t getImplicitAddend(const uint8_t *Buf, uint32_t Type) const;
// Returns true if a relocation is relative to the place being relocated,
// such as relocations used for PC-relative instructions. Such relocations
// need not be fixed up if an image is loaded to a different address than
// the link-time address. So we don't have to emit a relocation for the
// dynamic linker if isRelRelative returns true.
virtual bool isRelRelative(uint32_t Type) const;
// If lazy binding is supported, the first entry of the PLT has code
// to call the dynamic linker to resolve PLT entries the first time
// they are called. This function writes that code.
virtual void writePltHeader(uint8_t *Buf) const {}
virtual bool isSizeReloc(uint32_t Type) const;
virtual bool relocNeedsDynRelative(unsigned Type) const { return false; }
virtual bool relocNeedsGot(uint32_t Type, const SymbolBody &S) const = 0;
virtual bool relocNeedsPlt(uint32_t Type, const SymbolBody &S) const = 0;
virtual void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type,
uint64_t P, uint64_t SA, uint64_t ZA = 0,
uint8_t *PairedLoc = nullptr) const = 0;
virtual bool isGotRelative(uint32_t Type) const;
virtual bool isTlsOptimized(unsigned Type, const SymbolBody *S) const;
virtual bool needsCopyRel(uint32_t Type, const SymbolBody &S) const;
virtual unsigned relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd,
uint32_t Type, uint64_t P, uint64_t SA,
const SymbolBody &S) const;
virtual void writePlt(uint8_t *Buf, uint64_t GotEntryAddr,
uint64_t PltEntryAddr, int32_t Index,
unsigned RelOff) const {}
// Returns true if a relocation only uses the low bits of a value such that
// all those bits are in in the same page. For example, if the relocation
// only uses the low 12 bits in a system with 4k pages. If this is true, the
// bits will always have the same value at runtime and we don't have to emit
// a dynamic relocation.
virtual bool usesOnlyLowPageBits(uint32_t Type) const;
// Decide whether a Thunk is needed for the relocation from File
// targeting S. Returns one of:
// Expr if there is no Thunk required
// R_THUNK_ABS if thunk is required and expression is absolute
// R_THUNK_PC if thunk is required and expression is pc rel
// R_THUNK_PLT_PC if thunk is required to PLT entry and expression is pc rel
virtual RelExpr getThunkExpr(RelExpr Expr, uint32_t RelocType,
const InputFile &File,
const SymbolBody &S) const;
virtual RelExpr getRelExpr(uint32_t Type, const SymbolBody &S) const = 0;
virtual void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const = 0;
virtual ~TargetInfo();
protected:
unsigned TlsGdRelaxSkip = 1;
unsigned PageSize = 4096;
// On freebsd x86_64 the first page cannot be mmaped.
@ -88,34 +69,44 @@ class TargetInfo {
// Given that, the smallest value that can be used in here is 0x10000.
// If using 2MB pages, the smallest page aligned address that works is
// 0x200000, but it looks like every OS uses 4k pages for executables.
uint64_t VAStart = 0x10000;
uint64_t DefaultImageBase = 0x10000;
unsigned CopyReloc;
unsigned PCRelReloc;
unsigned GotReloc;
unsigned PltReloc;
unsigned RelativeReloc;
unsigned IRelativeReloc;
unsigned TlsGotReloc = 0;
unsigned TlsLocalDynamicReloc = 0;
unsigned TlsGlobalDynamicReloc = 0;
unsigned TlsModuleIndexReloc;
unsigned TlsOffsetReloc;
unsigned PltEntrySize = 8;
unsigned PltZeroEntrySize = 0;
unsigned GotHeaderEntriesNum = 0;
uint32_t CopyRel;
uint32_t GotRel;
uint32_t PltRel;
uint32_t RelativeRel;
uint32_t IRelativeRel;
uint32_t TlsDescRel;
uint32_t TlsGotRel;
uint32_t TlsModuleIndexRel;
uint32_t TlsOffsetRel;
unsigned GotEntrySize;
unsigned GotPltEntrySize;
unsigned PltEntrySize;
unsigned PltHeaderSize;
// At least on x86_64 positions 1 and 2 are used by the first plt entry
// to support lazy loading.
unsigned GotPltHeaderEntriesNum = 3;
bool LazyRelocations = false;
// Set to 0 for variant 2
unsigned TcbSize = 0;
virtual RelExpr adjustRelaxExpr(uint32_t Type, const uint8_t *Data,
RelExpr Expr) const;
virtual void relaxGot(uint8_t *Loc, uint64_t Val) const;
virtual void relaxTlsGdToIe(uint8_t *Loc, uint32_t Type, uint64_t Val) const;
virtual void relaxTlsGdToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const;
virtual void relaxTlsIeToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const;
virtual void relaxTlsLdToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const;
};
StringRef getRelName(uint32_t Type);
uint64_t getPPC64TocBase();
template <class ELFT>
typename llvm::object::ELFFile<ELFT>::uintX_t getMipsGpAddr();
const unsigned MipsGPOffset = 0x7ff0;
template <class ELFT> bool isGnuIFunc(const SymbolBody &S);
extern std::unique_ptr<TargetInfo> Target;
extern TargetInfo *Target;
TargetInfo *createTarget();
}
}

268
ELF/Thunks.cpp Normal file
View File

@ -0,0 +1,268 @@
//===- Thunks.cpp --------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===---------------------------------------------------------------------===//
//
// This file contains Thunk subclasses.
//
// A thunk is a small piece of code written after an input section
// which is used to jump between "incompatible" functions
// such as MIPS PIC and non-PIC or ARM non-Thumb and Thumb functions.
//
// If a jump target is too far and its address doesn't fit to a
// short jump instruction, we need to create a thunk too, but we
// haven't supported it yet.
//
// i386 and x86-64 don't need thunks.
//
//===---------------------------------------------------------------------===//
#include "Thunks.h"
#include "Error.h"
#include "InputFiles.h"
#include "InputSection.h"
#include "OutputSections.h"
#include "Symbols.h"
#include "Target.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Object/ELF.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/Endian.h"
using namespace llvm;
using namespace llvm::object;
using namespace llvm::support::endian;
using namespace llvm::ELF;
namespace lld {
namespace elf {
namespace {
// Specific ARM Thunk implementations. The naming convention is:
// Source State, TargetState, Target Requirement, ABS or PI, Range
template <class ELFT>
class ARMToThumbV7ABSLongThunk final : public Thunk<ELFT> {
public:
ARMToThumbV7ABSLongThunk(const SymbolBody &Dest,
const InputSection<ELFT> &Owner)
: Thunk<ELFT>(Dest, Owner) {}
uint32_t size() const override { return 12; }
void writeTo(uint8_t *Buf) const override;
};
template <class ELFT> class ARMToThumbV7PILongThunk final : public Thunk<ELFT> {
public:
ARMToThumbV7PILongThunk(const SymbolBody &Dest,
const InputSection<ELFT> &Owner)
: Thunk<ELFT>(Dest, Owner) {}
uint32_t size() const override { return 16; }
void writeTo(uint8_t *Buf) const override;
};
template <class ELFT>
class ThumbToARMV7ABSLongThunk final : public Thunk<ELFT> {
public:
ThumbToARMV7ABSLongThunk(const SymbolBody &Dest,
const InputSection<ELFT> &Owner)
: Thunk<ELFT>(Dest, Owner) {}
uint32_t size() const override { return 10; }
void writeTo(uint8_t *Buf) const override;
};
template <class ELFT> class ThumbToARMV7PILongThunk final : public Thunk<ELFT> {
public:
ThumbToARMV7PILongThunk(const SymbolBody &Dest,
const InputSection<ELFT> &Owner)
: Thunk<ELFT>(Dest, Owner) {}
uint32_t size() const override { return 12; }
void writeTo(uint8_t *Buf) const override;
};
// MIPS LA25 thunk
template <class ELFT> class MipsThunk final : public Thunk<ELFT> {
public:
MipsThunk(const SymbolBody &Dest, const InputSection<ELFT> &Owner)
: Thunk<ELFT>(Dest, Owner) {}
uint32_t size() const override { return 16; }
void writeTo(uint8_t *Buf) const override;
};
} // anonymous namespace
// ARM Target Thunks
template <class ELFT> static uint64_t getARMThunkDestVA(const SymbolBody &S) {
return S.isInPlt() ? S.getPltVA<ELFT>() : S.getVA<ELFT>();
}
template <class ELFT>
void ARMToThumbV7ABSLongThunk<ELFT>::writeTo(uint8_t *Buf) const {
const uint8_t Data[] = {
0x00, 0xc0, 0x00, 0xe3, // movw ip,:lower16:S
0x00, 0xc0, 0x40, 0xe3, // movt ip,:upper16:S
0x1c, 0xff, 0x2f, 0xe1, // bx ip
};
uint64_t S = getARMThunkDestVA<ELFT>(this->Destination);
memcpy(Buf, Data, sizeof(Data));
Target->relocateOne(Buf, R_ARM_MOVW_ABS_NC, S);
Target->relocateOne(Buf + 4, R_ARM_MOVT_ABS, S);
}
template <class ELFT>
void ThumbToARMV7ABSLongThunk<ELFT>::writeTo(uint8_t *Buf) const {
const uint8_t Data[] = {
0x40, 0xf2, 0x00, 0x0c, // movw ip, :lower16:S
0xc0, 0xf2, 0x00, 0x0c, // movt ip, :upper16:S
0x60, 0x47, // bx ip
};
uint64_t S = getARMThunkDestVA<ELFT>(this->Destination);
memcpy(Buf, Data, sizeof(Data));
Target->relocateOne(Buf, R_ARM_THM_MOVW_ABS_NC, S);
Target->relocateOne(Buf + 4, R_ARM_THM_MOVT_ABS, S);
}
template <class ELFT>
void ARMToThumbV7PILongThunk<ELFT>::writeTo(uint8_t *Buf) const {
const uint8_t Data[] = {
0xf0, 0xcf, 0x0f, 0xe3, // P: movw ip,:lower16:S - (P + (L1-P) +8)
0x00, 0xc0, 0x40, 0xe3, // movt ip,:upper16:S - (P + (L1-P+4) +8)
0x0f, 0xc0, 0x8c, 0xe0, // L1: add ip, ip, pc
0x1c, 0xff, 0x2f, 0xe1, // bx r12
};
uint64_t S = getARMThunkDestVA<ELFT>(this->Destination);
uint64_t P = this->getVA();
memcpy(Buf, Data, sizeof(Data));
Target->relocateOne(Buf, R_ARM_MOVW_PREL_NC, S - P - 16);
Target->relocateOne(Buf + 4, R_ARM_MOVT_PREL, S - P - 12);
}
template <class ELFT>
void ThumbToARMV7PILongThunk<ELFT>::writeTo(uint8_t *Buf) const {
const uint8_t Data[] = {
0x4f, 0xf6, 0xf4, 0x7c, // P: movw ip,:lower16:S - (P + (L1-P) + 4)
0xc0, 0xf2, 0x00, 0x0c, // movt ip,:upper16:S - (P + (L1-P+4) + 4)
0xfc, 0x44, // L1: add r12, pc
0x60, 0x47, // bx r12
};
uint64_t S = getARMThunkDestVA<ELFT>(this->Destination);
uint64_t P = this->getVA();
memcpy(Buf, Data, sizeof(Data));
Target->relocateOne(Buf, R_ARM_THM_MOVW_PREL_NC, S - P - 12);
Target->relocateOne(Buf + 4, R_ARM_THM_MOVT_PREL, S - P - 8);
}
// Write MIPS LA25 thunk code to call PIC function from the non-PIC one.
template <class ELFT> void MipsThunk<ELFT>::writeTo(uint8_t *Buf) const {
const endianness E = ELFT::TargetEndianness;
uint64_t S = this->Destination.template getVA<ELFT>();
write32<E>(Buf, 0x3c190000); // lui $25, %hi(func)
write32<E>(Buf + 4, 0x08000000 | (S >> 2)); // j func
write32<E>(Buf + 8, 0x27390000); // addiu $25, $25, %lo(func)
write32<E>(Buf + 12, 0x00000000); // nop
Target->relocateOne(Buf, R_MIPS_HI16, S);
Target->relocateOne(Buf + 8, R_MIPS_LO16, S);
}
template <class ELFT>
Thunk<ELFT>::Thunk(const SymbolBody &D, const InputSection<ELFT> &O)
: Destination(D), Owner(O), Offset(O.getThunkOff() + O.getThunksSize()) {}
template <class ELFT> typename ELFT::uint Thunk<ELFT>::getVA() const {
return Owner.OutSec->getVA() + Owner.OutSecOff + Offset;
}
template <class ELFT> Thunk<ELFT>::~Thunk() {}
// Creates a thunk for Thumb-ARM interworking.
template <class ELFT>
static Thunk<ELFT> *createThunkArm(uint32_t Reloc, SymbolBody &S,
InputSection<ELFT> &IS) {
// ARM relocations need ARM to Thumb interworking Thunks.
// Thumb relocations need Thumb to ARM relocations.
// Use position independent Thunks if we require position independent code.
BumpPtrAllocator &Alloc = IS.getFile()->Alloc;
switch (Reloc) {
case R_ARM_PC24:
case R_ARM_PLT32:
case R_ARM_JUMP24:
if (Config->Pic)
return new (Alloc) ARMToThumbV7PILongThunk<ELFT>(S, IS);
return new (Alloc) ARMToThumbV7ABSLongThunk<ELFT>(S, IS);
case R_ARM_THM_JUMP19:
case R_ARM_THM_JUMP24:
if (Config->Pic)
return new (Alloc) ThumbToARMV7PILongThunk<ELFT>(S, IS);
return new (Alloc) ThumbToARMV7ABSLongThunk<ELFT>(S, IS);
}
fatal("unrecognized relocation type");
}
template <class ELFT>
static void addThunkARM(uint32_t Reloc, SymbolBody &S, InputSection<ELFT> &IS) {
// Only one Thunk supported per symbol.
if (S.hasThunk<ELFT>())
return;
// ARM Thunks are added to the same InputSection as the relocation. This
// isn't strictly necessary but it makes it more likely that a limited range
// branch can reach the Thunk, and it makes Thunks to the PLT section easier
Thunk<ELFT> *T = createThunkArm(Reloc, S, IS);
IS.addThunk(T);
if (auto *Sym = dyn_cast<DefinedRegular<ELFT>>(&S))
Sym->ThunkData = T;
else if (auto *Sym = dyn_cast<SharedSymbol<ELFT>>(&S))
Sym->ThunkData = T;
else
fatal("symbol not DefinedRegular or Shared");
}
template <class ELFT>
static void addThunkMips(uint32_t RelocType, SymbolBody &S,
InputSection<ELFT> &IS) {
// Only one Thunk supported per symbol.
if (S.hasThunk<ELFT>())
return;
// Mips Thunks are added to the InputSection defining S.
auto *R = cast<DefinedRegular<ELFT>>(&S);
auto *Sec = cast<InputSection<ELFT>>(R->Section);
auto *T = new (IS.getFile()->Alloc) MipsThunk<ELFT>(S, *Sec);
Sec->addThunk(T);
R->ThunkData = T;
}
template <class ELFT>
void addThunk(uint32_t RelocType, SymbolBody &S, InputSection<ELFT> &IS) {
if (Config->EMachine == EM_ARM)
addThunkARM<ELFT>(RelocType, S, IS);
else if (Config->EMachine == EM_MIPS)
addThunkMips<ELFT>(RelocType, S, IS);
else
llvm_unreachable("add Thunk only supported for ARM and Mips");
}
template void addThunk<ELF32LE>(uint32_t, SymbolBody &,
InputSection<ELF32LE> &);
template void addThunk<ELF32BE>(uint32_t, SymbolBody &,
InputSection<ELF32BE> &);
template void addThunk<ELF64LE>(uint32_t, SymbolBody &,
InputSection<ELF64LE> &);
template void addThunk<ELF64BE>(uint32_t, SymbolBody &,
InputSection<ELF64BE> &);
template class Thunk<ELF32LE>;
template class Thunk<ELF32BE>;
template class Thunk<ELF64LE>;
template class Thunk<ELF64BE>;
} // namespace elf
} // namespace lld

56
ELF/Thunks.h Normal file
View File

@ -0,0 +1,56 @@
//===- Thunks.h --------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_ELF_THUNKS_H
#define LLD_ELF_THUNKS_H
#include "Relocations.h"
namespace lld {
namespace elf {
class SymbolBody;
template <class ELFT> class InputSection;
// Class to describe an instance of a Thunk.
// A Thunk is a code-sequence inserted by the linker in between a caller and
// the callee. The relocation to the callee is redirected to the Thunk, which
// after executing transfers control to the callee. Typical uses of Thunks
// include transferring control from non-pi to pi and changing state on
// targets like ARM.
//
// Thunks can be created for DefinedRegular and Shared Symbols. The Thunk
// is stored in a field of the Symbol Destination.
// Thunks to be written to an InputSection are recorded by the InputSection.
template <class ELFT> class Thunk {
typedef typename ELFT::uint uintX_t;
public:
Thunk(const SymbolBody &Destination, const InputSection<ELFT> &Owner);
virtual ~Thunk();
virtual uint32_t size() const { return 0; }
virtual void writeTo(uint8_t *Buf) const {}
uintX_t getVA() const;
protected:
const SymbolBody &Destination;
const InputSection<ELFT> &Owner;
uint64_t Offset;
};
// For a Relocation to symbol S from InputSection Src, create a Thunk and
// update the fields of S and the InputSection that the Thunk body will be
// written to. At present there are implementations for ARM and Mips Thunks.
template <class ELFT>
void addThunk(uint32_t RelocType, SymbolBody &S, InputSection<ELFT> &Src);
} // namespace elf
} // namespace lld
#endif

File diff suppressed because it is too large Load Diff

View File

@ -10,14 +10,28 @@
#ifndef LLD_ELF_WRITER_H
#define LLD_ELF_WRITER_H
namespace lld {
namespace elf2 {
#include <memory>
namespace llvm {
class StringRef;
}
namespace lld {
namespace elf {
template <class ELFT> class InputSectionBase;
template <class ELFT> class ObjectFile;
template <class ELFT> class SymbolTable;
template <class ELFT> void writeResult(SymbolTable<ELFT> *Symtab);
template <class ELFT> void markLive(SymbolTable<ELFT> *Symtab);
template <class ELFT> void markLive();
template <class ELFT>
llvm::StringRef getOutputSectionName(InputSectionBase<ELFT> *S);
template <class ELFT>
void reportDiscarded(InputSectionBase<ELFT> *IS,
const std::unique_ptr<elf::ObjectFile<ELFT>> &File);
}
}

View File

@ -4,7 +4,7 @@ lld License
University of Illinois/NCSA
Open Source License
Copyright (c) 2011-2015 by the contributors listed in CREDITS.TXT
Copyright (c) 2011-2016 by the contributors listed in CREDITS.TXT
All rights reserved.
Developed by:

61
docs/AtomLLD.rst Normal file
View File

@ -0,0 +1,61 @@
ATOM-based lld
==============
ATOM-based lld is a new set of modular code for creating linker tools.
Currently it supports Mach-O.
* End-User Features:
* Compatible with existing linker options
* Reads standard Object Files
* Writes standard Executable Files
* Remove clang's reliance on "the system linker"
* Uses the LLVM `"UIUC" BSD-Style license`__.
* Applications:
* Modular design
* Support cross linking
* Easy to add new CPU support
* Can be built as static tool or library
* Design and Implementation:
* Extensive unit tests
* Internal linker model can be dumped/read to textual format
* Additional linking features can be plugged in as "passes"
* OS specific and CPU specific code factored out
Why a new linker?
-----------------
The fact that clang relies on whatever linker tool you happen to have installed
means that clang has been very conservative adopting features which require a
recent linker.
In the same way that the MC layer of LLVM has removed clang's reliance on the
system assembler tool, the lld project will remove clang's reliance on the
system linker tool.
Contents
--------
.. toctree::
:maxdepth: 2
design
getting_started
ReleaseNotes
development
windows_support
open_projects
sphinx_intro
Indices and tables
------------------
* :ref:`genindex`
* :ref:`search`
__ http://llvm.org/docs/DeveloperPolicy.html#license

350
docs/NewLLD.rst Normal file
View File

@ -0,0 +1,350 @@
The ELF and COFF Linkers
========================
We started rewriting the ELF (Unix) and COFF (Windows) linkers in May 2015.
Since then, we have been making a steady progress towards providing
drop-in replacements for the system linkers.
Currently, the Windows support is mostly complete and is about 2x faster
than the linker that comes as a part of Micrsoft Visual Studio toolchain.
The ELF support is in progress and is able to link large programs
such as Clang or LLD itself. Unless your program depends on linker scripts,
you can expect it to be linkable with LLD.
It is currently about 1.2x to 2x faster than GNU gold linker.
We aim to make it a drop-in replacement for the GNU linker.
We expect that FreeBSD is going to be the first large system
to adopt LLD as the system linker.
We are working on it in collaboration with the FreeBSD project.
The linkers are notably small; as of June 2016,
the COFF linker is under 7k lines and the ELF linker is about 13k lines,
while gold is 146K lines.
The linkers are designed to be as fast and simple as possible.
Because it is simple, it is easy to extend to support new features.
It already supports several advanced features such section garbage
collection and identical code folding.
The COFF linker supports i386, x86-64 and ARM. The ELF linker supports
i386, x86-64, x32, MIPS32, MIPS64, PowerPC, AMDGPU, ARM and Aarch64,
although the quality varies depending on platform. By default, LLD
provides support for all targets because the amount of code we have for
each target is so small. We do not even provide a way to disable
targets at compile time.
There are a few key design choices that we made to achieve these goals.
We will describe them in this document.
The ELF Linker as a Library
---------------------------
You can embed LLD to your program by linking against it and calling the linker's
entry point function lld::elf::link.
The current policy is that it is your reponsibility to give trustworthy object
files. The function is guaranteed to return as long as you do not pass corrupted
or malicious object files. A corrupted file could cause a fatal error or SEGV.
That being said, you don't need to worry too much about it if you create object
files in the usual way and give them to the linker. It is naturally expected to
work, or otherwise it's a linker's bug.
Design
======
We will describe the design of the linkers in the rest of the document.
Key Concepts
------------
Linkers are fairly large pieces of software.
There are many design choices you have to make to create a complete linker.
This is a list of design choices we've made for ELF and COFF LLD.
We believe that these high-level design choices achieved a right balance
between speed, simplicity and extensibility.
* Implement as native linkers
We implemented the linkers as native linkers for each file format.
The two linkers share the same design but do not share code.
Sharing code makes sense if the benefit is worth its cost.
In our case, ELF and COFF are different enough that we thought the layer to
abstract the differences wouldn't worth its complexity and run-time cost.
Elimination of the abstract layer has greatly simplified the implementation.
* Speed by design
One of the most important things in archiving high performance is to
do less rather than do it efficiently.
Therefore, the high-level design matters more than local optimizations.
Since we are trying to create a high-performance linker,
it is very important to keep the design as efficient as possible.
Broadly speaking, we do not do anything until we have to do it.
For example, we do not read section contents or relocations
until we need them to continue linking.
When we need to do some costly operation (such as looking up
a hash table for each symbol), we do it only once.
We obtain a handler (which is typically just a pointer to actual data)
on the first operation and use it throughout the process.
* Efficient archive file handling
LLD's handling of archive files (the files with ".a" file extension) is different
from the traditional Unix linkers and similar to Windows linkers.
We'll describe how the traditional Unix linker handles archive files,
what the problem is, and how LLD approached the problem.
The traditional Unix linker maintains a set of undefined symbols during linking.
The linker visits each file in the order as they appeared in the command line
until the set becomes empty. What the linker would do depends on file type.
- If the linker visits an object file, the linker links object files to the result,
and undefined symbols in the object file are added to the set.
- If the linker visits an archive file, it checks for the archive file's symbol table
and extracts all object files that have definitions for any symbols in the set.
This algorithm sometimes leads to a counter-intuitive behavior.
If you give archive files before object files, nothing will happen
because when the linker visits archives, there is no undefined symbols in the set.
As a result, no files are extracted from the first archive file,
and the link is done at that point because the set is empty after it visits one file.
You can fix the problem by reordering the files,
but that cannot fix the issue of mutually-dependent archive files.
Linking mutually-dependent archive files is tricky.
You may specify the same archive file multiple times to
let the linker visit it more than once.
Or, you may use the special command line options, `--start-group` and `--end-group`,
to let the linker loop over the files between the options until
no new symbols are added to the set.
Visiting the same archive files multiple makes the linker slower.
Here is how LLD approached the problem. Instead of memorizing only undefined symbols,
we program LLD so that it memorizes all symbols.
When it sees an undefined symbol that can be resolved by extracting an object file
from an archive file it previously visited, it immediately extracts the file and link it.
It is doable because LLD does not forget symbols it have seen in archive files.
We believe that the LLD's way is efficient and easy to justify.
The semantics of LLD's archive handling is different from the traditional Unix's.
You can observe it if you carefully craft archive files to exploit it.
However, in reality, we don't know any program that cannot link
with our algorithm so far, so it's not going to cause trouble.
Numbers You Want to Know
------------------------
To give you intuition about what kinds of data the linker is mainly working on,
I'll give you the list of objects and their numbers LLD has to read and process
in order to link a very large executable. In order to link Chrome with debug info,
which is roughly 2 GB in output size, LLD reads
- 17,000 files,
- 1,800,000 sections,
- 6,300,000 symbols, and
- 13,000,000 relocations.
LLD produces the 2 GB executable in 15 seconds.
These numbers vary depending on your program, but in general,
you have a lot of relocations and symbols for each file.
If your program is written in C++, symbol names are likely to be
pretty long because of name mangling.
It is important to not waste time on relocations and symbols.
In the above case, the total amount of symbol strings is 450 MB,
and inserting all of them to a hash table takes 1.5 seconds.
Therefore, if you causally add a hash table lookup for each symbol,
it would slow down the linker by 10%. So, don't do that.
On the other hand, you don't have to pursue efficiency
when handling files.
Important Data Strcutures
-------------------------
We will describe the key data structures in LLD in this section.
The linker can be understood as the interactions between them.
Once you understand their functions, the code of the linker should look obvious to you.
* SymbolBody
SymbolBody is a class to represent symbols.
They are created for symbols in object files or archive files.
The linker creates linker-defined symbols as well.
There are basically three types of SymbolBodies: Defined, Undefined, or Lazy.
- Defined symbols are for all symbols that are considered as "resolved",
including real defined symbols, COMDAT symbols, common symbols,
absolute symbols, linker-created symbols, etc.
- Undefined symbols represent undefined symbols, which need to be replaced by
Defined symbols by the resolver until the link is complete.
- Lazy symbols represent symbols we found in archive file headers
which can turn into Defined if we read archieve members.
* Symbol
A Symbol is a container for a SymbolBody. There's only one Symbol for each
unique symbol name (this uniqueness is guaranteed by the symbol table).
Each global symbol has only one SymbolBody at any one time, which is
the SymbolBody stored within a memory region of the Symbol large enough
to store any SymbolBody.
As the resolver reads symbols from input files, it replaces the Symbol's
SymbolBody with the "best" SymbolBody for its symbol name by constructing
the new SymbolBody in place on top of the existing SymbolBody. For example,
if the resolver is given a defined symbol, and the SymbolBody with its name
is undefined, it will construct a Defined SymbolBody over the Undefined
SymbolBody.
This means that each SymbolBody pointer always points to the best SymbolBody,
and it is possible to get from a SymbolBody to a Symbol, or vice versa,
by adding or subtracting a fixed offset. This memory layout helps reduce
the cache miss rate through high locality and a small number of required
pointer indirections.
* SymbolTable
SymbolTable is basically a hash table from strings to Symbols
with a logic to resolve symbol conflicts. It resolves conflicts by symbol type.
- If we add Defined and Undefined symbols, the symbol table will keep the former.
- If we add Defined and Lazy symbols, it will keep the former.
- If we add Lazy and Undefined, it will keep the former,
but it will also trigger the Lazy symbol to load the archive member
to actually resolve the symbol.
* Chunk (COFF specific)
Chunk represents a chunk of data that will occupy space in an output.
Each regular section becomes a chunk.
Chunks created for common or BSS symbols are not backed by sections.
The linker may create chunks to append additional data to an output as well.
Chunks know about their size, how to copy their data to mmap'ed outputs,
and how to apply relocations to them.
Specifically, section-based chunks know how to read relocation tables
and how to apply them.
* InputSection (ELF specific)
Since we have less synthesized data for ELF, we don't abstract slices of
input files as Chunks for ELF. Instead, we directly use the input section
as an internal data type.
InputSection knows about their size and how to copy themselves to
mmap'ed outputs, just like COFF Chunks.
* OutputSection
OutputSection is a container of InputSections (ELF) or Chunks (COFF).
An InputSection or Chunk belongs to at most one OutputSection.
There are mainly three actors in this linker.
* InputFile
InputFile is a superclass of file readers.
We have a different subclass for each input file type,
such as regular object file, archive file, etc.
They are responsible for creating and owning SymbolBodies and
InputSections/Chunks.
* Writer
The writer is responsible for writing file headers and InputSections/Chunks to a file.
It creates OutputSections, put all InputSections/Chunks into them,
assign unique, non-overlapping addresses and file offsets to them,
and then write them down to a file.
* Driver
The linking process is driven by the driver. The driver
- processes command line options,
- creates a symbol table,
- creates an InputFile for each input file and put all symbols in it into the symbol table,
- checks if there's no remaining undefined symbols,
- creates a writer,
- and passes the symbol table to the writer to write the result to a file.
Link-Time Optimization
----------------------
LTO is implemented by handling LLVM bitcode files as object files.
The linker resolves symbols in bitcode files normally. If all symbols
are successfully resolved, it then runs LLVM passes
with all bitcode files to convert them to one big regular ELF/COFF file.
Finally, the linker replaces bitcode symbols with ELF/COFF symbols,
so that they are linked as if they were in the native format from the beginning.
The details are described in this document.
http://llvm.org/docs/LinkTimeOptimization.html
Glossary
--------
* RVA (COFF)
Short for Relative Virtual Address.
Windows executables or DLLs are not position-independent; they are
linked against a fixed address called an image base. RVAs are
offsets from an image base.
Default image bases are 0x140000000 for executables and 0x18000000
for DLLs. For example, when we are creating an executable, we assume
that the executable will be loaded at address 0x140000000 by the
loader, so we apply relocations accordingly. Result texts and data
will contain raw absolute addresses.
* VA
Short for Virtual Address. For COFF, it is equivalent to RVA + image base.
* Base relocations (COFF)
Relocation information for the loader. If the loader decides to map
an executable or a DLL to a different address than their image
bases, it fixes up binaries using information contained in the base
relocation table. A base relocation table consists of a list of
locations containing addresses. The loader adds a difference between
RVA and actual load address to all locations listed there.
Note that this run-time relocation mechanism is much simpler than ELF.
There's no PLT or GOT. Images are relocated as a whole just
by shifting entire images in memory by some offsets. Although doing
this breaks text sharing, I think this mechanism is not actually bad
on today's computers.
* ICF
Short for Identical COMDAT Folding (COFF) or Identical Code Folding (ELF).
ICF is an optimization to reduce output size by merging read-only sections
by not only their names but by their contents. If two read-only sections
happen to have the same metadata, actual contents and relocations,
they are merged by ICF. It is known as an effective technique,
and it usually reduces C++ program's size by a few percent or more.
Note that this is not entirely sound optimization. C/C++ require
different functions have different addresses. If a program depends on
that property, it would fail at runtime.
On Windows, that's not really an issue because MSVC link.exe enabled
the optimization by default. As long as your program works
with the linker's default settings, your program should be safe with ICF.
On Unix, your program is generally not guaranteed to be safe with ICF,
although large programs happen to work correctly.
LLD works fine with ICF for example.

35
docs/ReleaseNotes.rst Normal file
View File

@ -0,0 +1,35 @@
======================
LLD 3.9 Release Notes
======================
.. contents::
:local:
.. warning::
These are in-progress notes for the upcoming LLVM 3.9 release.
Introduction
============
This document contains the release notes for the LLD linker, release 3.9.
Here we describe the status of LLD, including major improvements
from the previous release. All LLD releases may be downloaded
from the `LLVM releases web site <http://llvm.org/releases/>`_.
Non-comprehensive list of changes in this release
=================================================
ELF Improvements
----------------
* Initial support for LTO.
COFF Improvements
-----------------
* Item 1.
MachO Improvements
------------------
* Item 1.

View File

@ -11,6 +11,7 @@
# serve to show the default.
import sys, os
from datetime import date
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
@ -40,7 +41,7 @@
# General information about the project.
project = u'lld'
copyright = u'2011-2014, LLVM Project'
copyright = u'2011-%d, LLVM Project' % date.today().year
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the

View File

@ -150,37 +150,6 @@ into an Atom graph. For instance, you may want the Reader to only accept
certain architectures. The options class can be instantiated from command
line options, or it can be subclassed and the ivars programmatically set.
ELF Section Groups
~~~~~~~~~~~~~~~~~~
Reference : `ELF Section Groups <http://mentorembedded.github.io/cxx-abi/abi/prop-72-comdat.html>`_
C++ has many situations where the compiler may need to emit code or data,
but may not be able to identify a unique compilation unit where it should be
emitted. The approach chosen by the C++ ABI group to deal with this problem, is
to allow the compiler to emit the required information in multiple compilation
units, in a form which allows the linker to remove all but one copy. This is
essentially the feature called COMDAT in several existing implementations.
The COMDAT sections in ELF are modeled by using '.group' sections in the input
files. Each '.group' section is associated with a signature. The '.group'
section has a list of members that are part of the the '.group' which the linker
selects to appear in the input file(Whichever .group section appeared first
in the link). References to any of the '.group' members can also appear from
outside the '.group'.
In lld the the '.group' sections with COMDAT are identified by contentType(
typeGroupComdat). The '.group' members are identified by using
**kindGroupChild** references.
The point to be noted here is the 'group child' members would need to be emitted
in the output file **iff** the group was selected by the resolver.
This is modeled in lld by removing the 'group child' members from the
definedAtom List.
Any reference to the group-child from **outside the group** is referenced using
a 'undefined' atom.
Resolving
~~~~~~~~~

View File

@ -4,55 +4,12 @@ lld - The LLVM Linker
=====================
lld contains two linkers whose architectures are different from each other.
One is a linker that implements native features directly.
They are in `COFF` or `ELF` directories. Other directories contains the other
implementation that is designed to be a set of modular code for creating
linker tools. This document covers mainly the latter.
For the former, please read README.md in `COFF` directory.
* End-User Features:
.. toctree::
:maxdepth: 1
* Compatible with existing linker options
* Reads standard Object Files (e.g. ELF, Mach-O, PE/COFF)
* Writes standard Executable Files (e.g. ELF, Mach-O, PE)
* Remove clang's reliance on "the system linker"
* Uses the LLVM `"UIUC" BSD-Style license`__.
* Applications:
* Modular design
* Support cross linking
* Easy to add new CPU support
* Can be built as static tool or library
* Design and Implementation:
* Extensive unit tests
* Internal linker model can be dumped/read to textual format
* Additional linking features can be plugged in as "passes"
* OS specific and CPU specific code factored out
Why a new linker?
-----------------
The fact that clang relies on whatever linker tool you happen to have installed
means that clang has been very conservative adopting features which require a
recent linker.
In the same way that the MC layer of LLVM has removed clang's reliance on the
system assembler tool, the lld project will remove clang's reliance on the
system linker tool.
Current Status
--------------
lld can self host on x86-64 FreeBSD and Linux and x86 Windows.
All SingleSource tests in test-suite pass on x86-64 Linux.
All SingleSource and MultiSource tests in the LLVM test-suite
pass on MIPS 32-bit little-endian Linux.
NewLLD
AtomLLD
Source
------
@ -66,24 +23,3 @@ lld is also available via the read-only git mirror::
git clone http://llvm.org/git/lld.git
Put it in llvm's tools/ directory, rerun cmake, then build target lld.
Contents
--------
.. toctree::
:maxdepth: 2
design
getting_started
development
windows_support
open_projects
sphinx_intro
Indices and tables
------------------
* :ref:`genindex`
* :ref:`search`
__ http://llvm.org/docs/DeveloperPolicy.html#license

View File

@ -4,11 +4,6 @@ Open Projects
=============
.. include:: ../include/lld/Core/TODO.txt
.. include:: ../lib/Driver/TODO.rst
.. include:: ../lib/ReaderWriter/ELF/X86_64/TODO.rst
.. include:: ../lib/ReaderWriter/ELF/AArch64/TODO.rst
.. include:: ../lib/ReaderWriter/ELF/ARM/TODO.rst
.. include:: ../tools/lld/TODO.txt
Documentation TODOs
~~~~~~~~~~~~~~~~~~~

View File

@ -1,100 +0,0 @@
//===- lld/Core/Alias.h - Alias atoms -------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief Provide alias atoms.
///
//===----------------------------------------------------------------------===//
#ifndef LLD_CORE_ALIAS_H
#define LLD_CORE_ALIAS_H
#include "lld/Core/LLVM.h"
#include "lld/Core/Simple.h"
#include "llvm/ADT/Optional.h"
#include <string>
namespace lld {
// An AliasAtom is a zero-size atom representing an alias for other atom. It has
// a LayoutAfter reference to the target atom, so that this atom and the target
// atom will be laid out at the same location in the final result. Initially
// the target atom is an undefined atom. Resolver will replace it with a defined
// one.
//
// It does not have attributes itself. Most member function calls are forwarded
// to the target atom.
class AliasAtom : public SimpleDefinedAtom {
public:
AliasAtom(const File &file, StringRef name)
: SimpleDefinedAtom(file), _name(name) {}
StringRef name() const override { return _name; }
uint64_t size() const override { return 0; }
ArrayRef<uint8_t> rawContent() const override { return ArrayRef<uint8_t>(); }
Scope scope() const override {
getTarget();
return _target ? _target->scope() : scopeLinkageUnit;
}
Merge merge() const override {
if (_merge.hasValue())
return _merge.getValue();
getTarget();
return _target ? _target->merge() : mergeNo;
}
void setMerge(Merge val) { _merge = val; }
ContentType contentType() const override {
getTarget();
return _target ? _target->contentType() : typeUnknown;
}
Interposable interposable() const override {
getTarget();
return _target ? _target->interposable() : interposeNo;
}
SectionChoice sectionChoice() const override {
getTarget();
return _target ? _target->sectionChoice() : sectionBasedOnContent;
}
StringRef customSectionName() const override {
getTarget();
return _target ? _target->customSectionName() : StringRef("");
}
DeadStripKind deadStrip() const override { return _deadStrip; }
void setDeadStrip(DeadStripKind val) { _deadStrip = val; }
private:
void getTarget() const {
if (_target)
return;
for (const Reference *r : *this) {
if (r->kindNamespace() == lld::Reference::KindNamespace::all &&
r->kindValue() == lld::Reference::kindLayoutAfter) {
_target = dyn_cast<DefinedAtom>(r->target());
return;
}
}
}
std::string _name;
mutable const DefinedAtom *_target = nullptr;
llvm::Optional<Merge> _merge = DefinedAtom::mergeNo;
DeadStripKind _deadStrip = DefinedAtom::deadStripNormal;
};
} // end namespace lld
#endif

View File

@ -11,7 +11,6 @@
#define LLD_CORE_ARCHIVE_LIBRARY_FILE_H
#include "lld/Core/File.h"
#include "lld/Core/Parallel.h"
#include <set>
namespace lld {
@ -33,23 +32,11 @@ class ArchiveLibraryFile : public File {
/// Check if any member of the archive contains an Atom with the
/// specified name and return the File object for that member, or nullptr.
virtual File *find(StringRef name, bool dataSymbolOnly) = 0;
virtual File *find(StringRef name) = 0;
virtual std::error_code
parseAllMembers(std::vector<std::unique_ptr<File>> &result) = 0;
// Parses a member file containing a given symbol, so that when you
// need the file find() can return that immediately. Calling this function
// has no side effect other than pre-instantiating a file. Calling this
// function doesn't affect correctness.
virtual void preload(TaskGroup &group, StringRef symbolName) {}
/// Returns a set of all defined symbols in the archive, i.e. all
/// resolvable symbol using this file.
virtual std::set<StringRef> getDefinedSymbols() {
return std::set<StringRef>();
}
protected:
/// only subclasses of ArchiveLibraryFile can be instantiated
ArchiveLibraryFile(StringRef path) : File(path, kindArchiveLibrary) {}

View File

@ -16,6 +16,9 @@ namespace lld {
class File;
template<typename T>
class OwningAtomPtr;
///
/// The linker has a Graph Theory model of linking. An object file is seen
/// as a set of Atoms with References to other Atoms. Each Atom is a node
@ -24,6 +27,7 @@ class File;
/// undefined symbol (extern declaration).
///
class Atom {
template<typename T> friend class OwningAtomPtr;
public:
/// Whether this atom is defined or a proxy for an undefined symbol
enum Definition {
@ -71,6 +75,55 @@ class Atom {
Definition _definition;
};
/// Class which owns an atom pointer and runs the atom destructor when the
/// owning pointer goes out of scope.
template<typename T>
class OwningAtomPtr {
private:
OwningAtomPtr(const OwningAtomPtr &) = delete;
void operator=(const OwningAtomPtr&) = delete;
public:
OwningAtomPtr() : atom(nullptr) { }
OwningAtomPtr(T *atom) : atom(atom) { }
~OwningAtomPtr() {
if (atom)
runDestructor(atom);
}
void runDestructor(Atom *atom) {
atom->~Atom();
}
OwningAtomPtr(OwningAtomPtr &&ptr) : atom(ptr.atom) {
ptr.atom = nullptr;
}
void operator=(OwningAtomPtr&& ptr) {
if (atom)
runDestructor(atom);
atom = ptr.atom;
ptr.atom = nullptr;
}
T *const &get() const {
return atom;
}
T *&get() {
return atom;
}
T *release() {
auto *v = atom;
atom = nullptr;
return v;
}
private:
T *atom;
};
} // namespace lld
#endif // LLD_CORE_ATOM_H

View File

@ -11,11 +11,12 @@
#define LLD_CORE_DEFINED_ATOM_H
#include "lld/Core/Atom.h"
#include "lld/Core/Reference.h"
#include "lld/Core/LLVM.h"
#include "llvm/Support/ErrorHandling.h"
namespace lld {
class File;
class Reference;
/// \brief The fundamental unit of linking.
///
@ -105,6 +106,7 @@ class DefinedAtom : public Atom {
enum ContentType {
typeUnknown, // for use with definitionUndefined
typeMachHeader, // atom representing mach_header [Darwin]
typeCode, // executable code
typeResolver, // function which returns address of target
typeBranchIsland, // linker created for large binaries
@ -127,6 +129,7 @@ class DefinedAtom : public Atom {
typeObjC1Class, // ObjC1 class [Darwin]
typeLazyPointer, // pointer through which a stub jumps
typeLazyDylibPointer, // pointer through which a stub jumps [Darwin]
typeNonLazyPointer, // pointer to external symbol
typeCFString, // NS/CFString object [Darwin]
typeGOT, // pointer to external symbol
typeInitializerPtr, // pointer to initializer function
@ -134,6 +137,8 @@ class DefinedAtom : public Atom {
typeCStringPtr, // pointer to UTF8 C string [Darwin]
typeObjCClassPtr, // pointer to ObjC class [Darwin]
typeObjC2CategoryList, // pointers to ObjC category [Darwin]
typeObjCImageInfo, // pointer to ObjC class [Darwin]
typeObjCMethodList, // pointer to ObjC method list [Darwin]
typeDTraceDOF, // runtime data for Dtrace [Darwin]
typeInterposingTuples, // tuples of interposing info for dyld [Darwin]
typeTempLTO, // temporary atom for bitcode reader
@ -143,14 +148,7 @@ class DefinedAtom : public Atom {
typeTLVInitialData, // initial data for a TLV [Darwin]
typeTLVInitialZeroFill, // TLV initial zero fill data [Darwin]
typeTLVInitializerPtr, // pointer to thread local initializer [Darwin]
typeMachHeader, // atom representing mach_header [Darwin]
typeThreadZeroFill, // Uninitialized thread local data(TBSS) [ELF]
typeThreadData, // Initialized thread local data(TDATA) [ELF]
typeRONote, // Identifies readonly note sections [ELF]
typeRWNote, // Identifies readwrite note sections [ELF]
typeNoAlloc, // Identifies non allocatable sections [ELF]
typeGroupComdat, // Identifies a section group [ELF, COFF]
typeGnuLinkOnce, // Identifies a gnu.linkonce section [ELF]
typeDSOHandle, // atom representing DSO handle [Darwin]
typeSectCreate, // Created via the -sectcreate option [Darwin]
};
@ -218,11 +216,6 @@ class DefinedAtom : public Atom {
///
/// This is used by the linker to order the layout of Atoms so that the
/// resulting image is stable and reproducible.
///
/// Note that this should not be confused with ordinals of exported symbols in
/// Windows DLLs. In Windows terminology, ordinals are symbols' export table
/// indices (small integers) which can be used instead of symbol names to
/// refer items in a DLL.
virtual uint64_t ordinal() const = 0;
/// \brief the number of bytes of space this atom's content will occupy in the
@ -307,8 +300,12 @@ class DefinedAtom : public Atom {
return _atom.derefIterator(_it);
}
bool operator==(const reference_iterator &other) const {
return _it == other._it;
}
bool operator!=(const reference_iterator &other) const {
return _it != other._it;
return !(*this == other);
}
reference_iterator &operator++() {
@ -326,6 +323,14 @@ class DefinedAtom : public Atom {
/// \brief Returns an iterator to the end of this Atom's References.
virtual reference_iterator end() const = 0;
/// Adds a reference to this atom.
virtual void addReference(Reference::KindNamespace ns,
Reference::KindArch arch,
Reference::KindValue kindValue, uint64_t off,
const Atom *target, Reference::Addend a) {
llvm_unreachable("Subclass does not permit adding references");
}
static bool classof(const Atom *a) {
return a->definition() == definitionRegular;
}
@ -338,16 +343,15 @@ class DefinedAtom : public Atom {
ContentType atomContentType = contentType();
return !(atomContentType == DefinedAtom::typeZeroFill ||
atomContentType == DefinedAtom::typeZeroFillFast ||
atomContentType == DefinedAtom::typeTLVInitialZeroFill ||
atomContentType == DefinedAtom::typeThreadZeroFill);
atomContentType == DefinedAtom::typeTLVInitialZeroFill);
}
/// Utility function to check if the atom belongs to a group section
/// that represents section groups or .gnu.linkonce sections.
bool isGroupParent() const {
/// Utility function to check if relocations in this atom to other defined
/// atoms can be implicitly generated, and so we don't need to explicitly
/// emit those relocations.
bool relocsToDefinedCanBeImplicit() const {
ContentType atomContentType = contentType();
return (atomContentType == DefinedAtom::typeGroupComdat ||
atomContentType == DefinedAtom::typeGnuLinkOnce);
return atomContentType == typeCFI;
}
// Returns true if lhs should be placed before rhs in the final output.
@ -359,6 +363,8 @@ class DefinedAtom : public Atom {
// constructor.
DefinedAtom() : Atom(definitionRegular) { }
~DefinedAtom() override = default;
/// \brief Returns a pointer to the Reference object that the abstract
/// iterator "points" to.
virtual const Reference *derefIterator(const void *iter) const = 0;

View File

@ -15,6 +15,8 @@
#define LLD_CORE_ERROR_H
#include "lld/Core/LLVM.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/Error.h"
#include <system_error>
namespace lld {
@ -30,39 +32,37 @@ inline std::error_code make_error_code(YamlReaderError e) {
return std::error_code(static_cast<int>(e), YamlReaderCategory());
}
const std::error_category &LinkerScriptReaderCategory();
enum class LinkerScriptReaderError {
success = 0,
parse_error,
unknown_symbol_in_expr,
unrecognized_function_in_expr,
unknown_phdr_ids,
extra_program_phdr,
misplaced_program_phdr,
program_phdr_wrong_phdrs,
};
inline std::error_code make_error_code(LinkerScriptReaderError e) {
return std::error_code(static_cast<int>(e), LinkerScriptReaderCategory());
}
/// Creates an error_code object that has associated with it an arbitrary
/// error messsage. The value() of the error_code will always be non-zero
/// but its value is meaningless. The messsage() will be (a copy of) the
/// supplied error string.
/// Note: Once ErrorOr<> is updated to work with errors other than error_code,
/// this can be updated to return some other kind of error.
std::error_code make_dynamic_error_code(const char *msg);
std::error_code make_dynamic_error_code(StringRef msg);
std::error_code make_dynamic_error_code(const Twine &msg);
/// Generic error.
///
/// For errors that don't require their own specific sub-error (most errors)
/// this class can be used to describe the error via a string message.
class GenericError : public llvm::ErrorInfo<GenericError> {
public:
static char ID;
GenericError(Twine Msg);
const std::string &getMessage() const { return Msg; }
void log(llvm::raw_ostream &OS) const override;
std::error_code convertToErrorCode() const override {
return make_dynamic_error_code(getMessage());
}
private:
std::string Msg;
};
} // end namespace lld
namespace std {
template <> struct is_error_code_enum<lld::YamlReaderError> : std::true_type {};
template <>
struct is_error_code_enum<lld::LinkerScriptReaderError> : std::true_type {};
}
#endif

View File

@ -14,8 +14,8 @@
#include "lld/Core/DefinedAtom.h"
#include "lld/Core/SharedLibraryAtom.h"
#include "lld/Core/UndefinedAtom.h"
#include "lld/Core/range.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/ErrorHandling.h"
#include <functional>
@ -45,9 +45,18 @@ class File {
/// \brief Kinds of files that are supported.
enum Kind {
kindObject, ///< object file (.o)
kindSharedLibrary, ///< shared library (.so)
kindArchiveLibrary ///< archive (.a)
kindErrorObject, ///< a error object file (.o)
kindNormalizedObject, ///< a normalized file (.o)
kindMachObject, ///< a MachO object file (.o)
kindCEntryObject, ///< a file for CEntries
kindHeaderObject, ///< a file for file headers
kindEntryObject, ///< a file for the entry
kindUndefinedSymsObject, ///< a file for undefined symbols
kindStubHelperObject, ///< a file for stub helpers
kindResolverMergedObject, ///< the resolver merged file.
kindSectCreateObject, ///< a sect create object file (.o)
kindSharedLibrary, ///< shared library (.so)
kindArchiveLibrary ///< archive (.a)
};
/// \brief Returns file kind. Need for dyn_cast<> on File objects.
@ -97,17 +106,69 @@ class File {
}
/// The type of atom mutable container.
template <typename T> using AtomVector = std::vector<const T *>;
template <typename T> using AtomVector = std::vector<OwningAtomPtr<T>>;
/// The range type for the atoms. It's backed by a std::vector, but hides
/// its member functions so that you can only call begin or end.
/// The range type for the atoms.
template <typename T> class AtomRange {
public:
AtomRange(AtomVector<T> v) : _v(v) {}
typename AtomVector<T>::const_iterator begin() const { return _v.begin(); }
typename AtomVector<T>::const_iterator end() const { return _v.end(); }
typename AtomVector<T>::iterator begin() { return _v.begin(); }
typename AtomVector<T>::iterator end() { return _v.end(); }
AtomRange(AtomVector<T> &v) : _v(v) {}
AtomRange(const AtomVector<T> &v) : _v(const_cast<AtomVector<T> &>(v)) {}
typedef std::pointer_to_unary_function<const OwningAtomPtr<T>&,
const T*> ConstDerefFn;
typedef std::pointer_to_unary_function<OwningAtomPtr<T>&, T*> DerefFn;
typedef llvm::mapped_iterator<typename AtomVector<T>::const_iterator,
ConstDerefFn> ConstItTy;
typedef llvm::mapped_iterator<typename AtomVector<T>::iterator,
DerefFn> ItTy;
static const T* DerefConst(const OwningAtomPtr<T> &p) {
return p.get();
}
static T* Deref(OwningAtomPtr<T> &p) {
return p.get();
}
ConstItTy begin() const {
return ConstItTy(_v.begin(), ConstDerefFn(DerefConst));
}
ConstItTy end() const {
return ConstItTy(_v.end(), ConstDerefFn(DerefConst));
}
ItTy begin() {
return ItTy(_v.begin(), DerefFn(Deref));
}
ItTy end() {
return ItTy(_v.end(), DerefFn(Deref));
}
llvm::iterator_range<typename AtomVector<T>::iterator> owning_ptrs() {
return llvm::make_range(_v.begin(), _v.end());
}
llvm::iterator_range<typename AtomVector<T>::iterator> owning_ptrs() const {
return llvm::make_range(_v.begin(), _v.end());
}
bool empty() const {
return _v.empty();
}
size_t size() const {
return _v.size();
}
const OwningAtomPtr<T> &operator[](size_t idx) const {
return _v[idx];
}
OwningAtomPtr<T> &operator[](size_t idx) {
return _v[idx];
}
private:
AtomVector<T> &_v;
@ -115,19 +176,25 @@ class File {
/// \brief Must be implemented to return the AtomVector object for
/// all DefinedAtoms in this File.
virtual const AtomVector<DefinedAtom> &defined() const = 0;
virtual const AtomRange<DefinedAtom> defined() const = 0;
/// \brief Must be implemented to return the AtomVector object for
/// all UndefinedAtomw in this File.
virtual const AtomVector<UndefinedAtom> &undefined() const = 0;
virtual const AtomRange<UndefinedAtom> undefined() const = 0;
/// \brief Must be implemented to return the AtomVector object for
/// all SharedLibraryAtoms in this File.
virtual const AtomVector<SharedLibraryAtom> &sharedLibrary() const = 0;
virtual const AtomRange<SharedLibraryAtom> sharedLibrary() const = 0;
/// \brief Must be implemented to return the AtomVector object for
/// all AbsoluteAtoms in this File.
virtual const AtomVector<AbsoluteAtom> &absolute() const = 0;
virtual const AtomRange<AbsoluteAtom> absolute() const = 0;
/// Drop all of the atoms owned by this file. This will result in all of
/// the atoms running their destructors.
/// This is required because atoms may be allocated on a BumpPtrAllocator
/// of a different file. We need to destruct all atoms before any files.
virtual void clearAtoms() = 0;
/// \brief If a file is parsed using a different method than doParse(),
/// one must use this method to set the last error status, so that
@ -137,14 +204,6 @@ class File {
std::error_code parse();
// This function is called just before the core linker tries to use
// a file. Currently the PECOFF reader uses this to trigger the
// driver to parse .drectve section (which contains command line options).
// If you want to do something having side effects, don't do that in
// doParse() because a file could be pre-loaded speculatively.
// Use this hook instead.
virtual void beforeLink() {}
// Usually each file owns a std::unique_ptr<MemoryBuffer>.
// However, there's one special case. If a file is an archive file,
// the archive file and its children all shares the same memory buffer.
@ -190,23 +249,26 @@ class File {
class ErrorFile : public File {
public:
ErrorFile(StringRef path, std::error_code ec)
: File(path, kindObject), _ec(ec) {}
: File(path, kindErrorObject), _ec(ec) {}
std::error_code doParse() override { return _ec; }
const AtomVector<DefinedAtom> &defined() const override {
const AtomRange<DefinedAtom> defined() const override {
llvm_unreachable("internal error");
}
const AtomVector<UndefinedAtom> &undefined() const override {
const AtomRange<UndefinedAtom> undefined() const override {
llvm_unreachable("internal error");
}
const AtomVector<SharedLibraryAtom> &sharedLibrary() const override {
const AtomRange<SharedLibraryAtom> sharedLibrary() const override {
llvm_unreachable("internal error");
}
const AtomVector<AbsoluteAtom> &absolute() const override {
const AtomRange<AbsoluteAtom> absolute() const override {
llvm_unreachable("internal error");
}
void clearAtoms() override {
}
private:
std::error_code _ec;
};

View File

@ -23,6 +23,7 @@
namespace llvm {
// ADT's.
class Error;
class StringRef;
class Twine;
class MemoryBuffer;
@ -38,6 +39,9 @@ namespace llvm {
template<typename T>
class ErrorOr;
template<typename T>
class Expected;
class raw_ostream;
// TODO: DenseMap, ...
}
@ -51,6 +55,7 @@ namespace lld {
using llvm::cast_or_null;
// ADT's.
using llvm::Error;
using llvm::StringRef;
using llvm::Twine;
using llvm::MemoryBuffer;
@ -61,6 +66,7 @@ namespace lld {
using llvm::SmallVectorImpl;
using llvm::SaveAndRestore;
using llvm::ErrorOr;
using llvm::Expected;
using llvm::raw_ostream;
} // end namespace lld.

View File

@ -13,9 +13,7 @@
#include "lld/Core/Error.h"
#include "lld/Core/LLVM.h"
#include "lld/Core/Node.h"
#include "lld/Core/Parallel.h"
#include "lld/Core/Reference.h"
#include "lld/Core/range.h"
#include "lld/Core/Reader.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/raw_ostream.h"
@ -33,16 +31,9 @@ class SharedLibraryFile;
///
/// The base class LinkingContext contains the options needed by core linking.
/// Subclasses of LinkingContext have additional options needed by specific
/// Writers. For example, ELFLinkingContext has methods that supplies
/// options to the ELF Writer and ELF Passes.
/// Writers.
class LinkingContext {
public:
/// \brief The types of output file that the linker creates.
enum class OutputFileType : uint8_t {
Default, // The default output type for this target
YAML, // The output type is set to YAML
};
virtual ~LinkingContext();
/// \name Methods needed by core linking
@ -78,28 +69,6 @@ class LinkingContext {
_deadStripRoots.push_back(symbolName);
}
/// Archive files (aka static libraries) are normally lazily loaded. That is,
/// object files within an archive are only loaded and linked in, if the
/// object file contains a DefinedAtom which will replace an existing
/// UndefinedAtom. If this method returns true, core linking will also look
/// for archive members to replace existing tentative definitions in addition
/// to replacing undefines. Note: a "tentative definition" (also called a
/// "common" symbols) is a C (but not C++) concept. They are modeled in lld
/// as a DefinedAtom with merge() of mergeAsTentative.
bool searchArchivesToOverrideTentativeDefinitions() const {
return _searchArchivesToOverrideTentativeDefinitions;
}
/// Normally core linking will turn a tentative definition into a real
/// definition if not replaced by a real DefinedAtom from some object file.
/// If this method returns true, core linking will search all supplied
/// dynamic shared libraries for symbol names that match remaining tentative
/// definitions. If any are found, the corresponding tentative definition
/// atom is replaced with SharedLibraryAtom.
bool searchSharedLibrariesToOverrideTentativeDefinitions() const {
return _searchSharedLibrariesToOverrideTentativeDefinitions;
}
/// Normally, every UndefinedAtom must be replaced by a DefinedAtom or a
/// SharedLibraryAtom for the link to be successful. This method controls
/// whether core linking prints out a list of remaining UndefinedAtoms.
@ -114,35 +83,6 @@ class LinkingContext {
/// whether core linking considers remaining undefines to be an error.
bool allowRemainingUndefines() const { return _allowRemainingUndefines; }
/// In the lld model, a SharedLibraryAtom is a proxy atom for something
/// that will be found in a dynamic shared library when the program runs.
/// A SharedLibraryAtom optionally contains the name of the shared library
/// in which to find the symbol name at runtime. Core linking may merge
/// two SharedLibraryAtom with the same name. If this method returns true,
/// when merging core linking will also verify that they both have the same
/// loadName() and if not print a warning.
///
/// \todo This should be a method core linking calls so that drivers can
/// format the warning as needed.
bool warnIfCoalesableAtomsHaveDifferentLoadName() const {
return _warnIfCoalesableAtomsHaveDifferentLoadName;
}
/// In C/C++ you can mark a function's prototype with
/// __attribute__((weak_import)) or __attribute__((weak)) to say the function
/// may not be available at runtime and/or build time and in which case its
/// address will evaluate to NULL. In lld this is modeled using the
/// UndefinedAtom::canBeNull() method. During core linking, UndefinedAtom
/// with the same name are automatically merged. If this method returns
/// true, core link also verfies that the canBeNull() value for merged
/// UndefinedAtoms are the same and warns if not.
///
/// \todo This should be a method core linking calls so that drivers can
/// format the warning as needed.
bool warnIfCoalesableAtomsHaveDifferentCanBeNull() const {
return _warnIfCoalesableAtomsHaveDifferentCanBeNull;
}
/// Normally, every UndefinedAtom must be replaced by a DefinedAtom or a
/// SharedLibraryAtom for the link to be successful. This method controls
/// whether core linking considers remaining undefines from the shared library
@ -176,20 +116,7 @@ class LinkingContext {
}
void setDeadStripping(bool enable) { _deadStrip = enable; }
void setAllowDuplicates(bool enable) { _allowDuplicates = enable; }
void setGlobalsAreDeadStripRoots(bool v) { _globalsAreDeadStripRoots = v; }
void setSearchArchivesToOverrideTentativeDefinitions(bool search) {
_searchArchivesToOverrideTentativeDefinitions = search;
}
void setSearchSharedLibrariesToOverrideTentativeDefinitions(bool search) {
_searchSharedLibrariesToOverrideTentativeDefinitions = search;
}
void setWarnIfCoalesableAtomsHaveDifferentCanBeNull(bool warn) {
_warnIfCoalesableAtomsHaveDifferentCanBeNull = warn;
}
void setWarnIfCoalesableAtomsHaveDifferentLoadName(bool warn) {
_warnIfCoalesableAtomsHaveDifferentLoadName = warn;
}
void setPrintRemainingUndefines(bool print) {
_printRemainingUndefines = print;
}
@ -199,27 +126,11 @@ class LinkingContext {
void setAllowShlibUndefines(bool allow) { _allowShlibUndefines = allow; }
void setLogInputFiles(bool log) { _logInputFiles = log; }
// Returns true if multiple definitions should not be treated as a
// fatal error.
bool getAllowDuplicates() const { return _allowDuplicates; }
void appendLLVMOption(const char *opt) { _llvmOptions.push_back(opt); }
void addAlias(StringRef from, StringRef to) { _aliasSymbols[from] = to; }
const std::map<std::string, std::string> &getAliases() const {
return _aliasSymbols;
}
std::vector<std::unique_ptr<Node>> &getNodes() { return _nodes; }
const std::vector<std::unique_ptr<Node>> &getNodes() const { return _nodes; }
/// Notify the LinkingContext when the symbol table found a name collision.
/// The useNew parameter specifies which the symbol table plans to keep,
/// but that can be changed by the LinkingContext. This is also an
/// opportunity for flavor specific processing.
virtual void notifySymbolTableCoalesce(const Atom *existingAtom,
const Atom *newAtom, bool &useNew) {}
/// This method adds undefined symbols specified by the -u option to the to
/// the list of undefined symbols known to the linker. This option essentially
/// forces an undefined symbol to be created. You may also need to call
@ -242,7 +153,7 @@ class LinkingContext {
/// Return the list of undefined symbols that are specified in the
/// linker command line, using the -u option.
range<const StringRef *> initialUndefinedSymbols() const {
ArrayRef<StringRef> initialUndefinedSymbols() const {
return _initialUndefinedSymbols;
}
@ -255,9 +166,7 @@ class LinkingContext {
bool validate(raw_ostream &diagnostics);
/// Formats symbol name for use in error messages.
virtual std::string demangle(StringRef symbolName) const {
return symbolName;
}
virtual std::string demangle(StringRef symbolName) const = 0;
/// @}
/// \name Methods used by Driver::link()
@ -269,19 +178,6 @@ class LinkingContext {
/// the linker to write to an in-memory buffer.
StringRef outputPath() const { return _outputPath; }
/// Set the various output file types that the linker would
/// create
bool setOutputFileType(StringRef outputFileType) {
if (outputFileType.equals_lower("yaml")) {
_outputFileType = OutputFileType::YAML;
return true;
}
return false;
}
/// Returns the output file type that that the linker needs to create.
OutputFileType outputFileType() const { return _outputFileType; }
/// Accessor for Register object embedded in LinkingContext.
const Registry &registry() const { return _registry; }
Registry &registry() { return _registry; }
@ -289,25 +185,30 @@ class LinkingContext {
/// This method is called by core linking to give the Writer a chance
/// to add file format specific "files" to set of files to be linked. This is
/// how file format specific atoms can be added to the link.
virtual void createImplicitFiles(std::vector<std::unique_ptr<File>> &);
virtual void createImplicitFiles(std::vector<std::unique_ptr<File>> &) = 0;
/// This method is called by core linking to build the list of Passes to be
/// run on the merged/linked graph of all input files.
virtual void addPasses(PassManager &pm);
virtual void addPasses(PassManager &pm) = 0;
/// Calls through to the writeFile() method on the specified Writer.
///
/// \param linkedFile This is the merged/linked graph of all input file Atoms.
virtual std::error_code writeFile(const File &linkedFile) const;
virtual llvm::Error writeFile(const File &linkedFile) const;
/// Return the next ordinal and Increment it.
virtual uint64_t getNextOrdinalAndIncrement() const { return _nextOrdinal++; }
// This function is called just before the Resolver kicks in.
// Derived classes may use it to change the list of input files.
virtual void finalizeInputFiles() {}
virtual void finalizeInputFiles() = 0;
TaskGroup &getTaskGroup() { return _taskGroup; }
/// Callback invoked for each file the Resolver decides we are going to load.
/// This can be used to update context state based on the file, and emit
/// errors for any differences between the context state and a loaded file.
/// For example, we can error if we try to load a file which is a different
/// arch from that being linked.
virtual llvm::Error handleLoadedFile(File &file) = 0;
/// @}
protected:
@ -324,36 +225,25 @@ class LinkingContext {
virtual std::unique_ptr<File> createUndefinedSymbolFile() const;
std::unique_ptr<File> createUndefinedSymbolFile(StringRef filename) const;
/// Method to create an internal file for alias symbols
std::unique_ptr<File> createAliasSymbolFile() const;
StringRef _outputPath;
StringRef _entrySymbolName;
bool _deadStrip;
bool _allowDuplicates;
bool _globalsAreDeadStripRoots;
bool _searchArchivesToOverrideTentativeDefinitions;
bool _searchSharedLibrariesToOverrideTentativeDefinitions;
bool _warnIfCoalesableAtomsHaveDifferentCanBeNull;
bool _warnIfCoalesableAtomsHaveDifferentLoadName;
bool _printRemainingUndefines;
bool _allowRemainingUndefines;
bool _logInputFiles;
bool _allowShlibUndefines;
OutputFileType _outputFileType;
bool _deadStrip = false;
bool _globalsAreDeadStripRoots = false;
bool _printRemainingUndefines = true;
bool _allowRemainingUndefines = false;
bool _logInputFiles = false;
bool _allowShlibUndefines = false;
std::vector<StringRef> _deadStripRoots;
std::map<std::string, std::string> _aliasSymbols;
std::vector<const char *> _llvmOptions;
StringRefVector _initialUndefinedSymbols;
std::vector<std::unique_ptr<Node>> _nodes;
mutable llvm::BumpPtrAllocator _allocator;
mutable uint64_t _nextOrdinal;
mutable uint64_t _nextOrdinal = 0;
Registry _registry;
private:
/// Validate the subclass bits. Only called by validate.
virtual bool validateImpl(raw_ostream &diagnostics) = 0;
TaskGroup _taskGroup;
};
} // end namespace lld

View File

@ -57,7 +57,7 @@ class GroupEnd : public Node {
class FileNode : public Node {
public:
explicit FileNode(std::unique_ptr<File> f)
: Node(Node::Kind::File), _file(std::move(f)), _asNeeded(false) {}
: Node(Node::Kind::File), _file(std::move(f)) {}
static bool classof(const Node *a) {
return a->kind() == Node::Kind::File;
@ -65,12 +65,8 @@ class FileNode : public Node {
File *getFile() { return _file.get(); }
void setAsNeeded(bool val) { _asNeeded = val; }
bool asNeeded() const { return _asNeeded; }
protected:
std::unique_ptr<File> _file;
bool _asNeeded;
};
} // namespace lld

View File

@ -12,7 +12,6 @@
#include "lld/Core/Instrumentation.h"
#include "lld/Core/LLVM.h"
#include "lld/Core/range.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/thread.h"
@ -63,41 +62,6 @@ class Latch {
}
};
/// \brief An implementation of future. std::future and std::promise in
/// old libstdc++ have a threading bug; there is a small chance that a
/// call of future::get throws an exception in the normal use case.
/// We want to use our own future implementation until we drop support
/// of old versions of libstdc++.
/// https://gcc.gnu.org/ml/gcc-patches/2014-05/msg01389.html
template<typename T> class Future {
public:
Future() : _hasValue(false) {}
void set(T &&val) {
assert(!_hasValue);
{
std::unique_lock<std::mutex> lock(_mutex);
_val = val;
_hasValue = true;
}
_cond.notify_all();
}
T &get() {
std::unique_lock<std::mutex> lock(_mutex);
if (_hasValue)
return _val;
_cond.wait(lock, [&] { return _hasValue; });
return _val;
}
private:
T _val;
bool _hasValue;
std::mutex _mutex;
std::condition_variable _cond;
};
// Classes in this namespace are implementation details of this header.
namespace internal {

View File

@ -13,7 +13,7 @@
#include "lld/Core/Atom.h"
#include "lld/Core/File.h"
#include "lld/Core/Reference.h"
#include "lld/Core/range.h"
#include "llvm/Support/Error.h"
#include <vector>
namespace lld {
@ -34,7 +34,7 @@ class Pass {
virtual ~Pass() { }
/// Do the actual work of the Pass.
virtual std::error_code perform(SimpleFile &mergedFile) = 0;
virtual llvm::Error perform(SimpleFile &mergedFile) = 0;
protected:
// Only subclassess can be instantiated.

View File

@ -12,6 +12,7 @@
#include "lld/Core/LLVM.h"
#include "lld/Core/Pass.h"
#include "llvm/Support/Error.h"
#include <memory>
#include <vector>
@ -31,11 +32,11 @@ class PassManager {
_passes.push_back(std::move(pass));
}
std::error_code runOnFile(SimpleFile &file) {
llvm::Error runOnFile(SimpleFile &file) {
for (std::unique_ptr<Pass> &pass : _passes)
if (std::error_code EC = pass->perform(file))
if (llvm::Error EC = pass->perform(file))
return EC;
return std::error_code();
return llvm::Error();
}
private:

View File

@ -27,17 +27,14 @@ class IO;
}
namespace lld {
class ELFLinkingContext;
class File;
class LinkingContext;
class PECOFFLinkingContext;
class MachOLinkingContext;
/// \brief An abstract class for reading object files, library files, and
/// executable files.
///
/// Each file format (e.g. ELF, mach-o, PECOFF, etc) have a concrete
/// subclass of Reader.
/// Each file format (e.g. mach-o, etc) has a concrete subclass of Reader.
class Reader {
public:
virtual ~Reader() {}
@ -114,11 +111,7 @@ class Registry {
// as parameters to the addSupport*() method.
void addSupportArchives(bool logLoading);
void addSupportYamlFiles();
void addSupportCOFFObjects(PECOFFLinkingContext &);
void addSupportCOFFImportLibraries(PECOFFLinkingContext &);
void addSupportMachOObjects(MachOLinkingContext &);
void addSupportELFObjects(ELFLinkingContext &);
void addSupportELFDynamicSharedObjects(ELFLinkingContext &);
/// To convert between kind values and names, the registry walks the list
/// of registered kind tables. Each table is a zero terminated array of

View File

@ -25,15 +25,13 @@ class Atom;
/// the Atom, then the function Atom will have a Reference of: offsetInAtom=40,
/// kind=callsite, target=malloc, addend=0.
///
/// Besides supporting traditional "relocations", References are also used
/// grouping atoms (group comdat), forcing layout (one atom must follow
/// another), marking data-in-code (jump tables or ARM constants), etc.
/// Besides supporting traditional "relocations", references are also used
/// forcing layout (one atom must follow another), marking data-in-code
/// (jump tables or ARM constants), etc.
///
/// The "kind" of a reference is a tuple of <namespace, arch, value>. This
/// enable us to re-use existing relocation types definded for various
/// file formats and architectures. For instance, in ELF the relocation type 10
/// means R_X86_64_32 for x86_64, and R_386_GOTPC for i386. For PE/COFF
/// relocation 10 means IMAGE_REL_AMD64_SECTION.
/// file formats and architectures.
///
/// References and atoms form a directed graph. The dead-stripping pass
/// traverses them starting from dead-strip root atoms to garbage collect
@ -47,16 +45,14 @@ class Reference {
enum class KindNamespace {
all = 0,
testing = 1,
ELF = 2,
COFF = 3,
mach_o = 4,
mach_o = 2,
};
KindNamespace kindNamespace() const { return (KindNamespace)_kindNamespace; }
void setKindNamespace(KindNamespace ns) { _kindNamespace = (uint8_t)ns; }
// Which architecture the kind value is for.
enum class KindArch { all, AArch64, AMDGPU, ARM, Hexagon, Mips, x86, x86_64 };
enum class KindArch { all, AArch64, ARM, x86, x86_64};
KindArch kindArch() const { return (KindArch)_kindArch; }
void setKindArch(KindArch a) { _kindArch = (uint8_t)a; }
@ -76,8 +72,6 @@ class Reference {
// kindLayoutAfter is treated as a bidirected edge by the dead-stripping
// pass.
kindLayoutAfter = 1,
// kindGroupChild is treated as a bidirected edge too.
kindGroupChild,
kindAssociate,
};

View File

@ -17,6 +17,7 @@
#include "lld/Core/SymbolTable.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/Support/ErrorOr.h"
#include <set>
#include <unordered_map>
#include <unordered_set>
@ -31,25 +32,23 @@ class LinkingContext;
/// and producing a merged graph.
class Resolver {
public:
Resolver(LinkingContext &ctx)
: _ctx(ctx), _symbolTable(ctx), _result(new MergedFile()),
_fileIndex(0) {}
Resolver(LinkingContext &ctx) : _ctx(ctx), _result(new MergedFile()) {}
// InputFiles::Handler methods
void doDefinedAtom(const DefinedAtom&);
bool doUndefinedAtom(const UndefinedAtom &);
void doSharedLibraryAtom(const SharedLibraryAtom &);
void doAbsoluteAtom(const AbsoluteAtom &);
void doDefinedAtom(OwningAtomPtr<DefinedAtom> atom);
bool doUndefinedAtom(OwningAtomPtr<UndefinedAtom> atom);
void doSharedLibraryAtom(OwningAtomPtr<SharedLibraryAtom> atom);
void doAbsoluteAtom(OwningAtomPtr<AbsoluteAtom> atom);
// Handle files, this adds atoms from the current file thats
// being processed by the resolver
bool handleFile(File &);
llvm::Expected<bool> handleFile(File &);
// Handle an archive library file.
bool handleArchiveFile(File &);
llvm::Expected<bool> handleArchiveFile(File &);
// Handle a shared library file.
void handleSharedLibrary(File &);
llvm::Error handleSharedLibrary(File &);
/// @brief do work of merging and resolving and return list
bool resolve();
@ -57,37 +56,30 @@ class Resolver {
std::unique_ptr<SimpleFile> resultFile() { return std::move(_result); }
private:
typedef std::function<void(StringRef, bool)> UndefCallback;
typedef std::function<llvm::Expected<bool>(StringRef)> UndefCallback;
bool undefinesAdded(int begin, int end);
File *getFile(int &index);
/// \brief Add section group/.gnu.linkonce if it does not exist previously.
void maybeAddSectionGroupOrGnuLinkOnce(const DefinedAtom &atom);
/// \brief The main function that iterates over the files to resolve
void updatePreloadArchiveMap();
bool resolveUndefines();
void updateReferences();
void deadStripOptimize();
bool checkUndefines();
void removeCoalescedAwayAtoms();
void checkDylibSymbolCollisions();
void forEachUndefines(File &file, bool searchForOverrides, UndefCallback callback);
llvm::Expected<bool> forEachUndefines(File &file, UndefCallback callback);
void markLive(const Atom *atom);
void addAtoms(const std::vector<const DefinedAtom *>&);
void maybePreloadArchiveMember(StringRef sym);
class MergedFile : public SimpleFile {
public:
MergedFile() : SimpleFile("<linker-internal>") {}
void addAtoms(std::vector<const Atom*>& atoms);
MergedFile() : SimpleFile("<linker-internal>", kindResolverMergedObject) {}
void addAtoms(llvm::MutableArrayRef<OwningAtomPtr<Atom>> atoms);
};
LinkingContext &_ctx;
SymbolTable _symbolTable;
std::vector<const Atom *> _atoms;
std::vector<OwningAtomPtr<Atom>> _atoms;
std::set<const Atom *> _deadStripRoots;
llvm::DenseSet<const Atom *> _liveAtoms;
llvm::DenseSet<const Atom *> _deadAtoms;
@ -97,11 +89,6 @@ class Resolver {
// --start-group and --end-group
std::vector<File *> _files;
std::map<File *, bool> _newUndefinesAdded;
size_t _fileIndex;
// Preloading
llvm::StringMap<ArchiveLibraryFile *> _archiveMap;
llvm::DenseSet<ArchiveLibraryFile *> _archiveSeen;
// List of undefined symbols.
std::vector<StringRef> _undefines;

View File

@ -1,29 +0,0 @@
//===- lld/Core/STDExtra.h - Helpers for the stdlib -----------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_CORE_STD_EXTRA_H
#define LLD_CORE_STD_EXTRA_H
namespace lld {
/// \brief Deleter for smart pointers that only calls the destructor. Memory is
/// managed elsewhere. A common use of this is for things allocated with a
/// BumpPtrAllocator.
template <class T>
struct destruct_delete {
void operator ()(T *ptr) {
ptr->~T();
}
};
template <class T>
using unique_bump_ptr = std::unique_ptr<T, destruct_delete<T>>;
} // end namespace lld
#endif

View File

@ -25,9 +25,7 @@ class SharedLibraryAtom : public Atom {
};
/// Returns shared library name used to load it at runtime.
/// On linux that is the DT_NEEDED name.
/// On Darwin it is the LC_DYLIB_LOAD dylib name.
/// On Windows it is the DLL name that to be referred from .idata section.
virtual StringRef loadName() const = 0;
/// Returns if shared library symbol can be missing at runtime and if
@ -46,6 +44,8 @@ class SharedLibraryAtom : public Atom {
protected:
SharedLibraryAtom() : Atom(definitionSharedLibrary) {}
~SharedLibraryAtom() override = default;
};
} // namespace lld

View File

@ -27,29 +27,34 @@ class SharedLibraryFile : public File {
/// Check if the shared library exports a symbol with the specified name.
/// If so, return a SharedLibraryAtom which represents that exported
/// symbol. Otherwise return nullptr.
virtual const SharedLibraryAtom *exports(StringRef name,
bool dataSymbolOnly) const = 0;
virtual OwningAtomPtr<SharedLibraryAtom> exports(StringRef name) const = 0;
// Returns DSO name. It's the soname (ELF), the install name (MachO) or
// the import name (Windows).
// Returns the install name.
virtual StringRef getDSOName() const = 0;
const AtomVector<DefinedAtom> &defined() const override {
const AtomRange<DefinedAtom> defined() const override {
return _definedAtoms;
}
const AtomVector<UndefinedAtom> &undefined() const override {
const AtomRange<UndefinedAtom> undefined() const override {
return _undefinedAtoms;
}
const AtomVector<SharedLibraryAtom> &sharedLibrary() const override {
const AtomRange<SharedLibraryAtom> sharedLibrary() const override {
return _sharedLibraryAtoms;
}
const AtomVector<AbsoluteAtom> &absolute() const override {
const AtomRange<AbsoluteAtom> absolute() const override {
return _absoluteAtoms;
}
void clearAtoms() override {
_definedAtoms.clear();
_undefinedAtoms.clear();
_sharedLibraryAtoms.clear();
_absoluteAtoms.clear();
}
protected:
/// only subclasses of SharedLibraryFile can be instantiated
explicit SharedLibraryFile(StringRef path) : File(path, kindSharedLibrary) {}

View File

@ -15,36 +15,60 @@
#ifndef LLD_CORE_SIMPLE_H
#define LLD_CORE_SIMPLE_H
#include "lld/Core/AbsoluteAtom.h"
#include "lld/Core/Atom.h"
#include "lld/Core/DefinedAtom.h"
#include "lld/Core/File.h"
#include "lld/Core/ArchiveLibraryFile.h"
#include "lld/Core/LinkingContext.h"
#include "lld/Core/Reference.h"
#include "lld/Core/SharedLibraryAtom.h"
#include "lld/Core/UndefinedAtom.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/ilist.h"
#include "llvm/ADT/ilist_node.h"
#include <atomic>
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <functional>
namespace lld {
class SimpleFile : public File {
public:
SimpleFile(StringRef path) : File(path, kindObject) {}
SimpleFile(StringRef path, File::Kind kind)
: File(path, kind) {}
void addAtom(const DefinedAtom &a) { _defined.push_back(&a); }
void addAtom(const UndefinedAtom &a) { _undefined.push_back(&a); }
void addAtom(const SharedLibraryAtom &a) { _shared.push_back(&a); }
void addAtom(const AbsoluteAtom &a) { _absolute.push_back(&a); }
~SimpleFile() override {
_defined.clear();
_undefined.clear();
_shared.clear();
_absolute.clear();
}
void addAtom(DefinedAtom &a) {
_defined.push_back(OwningAtomPtr<DefinedAtom>(&a));
}
void addAtom(UndefinedAtom &a) {
_undefined.push_back(OwningAtomPtr<UndefinedAtom>(&a));
}
void addAtom(SharedLibraryAtom &a) {
_shared.push_back(OwningAtomPtr<SharedLibraryAtom>(&a));
}
void addAtom(AbsoluteAtom &a) {
_absolute.push_back(OwningAtomPtr<AbsoluteAtom>(&a));
}
void addAtom(const Atom &atom) {
if (auto *p = dyn_cast<DefinedAtom>(&atom)) {
_defined.push_back(p);
addAtom(const_cast<DefinedAtom &>(*p));
} else if (auto *p = dyn_cast<UndefinedAtom>(&atom)) {
_undefined.push_back(p);
addAtom(const_cast<UndefinedAtom &>(*p));
} else if (auto *p = dyn_cast<SharedLibraryAtom>(&atom)) {
_shared.push_back(p);
addAtom(const_cast<SharedLibraryAtom &>(*p));
} else if (auto *p = dyn_cast<AbsoluteAtom>(&atom)) {
_absolute.push_back(p);
addAtom(const_cast<AbsoluteAtom &>(*p));
} else {
llvm_unreachable("atom has unknown definition kind");
}
@ -52,26 +76,33 @@ class SimpleFile : public File {
void removeDefinedAtomsIf(std::function<bool(const DefinedAtom *)> pred) {
auto &atoms = _defined;
auto newEnd = std::remove_if(atoms.begin(), atoms.end(), pred);
auto newEnd = std::remove_if(atoms.begin(), atoms.end(),
[&pred](OwningAtomPtr<DefinedAtom> &p) {
return pred(p.get());
});
atoms.erase(newEnd, atoms.end());
}
const AtomVector<DefinedAtom> &defined() const override { return _defined; }
const AtomRange<DefinedAtom> defined() const override { return _defined; }
const AtomVector<UndefinedAtom> &undefined() const override {
const AtomRange<UndefinedAtom> undefined() const override {
return _undefined;
}
const AtomVector<SharedLibraryAtom> &sharedLibrary() const override {
const AtomRange<SharedLibraryAtom> sharedLibrary() const override {
return _shared;
}
const AtomVector<AbsoluteAtom> &absolute() const override {
const AtomRange<AbsoluteAtom> absolute() const override {
return _absolute;
}
typedef range<std::vector<const DefinedAtom *>::iterator> DefinedAtomRange;
DefinedAtomRange definedAtoms() { return make_range(_defined); }
void clearAtoms() override {
_defined.clear();
_undefined.clear();
_shared.clear();
_absolute.clear();
}
private:
AtomVector<DefinedAtom> _defined;
@ -80,48 +111,6 @@ class SimpleFile : public File {
AtomVector<AbsoluteAtom> _absolute;
};
/// \brief Archive library file that may be used as a virtual container
/// for symbols that should be added dynamically in response to
/// call to find() method.
class SimpleArchiveLibraryFile : public ArchiveLibraryFile {
public:
SimpleArchiveLibraryFile(StringRef filename)
: ArchiveLibraryFile(filename) {}
const AtomVector<DefinedAtom> &defined() const override {
return _definedAtoms;
}
const AtomVector<UndefinedAtom> &undefined() const override {
return _undefinedAtoms;
}
const AtomVector<SharedLibraryAtom> &sharedLibrary() const override {
return _sharedLibraryAtoms;
}
const AtomVector<AbsoluteAtom> &absolute() const override {
return _absoluteAtoms;
}
File *find(StringRef sym, bool dataSymbolOnly) override {
// For descendants:
// do some checks here and return dynamically generated files with atoms.
return nullptr;
}
std::error_code
parseAllMembers(std::vector<std::unique_ptr<File>> &result) override {
return std::error_code();
}
private:
AtomVector<DefinedAtom> _definedAtoms;
AtomVector<UndefinedAtom> _undefinedAtoms;
AtomVector<SharedLibraryAtom> _sharedLibraryAtoms;
AtomVector<AbsoluteAtom> _absoluteAtoms;
};
class SimpleReference : public Reference {
public:
SimpleReference(Reference::KindNamespace ns, Reference::KindArch arch,
@ -159,12 +148,13 @@ class SimpleReference : public Reference {
SimpleReference *_prev;
};
}
} // end namespace lld
// ilist will lazily create a sentinal (so end() can return a node past the
// end of the list). We need this trait so that the sentinal is allocated
// via the BumpPtrAllocator.
namespace llvm {
template<>
struct ilist_sentinel_traits<lld::SimpleReference> {
@ -200,7 +190,8 @@ struct ilist_sentinel_traits<lld::SimpleReference> {
private:
mutable llvm::BumpPtrAllocator *_allocator;
};
}
} // end namespace llvm
namespace lld {
@ -211,6 +202,10 @@ class SimpleDefinedAtom : public DefinedAtom {
_references.setAllocator(&f.allocator());
}
~SimpleDefinedAtom() override {
_references.clearAndLeakNodesUnsafely();
}
const File &file() const override { return _file; }
StringRef name() const override { return StringRef(); }
@ -256,9 +251,10 @@ class SimpleDefinedAtom : public DefinedAtom {
it = reinterpret_cast<const void*>(next);
}
void addReference(Reference::KindNamespace ns, Reference::KindArch arch,
void addReference(Reference::KindNamespace ns,
Reference::KindArch arch,
Reference::KindValue kindValue, uint64_t off,
const Atom *target, Reference::Addend a) {
const Atom *target, Reference::Addend a) override {
assert(target && "trying to create reference to nothing");
auto node = new (_file.allocator())
SimpleReference(ns, arch, kindValue, off, target, a);
@ -290,6 +286,7 @@ class SimpleDefinedAtom : public DefinedAtom {
_references.push_back(node);
}
}
void setOrdinal(uint64_t ord) { _ordinal = ord; }
private:
@ -306,6 +303,8 @@ class SimpleUndefinedAtom : public UndefinedAtom {
assert(!name.empty() && "UndefinedAtoms must have a name");
}
~SimpleUndefinedAtom() override = default;
/// file - returns the File that produced/owns this Atom
const File &file() const override { return _file; }
@ -320,23 +319,6 @@ class SimpleUndefinedAtom : public UndefinedAtom {
StringRef _name;
};
class SimpleAbsoluteAtom : public AbsoluteAtom {
public:
SimpleAbsoluteAtom(const File &f, StringRef name, Scope s, uint64_t value)
: _file(f), _name(name), _scope(s), _value(value) {}
const File &file() const override { return _file; }
StringRef name() const override { return _name; }
uint64_t value() const override { return _value; }
Scope scope() const override { return _scope; }
private:
const File &_file;
StringRef _name;
Scope _scope;
uint64_t _value;
};
} // end namespace lld
#endif
#endif // LLD_CORE_SIMPLE_H

View File

@ -34,8 +34,6 @@ class UndefinedAtom;
/// if an atom has been coalesced away.
class SymbolTable {
public:
explicit SymbolTable(LinkingContext &);
/// @brief add atom to symbol table
bool add(const DefinedAtom &);
@ -70,13 +68,6 @@ class SymbolTable {
/// @brief if atom has been coalesced away, return true
bool isCoalescedAway(const Atom *);
/// @brief Find a group atom.
const Atom *findGroup(StringRef name);
/// @brief Add a group atom and returns true/false depending on whether the
/// previously existed.
bool addGroup(const DefinedAtom &da);
private:
typedef llvm::DenseMap<const Atom *, const Atom *> AtomToAtom;
@ -105,10 +96,8 @@ class SymbolTable {
bool addByName(const Atom &);
bool addByContent(const DefinedAtom &);
LinkingContext &_ctx;
AtomToAtom _replacedAtoms;
NameToAtom _nameTable;
NameToAtom _groupTable;
AtomContentSet _contentTable;
};

View File

@ -57,16 +57,10 @@ class UndefinedAtom : public Atom {
static bool classof(const UndefinedAtom *) { return true; }
/// Returns an undefined atom if this undefined symbol has a synonym. This is
/// mainly used in COFF. In COFF, an unresolved external symbol can have up to
/// one optional name (sym2) in addition to its regular name (sym1). If a
/// definition of sym1 exists, sym1 is resolved normally. Otherwise, all
/// references to sym1 refer to sym2 instead. In that case sym2 must be
/// resolved, or link will fail.
virtual const UndefinedAtom *fallback() const { return nullptr; }
protected:
UndefinedAtom() : Atom(definitionUndefined) {}
~UndefinedAtom() override = default;
};
} // namespace lld

View File

@ -11,25 +11,24 @@
#define LLD_CORE_WRITER_H
#include "lld/Core/LLVM.h"
#include "llvm/Support/Error.h"
#include <memory>
#include <vector>
namespace lld {
class ELFLinkingContext;
class File;
class LinkingContext;
class MachOLinkingContext;
class PECOFFLinkingContext;
/// \brief The Writer is an abstract class for writing object files, shared
/// library files, and executable files. Each file format (e.g. ELF, mach-o,
/// PECOFF, etc) have a concrete subclass of Writer.
/// library files, and executable files. Each file format (e.g. mach-o, etc)
/// has a concrete subclass of Writer.
class Writer {
public:
virtual ~Writer();
/// \brief Write a file from the supplied File object
virtual std::error_code writeFile(const File &linkedFile, StringRef path) = 0;
virtual llvm::Error writeFile(const File &linkedFile, StringRef path) = 0;
/// \brief This method is called by Core Linking to give the Writer a chance
/// to add file format specific "files" to set of files to be linked. This is
@ -41,9 +40,7 @@ class Writer {
Writer();
};
std::unique_ptr<Writer> createWriterELF(const ELFLinkingContext &);
std::unique_ptr<Writer> createWriterMachO(const MachOLinkingContext &);
std::unique_ptr<Writer> createWriterPECOFF(const PECOFFLinkingContext &);
std::unique_ptr<Writer> createWriterYAML(const LinkingContext &);
} // end namespace lld

View File

@ -1,738 +0,0 @@
//===-- lld/Core/range.h - Iterator ranges ----------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief Iterator range type based on c++1y range proposal.
///
/// See http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2012/n3350.html
///
//===----------------------------------------------------------------------===//
#ifndef LLD_CORE_RANGE_H
#define LLD_CORE_RANGE_H
#include "llvm/Support/Compiler.h"
#include <array>
#include <cassert>
#include <iterator>
#include <string>
#include <type_traits>
#include <utility>
#include <vector>
namespace lld {
// Nothing in this namespace is part of the exported interface.
namespace detail {
using std::begin;
using std::end;
/// Used as the result type of undefined functions.
struct undefined {};
template <typename R> class begin_result {
template <typename T> static auto check(T &&t) -> decltype(begin(t));
static undefined check(...);
public:
typedef decltype(check(std::declval<R>())) type;
};
template <typename R> class end_result {
template <typename T> static auto check(T &&t) -> decltype(end(t));
static undefined check(...);
public:
typedef decltype(check(std::declval<R>())) type;
};
// Things that begin and end work on, in compatible ways, are
// ranges. [stmt.ranged]
template <typename R>
struct is_range : std::is_same<typename detail::begin_result<R>::type,
typename detail::end_result<R>::type> {};
// This currently requires specialization and doesn't work for
// detecting \c range<>s or iterators. We should add
// \c contiguous_iterator_tag to fix that.
template <typename R> struct is_contiguous_range : std::false_type {};
template <typename R>
struct is_contiguous_range<R &> : is_contiguous_range<R> {};
template <typename R>
struct is_contiguous_range <R &&> : is_contiguous_range<R> {};
template <typename R>
struct is_contiguous_range<const R> : is_contiguous_range<R> {};
template <typename T, size_t N>
struct is_contiguous_range<T[N]> : std::true_type {};
template <typename T, size_t N>
struct is_contiguous_range<const T[N]> : std::true_type {};
template <typename T, size_t N>
struct is_contiguous_range<std::array<T, N> > : std::true_type {};
template <typename charT, typename traits, typename Allocator>
struct is_contiguous_range<
std::basic_string<charT, traits, Allocator> > : std::true_type {};
template <typename T, typename Allocator>
struct is_contiguous_range<std::vector<T, Allocator> > : std::true_type {};
// Removes cv qualifiers from all levels of a multi-level pointer
// type, not just the type level.
template <typename T> struct remove_all_cv_ptr {
typedef T type;
};
template <typename T> struct remove_all_cv_ptr<T *> {
typedef typename remove_all_cv_ptr<T>::type *type;
};
template <typename T> struct remove_all_cv_ptr<const T> {
typedef typename remove_all_cv_ptr<T>::type type;
};
template <typename T> struct remove_all_cv_ptr<volatile T> {
typedef typename remove_all_cv_ptr<T>::type type;
};
template <typename T> struct remove_all_cv_ptr<const volatile T> {
typedef typename remove_all_cv_ptr<T>::type type;
};
template <typename From, typename To>
struct conversion_preserves_array_indexing : std::false_type {};
template <typename FromVal, typename ToVal>
struct conversion_preserves_array_indexing<FromVal *,
ToVal *> : std::integral_constant<
bool, std::is_convertible<FromVal *, ToVal *>::value &&
std::is_same<typename remove_all_cv_ptr<FromVal>::type,
typename remove_all_cv_ptr<ToVal>::type>::value> {};
template <typename T>
LLVM_CONSTEXPR auto adl_begin(T &&t) -> decltype(begin(t)) {
return begin(std::forward<T>(t));
}
template <typename T> LLVM_CONSTEXPR auto adl_end(T &&t) -> decltype(end(t)) {
return end(std::forward<T>(t));
}
} // end namespace detail
/// A \c std::range<Iterator> represents a half-open iterator range
/// built from two iterators, \c 'begin', and \c 'end'. If \c end is
/// not reachable from \c begin, the behavior is undefined.
///
/// The mutability of elements of the range is controlled by the
/// Iterator argument. Instantiate
/// <code>range<<var>Foo</var>::iterator></code> or
/// <code>range<<var>T</var>*></code>, or call
/// <code>make_range(<var>non_const_container</var>)</code>, and you
/// get a mutable range. Instantiate
/// <code>range<<var>Foo</var>::const_iterator></code> or
/// <code>range<const <var>T</var>*></code>, or call
/// <code>make_range(<var>const_container</var>)</code>, and you get a
/// constant range.
///
/// \todo Inherit from std::pair<Iterator, Iterator>?
///
/// \todo This interface contains some functions that could be
/// provided as free algorithms rather than member functions, and all
/// of the <code>pop_*()</code> functions could be replaced by \c
/// slice() at the cost of some extra iterator copies. This makes
/// them more awkward to use, but makes it easier for users to write
/// their own types that follow the same interface. On the other hand,
/// a \c range_facade could be provided to help users write new
/// ranges, and it could provide the members. Such functions are
/// marked with a note in their documentation. (Of course, all of
/// these member functions could be provided as free functions using
/// the iterator access methods, but one goal here is to allow people
/// to program without touching iterators at all.)
template <typename Iterator> class range {
Iterator begin_, end_;
public:
/// \name types
/// @{
/// The iterator category of \c Iterator.
/// \todo Consider defining range categories. If they don't add
/// anything over the corresponding iterator categories, then
/// they're probably not worth defining.
typedef typename std::iterator_traits<
Iterator>::iterator_category iterator_category;
/// The type of elements of the range. Not cv-qualified.
typedef typename std::iterator_traits<Iterator>::value_type value_type;
/// The type of the size of the range and offsets within the range.
typedef typename std::iterator_traits<
Iterator>::difference_type difference_type;
/// The return type of element access methods: \c front(), \c back(), etc.
typedef typename std::iterator_traits<Iterator>::reference reference;
typedef typename std::iterator_traits<Iterator>::pointer pointer;
/// @}
/// \name constructors
/// @{
/// Creates a range of default-constructed (<em>not</em>
/// value-initialized) iterators. For most \c Iterator types, this
/// will be an invalid range.
range() : begin_(), end_() {}
/// \pre \c end is reachable from \c begin.
/// \post <code>this->begin() == begin && this->end() == end</code>
LLVM_CONSTEXPR range(Iterator begin, Iterator end)
: begin_(begin), end_(end) {}
/// \par Participates in overload resolution if:
/// - \c Iterator is not a pointer type,
/// - \c begin(r) and \c end(r) return the same type, and
/// - that type is convertible to \c Iterator.
///
/// \todo std::begin and std::end are overloaded between T& and
/// const T&, which means that if a container has only a non-const
/// begin or end method, then it's ill-formed to pass an rvalue to
/// the free function. To avoid that problem, we don't use
/// std::forward<> here, so begin() and end() are always called with
/// an lvalue. Another option would be to insist that rvalue
/// arguments to range() must have const begin() and end() methods.
template <typename R> LLVM_CONSTEXPR range(
R &&r,
typename std::enable_if<
!std::is_pointer<Iterator>::value &&
detail::is_range<R>::value &&
std::is_convertible<typename detail::begin_result<R>::type,
Iterator>::value>::type* = 0)
: begin_(detail::adl_begin(r)), end_(detail::adl_end(r)) {}
/// This constructor creates a \c range<T*> from any range with
/// contiguous iterators. Because dereferencing a past-the-end
/// iterator can be undefined behavior, empty ranges get initialized
/// with \c nullptr rather than \c &*begin().
///
/// \par Participates in overload resolution if:
/// - \c Iterator is a pointer type \c T*,
/// - \c begin(r) and \c end(r) return the same type,
/// - elements \c i of that type satisfy the invariant
/// <code>&*(i + N) == (&*i) + N</code>, and
/// - The result of <code>&*begin()</code> is convertible to \c T*
/// using only qualification conversions [conv.qual] (since
/// pointer conversions stop the pointer from pointing to an
/// array element).
///
/// \todo The <code>&*(i + N) == (&*i) + N</code> invariant is
/// currently impossible to check for user-defined types. We need a
/// \c contiguous_iterator_tag to let users assert it.
template <typename R> LLVM_CONSTEXPR range(
R &&r,
typename std::enable_if<
std::is_pointer<Iterator>::value &&
detail::is_contiguous_range<R>::value
// MSVC returns false for this in this context, but not if we lift it out of the
// constructor.
#ifndef _MSC_VER
&& detail::conversion_preserves_array_indexing<
decltype(&*detail::adl_begin(r)), Iterator>::value
#endif
>::type* = 0)
: begin_((detail::adl_begin(r) == detail::adl_end(r) &&
!std::is_pointer<decltype(detail::adl_begin(r))>::value)
// For non-pointers, &*begin(r) is only defined behavior
// if there's an element there. Otherwise, use nullptr
// since the user can't dereference it anyway. This _is_
// detectable.
? nullptr : &*detail::adl_begin(r)),
end_(begin_ + (detail::adl_end(r) - detail::adl_begin(r))) {}
/// @}
/// \name iterator access
/// @{
LLVM_CONSTEXPR Iterator begin() const { return begin_; }
LLVM_CONSTEXPR Iterator end() const { return end_; }
/// @}
/// \name element access
/// @{
/// \par Complexity:
/// O(1)
/// \pre \c !empty()
/// \returns a reference to the element at the front of the range.
LLVM_CONSTEXPR reference front() const { return *begin(); }
/// \par Ill-formed unless:
/// \c iterator_category is convertible to \c
/// std::bidirectional_iterator_tag.
///
/// \par Complexity:
/// O(2) (Involves copying and decrementing an iterator, so not
/// quite as cheap as \c front())
///
/// \pre \c !empty()
/// \returns a reference to the element at the front of the range.
LLVM_CONSTEXPR reference back() const {
static_assert(
std::is_convertible<iterator_category,
std::bidirectional_iterator_tag>::value,
"Can only retrieve the last element of a bidirectional range.");
using std::prev;
return *prev(end());
}
/// This method is drawn from scripting language indexing. It
/// indexes std::forward from the beginning of the range if the argument
/// is positive, or backwards from the end of the array if the
/// argument is negative.
///
/// \par Ill-formed unless:
/// \c iterator_category is convertible to \c
/// std::random_access_iterator_tag.
///
/// \par Complexity:
/// O(1)
///
/// \pre <code>abs(index) < size() || index == -size()</code>
///
/// \returns if <code>index >= 0</code>, a reference to the
/// <code>index</code>'th element in the range. Otherwise, a
/// reference to the <code>size()+index</code>'th element.
LLVM_CONSTEXPR reference operator[](difference_type index) const {
static_assert(std::is_convertible<iterator_category,
std::random_access_iterator_tag>::value,
"Can only index into a random-access range.");
// Less readable construction for constexpr support.
return index < 0 ? end()[index]
: begin()[index];
}
/// @}
/// \name size
/// @{
/// \par Complexity:
/// O(1)
/// \returns \c true if the range contains no elements.
LLVM_CONSTEXPR bool empty() const { return begin() == end(); }
/// \par Ill-formed unless:
/// \c iterator_category is convertible to
/// \c std::forward_iterator_tag.
///
/// \par Complexity:
/// O(1) if \c iterator_category is convertible to \c
/// std::random_access_iterator_tag. O(<code>size()</code>)
/// otherwise.
///
/// \returns the number of times \c pop_front() can be called before
/// \c empty() becomes true.
LLVM_CONSTEXPR difference_type size() const {
static_assert(std::is_convertible<iterator_category,
std::forward_iterator_tag>::value,
"Calling size on an input range would destroy the range.");
return dispatch_size(iterator_category());
}
/// @}
/// \name traversal from the beginning of the range
/// @{
/// Advances the beginning of the range by one element.
/// \pre \c !empty()
void pop_front() { ++begin_; }
/// Advances the beginning of the range by \c n elements.
///
/// \par Complexity:
/// O(1) if \c iterator_category is convertible to \c
/// std::random_access_iterator_tag, O(<code>n</code>) otherwise.
///
/// \pre <code>n >= 0</code>, and there must be at least \c n
/// elements in the range.
void pop_front(difference_type n) { advance(begin_, n); }
/// Advances the beginning of the range by at most \c n elements,
/// stopping if the range becomes empty. A negative argument causes
/// no change.
///
/// \par Complexity:
/// O(1) if \c iterator_category is convertible to \c
/// std::random_access_iterator_tag, O(<code>min(n,
/// <var>#-elements-in-range</var>)</code>) otherwise.
///
/// \note Could be provided as a free function with little-to-no
/// loss in efficiency.
void pop_front_upto(difference_type n) {
advance_upto(begin_, std::max<difference_type>(0, n), end_,
iterator_category());
}
/// @}
/// \name traversal from the end of the range
/// @{
/// Moves the end of the range earlier by one element.
///
/// \par Ill-formed unless:
/// \c iterator_category is convertible to
/// \c std::bidirectional_iterator_tag.
///
/// \par Complexity:
/// O(1)
///
/// \pre \c !empty()
void pop_back() {
static_assert(std::is_convertible<iterator_category,
std::bidirectional_iterator_tag>::value,
"Can only access the end of a bidirectional range.");
--end_;
}
/// Moves the end of the range earlier by \c n elements.
///
/// \par Ill-formed unless:
/// \c iterator_category is convertible to
/// \c std::bidirectional_iterator_tag.
///
/// \par Complexity:
/// O(1) if \c iterator_category is convertible to \c
/// std::random_access_iterator_tag, O(<code>n</code>) otherwise.
///
/// \pre <code>n >= 0</code>, and there must be at least \c n
/// elements in the range.
void pop_back(difference_type n) {
static_assert(std::is_convertible<iterator_category,
std::bidirectional_iterator_tag>::value,
"Can only access the end of a bidirectional range.");
advance(end_, -n);
}
/// Moves the end of the range earlier by <code>min(n,
/// size())</code> elements. A negative argument causes no change.
///
/// \par Ill-formed unless:
/// \c iterator_category is convertible to
/// \c std::bidirectional_iterator_tag.
///
/// \par Complexity:
/// O(1) if \c iterator_category is convertible to \c
/// std::random_access_iterator_tag, O(<code>min(n,
/// <var>#-elements-in-range</var>)</code>) otherwise.
///
/// \note Could be provided as a free function with little-to-no
/// loss in efficiency.
void pop_back_upto(difference_type n) {
static_assert(std::is_convertible<iterator_category,
std::bidirectional_iterator_tag>::value,
"Can only access the end of a bidirectional range.");
advance_upto(end_, -std::max<difference_type>(0, n), begin_,
iterator_category());
}
/// @}
/// \name creating derived ranges
/// @{
/// Divides the range into two pieces at \c index, where a positive
/// \c index represents an offset from the beginning of the range
/// and a negative \c index represents an offset from the end.
/// <code>range[index]</code> is the first element in the second
/// piece. If <code>index >= size()</code>, the second piece
/// will be empty. If <code>index < -size()</code>, the first
/// piece will be empty.
///
/// \par Ill-formed unless:
/// \c iterator_category is convertible to
/// \c std::forward_iterator_tag.
///
/// \par Complexity:
/// - If \c iterator_category is convertible to \c
/// std::random_access_iterator_tag: O(1)
/// - Otherwise, if \c iterator_category is convertible to \c
/// std::bidirectional_iterator_tag, \c abs(index) iterator increments
/// or decrements
/// - Otherwise, if <code>index >= 0</code>, \c index iterator
/// increments
/// - Otherwise, <code>size() + (size() + index)</code>
/// iterator increments.
///
/// \returns a pair of adjacent ranges.
///
/// \post
/// - <code>result.first.size() == min(index, this->size())</code>
/// - <code>result.first.end() == result.second.begin()</code>
/// - <code>result.first.size() + result.second.size()</code> <code>==
/// this->size()</code>
///
/// \todo split() could take an arbitrary number of indices and
/// return an <code>N+1</code>-element \c tuple<>. This is tricky to
/// implement with negative indices in the optimal number of
/// increments or decrements for a bidirectional iterator, but it
/// should be possible. Do we want it?
std::pair<range, range> split(difference_type index) const {
static_assert(
std::is_convertible<iterator_category,
std::forward_iterator_tag>::value,
"Calling split on a non-std::forward range would return a useless "
"first result.");
if (index >= 0) {
range second = *this;
second.pop_front_upto(index);
return make_pair(range(begin(), second.begin()), second);
} else {
return dispatch_split_neg(index, iterator_category());
}
}
/// \returns A sub-range from \c start to \c stop (not including \c
/// stop, as usual). \c start and \c stop are interpreted as for
/// <code>operator[]</code>, with negative values offsetting from
/// the end of the range. Omitting the \c stop argument makes the
/// sub-range continue to the end of the original range. Positive
/// arguments saturate to the end of the range, and negative
/// arguments saturate to the beginning. If \c stop is before \c
/// start, returns an empty range beginning and ending at \c start.
///
/// \par Ill-formed unless:
/// \c iterator_category is convertible to
/// \c std::forward_iterator_tag.
///
/// \par Complexity:
/// - If \c iterator_category is convertible to \c
/// std::random_access_iterator_tag: O(1)
/// - Otherwise, if \c iterator_category is convertible to \c
/// std::bidirectional_iterator_tag, at most <code>min(abs(start),
/// size()) + min(abs(stop), size())</code> iterator
/// increments or decrements
/// - Otherwise, if <code>start >= 0 && stop >= 0</code>,
/// <code>max(start, stop)</code> iterator increments
/// - Otherwise, <code>size() + max(start', stop')</code>
/// iterator increments, where \c start' and \c stop' are the
/// offsets of the elements \c start and \c stop refer to.
///
/// \note \c slice(start) should be implemented with a different
/// overload, rather than defaulting \c stop to
/// <code>numeric_limits<difference_type>::max()</code>, because
/// using a default would force non-random-access ranges to use an
/// O(<code>size()</code>) algorithm to compute the end rather
/// than the O(1) they're capable of.
range slice(difference_type start, difference_type stop) const {
static_assert(
std::is_convertible<iterator_category,
std::forward_iterator_tag>::value,
"Calling slice on a non-std::forward range would destroy the original "
"range.");
return dispatch_slice(start, stop, iterator_category());
}
range slice(difference_type start) const {
static_assert(
std::is_convertible<iterator_category,
std::forward_iterator_tag>::value,
"Calling slice on a non-std::forward range would destroy the original "
"range.");
return split(start).second;
}
/// @}
private:
// advance_upto: should be added to <algorithm>, but I'll use it as
// a helper function here.
//
// These return the number of increments that weren't applied
// because we ran into 'limit' (or 0 if we didn't run into limit).
static difference_type advance_upto(Iterator &it, difference_type n,
Iterator limit, std::input_iterator_tag) {
if (n < 0)
return 0;
while (it != limit && n > 0) {
++it;
--n;
}
return n;
}
static difference_type advance_upto(Iterator &it, difference_type n,
Iterator limit,
std::bidirectional_iterator_tag) {
if (n < 0) {
while (it != limit && n < 0) {
--it;
++n;
}
} else {
while (it != limit && n > 0) {
++it;
--n;
}
}
return n;
}
static difference_type advance_upto(Iterator &it, difference_type n,
Iterator limit,
std::random_access_iterator_tag) {
difference_type distance = limit - it;
if (distance < 0)
assert(n <= 0);
else if (distance > 0)
assert(n >= 0);
if (abs(distance) > abs(n)) {
it += n;
return 0;
} else {
it = limit;
return n - distance;
}
}
// Dispatch functions.
difference_type dispatch_size(std::forward_iterator_tag) const {
return std::distance(begin(), end());
}
LLVM_CONSTEXPR difference_type dispatch_size(
std::random_access_iterator_tag) const {
return end() - begin();
}
std::pair<range, range> dispatch_split_neg(difference_type index,
std::forward_iterator_tag) const {
assert(index < 0);
difference_type size = this->size();
return split(std::max<difference_type>(0, size + index));
}
std::pair<range, range> dispatch_split_neg(
difference_type index, std::bidirectional_iterator_tag) const {
assert(index < 0);
range first = *this;
first.pop_back_upto(-index);
return make_pair(first, range(first.end(), end()));
}
range dispatch_slice(difference_type start, difference_type stop,
std::forward_iterator_tag) const {
if (start < 0 || stop < 0) {
difference_type size = this->size();
if (start < 0)
start = std::max<difference_type>(0, size + start);
if (stop < 0)
stop = size + stop; // Possibly negative; will be fixed in 2 lines.
}
stop = std::max<difference_type>(start, stop);
Iterator first = begin();
advance_upto(first, start, end(), iterator_category());
Iterator last = first;
advance_upto(last, stop - start, end(), iterator_category());
return range(first, last);
}
range dispatch_slice(const difference_type start, const difference_type stop,
std::bidirectional_iterator_tag) const {
Iterator first;
if (start < 0) {
first = end();
advance_upto(first, start, begin(), iterator_category());
} else {
first = begin();
advance_upto(first, start, end(), iterator_category());
}
Iterator last;
if (stop < 0) {
last = end();
advance_upto(last, stop, first, iterator_category());
} else {
if (start >= 0) {
last = first;
if (stop > start)
advance_upto(last, stop - start, end(), iterator_category());
} else {
// Complicated: 'start' walked from the end of the sequence,
// but 'stop' needs to walk from the beginning.
Iterator dummy = begin();
// Walk up to 'stop' increments from begin(), stopping when we
// get to 'first', and capturing the remaining number of
// increments.
difference_type increments_past_start =
advance_upto(dummy, stop, first, iterator_category());
if (increments_past_start == 0) {
// If this is 0, then stop was before start.
last = first;
} else {
// Otherwise, count that many spaces beyond first.
last = first;
advance_upto(last, increments_past_start, end(), iterator_category());
}
}
}
return range(first, last);
}
range dispatch_slice(difference_type start, difference_type stop,
std::random_access_iterator_tag) const {
const difference_type size = this->size();
if (start < 0)
start = size + start;
if (start < 0)
start = 0;
if (start > size)
start = size;
if (stop < 0)
stop = size + stop;
if (stop < start)
stop = start;
if (stop > size)
stop = size;
return range(begin() + start, begin() + stop);
}
};
/// \name deducing constructor wrappers
/// \relates std::range
/// \xmlonly <nonmember/> \endxmlonly
///
/// These functions do the same thing as the constructor with the same
/// signature. They just allow users to avoid writing the iterator
/// type.
/// @{
/// \todo I'd like to define a \c make_range taking a single iterator
/// argument representing the beginning of a range that ends with a
/// default-constructed \c Iterator. This would help with using
/// iterators like \c istream_iterator. However, using just \c
/// make_range() could be confusing and lead to people writing
/// incorrect ranges of more common iterators. Is there a better name?
template <typename Iterator>
LLVM_CONSTEXPR range<Iterator> make_range(Iterator begin, Iterator end) {
return range<Iterator>(begin, end);
}
/// \par Participates in overload resolution if:
/// \c begin(r) and \c end(r) return the same type.
template <typename Range> LLVM_CONSTEXPR auto make_range(
Range &&r,
typename std::enable_if<detail::is_range<Range>::value>::type* = 0)
-> range<decltype(detail::adl_begin(r))> {
return range<decltype(detail::adl_begin(r))>(r);
}
/// \par Participates in overload resolution if:
/// - \c begin(r) and \c end(r) return the same type,
/// - that type satisfies the invariant that <code>&*(i + N) ==
/// (&*i) + N</code>, and
/// - \c &*begin(r) has a pointer type.
template <typename Range> LLVM_CONSTEXPR auto make_ptr_range(
Range &&r,
typename std::enable_if<
detail::is_contiguous_range<Range>::value &&
std::is_pointer<decltype(&*detail::adl_begin(r))>::value>::type* = 0)
-> range<decltype(&*detail::adl_begin(r))> {
return range<decltype(&*detail::adl_begin(r))>(r);
}
/// @}
} // end namespace lld
#endif

View File

@ -6,145 +6,27 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
///
/// Interface for Drivers which convert command line arguments into
/// LinkingContext objects, then perform the link.
///
//===----------------------------------------------------------------------===//
#ifndef LLD_DRIVER_DRIVER_H
#define LLD_DRIVER_DRIVER_H
#include "lld/Core/LLVM.h"
#include "lld/Core/Node.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/Support/raw_ostream.h"
#include <memory>
#include <set>
#include <vector>
namespace lld {
class LinkingContext;
class CoreLinkingContext;
class MachOLinkingContext;
class PECOFFLinkingContext;
class ELFLinkingContext;
typedef std::vector<std::unique_ptr<File>> FileVector;
FileVector makeErrorFile(StringRef path, std::error_code ec);
FileVector parseMemberFiles(std::unique_ptr<File> File);
FileVector loadFile(LinkingContext &ctx, StringRef path, bool wholeArchive);
/// Base class for all Drivers.
class Driver {
protected:
/// Performs link using specified options
static bool link(LinkingContext &context,
raw_ostream &diag = llvm::errs());
/// Parses the LLVM options from the context.
static void parseLLVMOptions(const LinkingContext &context);
private:
Driver() = delete;
};
/// Driver for "universal" lld tool which can mimic any linker command line
/// parsing once it figures out which command line flavor to use.
class UniversalDriver : public Driver {
public:
/// Determine flavor and pass control to Driver for that flavor.
static bool link(llvm::MutableArrayRef<const char *> args,
raw_ostream &diag = llvm::errs());
private:
UniversalDriver() = delete;
};
/// Driver for gnu/binutil 'ld' command line options.
class GnuLdDriver : public Driver {
public:
/// Parses command line arguments same as gnu/binutils ld and performs link.
/// Returns true iff an error occurred.
static bool linkELF(llvm::ArrayRef<const char *> args,
raw_ostream &diag = llvm::errs());
/// Uses gnu/binutils style ld command line options to fill in options struct.
/// Returns true iff there was an error.
static bool parse(llvm::ArrayRef<const char *> args,
std::unique_ptr<ELFLinkingContext> &context,
raw_ostream &diag = llvm::errs());
/// Parses a given memory buffer as a linker script and evaluate that.
/// Public function for testing.
static std::error_code evalLinkerScript(ELFLinkingContext &ctx,
std::unique_ptr<MemoryBuffer> mb,
raw_ostream &diag, bool nostdlib);
/// A factory method to create an instance of ELFLinkingContext.
static std::unique_ptr<ELFLinkingContext>
createELFLinkingContext(llvm::Triple triple);
private:
static llvm::Triple getDefaultTarget(const char *progName);
static bool applyEmulation(llvm::Triple &triple,
llvm::opt::InputArgList &args,
raw_ostream &diag);
static void addPlatformSearchDirs(ELFLinkingContext &ctx,
llvm::Triple &triple,
llvm::Triple &baseTriple);
GnuLdDriver() = delete;
};
/// Driver for darwin/ld64 'ld' command line options.
class DarwinLdDriver : public Driver {
public:
/// Parses command line arguments same as darwin's ld and performs link.
/// Returns true iff there was an error.
static bool linkMachO(llvm::ArrayRef<const char *> args,
raw_ostream &diag = llvm::errs());
/// Uses darwin style ld command line options to update LinkingContext object.
/// Returns true iff there was an error.
static bool parse(llvm::ArrayRef<const char *> args,
MachOLinkingContext &info,
raw_ostream &diag = llvm::errs());
private:
DarwinLdDriver() = delete;
};
/// Driver for Windows 'link.exe' command line options
namespace coff {
void link(llvm::ArrayRef<const char *> args);
bool link(llvm::ArrayRef<const char *> Args);
}
namespace elf2 {
void link(llvm::ArrayRef<const char *> args);
namespace elf {
bool link(llvm::ArrayRef<const char *> Args,
llvm::raw_ostream &Diag = llvm::errs());
}
/// Driver for lld unit tests
class CoreDriver : public Driver {
public:
/// Parses command line arguments same as lld-core and performs link.
/// Returns true iff there was an error.
static bool link(llvm::ArrayRef<const char *> args,
raw_ostream &diag = llvm::errs());
/// Uses lld-core command line options to fill in options struct.
/// Returns true iff there was an error.
static bool parse(llvm::ArrayRef<const char *> args, CoreLinkingContext &info,
raw_ostream &diag = llvm::errs());
private:
CoreDriver() = delete;
};
} // end namespace lld
namespace mach_o {
bool link(llvm::ArrayRef<const char *> Args,
llvm::raw_ostream &Diag = llvm::errs());
}
}
#endif

View File

@ -1,39 +0,0 @@
//===- include/lld/ReaderWriter/AtomLayout.h ------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_READER_WRITER_ATOM_LAYOUT_H
#define LLD_READER_WRITER_ATOM_LAYOUT_H
namespace lld {
class Atom;
/// AtomLayouts are used by a writer to manage physical positions of atoms.
/// AtomLayout has two positions; one is file offset, and the other is the
/// address when loaded into memory.
///
/// Construction of AtomLayouts is usually a multi-pass process. When an atom
/// is appended to a section, we don't know the starting address of the
/// section. Thus, we have no choice but to store the offset from the
/// beginning of the section as AtomLayout values. After all sections starting
/// address are fixed, AtomLayout is revisited to get the offsets updated by
/// adding the starting addresses of the section.
struct AtomLayout {
AtomLayout(const Atom *a, uint64_t fileOff, uint64_t virAddr)
: _atom(a), _fileOffset(fileOff), _virtualAddr(virAddr) {}
AtomLayout() : _atom(nullptr), _fileOffset(0), _virtualAddr(0) {}
const Atom *_atom;
uint64_t _fileOffset;
uint64_t _virtualAddr;
};
}
#endif

Some files were not shown because too many files have changed in this diff Show More