Vendor import of llvm RELEASE_360/rc2 tag r227651 (effectively, 3.6.0 RC2):
https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_360/rc2@227651
This commit is contained in:
parent
67c32a9831
commit
ec304151b7
@ -102,6 +102,282 @@ enable handling of case (3).
|
||||
.. _discussions: http://lists.cs.uiuc.edu/pipermail/llvmdev/2014-May/073235.html
|
||||
|
||||
|
||||
Metadata is not a Value
|
||||
-----------------------
|
||||
|
||||
Metadata nodes (``!{...}``) and strings (``!"..."``) are no longer values.
|
||||
They have no use-lists, no type, cannot RAUW, and cannot be function-local.
|
||||
|
||||
Bridges between Value and Metadata
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
LLVM intrinsics can reference metadata using the ``metadata`` type, and
|
||||
metadata nodes can reference constant values.
|
||||
|
||||
Function-local metadata is limited to direct arguments to LLVM intrinsics.
|
||||
|
||||
Metadata is typeless
|
||||
^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The following old IR:
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
@g = global i32 0
|
||||
|
||||
define void @foo(i32 %v) {
|
||||
entry:
|
||||
call void @llvm.md(metadata !{i32 %v})
|
||||
call void @llvm.md(metadata !{i32* @global})
|
||||
call void @llvm.md(metadata !0)
|
||||
call void @llvm.md(metadata !{metadata !"string"})
|
||||
call void @llvm.md(metadata !{metadata !{metadata !1, metadata !"string"}})
|
||||
ret void, !bar !1, !baz !2
|
||||
}
|
||||
|
||||
declare void @llvm.md(metadata)
|
||||
|
||||
!0 = metadata !{metadata !1, metadata !2, metadata !3, metadata !"some string"}
|
||||
!1 = metadata !{metadata !2, null, metadata !"other", i32* @global, i32 7}
|
||||
!2 = metadata !{}
|
||||
|
||||
should now be written as:
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
@g = global i32 0
|
||||
|
||||
define void @foo(i32 %v) {
|
||||
entry:
|
||||
call void @llvm.md(metadata i32 %v) ; The only legal place for function-local
|
||||
; metadata.
|
||||
call void @llvm.md(metadata i32* @global)
|
||||
call void @llvm.md(metadata !0)
|
||||
call void @llvm.md(metadata !{!"string"})
|
||||
call void @llvm.md(metadata !{!{!1, !"string"}})
|
||||
ret void, !bar !1, !baz !2
|
||||
}
|
||||
|
||||
declare void @llvm.md(metadata)
|
||||
|
||||
!0 = !{!1, !2, !3, !"some string"}
|
||||
!1 = !{!2, null, !"other", i32* @global, i32 7}
|
||||
!2 = !{}
|
||||
|
||||
Distinct metadata nodes
|
||||
^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Metadata nodes can opt-out of uniquing, using the keyword ``distinct``.
|
||||
Distinct nodes are still owned by the context, but are stored in a side table,
|
||||
and not uniqued.
|
||||
|
||||
In LLVM 3.5, metadata nodes would drop uniquing if an operand changed to
|
||||
``null`` during optimizations. This is no longer true. However, if an operand
|
||||
change causes a uniquing collision, they become ``distinct``. Unlike LLVM 3.5,
|
||||
where serializing to assembly or bitcode would re-unique the nodes, they now
|
||||
remain ``distinct``.
|
||||
|
||||
The following IR:
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
!named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8}
|
||||
|
||||
!0 = !{}
|
||||
!1 = !{}
|
||||
!2 = distinct !{}
|
||||
!3 = distinct !{}
|
||||
!4 = !{!0}
|
||||
!5 = distinct !{!0}
|
||||
!6 = !{!4, !{}, !5}
|
||||
!7 = !{!{!0}, !0, !5}
|
||||
!8 = distinct !{!{!0}, !0, !5}
|
||||
|
||||
is equivalent to the following:
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
!named = !{!0, !0, !1, !2, !3, !4, !5, !5, !6}
|
||||
|
||||
!0 = !{}
|
||||
!1 = distinct !{}
|
||||
!2 = distinct !{}
|
||||
!3 = !{!0}
|
||||
!4 = distinct !{!0}
|
||||
!5 = !{!3, !0, !4}
|
||||
!6 = distinct !{!3, !0, !4}
|
||||
|
||||
Constructing cyclic graphs
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
During graph construction, if a metadata node transitively references a forward
|
||||
declaration, the node itself is considered "unresolved" until the forward
|
||||
declaration resolves. An unresolved node can RAUW itself to support uniquing.
|
||||
Nodes automatically resolve once all their operands have resolved.
|
||||
|
||||
However, cyclic graphs prevent the nodes from resolving. An API client that
|
||||
constructs a cyclic graph must call ``resolveCycles()`` to resolve nodes in the
|
||||
cycle.
|
||||
|
||||
To save self-references from that burden, self-referencing nodes are implicitly
|
||||
``distinct``. So the following IR:
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
!named = !{!0, !1, !2, !3, !4}
|
||||
|
||||
!0 = !{!0}
|
||||
!1 = !{!1}
|
||||
!2 = !{!2, !1}
|
||||
!3 = !{!2, !1}
|
||||
!4 = !{!2, !1}
|
||||
|
||||
is equivalent to:
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
!named = !{!0, !1, !2, !3, !3}
|
||||
|
||||
!0 = distinct !{!0}
|
||||
!1 = distinct !{!1}
|
||||
!2 = distinct !{!2, !1}
|
||||
!3 = !{!2, !1}
|
||||
|
||||
MDLocation (aka DebugLoc aka DILocation)
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
There's a new first-class metadata construct called ``MDLocation`` (to be
|
||||
followed in subsequent releases by others). It's used for the locations
|
||||
referenced by ``!dbg`` metadata attachments.
|
||||
|
||||
For example, if an old ``!dbg`` attachment looked like this:
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
define i32 @foo(i32 %a, i32 %b) {
|
||||
entry:
|
||||
%add = add i32 %a, %b, !dbg !0
|
||||
ret %add, !dbg !1
|
||||
}
|
||||
|
||||
!0 = metadata !{i32 10, i32 3, metadata !2, metadata !1)
|
||||
!1 = metadata !{i32 20, i32 7, metadata !3)
|
||||
!2 = metadata !{...}
|
||||
!3 = metadata !{...}
|
||||
|
||||
the new attachment looks like this:
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
define i32 @foo(i32 %a, i32 %b) {
|
||||
entry:
|
||||
%add = add i32 %a, %b, !dbg !0
|
||||
ret %add, !dbg !1
|
||||
}
|
||||
|
||||
!0 = !MDLocation(line: 10, column: 3, scope: !2, inlinedAt: !1)
|
||||
!1 = !MDLocation(line: 20, column: 7, scope: !3)
|
||||
!2 = !{...}
|
||||
!3 = !{...}
|
||||
|
||||
The fields are named, can be reordered, and have sane defaults if left out
|
||||
(although ``scope:`` is required).
|
||||
|
||||
|
||||
Alias syntax change
|
||||
-----------------------
|
||||
|
||||
The syntax for aliases is now closer to what is used for global variables
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
@a = weak global ...
|
||||
@b = weak alias ...
|
||||
|
||||
The order of the ``alias`` keyword and the linkage was swapped before.
|
||||
|
||||
The old JIT has been removed
|
||||
----------------------------
|
||||
|
||||
All users should transition to MCJIT.
|
||||
|
||||
|
||||
object::Binary doesn't owns the file buffer
|
||||
-------------------------------------------
|
||||
|
||||
It is now just a wrapper, which simplifies using object::Binary with other
|
||||
users of the underlying file.
|
||||
|
||||
IR in object files is now supported
|
||||
-----------------------------------
|
||||
|
||||
Regular object files can contain IR in a section named ``.llvmbc``.
|
||||
|
||||
|
||||
The gold plugin has been rewritten
|
||||
----------------------------------
|
||||
|
||||
It is now implemented directly on top of lib/Linker instead of ``lib/LTO``.
|
||||
The API of ``lib/LTO`` is sufficiently different from gold's view of the
|
||||
linking process that some cases could not be conveniently implemented.
|
||||
|
||||
The new implementation is also lazier and has a ``save-temps`` option.
|
||||
|
||||
|
||||
Change in the representation of lazy loaded funcs
|
||||
-------------------------------------------------
|
||||
|
||||
Lazy loaded functions are now represented is a way that ``isDeclaration``
|
||||
returns the correct answer even before reading the body.
|
||||
|
||||
|
||||
The opt option -std-compile-opts was removed
|
||||
--------------------------------------------
|
||||
|
||||
It was effectively an alias of -O3.
|
||||
|
||||
|
||||
Python 2.7 is now required
|
||||
--------------------------
|
||||
|
||||
This was done to simplify compatibility with python 3.
|
||||
|
||||
The leak detector has been removed
|
||||
----------------------------------
|
||||
|
||||
In practice tools like asan and valgrind were finding way more bugs than
|
||||
the old leak detector, so it was removed.
|
||||
|
||||
|
||||
New comdat syntax
|
||||
-----------------
|
||||
|
||||
The syntax of comdats was changed to
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
$c = comdat any
|
||||
@g = global i32 0, comdat($c)
|
||||
@c = global i32 0, comdat
|
||||
|
||||
The version without the parentheses is a syntatic sugar for a comdat with
|
||||
the same name as the global.
|
||||
|
||||
|
||||
Diagnotic infrastructure used by lib/Linker and lib/Bitcode
|
||||
-----------------------------------------------------------
|
||||
|
||||
These libraries now use the diagnostic handler to print errors and warnings.
|
||||
This provides better error messages and simpler error handling.
|
||||
|
||||
|
||||
The PreserveSource linker mode was removed
|
||||
------------------------------------------
|
||||
|
||||
It was fairly broken and was removed.
|
||||
|
||||
|
||||
|
||||
Changes to the ARM Backend
|
||||
--------------------------
|
||||
|
||||
@ -237,8 +513,35 @@ An exciting aspect of LLVM is that it is used as an enabling technology for
|
||||
a lot of other language and tools projects. This section lists some of the
|
||||
projects that have already been updated to work with LLVM 3.6.
|
||||
|
||||
* A project
|
||||
Portable Computing Language (pocl)
|
||||
----------------------------------
|
||||
|
||||
In addition to producing an easily portable open source OpenCL
|
||||
implementation, another major goal of `pocl <http://portablecl.org/>`_
|
||||
is improving performance portability of OpenCL programs with
|
||||
compiler optimizations, reducing the need for target-dependent manual
|
||||
optimizations. An important part of pocl is a set of LLVM passes used to
|
||||
statically parallelize multiple work-items with the kernel compiler, even in
|
||||
the presence of work-group barriers. This enables static parallelization of
|
||||
the fine-grained static concurrency in the work groups in multiple ways.
|
||||
|
||||
TTA-based Co-design Environment (TCE)
|
||||
-------------------------------------
|
||||
|
||||
`TCE <http://tce.cs.tut.fi/>`_ is a toolset for designing customized
|
||||
exposed datapath processors based on the Transport triggered
|
||||
architecture (TTA).
|
||||
|
||||
The toolset provides a complete co-design flow from C/C++
|
||||
programs down to synthesizable VHDL/Verilog and parallel program binaries.
|
||||
Processor customization points include the register files, function units,
|
||||
supported operations, and the interconnection network.
|
||||
|
||||
TCE uses Clang and LLVM for C/C++/OpenCL C language support, target independent
|
||||
optimizations and also for parts of code generation. It generates
|
||||
new LLVM-based code generators "on the fly" for the designed processors and
|
||||
loads them in to the compiler backend as runtime libraries to avoid
|
||||
per-target recompilation of larger parts of the compiler chain.
|
||||
|
||||
Additional Information
|
||||
======================
|
||||
|
@ -195,6 +195,9 @@ class PMTopLevelManager {
|
||||
/// then return NULL.
|
||||
Pass *findAnalysisPass(AnalysisID AID);
|
||||
|
||||
/// Retrieve the PassInfo for an analysis.
|
||||
const PassInfo *findAnalysisPassInfo(AnalysisID AID) const;
|
||||
|
||||
/// Find analysis usage information for the pass P.
|
||||
AnalysisUsage *findAnalysisUsage(Pass *P);
|
||||
|
||||
@ -251,6 +254,12 @@ class PMTopLevelManager {
|
||||
SmallVector<ImmutablePass *, 16> ImmutablePasses;
|
||||
|
||||
DenseMap<Pass *, AnalysisUsage *> AnUsageMap;
|
||||
|
||||
/// Collection of PassInfo objects found via analysis IDs and in this top
|
||||
/// level manager. This is used to memoize queries to the pass registry.
|
||||
/// FIXME: This is an egregious hack because querying the pass registry is
|
||||
/// either slow or racy.
|
||||
mutable DenseMap<AnalysisID, const PassInfo *> AnalysisPassInfos;
|
||||
};
|
||||
|
||||
|
||||
|
@ -61,6 +61,12 @@ class MCAsmBackend {
|
||||
/// markers. If not, data region directives will be ignored.
|
||||
bool hasDataInCodeSupport() const { return HasDataInCodeSupport; }
|
||||
|
||||
/// doesSectionRequireSymbols - Check whether the given section requires that
|
||||
/// all symbols (even temporaries) have symbol table entries.
|
||||
virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
/// @name Target Fixup Interfaces
|
||||
/// @{
|
||||
|
||||
|
@ -11,7 +11,6 @@
|
||||
#define LLVM_MC_MCASSEMBLER_H
|
||||
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/DenseSet.h"
|
||||
#include "llvm/ADT/PointerIntPair.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/SmallString.h"
|
||||
@ -882,8 +881,6 @@ class MCAssembler {
|
||||
|
||||
iplist<MCSymbolData> Symbols;
|
||||
|
||||
DenseSet<const MCSymbol *> LocalsUsedInReloc;
|
||||
|
||||
/// The map of sections to their associated assembler backend data.
|
||||
//
|
||||
// FIXME: Avoid this indirection?
|
||||
@ -983,9 +980,6 @@ class MCAssembler {
|
||||
MCFragment &F, const MCFixup &Fixup);
|
||||
|
||||
public:
|
||||
void addLocalUsedInReloc(const MCSymbol &Sym);
|
||||
bool isLocalUsedInReloc(const MCSymbol &Sym) const;
|
||||
|
||||
/// Compute the effective fragment size assuming it is laid out at the given
|
||||
/// \p SectionAddress and \p FragmentOffset.
|
||||
uint64_t computeFragmentSize(const MCAsmLayout &Layout,
|
||||
|
@ -68,10 +68,12 @@ class MCMachObjectTargetWriter {
|
||||
/// @name API
|
||||
/// @{
|
||||
|
||||
virtual void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
|
||||
virtual void RecordRelocation(MachObjectWriter *Writer,
|
||||
const MCAssembler &Asm,
|
||||
const MCAsmLayout &Layout,
|
||||
const MCFragment *Fragment,
|
||||
const MCFixup &Fixup, MCValue Target,
|
||||
const MCFixup &Fixup,
|
||||
MCValue Target,
|
||||
uint64_t &FixedValue) = 0;
|
||||
|
||||
/// @}
|
||||
@ -95,14 +97,8 @@ class MachObjectWriter : public MCObjectWriter {
|
||||
/// @name Relocation Data
|
||||
/// @{
|
||||
|
||||
struct RelAndSymbol {
|
||||
const MCSymbolData *Sym;
|
||||
MachO::any_relocation_info MRE;
|
||||
RelAndSymbol(const MCSymbolData *Sym, const MachO::any_relocation_info &MRE)
|
||||
: Sym(Sym), MRE(MRE) {}
|
||||
};
|
||||
|
||||
llvm::DenseMap<const MCSectionData *, std::vector<RelAndSymbol>> Relocations;
|
||||
llvm::DenseMap<const MCSectionData*,
|
||||
std::vector<MachO::any_relocation_info> > Relocations;
|
||||
llvm::DenseMap<const MCSectionData*, unsigned> IndirectSymBase;
|
||||
|
||||
/// @}
|
||||
@ -217,15 +213,9 @@ class MachObjectWriter : public MCObjectWriter {
|
||||
// - Input errors, where something cannot be correctly encoded. 'as' allows
|
||||
// these through in many cases.
|
||||
|
||||
// Add a relocation to be output in the object file. At the time this is
|
||||
// called, the symbol indexes are not know, so if the relocation refers
|
||||
// to a symbol it should be passed as \p RelSymbol so that it can be updated
|
||||
// afterwards. If the relocation doesn't refer to a symbol, nullptr should be
|
||||
// used.
|
||||
void addRelocation(const MCSymbolData *RelSymbol, const MCSectionData *SD,
|
||||
void addRelocation(const MCSectionData *SD,
|
||||
MachO::any_relocation_info &MRE) {
|
||||
RelAndSymbol P(RelSymbol, MRE);
|
||||
Relocations[SD].push_back(P);
|
||||
Relocations[SD].push_back(MRE);
|
||||
}
|
||||
|
||||
void RecordScatteredRelocation(const MCAssembler &Asm,
|
||||
@ -241,7 +231,7 @@ class MachObjectWriter : public MCObjectWriter {
|
||||
const MCFixup &Fixup, MCValue Target,
|
||||
uint64_t &FixedValue);
|
||||
|
||||
void RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
|
||||
void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
|
||||
const MCFragment *Fragment, const MCFixup &Fixup,
|
||||
MCValue Target, bool &IsPCRel,
|
||||
uint64_t &FixedValue) override;
|
||||
|
@ -76,10 +76,12 @@ class MCObjectWriter {
|
||||
/// post layout binding. The implementation is responsible for storing
|
||||
/// information about the relocation so that it can be emitted during
|
||||
/// WriteObject().
|
||||
virtual void RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
|
||||
virtual void RecordRelocation(const MCAssembler &Asm,
|
||||
const MCAsmLayout &Layout,
|
||||
const MCFragment *Fragment,
|
||||
const MCFixup &Fixup, MCValue Target,
|
||||
bool &IsPCRel, uint64_t &FixedValue) = 0;
|
||||
bool &IsPCRel,
|
||||
uint64_t &FixedValue) = 0;
|
||||
|
||||
/// \brief Check whether the difference (A - B) between two symbol
|
||||
/// references is fully resolved.
|
||||
|
@ -17,6 +17,7 @@
|
||||
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/Target/TargetLibraryInfo.h"
|
||||
|
||||
namespace llvm {
|
||||
class Value;
|
||||
@ -53,8 +54,10 @@ class FortifiedLibCallSimplifier {
|
||||
Value *optimizeMemCpyChk(CallInst *CI, IRBuilder<> &B);
|
||||
Value *optimizeMemMoveChk(CallInst *CI, IRBuilder<> &B);
|
||||
Value *optimizeMemSetChk(CallInst *CI, IRBuilder<> &B);
|
||||
Value *optimizeStrCpyChk(CallInst *CI, IRBuilder<> &B);
|
||||
Value *optimizeStrNCpyChk(CallInst *CI, IRBuilder<> &B);
|
||||
|
||||
// Str/Stp cpy are similar enough to be handled in the same functions.
|
||||
Value *optimizeStrpCpyChk(CallInst *CI, IRBuilder<> &B, LibFunc::Func Func);
|
||||
Value *optimizeStrpNCpyChk(CallInst *CI, IRBuilder<> &B, LibFunc::Func Func);
|
||||
|
||||
/// \brief Checks whether the call \p CI to a fortified libcall is foldable
|
||||
/// to the non-fortified version.
|
||||
|
@ -3154,8 +3154,9 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
|
||||
if (LHS == RHS)
|
||||
return getConstant(LHS->getType(), 0);
|
||||
|
||||
// X - Y --> X + -Y
|
||||
return getAddExpr(LHS, getNegativeSCEV(RHS), Flags);
|
||||
// X - Y --> X + -Y.
|
||||
// X -(nsw || nuw) Y --> X + -Y.
|
||||
return getAddExpr(LHS, getNegativeSCEV(RHS));
|
||||
}
|
||||
|
||||
/// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the
|
||||
@ -3461,12 +3462,10 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
|
||||
if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr)))
|
||||
Flags = setFlags(Flags, SCEV::FlagNUW);
|
||||
}
|
||||
} else if (const SubOperator *OBO =
|
||||
dyn_cast<SubOperator>(BEValueV)) {
|
||||
if (OBO->hasNoUnsignedWrap())
|
||||
Flags = setFlags(Flags, SCEV::FlagNUW);
|
||||
if (OBO->hasNoSignedWrap())
|
||||
Flags = setFlags(Flags, SCEV::FlagNSW);
|
||||
|
||||
// We cannot transfer nuw and nsw flags from subtraction
|
||||
// operations -- sub nuw X, Y is not the same as add nuw X, -Y
|
||||
// for instance.
|
||||
}
|
||||
|
||||
const SCEV *StartVal = getSCEV(StartValueV);
|
||||
|
@ -626,10 +626,7 @@ static uint64_t getBaseTypeSize(DwarfDebug *DD, DIDerivedType Ty) {
|
||||
|
||||
DIType BaseType = DD->resolve(Ty.getTypeDerivedFrom());
|
||||
|
||||
// If this type is not derived from any type or the type is a declaration then
|
||||
// take conservative approach.
|
||||
if (!BaseType.isValid() || BaseType.isForwardDecl())
|
||||
return Ty.getSizeInBits();
|
||||
assert(BaseType.isValid());
|
||||
|
||||
// If this is a derived type, go ahead and get the base type, unless it's a
|
||||
// reference then it's just the size of the field. Pointer types have no need
|
||||
@ -1473,7 +1470,7 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) {
|
||||
uint64_t FieldSize = getBaseTypeSize(DD, DT);
|
||||
uint64_t OffsetInBytes;
|
||||
|
||||
if (Size != FieldSize) {
|
||||
if (FieldSize && Size != FieldSize) {
|
||||
// Handle bitfield, assume bytes are 8 bits.
|
||||
addUInt(MemberDie, dwarf::DW_AT_byte_size, None, FieldSize/8);
|
||||
addUInt(MemberDie, dwarf::DW_AT_bit_size, None, Size);
|
||||
|
@ -11,11 +11,19 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/Analysis/DominanceFrontier.h"
|
||||
#include "llvm/Analysis/IVUsers.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
|
||||
#include "llvm/Analysis/ScalarEvolution.h"
|
||||
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
|
||||
#include "llvm/CodeGen/Passes.h"
|
||||
#include "llvm/CodeGen/StackProtector.h"
|
||||
#include "llvm/IR/Dominators.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
using namespace llvm;
|
||||
|
||||
Pass *MachineFunctionPass::createPrinterPass(raw_ostream &O,
|
||||
@ -43,15 +51,13 @@ void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
// because CodeGen overloads that to mean preserving the MachineBasicBlock
|
||||
// CFG in addition to the LLVM IR CFG.
|
||||
AU.addPreserved<AliasAnalysis>();
|
||||
AU.addPreserved("scalar-evolution");
|
||||
AU.addPreserved("iv-users");
|
||||
AU.addPreserved("memdep");
|
||||
AU.addPreserved("live-values");
|
||||
AU.addPreserved("domtree");
|
||||
AU.addPreserved("domfrontier");
|
||||
AU.addPreserved("loops");
|
||||
AU.addPreserved("lda");
|
||||
AU.addPreserved("stack-protector");
|
||||
AU.addPreserved<DominanceFrontier>();
|
||||
AU.addPreserved<DominatorTreeWrapperPass>();
|
||||
AU.addPreserved<IVUsers>();
|
||||
AU.addPreserved<LoopInfo>();
|
||||
AU.addPreserved<MemoryDependenceAnalysis>();
|
||||
AU.addPreserved<ScalarEvolution>();
|
||||
AU.addPreserved<StackProtector>();
|
||||
|
||||
FunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
|
@ -563,9 +563,23 @@ LLVMValueRef LLVMGetMetadata(LLVMValueRef Inst, unsigned KindID) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void LLVMSetMetadata(LLVMValueRef Inst, unsigned KindID, LLVMValueRef MD) {
|
||||
MDNode *N =
|
||||
MD ? cast<MDNode>(unwrap<MetadataAsValue>(MD)->getMetadata()) : nullptr;
|
||||
// MetadataAsValue uses a canonical format which strips the actual MDNode for
|
||||
// MDNode with just a single constant value, storing just a ConstantAsMetadata
|
||||
// This undoes this canonicalization, reconstructing the MDNode.
|
||||
static MDNode *extractMDNode(MetadataAsValue *MAV) {
|
||||
Metadata *MD = MAV->getMetadata();
|
||||
assert((isa<MDNode>(MD) || isa<ConstantAsMetadata>(MD)) &&
|
||||
"Expected a metadata node or a canonicalized constant");
|
||||
|
||||
if (MDNode *N = dyn_cast<MDNode>(MD))
|
||||
return N;
|
||||
|
||||
return MDNode::get(MAV->getContext(), MD);
|
||||
}
|
||||
|
||||
void LLVMSetMetadata(LLVMValueRef Inst, unsigned KindID, LLVMValueRef Val) {
|
||||
MDNode *N = Val ? extractMDNode(unwrap<MetadataAsValue>(Val)) : nullptr;
|
||||
|
||||
unwrap<Instruction>(Inst)->setMetadata(KindID, N);
|
||||
}
|
||||
|
||||
@ -795,7 +809,7 @@ void LLVMAddNamedMetadataOperand(LLVMModuleRef M, const char* name,
|
||||
return;
|
||||
if (!Val)
|
||||
return;
|
||||
N->addOperand(cast<MDNode>(unwrap<MetadataAsValue>(Val)->getMetadata()));
|
||||
N->addOperand(extractMDNode(unwrap<MetadataAsValue>(Val)));
|
||||
}
|
||||
|
||||
/*--.. Operations on scalar constants ......................................--*/
|
||||
|
@ -600,8 +600,7 @@ void PMTopLevelManager::schedulePass(Pass *P) {
|
||||
// If P is an analysis pass and it is available then do not
|
||||
// generate the analysis again. Stale analysis info should not be
|
||||
// available at this point.
|
||||
const PassInfo *PI =
|
||||
PassRegistry::getPassRegistry()->getPassInfo(P->getPassID());
|
||||
const PassInfo *PI = findAnalysisPassInfo(P->getPassID());
|
||||
if (PI && PI->isAnalysis() && findAnalysisPass(P->getPassID())) {
|
||||
delete P;
|
||||
return;
|
||||
@ -619,7 +618,7 @@ void PMTopLevelManager::schedulePass(Pass *P) {
|
||||
|
||||
Pass *AnalysisPass = findAnalysisPass(*I);
|
||||
if (!AnalysisPass) {
|
||||
const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I);
|
||||
const PassInfo *PI = findAnalysisPassInfo(*I);
|
||||
|
||||
if (!PI) {
|
||||
// Pass P is not in the global PassRegistry
|
||||
@ -716,8 +715,7 @@ Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) {
|
||||
return *I;
|
||||
|
||||
// If Pass not found then check the interfaces implemented by Immutable Pass
|
||||
const PassInfo *PassInf =
|
||||
PassRegistry::getPassRegistry()->getPassInfo(PI);
|
||||
const PassInfo *PassInf = findAnalysisPassInfo(PI);
|
||||
assert(PassInf && "Expected all immutable passes to be initialized");
|
||||
const std::vector<const PassInfo*> &ImmPI =
|
||||
PassInf->getInterfacesImplemented();
|
||||
@ -731,6 +729,17 @@ Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const PassInfo *PMTopLevelManager::findAnalysisPassInfo(AnalysisID AID) const {
|
||||
const PassInfo *&PI = AnalysisPassInfos[AID];
|
||||
if (!PI)
|
||||
PI = PassRegistry::getPassRegistry()->getPassInfo(AID);
|
||||
else
|
||||
assert(PI == PassRegistry::getPassRegistry()->getPassInfo(AID) &&
|
||||
"The pass info pointer changed for an analysis ID!");
|
||||
|
||||
return PI;
|
||||
}
|
||||
|
||||
// Print passes managed by this top level manager.
|
||||
void PMTopLevelManager::dumpPasses() const {
|
||||
|
||||
@ -759,8 +768,7 @@ void PMTopLevelManager::dumpArguments() const {
|
||||
dbgs() << "Pass Arguments: ";
|
||||
for (SmallVectorImpl<ImmutablePass *>::const_iterator I =
|
||||
ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I)
|
||||
if (const PassInfo *PI =
|
||||
PassRegistry::getPassRegistry()->getPassInfo((*I)->getPassID())) {
|
||||
if (const PassInfo *PI = findAnalysisPassInfo((*I)->getPassID())) {
|
||||
assert(PI && "Expected all immutable passes to be initialized");
|
||||
if (!PI->isAnalysisGroup())
|
||||
dbgs() << " -" << PI->getPassArgument();
|
||||
@ -824,7 +832,7 @@ void PMDataManager::recordAvailableAnalysis(Pass *P) {
|
||||
|
||||
// This pass is the current implementation of all of the interfaces it
|
||||
// implements as well.
|
||||
const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI);
|
||||
const PassInfo *PInf = TPM->findAnalysisPassInfo(PI);
|
||||
if (!PInf) return;
|
||||
const std::vector<const PassInfo*> &II = PInf->getInterfacesImplemented();
|
||||
for (unsigned i = 0, e = II.size(); i != e; ++i)
|
||||
@ -957,7 +965,7 @@ void PMDataManager::freePass(Pass *P, StringRef Msg,
|
||||
}
|
||||
|
||||
AnalysisID PI = P->getPassID();
|
||||
if (const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI)) {
|
||||
if (const PassInfo *PInf = TPM->findAnalysisPassInfo(PI)) {
|
||||
// Remove the pass itself (if it is not already removed).
|
||||
AvailableAnalysis.erase(PI);
|
||||
|
||||
@ -1037,7 +1045,7 @@ void PMDataManager::add(Pass *P, bool ProcessAnalysis) {
|
||||
for (SmallVectorImpl<AnalysisID>::iterator
|
||||
I = ReqAnalysisNotAvailable.begin(),
|
||||
E = ReqAnalysisNotAvailable.end() ;I != E; ++I) {
|
||||
const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I);
|
||||
const PassInfo *PI = TPM->findAnalysisPassInfo(*I);
|
||||
Pass *AnalysisPass = PI->createPass();
|
||||
this->addLowerLevelRequiredPass(P, AnalysisPass);
|
||||
}
|
||||
@ -1142,7 +1150,7 @@ void PMDataManager::dumpPassArguments() const {
|
||||
PMD->dumpPassArguments();
|
||||
else
|
||||
if (const PassInfo *PI =
|
||||
PassRegistry::getPassRegistry()->getPassInfo((*I)->getPassID()))
|
||||
TPM->findAnalysisPassInfo((*I)->getPassID()))
|
||||
if (!PI->isAnalysisGroup())
|
||||
dbgs() << " -" << PI->getPassArgument();
|
||||
}
|
||||
@ -1218,7 +1226,7 @@ void PMDataManager::dumpAnalysisUsage(StringRef Msg, const Pass *P,
|
||||
dbgs() << (const void*)P << std::string(getDepth()*2+3, ' ') << Msg << " Analyses:";
|
||||
for (unsigned i = 0; i != Set.size(); ++i) {
|
||||
if (i) dbgs() << ',';
|
||||
const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(Set[i]);
|
||||
const PassInfo *PInf = TPM->findAnalysisPassInfo(Set[i]);
|
||||
if (!PInf) {
|
||||
// Some preserved passes, such as AliasAnalysis, may not be initialized by
|
||||
// all drivers.
|
||||
@ -1658,8 +1666,8 @@ void MPPassManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
|
||||
|
||||
OnTheFlyManagers[P] = FPP;
|
||||
}
|
||||
const PassInfo * RequiredPassPI =
|
||||
PassRegistry::getPassRegistry()->getPassInfo(RequiredPass->getPassID());
|
||||
const PassInfo *RequiredPassPI =
|
||||
TPM->findAnalysisPassInfo(RequiredPass->getPassID());
|
||||
|
||||
Pass *FoundPass = nullptr;
|
||||
if (RequiredPassPI && RequiredPassPI->isAnalysis()) {
|
||||
|
@ -219,7 +219,7 @@ class ELFObjectWriter : public MCObjectWriter {
|
||||
const MCSymbolData *SD, uint64_t C,
|
||||
unsigned Type) const;
|
||||
|
||||
void RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
|
||||
void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
|
||||
const MCFragment *Fragment, const MCFixup &Fixup,
|
||||
MCValue Target, bool &IsPCRel,
|
||||
uint64_t &FixedValue) override;
|
||||
@ -789,11 +789,13 @@ static const MCSymbol *getWeakRef(const MCSymbolRefExpr &Ref) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void ELFObjectWriter::RecordRelocation(MCAssembler &Asm,
|
||||
void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm,
|
||||
const MCAsmLayout &Layout,
|
||||
const MCFragment *Fragment,
|
||||
const MCFixup &Fixup, MCValue Target,
|
||||
bool &IsPCRel, uint64_t &FixedValue) {
|
||||
const MCFixup &Fixup,
|
||||
MCValue Target,
|
||||
bool &IsPCRel,
|
||||
uint64_t &FixedValue) {
|
||||
const MCSectionData *FixupSection = Fragment->getParent();
|
||||
uint64_t C = Target.getConstant();
|
||||
uint64_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
|
||||
|
@ -27,7 +27,22 @@ bool MCAsmInfoDarwin::isSectionAtomizableBySymbols(
|
||||
// contain.
|
||||
// Sections holding 2 byte strings require symbols in order to be atomized.
|
||||
// There is no dedicated section for 4 byte strings.
|
||||
if (SMO.getType() == MachO::S_CSTRING_LITERALS)
|
||||
if (SMO.getKind().isMergeable1ByteCString())
|
||||
return false;
|
||||
|
||||
if (SMO.getSegmentName() == "__TEXT" &&
|
||||
SMO.getSectionName() == "__objc_classname" &&
|
||||
SMO.getType() == MachO::S_CSTRING_LITERALS)
|
||||
return false;
|
||||
|
||||
if (SMO.getSegmentName() == "__TEXT" &&
|
||||
SMO.getSectionName() == "__objc_methname" &&
|
||||
SMO.getType() == MachO::S_CSTRING_LITERALS)
|
||||
return false;
|
||||
|
||||
if (SMO.getSegmentName() == "__TEXT" &&
|
||||
SMO.getSectionName() == "__objc_methtype" &&
|
||||
SMO.getType() == MachO::S_CSTRING_LITERALS)
|
||||
return false;
|
||||
|
||||
if (SMO.getSegmentName() == "__DATA" && SMO.getSectionName() == "__cfstring")
|
||||
|
@ -425,16 +425,6 @@ bool MCAssembler::isThumbFunc(const MCSymbol *Symbol) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
void MCAssembler::addLocalUsedInReloc(const MCSymbol &Sym) {
|
||||
assert(Sym.isTemporary());
|
||||
LocalsUsedInReloc.insert(&Sym);
|
||||
}
|
||||
|
||||
bool MCAssembler::isLocalUsedInReloc(const MCSymbol &Sym) const {
|
||||
assert(Sym.isTemporary());
|
||||
return LocalsUsedInReloc.count(&Sym);
|
||||
}
|
||||
|
||||
bool MCAssembler::isSymbolLinkerVisible(const MCSymbol &Symbol) const {
|
||||
// Non-temporary labels should always be visible to the linker.
|
||||
if (!Symbol.isTemporary())
|
||||
@ -444,10 +434,8 @@ bool MCAssembler::isSymbolLinkerVisible(const MCSymbol &Symbol) const {
|
||||
if (!Symbol.isInSection())
|
||||
return false;
|
||||
|
||||
if (isLocalUsedInReloc(Symbol))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
// Otherwise, check if the section requires symbols even for temporary labels.
|
||||
return getBackend().doesSectionRequireSymbols(Symbol.getSection());
|
||||
}
|
||||
|
||||
const MCSymbolData *MCAssembler::getAtom(const MCSymbolData *SD) const {
|
||||
|
@ -448,11 +448,14 @@ void MachObjectWriter::WriteLinkerOptionsLoadCommand(
|
||||
assert(OS.tell() - Start == Size);
|
||||
}
|
||||
|
||||
void MachObjectWriter::RecordRelocation(MCAssembler &Asm,
|
||||
|
||||
void MachObjectWriter::RecordRelocation(const MCAssembler &Asm,
|
||||
const MCAsmLayout &Layout,
|
||||
const MCFragment *Fragment,
|
||||
const MCFixup &Fixup, MCValue Target,
|
||||
bool &IsPCRel, uint64_t &FixedValue) {
|
||||
const MCFixup &Fixup,
|
||||
MCValue Target,
|
||||
bool &IsPCRel,
|
||||
uint64_t &FixedValue) {
|
||||
TargetObjectWriter->RecordRelocation(this, Asm, Layout, Fragment, Fixup,
|
||||
Target, FixedValue);
|
||||
}
|
||||
@ -613,22 +616,6 @@ void MachObjectWriter::ComputeSymbolTable(
|
||||
ExternalSymbolData[i].SymbolData->setIndex(Index++);
|
||||
for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
|
||||
UndefinedSymbolData[i].SymbolData->setIndex(Index++);
|
||||
|
||||
for (const MCSectionData &SD : Asm) {
|
||||
std::vector<RelAndSymbol> &Relocs = Relocations[&SD];
|
||||
for (RelAndSymbol &Rel : Relocs) {
|
||||
if (!Rel.Sym)
|
||||
continue;
|
||||
|
||||
// Set the Index and the IsExtern bit.
|
||||
unsigned Index = Rel.Sym->getIndex();
|
||||
assert(isInt<24>(Index));
|
||||
if (IsLittleEndian)
|
||||
Rel.MRE.r_word1 = (Rel.MRE.r_word1 & (-1 << 24)) | Index | (1 << 27);
|
||||
else
|
||||
Rel.MRE.r_word1 = (Rel.MRE.r_word1 & 0xff) | Index << 8 | (1 << 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm,
|
||||
@ -675,6 +662,10 @@ void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
|
||||
// Mark symbol difference expressions in variables (from .set or = directives)
|
||||
// as absolute.
|
||||
markAbsoluteVariableSymbols(Asm, Layout);
|
||||
|
||||
// Compute symbol table information and bind symbol indices.
|
||||
ComputeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData,
|
||||
UndefinedSymbolData);
|
||||
}
|
||||
|
||||
bool MachObjectWriter::
|
||||
@ -758,10 +749,6 @@ IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
|
||||
|
||||
void MachObjectWriter::WriteObject(MCAssembler &Asm,
|
||||
const MCAsmLayout &Layout) {
|
||||
// Compute symbol table information and bind symbol indices.
|
||||
ComputeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData,
|
||||
UndefinedSymbolData);
|
||||
|
||||
unsigned NumSections = Asm.size();
|
||||
const MCAssembler::VersionMinInfoType &VersionInfo =
|
||||
Layout.getAssembler().getVersionMinInfo();
|
||||
@ -852,7 +839,7 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
|
||||
uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
|
||||
for (MCAssembler::const_iterator it = Asm.begin(),
|
||||
ie = Asm.end(); it != ie; ++it) {
|
||||
std::vector<RelAndSymbol> &Relocs = Relocations[it];
|
||||
std::vector<MachO::any_relocation_info> &Relocs = Relocations[it];
|
||||
unsigned NumRelocs = Relocs.size();
|
||||
uint64_t SectionStart = SectionDataStart + getSectionAddress(it);
|
||||
WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs);
|
||||
@ -946,10 +933,10 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
|
||||
ie = Asm.end(); it != ie; ++it) {
|
||||
// Write the section relocation entries, in reverse order to match 'as'
|
||||
// (approximately, the exact algorithm is more complicated than this).
|
||||
std::vector<RelAndSymbol> &Relocs = Relocations[it];
|
||||
std::vector<MachO::any_relocation_info> &Relocs = Relocations[it];
|
||||
for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
|
||||
Write32(Relocs[e - i - 1].MRE.r_word0);
|
||||
Write32(Relocs[e - i - 1].MRE.r_word1);
|
||||
Write32(Relocs[e - i - 1].r_word0);
|
||||
Write32(Relocs[e - i - 1].r_word1);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -175,7 +175,7 @@ class WinCOFFObjectWriter : public MCObjectWriter {
|
||||
const MCFragment &FB, bool InSet,
|
||||
bool IsPCRel) const override;
|
||||
|
||||
void RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
|
||||
void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
|
||||
const MCFragment *Fragment, const MCFixup &Fixup,
|
||||
MCValue Target, bool &IsPCRel,
|
||||
uint64_t &FixedValue) override;
|
||||
@ -661,9 +661,13 @@ bool WinCOFFObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(
|
||||
InSet, IsPCRel);
|
||||
}
|
||||
|
||||
void WinCOFFObjectWriter::RecordRelocation(
|
||||
MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment,
|
||||
const MCFixup &Fixup, MCValue Target, bool &IsPCRel, uint64_t &FixedValue) {
|
||||
void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
|
||||
const MCAsmLayout &Layout,
|
||||
const MCFragment *Fragment,
|
||||
const MCFixup &Fixup,
|
||||
MCValue Target,
|
||||
bool &IsPCRel,
|
||||
uint64_t &FixedValue) {
|
||||
assert(Target.getSymA() && "Relocation must reference a symbol!");
|
||||
|
||||
const MCSymbol &Symbol = Target.getSymA()->getSymbol();
|
||||
|
@ -246,13 +246,21 @@ static Triple::ArchType parseARMArch(StringRef ArchName) {
|
||||
|
||||
if (ArchName.startswith("armv")) {
|
||||
offset = 3;
|
||||
arch = Triple::arm;
|
||||
if (ArchName.endswith("eb")) {
|
||||
arch = Triple::armeb;
|
||||
ArchName = ArchName.substr(0, ArchName.size() - 2);
|
||||
} else
|
||||
arch = Triple::arm;
|
||||
} else if (ArchName.startswith("armebv")) {
|
||||
offset = 5;
|
||||
arch = Triple::armeb;
|
||||
} else if (ArchName.startswith("thumbv")) {
|
||||
offset = 5;
|
||||
arch = Triple::thumb;
|
||||
if (ArchName.endswith("eb")) {
|
||||
arch = Triple::thumbeb;
|
||||
ArchName = ArchName.substr(0, ArchName.size() - 2);
|
||||
} else
|
||||
arch = Triple::thumb;
|
||||
} else if (ArchName.startswith("thumbebv")) {
|
||||
offset = 7;
|
||||
arch = Triple::thumbeb;
|
||||
@ -271,6 +279,8 @@ static Triple::ArchType parseARMArch(StringRef ArchName) {
|
||||
}
|
||||
|
||||
static Triple::ArchType parseArch(StringRef ArchName) {
|
||||
Triple::ArchType ARMArch(parseARMArch(ArchName));
|
||||
|
||||
return StringSwitch<Triple::ArchType>(ArchName)
|
||||
.Cases("i386", "i486", "i586", "i686", Triple::x86)
|
||||
// FIXME: Do we need to support these?
|
||||
@ -280,9 +290,10 @@ static Triple::ArchType parseArch(StringRef ArchName) {
|
||||
.Cases("powerpc64", "ppu", Triple::ppc64)
|
||||
.Case("powerpc64le", Triple::ppc64le)
|
||||
.Case("xscale", Triple::arm)
|
||||
.StartsWith("arm", parseARMArch(ArchName))
|
||||
.StartsWith("thumb", parseARMArch(ArchName))
|
||||
.StartsWith("aarch64", parseARMArch(ArchName))
|
||||
.Case("xscaleeb", Triple::armeb)
|
||||
.StartsWith("arm", ARMArch)
|
||||
.StartsWith("thumb", ARMArch)
|
||||
.StartsWith("aarch64", ARMArch)
|
||||
.Case("msp430", Triple::msp430)
|
||||
.Cases("mips", "mipseb", "mipsallegrex", Triple::mips)
|
||||
.Cases("mipsel", "mipsallegrexel", Triple::mipsel)
|
||||
@ -379,6 +390,9 @@ static Triple::ObjectFormatType parseFormat(StringRef EnvironmentName) {
|
||||
}
|
||||
|
||||
static Triple::SubArchType parseSubArch(StringRef SubArchName) {
|
||||
if (SubArchName.endswith("eb"))
|
||||
SubArchName = SubArchName.substr(0, SubArchName.size() - 2);
|
||||
|
||||
return StringSwitch<Triple::SubArchType>(SubArchName)
|
||||
.EndsWith("v8", Triple::ARMSubArch_v8)
|
||||
.EndsWith("v8a", Triple::ARMSubArch_v8)
|
||||
@ -1022,6 +1036,8 @@ const char *Triple::getARMCPUForArch(StringRef MArch) const {
|
||||
offset = 5;
|
||||
if (offset != StringRef::npos && MArch.substr(offset, 2) == "eb")
|
||||
offset += 2;
|
||||
if (MArch.endswith("eb"))
|
||||
MArch = MArch.substr(0, MArch.size() - 2);
|
||||
if (offset != StringRef::npos)
|
||||
result = llvm::StringSwitch<const char *>(MArch.substr(offset))
|
||||
.Cases("v2", "v2a", "arm2")
|
||||
|
@ -204,6 +204,44 @@ def RetCC_AArch64_WebKit_JS : CallingConv<[
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>
|
||||
]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ARM64 Calling Convention for GHC
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// This calling convention is specific to the Glasgow Haskell Compiler.
|
||||
// The only documentation is the GHC source code, specifically the C header
|
||||
// file:
|
||||
//
|
||||
// https://github.com/ghc/ghc/blob/master/includes/stg/MachRegs.h
|
||||
//
|
||||
// which defines the registers for the Spineless Tagless G-Machine (STG) that
|
||||
// GHC uses to implement lazy evaluation. The generic STG machine has a set of
|
||||
// registers which are mapped to appropriate set of architecture specific
|
||||
// registers for each CPU architecture.
|
||||
//
|
||||
// The STG Machine is documented here:
|
||||
//
|
||||
// https://ghc.haskell.org/trac/ghc/wiki/Commentary/Compiler/GeneratedCode
|
||||
//
|
||||
// The AArch64 register mapping is under the heading "The ARMv8/AArch64 ABI
|
||||
// register mapping".
|
||||
|
||||
def CC_AArch64_GHC : CallingConv<[
|
||||
// Handle all vector types as either f64 or v2f64.
|
||||
CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
|
||||
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, f128], CCBitConvertToType<v2f64>>,
|
||||
|
||||
CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>,
|
||||
CCIfType<[f32], CCAssignToReg<[S8, S9, S10, S11]>>,
|
||||
CCIfType<[f64], CCAssignToReg<[D12, D13, D14, D15]>>,
|
||||
|
||||
// Promote i8/i16/i32 arguments to i64.
|
||||
CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
|
||||
|
||||
// Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, SpLim
|
||||
CCIfType<[i64], CCAssignToReg<[X19, X20, X21, X22, X23, X24, X25, X26, X27, X28]>>
|
||||
]>;
|
||||
|
||||
// FIXME: LR is only callee-saved in the sense that *we* preserve it and are
|
||||
// presumably a callee to someone. External functions may not do so, but this
|
||||
// is currently safe since BL has LR as an implicit-def and what happens after a
|
||||
@ -249,3 +287,4 @@ def CSR_AArch64_AllRegs
|
||||
(sequence "S%u", 0, 31), (sequence "D%u", 0, 31),
|
||||
(sequence "Q%u", 0, 31))>;
|
||||
|
||||
def CSR_AArch64_NoRegs : CalleeSavedRegs<(add)>;
|
||||
|
@ -302,6 +302,8 @@ static unsigned getImplicitScaleFactor(MVT VT) {
|
||||
CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
|
||||
if (CC == CallingConv::WebKit_JS)
|
||||
return CC_AArch64_WebKit_JS;
|
||||
if (CC == CallingConv::GHC)
|
||||
return CC_AArch64_GHC;
|
||||
return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
|
||||
}
|
||||
|
||||
|
@ -215,6 +215,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
|
||||
bool HasFP = hasFP(MF);
|
||||
DebugLoc DL = MBB.findDebugLoc(MBBI);
|
||||
|
||||
// All calls are tail calls in GHC calling conv, and functions have no
|
||||
// prologue/epilogue.
|
||||
if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
|
||||
return;
|
||||
|
||||
int NumBytes = (int)MFI->getStackSize();
|
||||
if (!AFI->hasStackFrame()) {
|
||||
assert(!HasFP && "unexpected function without stack frame but with FP");
|
||||
@ -451,6 +456,11 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
|
||||
int NumBytes = MFI->getStackSize();
|
||||
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
|
||||
|
||||
// All calls are tail calls in GHC calling conv, and functions have no
|
||||
// prologue/epilogue.
|
||||
if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
|
||||
return;
|
||||
|
||||
// Initial and residual are named for consitency with the prologue. Note that
|
||||
// in the epilogue, the residual adjustment is executed first.
|
||||
uint64_t ArgumentPopSize = 0;
|
||||
|
@ -1990,6 +1990,8 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
|
||||
llvm_unreachable("Unsupported calling convention.");
|
||||
case CallingConv::WebKit_JS:
|
||||
return CC_AArch64_WebKit_JS;
|
||||
case CallingConv::GHC:
|
||||
return CC_AArch64_GHC;
|
||||
case CallingConv::C:
|
||||
case CallingConv::Fast:
|
||||
if (!Subtarget->isTargetDarwin())
|
||||
|
@ -33,6 +33,10 @@ using namespace llvm;
|
||||
#define GET_REGINFO_TARGET_DESC
|
||||
#include "AArch64GenRegisterInfo.inc"
|
||||
|
||||
static cl::opt<bool>
|
||||
ReserveX18("aarch64-reserve-x18", cl::Hidden,
|
||||
cl::desc("Reserve X18, making it unavailable as GPR"));
|
||||
|
||||
AArch64RegisterInfo::AArch64RegisterInfo(const AArch64InstrInfo *tii,
|
||||
const AArch64Subtarget *sti)
|
||||
: AArch64GenRegisterInfo(AArch64::LR), TII(tii), STI(sti) {}
|
||||
@ -40,6 +44,10 @@ AArch64RegisterInfo::AArch64RegisterInfo(const AArch64InstrInfo *tii,
|
||||
const MCPhysReg *
|
||||
AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
|
||||
assert(MF && "Invalid MachineFunction pointer.");
|
||||
if (MF->getFunction()->getCallingConv() == CallingConv::GHC)
|
||||
// GHC set of callee saved regs is empty as all those regs are
|
||||
// used for passing STG regs around
|
||||
return CSR_AArch64_NoRegs_SaveList;
|
||||
if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg)
|
||||
return CSR_AArch64_AllRegs_SaveList;
|
||||
else
|
||||
@ -48,6 +56,9 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
|
||||
|
||||
const uint32_t *
|
||||
AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
|
||||
if (CC == CallingConv::GHC)
|
||||
// This is academic becase all GHC calls are (supposed to be) tail calls
|
||||
return CSR_AArch64_NoRegs_RegMask;
|
||||
if (CC == CallingConv::AnyReg)
|
||||
return CSR_AArch64_AllRegs_RegMask;
|
||||
else
|
||||
@ -63,7 +74,7 @@ const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const {
|
||||
}
|
||||
|
||||
const uint32_t *
|
||||
AArch64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const {
|
||||
AArch64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const {
|
||||
// This should return a register mask that is the same as that returned by
|
||||
// getCallPreservedMask but that additionally preserves the register used for
|
||||
// the first i64 argument (which must also be the register used to return a
|
||||
@ -71,6 +82,7 @@ AArch64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const {
|
||||
//
|
||||
// In case that the calling convention does not use the same register for
|
||||
// both, the function should return NULL (does not currently apply)
|
||||
assert(CC != CallingConv::GHC && "should not be GHC calling convention.");
|
||||
return CSR_AArch64_AAPCS_ThisReturn_RegMask;
|
||||
}
|
||||
|
||||
@ -90,7 +102,7 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
|
||||
Reserved.set(AArch64::W29);
|
||||
}
|
||||
|
||||
if (STI->isTargetDarwin()) {
|
||||
if (STI->isTargetDarwin() || ReserveX18) {
|
||||
Reserved.set(AArch64::X18); // Platform register
|
||||
Reserved.set(AArch64::W18);
|
||||
}
|
||||
@ -117,7 +129,7 @@ bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF,
|
||||
return true;
|
||||
case AArch64::X18:
|
||||
case AArch64::W18:
|
||||
return STI->isTargetDarwin();
|
||||
return STI->isTargetDarwin() || ReserveX18;
|
||||
case AArch64::FP:
|
||||
case AArch64::W29:
|
||||
return TFI->hasFP(MF) || STI->isTargetDarwin();
|
||||
@ -379,7 +391,7 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
|
||||
case AArch64::GPR64commonRegClassID:
|
||||
return 32 - 1 // XZR/SP
|
||||
- (TFI->hasFP(MF) || STI->isTargetDarwin()) // FP
|
||||
- STI->isTargetDarwin() // X18 reserved as platform register
|
||||
- (STI->isTargetDarwin() || ReserveX18) // X18 reserved as platform register
|
||||
- hasBasePointer(MF); // X19
|
||||
case AArch64::FPR8RegClassID:
|
||||
case AArch64::FPR16RegClassID:
|
||||
|
@ -317,6 +317,42 @@ class DarwinAArch64AsmBackend : public AArch64AsmBackend {
|
||||
MachO::CPU_SUBTYPE_ARM64_ALL);
|
||||
}
|
||||
|
||||
bool doesSectionRequireSymbols(const MCSection &Section) const override {
|
||||
// Any section for which the linker breaks things into atoms needs to
|
||||
// preserve symbols, including assembler local symbols, to identify
|
||||
// those atoms. These sections are:
|
||||
// Sections of type:
|
||||
//
|
||||
// S_CSTRING_LITERALS (e.g. __cstring)
|
||||
// S_LITERAL_POINTERS (e.g. objc selector pointers)
|
||||
// S_16BYTE_LITERALS, S_8BYTE_LITERALS, S_4BYTE_LITERALS
|
||||
//
|
||||
// Sections named:
|
||||
//
|
||||
// __TEXT,__eh_frame
|
||||
// __TEXT,__ustring
|
||||
// __DATA,__cfstring
|
||||
// __DATA,__objc_classrefs
|
||||
// __DATA,__objc_catlist
|
||||
//
|
||||
// FIXME: It would be better if the compiler used actual linker local
|
||||
// symbols for each of these sections rather than preserving what
|
||||
// are ostensibly assembler local symbols.
|
||||
const MCSectionMachO &SMO = static_cast<const MCSectionMachO &>(Section);
|
||||
return (SMO.getType() == MachO::S_CSTRING_LITERALS ||
|
||||
SMO.getType() == MachO::S_4BYTE_LITERALS ||
|
||||
SMO.getType() == MachO::S_8BYTE_LITERALS ||
|
||||
SMO.getType() == MachO::S_16BYTE_LITERALS ||
|
||||
SMO.getType() == MachO::S_LITERAL_POINTERS ||
|
||||
(SMO.getSegmentName() == "__TEXT" &&
|
||||
(SMO.getSectionName() == "__eh_frame" ||
|
||||
SMO.getSectionName() == "__ustring")) ||
|
||||
(SMO.getSegmentName() == "__DATA" &&
|
||||
(SMO.getSectionName() == "__cfstring" ||
|
||||
SMO.getSectionName() == "__objc_classrefs" ||
|
||||
SMO.getSectionName() == "__objc_catlist")));
|
||||
}
|
||||
|
||||
/// \brief Generate the compact unwind encoding from the CFI directives.
|
||||
uint32_t generateCompactUnwindEncoding(
|
||||
ArrayRef<MCCFIInstruction> Instrs) const override {
|
||||
|
@ -10,7 +10,6 @@
|
||||
#include "MCTargetDesc/AArch64FixupKinds.h"
|
||||
#include "MCTargetDesc/AArch64MCTargetDesc.h"
|
||||
#include "llvm/ADT/Twine.h"
|
||||
#include "llvm/MC/MCAsmInfo.h"
|
||||
#include "llvm/MC/MCAsmLayout.h"
|
||||
#include "llvm/MC/MCAssembler.h"
|
||||
#include "llvm/MC/MCContext.h"
|
||||
@ -34,7 +33,7 @@ class AArch64MachObjectWriter : public MCMachObjectTargetWriter {
|
||||
: MCMachObjectTargetWriter(true /* is64Bit */, CPUType, CPUSubtype,
|
||||
/*UseAggressiveSymbolFolding=*/true) {}
|
||||
|
||||
void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
|
||||
void RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm,
|
||||
const MCAsmLayout &Layout, const MCFragment *Fragment,
|
||||
const MCFixup &Fixup, MCValue Target,
|
||||
uint64_t &FixedValue) override;
|
||||
@ -113,25 +112,8 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
|
||||
}
|
||||
}
|
||||
|
||||
static bool canUseLocalRelocation(const MCSectionMachO &Section,
|
||||
const MCSymbol &Symbol, unsigned Log2Size) {
|
||||
// Debug info sections can use local relocations.
|
||||
if (Section.hasAttribute(MachO::S_ATTR_DEBUG))
|
||||
return true;
|
||||
|
||||
// Otherwise, only pointer sized relocations are supported.
|
||||
if (Log2Size != 3)
|
||||
return false;
|
||||
|
||||
// But only if they don't point to a cstring.
|
||||
if (!Symbol.isInSection())
|
||||
return true;
|
||||
const MCSectionMachO &RefSec = cast<MCSectionMachO>(Symbol.getSection());
|
||||
return RefSec.getType() != MachO::S_CSTRING_LITERALS;
|
||||
}
|
||||
|
||||
void AArch64MachObjectWriter::RecordRelocation(
|
||||
MachObjectWriter *Writer, MCAssembler &Asm, const MCAsmLayout &Layout,
|
||||
MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout,
|
||||
const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target,
|
||||
uint64_t &FixedValue) {
|
||||
unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
|
||||
@ -141,9 +123,9 @@ void AArch64MachObjectWriter::RecordRelocation(
|
||||
unsigned Log2Size = 0;
|
||||
int64_t Value = 0;
|
||||
unsigned Index = 0;
|
||||
unsigned IsExtern = 0;
|
||||
unsigned Type = 0;
|
||||
unsigned Kind = Fixup.getKind();
|
||||
const MCSymbolData *RelSymbol = nullptr;
|
||||
|
||||
FixupOffset += Fixup.getOffset();
|
||||
|
||||
@ -189,8 +171,10 @@ void AArch64MachObjectWriter::RecordRelocation(
|
||||
// FIXME: Should this always be extern?
|
||||
// SymbolNum of 0 indicates the absolute section.
|
||||
Type = MachO::ARM64_RELOC_UNSIGNED;
|
||||
Index = 0;
|
||||
|
||||
if (IsPCRel) {
|
||||
IsExtern = 1;
|
||||
Asm.getContext().FatalError(Fixup.getLoc(),
|
||||
"PC relative absolute relocation!");
|
||||
|
||||
@ -214,12 +198,15 @@ void AArch64MachObjectWriter::RecordRelocation(
|
||||
Layout.getSymbolOffset(&B_SD) ==
|
||||
Layout.getFragmentOffset(Fragment) + Fixup.getOffset()) {
|
||||
// SymB is the PC, so use a PC-rel pointer-to-GOT relocation.
|
||||
Index = A_Base->getIndex();
|
||||
IsExtern = 1;
|
||||
Type = MachO::ARM64_RELOC_POINTER_TO_GOT;
|
||||
IsPCRel = 1;
|
||||
MachO::any_relocation_info MRE;
|
||||
MRE.r_word0 = FixupOffset;
|
||||
MRE.r_word1 = (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
|
||||
Writer->addRelocation(A_Base, Fragment->getParent(), MRE);
|
||||
MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
|
||||
(IsExtern << 27) | (Type << 28));
|
||||
Writer->addRelocation(Fragment->getParent(), MRE);
|
||||
return;
|
||||
} else if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
|
||||
Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None)
|
||||
@ -265,30 +252,25 @@ void AArch64MachObjectWriter::RecordRelocation(
|
||||
? 0
|
||||
: Writer->getSymbolAddress(B_Base, Layout));
|
||||
|
||||
Index = A_Base->getIndex();
|
||||
IsExtern = 1;
|
||||
Type = MachO::ARM64_RELOC_UNSIGNED;
|
||||
|
||||
MachO::any_relocation_info MRE;
|
||||
MRE.r_word0 = FixupOffset;
|
||||
MRE.r_word1 = (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
|
||||
Writer->addRelocation(A_Base, Fragment->getParent(), MRE);
|
||||
MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
|
||||
(IsExtern << 27) | (Type << 28));
|
||||
Writer->addRelocation(Fragment->getParent(), MRE);
|
||||
|
||||
RelSymbol = B_Base;
|
||||
Index = B_Base->getIndex();
|
||||
IsExtern = 1;
|
||||
Type = MachO::ARM64_RELOC_SUBTRACTOR;
|
||||
} else { // A + constant
|
||||
const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
|
||||
const MCSectionMachO &Section = static_cast<const MCSectionMachO &>(
|
||||
Fragment->getParent()->getSection());
|
||||
|
||||
bool CanUseLocalRelocation =
|
||||
canUseLocalRelocation(Section, *Symbol, Log2Size);
|
||||
if (Symbol->isTemporary() && (Value || !CanUseLocalRelocation)) {
|
||||
const MCSection &Sec = Symbol->getSection();
|
||||
if (!Asm.getContext().getAsmInfo()->isSectionAtomizableBySymbols(Sec))
|
||||
Asm.addLocalUsedInReloc(*Symbol);
|
||||
}
|
||||
|
||||
const MCSymbolData &SD = Asm.getSymbolData(*Symbol);
|
||||
const MCSymbolData *Base = Asm.getAtom(&SD);
|
||||
const MCSectionMachO &Section = static_cast<const MCSectionMachO &>(
|
||||
Fragment->getParent()->getSection());
|
||||
|
||||
// If the symbol is a variable and we weren't able to get a Base for it
|
||||
// (i.e., it's not in the symbol table associated with a section) resolve
|
||||
@ -328,13 +310,16 @@ void AArch64MachObjectWriter::RecordRelocation(
|
||||
// sections, and for pointer-sized relocations (.quad), we allow section
|
||||
// relocations. It's code sections that run into trouble.
|
||||
if (Base) {
|
||||
RelSymbol = Base;
|
||||
Index = Base->getIndex();
|
||||
IsExtern = 1;
|
||||
|
||||
// Add the local offset, if needed.
|
||||
if (Base != &SD)
|
||||
Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base);
|
||||
} else if (Symbol->isInSection()) {
|
||||
if (!CanUseLocalRelocation)
|
||||
// Pointer-sized relocations can use a local relocation. Otherwise,
|
||||
// we have to be in a debug info section.
|
||||
if (!Section.hasAttribute(MachO::S_ATTR_DEBUG) && Log2Size != 3)
|
||||
Asm.getContext().FatalError(
|
||||
Fixup.getLoc(),
|
||||
"unsupported relocation of local symbol '" + Symbol->getName() +
|
||||
@ -344,6 +329,7 @@ void AArch64MachObjectWriter::RecordRelocation(
|
||||
const MCSectionData &SymSD =
|
||||
Asm.getSectionData(SD.getSymbol().getSection());
|
||||
Index = SymSD.getOrdinal() + 1;
|
||||
IsExtern = 0;
|
||||
Value += Writer->getSymbolAddress(&SD, Layout);
|
||||
|
||||
if (IsPCRel)
|
||||
@ -376,16 +362,16 @@ void AArch64MachObjectWriter::RecordRelocation(
|
||||
|
||||
MachO::any_relocation_info MRE;
|
||||
MRE.r_word0 = FixupOffset;
|
||||
MRE.r_word1 =
|
||||
(Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
|
||||
Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
|
||||
MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
|
||||
(IsExtern << 27) | (Type << 28));
|
||||
Writer->addRelocation(Fragment->getParent(), MRE);
|
||||
|
||||
// Now set up the Addend relocation.
|
||||
Type = MachO::ARM64_RELOC_ADDEND;
|
||||
Index = Value;
|
||||
RelSymbol = nullptr;
|
||||
IsPCRel = 0;
|
||||
Log2Size = 2;
|
||||
IsExtern = 0;
|
||||
|
||||
// Put zero into the instruction itself. The addend is in the relocation.
|
||||
Value = 0;
|
||||
@ -397,9 +383,9 @@ void AArch64MachObjectWriter::RecordRelocation(
|
||||
// struct relocation_info (8 bytes)
|
||||
MachO::any_relocation_info MRE;
|
||||
MRE.r_word0 = FixupOffset;
|
||||
MRE.r_word1 =
|
||||
(Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
|
||||
Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
|
||||
MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
|
||||
(IsExtern << 27) | (Type << 28));
|
||||
Writer->addRelocation(Fragment->getParent(), MRE);
|
||||
}
|
||||
|
||||
MCObjectWriter *llvm::createAArch64MachObjectWriter(raw_ostream &OS,
|
||||
|
@ -567,10 +567,21 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
|
||||
// MOV NewBase, Base
|
||||
// ADDS NewBase, #imm8.
|
||||
if (Base != NewBase && Offset >= 8) {
|
||||
const ARMSubtarget &Subtarget = MBB.getParent()->getTarget()
|
||||
.getSubtarget<ARMSubtarget>();
|
||||
// Need to insert a MOV to the new base first.
|
||||
BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
|
||||
.addReg(Base, getKillRegState(BaseKill))
|
||||
.addImm(Pred).addReg(PredReg);
|
||||
if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
|
||||
!Subtarget.hasV6Ops()) {
|
||||
// thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
|
||||
if (Pred != ARMCC::AL)
|
||||
return false;
|
||||
BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVSr), NewBase)
|
||||
.addReg(Base, getKillRegState(BaseKill));
|
||||
} else
|
||||
BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
|
||||
.addReg(Base, getKillRegState(BaseKill))
|
||||
.addImm(Pred).addReg(PredReg);
|
||||
|
||||
// Set up BaseKill and Base correctly to insert the ADDS/SUBS below.
|
||||
Base = NewBase;
|
||||
BaseKill = false;
|
||||
|
@ -9191,27 +9191,39 @@ bool ARMAsmParser::parseDirectiveCPU(SMLoc L) {
|
||||
// FIXME: This is duplicated in getARMFPUFeatures() in
|
||||
// tools/clang/lib/Driver/Tools.cpp
|
||||
static const struct {
|
||||
const unsigned Fpu;
|
||||
const unsigned ID;
|
||||
const uint64_t Enabled;
|
||||
const uint64_t Disabled;
|
||||
} Fpus[] = {
|
||||
{ARM::VFP, ARM::FeatureVFP2, ARM::FeatureNEON},
|
||||
{ARM::VFPV2, ARM::FeatureVFP2, ARM::FeatureNEON},
|
||||
{ARM::VFPV3, ARM::FeatureVFP3, ARM::FeatureNEON},
|
||||
{ARM::VFPV3_D16, ARM::FeatureVFP3 | ARM::FeatureD16, ARM::FeatureNEON},
|
||||
{ARM::VFPV4, ARM::FeatureVFP4, ARM::FeatureNEON},
|
||||
{ARM::VFPV4_D16, ARM::FeatureVFP4 | ARM::FeatureD16, ARM::FeatureNEON},
|
||||
{ARM::FPV5_D16, ARM::FeatureFPARMv8 | ARM::FeatureD16,
|
||||
ARM::FeatureNEON | ARM::FeatureCrypto},
|
||||
{ARM::FP_ARMV8, ARM::FeatureFPARMv8,
|
||||
ARM::FeatureNEON | ARM::FeatureCrypto},
|
||||
{ARM::NEON, ARM::FeatureNEON, 0},
|
||||
{ARM::NEON_VFPV4, ARM::FeatureVFP4 | ARM::FeatureNEON, 0},
|
||||
{ARM::NEON_FP_ARMV8, ARM::FeatureFPARMv8 | ARM::FeatureNEON,
|
||||
ARM::FeatureCrypto},
|
||||
{ARM::CRYPTO_NEON_FP_ARMV8,
|
||||
ARM::FeatureFPARMv8 | ARM::FeatureNEON | ARM::FeatureCrypto, 0},
|
||||
{ARM::SOFTVFP, 0, 0},
|
||||
} FPUs[] = {
|
||||
{ARM::VFP, ARM::FeatureVFP2, ARM::FeatureNEON},
|
||||
{ARM::VFPV2, ARM::FeatureVFP2, ARM::FeatureNEON},
|
||||
{ARM::VFPV3, ARM::FeatureVFP2 | ARM::FeatureVFP3, ARM::FeatureNEON},
|
||||
{ARM::VFPV3_D16, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureD16,
|
||||
ARM::FeatureNEON},
|
||||
{ARM::VFPV4, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4,
|
||||
ARM::FeatureNEON},
|
||||
{ARM::VFPV4_D16,
|
||||
ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | ARM::FeatureD16,
|
||||
ARM::FeatureNEON},
|
||||
{ARM::FPV5_D16, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
|
||||
ARM::FeatureFPARMv8 | ARM::FeatureD16,
|
||||
ARM::FeatureNEON | ARM::FeatureCrypto},
|
||||
{ARM::FP_ARMV8, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
|
||||
ARM::FeatureFPARMv8,
|
||||
ARM::FeatureNEON | ARM::FeatureCrypto},
|
||||
{ARM::NEON, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureNEON, 0},
|
||||
{ARM::NEON_VFPV4,
|
||||
ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | ARM::FeatureNEON,
|
||||
0},
|
||||
{ARM::NEON_FP_ARMV8,
|
||||
ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
|
||||
ARM::FeatureFPARMv8 | ARM::FeatureNEON,
|
||||
ARM::FeatureCrypto},
|
||||
{ARM::CRYPTO_NEON_FP_ARMV8,
|
||||
ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
|
||||
ARM::FeatureFPARMv8 | ARM::FeatureNEON | ARM::FeatureCrypto,
|
||||
0},
|
||||
{ARM::SOFTVFP, 0, 0},
|
||||
};
|
||||
|
||||
/// parseDirectiveFPU
|
||||
@ -9229,14 +9241,14 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (const auto &Fpu : Fpus) {
|
||||
if (Fpu.Fpu != ID)
|
||||
for (const auto &Entry : FPUs) {
|
||||
if (Entry.ID != ID)
|
||||
continue;
|
||||
|
||||
// Need to toggle features that should be on but are off and that
|
||||
// should off but are on.
|
||||
uint64_t Toggle = (Fpu.Enabled & ~STI.getFeatureBits()) |
|
||||
(Fpu.Disabled & STI.getFeatureBits());
|
||||
uint64_t Toggle = (Entry.Enabled & ~STI.getFeatureBits()) |
|
||||
(Entry.Disabled & STI.getFeatureBits());
|
||||
setAvailableFeatures(ComputeAvailableFeatures(STI.ToggleFeature(Toggle)));
|
||||
break;
|
||||
}
|
||||
|
@ -54,10 +54,10 @@ class ARMMachObjectWriter : public MCMachObjectTargetWriter {
|
||||
: MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
|
||||
/*UseAggressiveSymbolFolding=*/true) {}
|
||||
|
||||
void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
|
||||
const MCAsmLayout &Layout, const MCFragment *Fragment,
|
||||
const MCFixup &Fixup, MCValue Target,
|
||||
uint64_t &FixedValue) override;
|
||||
void RecordRelocation(MachObjectWriter *Writer,
|
||||
const MCAssembler &Asm, const MCAsmLayout &Layout,
|
||||
const MCFragment *Fragment, const MCFixup &Fixup,
|
||||
MCValue Target, uint64_t &FixedValue) override;
|
||||
};
|
||||
}
|
||||
|
||||
@ -232,7 +232,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
|
||||
(IsPCRel << 30) |
|
||||
MachO::R_SCATTERED);
|
||||
MRE.r_word1 = Value2;
|
||||
Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
|
||||
Writer->addRelocation(Fragment->getParent(), MRE);
|
||||
}
|
||||
|
||||
MachO::any_relocation_info MRE;
|
||||
@ -243,7 +243,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
|
||||
(IsPCRel << 30) |
|
||||
MachO::R_SCATTERED);
|
||||
MRE.r_word1 = Value;
|
||||
Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
|
||||
Writer->addRelocation(Fragment->getParent(), MRE);
|
||||
}
|
||||
|
||||
void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
|
||||
@ -297,7 +297,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
|
||||
(IsPCRel << 30) |
|
||||
MachO::R_SCATTERED);
|
||||
MRE.r_word1 = Value2;
|
||||
Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
|
||||
Writer->addRelocation(Fragment->getParent(), MRE);
|
||||
}
|
||||
|
||||
MachO::any_relocation_info MRE;
|
||||
@ -307,7 +307,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
|
||||
(IsPCRel << 30) |
|
||||
MachO::R_SCATTERED);
|
||||
MRE.r_word1 = Value;
|
||||
Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
|
||||
Writer->addRelocation(Fragment->getParent(), MRE);
|
||||
}
|
||||
|
||||
bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer,
|
||||
@ -351,10 +351,11 @@ bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer,
|
||||
}
|
||||
|
||||
void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
|
||||
MCAssembler &Asm,
|
||||
const MCAssembler &Asm,
|
||||
const MCAsmLayout &Layout,
|
||||
const MCFragment *Fragment,
|
||||
const MCFixup &Fixup, MCValue Target,
|
||||
const MCFixup &Fixup,
|
||||
MCValue Target,
|
||||
uint64_t &FixedValue) {
|
||||
unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
|
||||
unsigned Log2Size;
|
||||
@ -400,8 +401,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
|
||||
// See <reloc.h>.
|
||||
uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
|
||||
unsigned Index = 0;
|
||||
unsigned IsExtern = 0;
|
||||
unsigned Type = 0;
|
||||
const MCSymbolData *RelSymbol = nullptr;
|
||||
|
||||
if (Target.isAbsolute()) { // constant
|
||||
// FIXME!
|
||||
@ -421,7 +422,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
|
||||
// Check whether we need an external or internal relocation.
|
||||
if (requiresExternRelocation(Writer, Asm, *Fragment, RelocType, SD,
|
||||
FixedValue)) {
|
||||
RelSymbol = SD;
|
||||
IsExtern = 1;
|
||||
Index = SD->getIndex();
|
||||
|
||||
// For external relocations, make sure to offset the fixup value to
|
||||
// compensate for the addend of the symbol address, if it was
|
||||
@ -445,8 +447,11 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
|
||||
// struct relocation_info (8 bytes)
|
||||
MachO::any_relocation_info MRE;
|
||||
MRE.r_word0 = FixupOffset;
|
||||
MRE.r_word1 =
|
||||
(Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
|
||||
MRE.r_word1 = ((Index << 0) |
|
||||
(IsPCRel << 24) |
|
||||
(Log2Size << 25) |
|
||||
(IsExtern << 27) |
|
||||
(Type << 28));
|
||||
|
||||
// Even when it's not a scattered relocation, movw/movt always uses
|
||||
// a PAIR relocation.
|
||||
@ -471,10 +476,10 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
|
||||
(Log2Size << 25) |
|
||||
(MachO::ARM_RELOC_PAIR << 28));
|
||||
|
||||
Writer->addRelocation(nullptr, Fragment->getParent(), MREPair);
|
||||
Writer->addRelocation(Fragment->getParent(), MREPair);
|
||||
}
|
||||
|
||||
Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
|
||||
Writer->addRelocation(Fragment->getParent(), MRE);
|
||||
}
|
||||
|
||||
MCObjectWriter *llvm::createARMMachObjectWriter(raw_ostream &OS,
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "Hexagon.h"
|
||||
#include "HexagonTargetMachine.h"
|
||||
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
|
||||
#include "llvm/CodeGen/StackProtector.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/Pass.h"
|
||||
@ -42,7 +43,7 @@ namespace {
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.addRequired<MachineFunctionAnalysis>();
|
||||
AU.addPreserved<MachineFunctionAnalysis>();
|
||||
AU.addPreserved("stack-protector");
|
||||
AU.addPreserved<StackProtector>();
|
||||
FunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
};
|
||||
|
@ -497,14 +497,14 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
|
||||
SDValue JumpTarget = Callee;
|
||||
|
||||
// T9 should contain the address of the callee function if
|
||||
// -reloction-model=pic or it is an indirect call.
|
||||
// -relocation-model=pic or it is an indirect call.
|
||||
if (IsPICCall || !GlobalOrExternal) {
|
||||
unsigned V0Reg = Mips::V0;
|
||||
if (NeedMips16Helper) {
|
||||
RegsToPass.push_front(std::make_pair(V0Reg, Callee));
|
||||
JumpTarget = DAG.getExternalSymbol(Mips16HelperFunction, getPointerTy());
|
||||
ExternalSymbolSDNode *S = cast<ExternalSymbolSDNode>(JumpTarget);
|
||||
JumpTarget = getAddrGlobal(S, JumpTarget.getValueType(), DAG,
|
||||
JumpTarget = getAddrGlobal(S, CLI.DL, JumpTarget.getValueType(), DAG,
|
||||
MipsII::MO_GOT, Chain,
|
||||
FuncInfo->callPtrInfo(S->getSymbol()));
|
||||
} else
|
||||
|
@ -756,7 +756,7 @@ def : MipsPat<(setge f32:$lhs, f32:$rhs), (CMP_LT_S f32:$rhs, f32:$lhs)>,
|
||||
ISA_MIPS32R6;
|
||||
def : MipsPat<(setlt f32:$lhs, f32:$rhs), (CMP_LT_S f32:$lhs, f32:$rhs)>,
|
||||
ISA_MIPS32R6;
|
||||
def : MipsPat<(setlt f32:$lhs, f32:$rhs), (CMP_LE_S f32:$lhs, f32:$rhs)>,
|
||||
def : MipsPat<(setle f32:$lhs, f32:$rhs), (CMP_LE_S f32:$lhs, f32:$rhs)>,
|
||||
ISA_MIPS32R6;
|
||||
def : MipsPat<(setne f32:$lhs, f32:$rhs),
|
||||
(NOR (CMP_EQ_S f32:$lhs, f32:$rhs), ZERO)>, ISA_MIPS32R6;
|
||||
@ -776,7 +776,7 @@ def : MipsPat<(setge f64:$lhs, f64:$rhs), (CMP_LT_D f64:$rhs, f64:$lhs)>,
|
||||
ISA_MIPS32R6;
|
||||
def : MipsPat<(setlt f64:$lhs, f64:$rhs), (CMP_LT_D f64:$lhs, f64:$rhs)>,
|
||||
ISA_MIPS32R6;
|
||||
def : MipsPat<(setlt f64:$lhs, f64:$rhs), (CMP_LE_D f64:$lhs, f64:$rhs)>,
|
||||
def : MipsPat<(setle f64:$lhs, f64:$rhs), (CMP_LE_D f64:$lhs, f64:$rhs)>,
|
||||
ISA_MIPS32R6;
|
||||
def : MipsPat<(setne f64:$lhs, f64:$rhs),
|
||||
(NOR (CMP_EQ_D f64:$lhs, f64:$rhs), ZERO)>, ISA_MIPS32R6;
|
||||
|
@ -1613,22 +1613,22 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,
|
||||
|
||||
if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine()))
|
||||
// %gp_rel relocation
|
||||
return getAddrGPRel(N, Ty, DAG);
|
||||
return getAddrGPRel(N, SDLoc(N), Ty, DAG);
|
||||
|
||||
// %hi/%lo relocation
|
||||
return getAddrNonPIC(N, Ty, DAG);
|
||||
return getAddrNonPIC(N, SDLoc(N), Ty, DAG);
|
||||
}
|
||||
|
||||
if (GV->hasInternalLinkage() || (GV->hasLocalLinkage() && !isa<Function>(GV)))
|
||||
return getAddrLocal(N, Ty, DAG,
|
||||
return getAddrLocal(N, SDLoc(N), Ty, DAG,
|
||||
Subtarget.isABI_N32() || Subtarget.isABI_N64());
|
||||
|
||||
if (LargeGOT)
|
||||
return getAddrGlobalLargeGOT(N, Ty, DAG, MipsII::MO_GOT_HI16,
|
||||
return getAddrGlobalLargeGOT(N, SDLoc(N), Ty, DAG, MipsII::MO_GOT_HI16,
|
||||
MipsII::MO_GOT_LO16, DAG.getEntryNode(),
|
||||
MachinePointerInfo::getGOT());
|
||||
|
||||
return getAddrGlobal(N, Ty, DAG,
|
||||
return getAddrGlobal(N, SDLoc(N), Ty, DAG,
|
||||
(Subtarget.isABI_N32() || Subtarget.isABI_N64())
|
||||
? MipsII::MO_GOT_DISP
|
||||
: MipsII::MO_GOT16,
|
||||
@ -1642,9 +1642,9 @@ SDValue MipsTargetLowering::lowerBlockAddress(SDValue Op,
|
||||
|
||||
if (getTargetMachine().getRelocationModel() != Reloc::PIC_ &&
|
||||
!Subtarget.isABI_N64())
|
||||
return getAddrNonPIC(N, Ty, DAG);
|
||||
return getAddrNonPIC(N, SDLoc(N), Ty, DAG);
|
||||
|
||||
return getAddrLocal(N, Ty, DAG,
|
||||
return getAddrLocal(N, SDLoc(N), Ty, DAG,
|
||||
Subtarget.isABI_N32() || Subtarget.isABI_N64());
|
||||
}
|
||||
|
||||
@ -1735,9 +1735,9 @@ lowerJumpTable(SDValue Op, SelectionDAG &DAG) const
|
||||
|
||||
if (getTargetMachine().getRelocationModel() != Reloc::PIC_ &&
|
||||
!Subtarget.isABI_N64())
|
||||
return getAddrNonPIC(N, Ty, DAG);
|
||||
return getAddrNonPIC(N, SDLoc(N), Ty, DAG);
|
||||
|
||||
return getAddrLocal(N, Ty, DAG,
|
||||
return getAddrLocal(N, SDLoc(N), Ty, DAG,
|
||||
Subtarget.isABI_N32() || Subtarget.isABI_N64());
|
||||
}
|
||||
|
||||
@ -1754,12 +1754,12 @@ lowerConstantPool(SDValue Op, SelectionDAG &DAG) const
|
||||
|
||||
if (TLOF.IsConstantInSmallSection(N->getConstVal(), getTargetMachine()))
|
||||
// %gp_rel relocation
|
||||
return getAddrGPRel(N, Ty, DAG);
|
||||
return getAddrGPRel(N, SDLoc(N), Ty, DAG);
|
||||
|
||||
return getAddrNonPIC(N, Ty, DAG);
|
||||
return getAddrNonPIC(N, SDLoc(N), Ty, DAG);
|
||||
}
|
||||
|
||||
return getAddrLocal(N, Ty, DAG,
|
||||
return getAddrLocal(N, SDLoc(N), Ty, DAG,
|
||||
Subtarget.isABI_N32() || Subtarget.isABI_N64());
|
||||
}
|
||||
|
||||
@ -2681,15 +2681,15 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
InternalLinkage = Val->hasInternalLinkage();
|
||||
|
||||
if (InternalLinkage)
|
||||
Callee = getAddrLocal(G, Ty, DAG,
|
||||
Callee = getAddrLocal(G, DL, Ty, DAG,
|
||||
Subtarget.isABI_N32() || Subtarget.isABI_N64());
|
||||
else if (LargeGOT) {
|
||||
Callee = getAddrGlobalLargeGOT(G, Ty, DAG, MipsII::MO_CALL_HI16,
|
||||
Callee = getAddrGlobalLargeGOT(G, DL, Ty, DAG, MipsII::MO_CALL_HI16,
|
||||
MipsII::MO_CALL_LO16, Chain,
|
||||
FuncInfo->callPtrInfo(Val));
|
||||
IsCallReloc = true;
|
||||
} else {
|
||||
Callee = getAddrGlobal(G, Ty, DAG, MipsII::MO_GOT_CALL, Chain,
|
||||
Callee = getAddrGlobal(G, DL, Ty, DAG, MipsII::MO_GOT_CALL, Chain,
|
||||
FuncInfo->callPtrInfo(Val));
|
||||
IsCallReloc = true;
|
||||
}
|
||||
@ -2702,15 +2702,15 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
const char *Sym = S->getSymbol();
|
||||
|
||||
if (!Subtarget.isABI_N64() && !IsPIC) // !N64 && static
|
||||
Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(),
|
||||
MipsII::MO_NO_FLAG);
|
||||
Callee =
|
||||
DAG.getTargetExternalSymbol(Sym, getPointerTy(), MipsII::MO_NO_FLAG);
|
||||
else if (LargeGOT) {
|
||||
Callee = getAddrGlobalLargeGOT(S, Ty, DAG, MipsII::MO_CALL_HI16,
|
||||
Callee = getAddrGlobalLargeGOT(S, DL, Ty, DAG, MipsII::MO_CALL_HI16,
|
||||
MipsII::MO_CALL_LO16, Chain,
|
||||
FuncInfo->callPtrInfo(Sym));
|
||||
IsCallReloc = true;
|
||||
} else { // N64 || PIC
|
||||
Callee = getAddrGlobal(S, Ty, DAG, MipsII::MO_GOT_CALL, Chain,
|
||||
Callee = getAddrGlobal(S, DL, Ty, DAG, MipsII::MO_GOT_CALL, Chain,
|
||||
FuncInfo->callPtrInfo(Sym));
|
||||
IsCallReloc = true;
|
||||
}
|
||||
|
@ -272,9 +272,8 @@ namespace llvm {
|
||||
//
|
||||
// (add (load (wrapper $gp, %got(sym)), %lo(sym))
|
||||
template <class NodeTy>
|
||||
SDValue getAddrLocal(NodeTy *N, EVT Ty, SelectionDAG &DAG,
|
||||
SDValue getAddrLocal(NodeTy *N, SDLoc DL, EVT Ty, SelectionDAG &DAG,
|
||||
bool IsN32OrN64) const {
|
||||
SDLoc DL(N);
|
||||
unsigned GOTFlag = IsN32OrN64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT;
|
||||
SDValue GOT = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty),
|
||||
getTargetNode(N, Ty, DAG, GOTFlag));
|
||||
@ -291,11 +290,10 @@ namespace llvm {
|
||||
// computing a global symbol's address:
|
||||
//
|
||||
// (load (wrapper $gp, %got(sym)))
|
||||
template<class NodeTy>
|
||||
SDValue getAddrGlobal(NodeTy *N, EVT Ty, SelectionDAG &DAG,
|
||||
template <class NodeTy>
|
||||
SDValue getAddrGlobal(NodeTy *N, SDLoc DL, EVT Ty, SelectionDAG &DAG,
|
||||
unsigned Flag, SDValue Chain,
|
||||
const MachinePointerInfo &PtrInfo) const {
|
||||
SDLoc DL(N);
|
||||
SDValue Tgt = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty),
|
||||
getTargetNode(N, Ty, DAG, Flag));
|
||||
return DAG.getLoad(Ty, DL, Chain, Tgt, PtrInfo, false, false, false, 0);
|
||||
@ -305,14 +303,13 @@ namespace llvm {
|
||||
// computing a global symbol's address in large-GOT mode:
|
||||
//
|
||||
// (load (wrapper (add %hi(sym), $gp), %lo(sym)))
|
||||
template<class NodeTy>
|
||||
SDValue getAddrGlobalLargeGOT(NodeTy *N, EVT Ty, SelectionDAG &DAG,
|
||||
unsigned HiFlag, unsigned LoFlag,
|
||||
SDValue Chain,
|
||||
template <class NodeTy>
|
||||
SDValue getAddrGlobalLargeGOT(NodeTy *N, SDLoc DL, EVT Ty,
|
||||
SelectionDAG &DAG, unsigned HiFlag,
|
||||
unsigned LoFlag, SDValue Chain,
|
||||
const MachinePointerInfo &PtrInfo) const {
|
||||
SDLoc DL(N);
|
||||
SDValue Hi = DAG.getNode(MipsISD::Hi, DL, Ty,
|
||||
getTargetNode(N, Ty, DAG, HiFlag));
|
||||
SDValue Hi =
|
||||
DAG.getNode(MipsISD::Hi, DL, Ty, getTargetNode(N, Ty, DAG, HiFlag));
|
||||
Hi = DAG.getNode(ISD::ADD, DL, Ty, Hi, getGlobalReg(DAG, Ty));
|
||||
SDValue Wrapper = DAG.getNode(MipsISD::Wrapper, DL, Ty, Hi,
|
||||
getTargetNode(N, Ty, DAG, LoFlag));
|
||||
@ -324,9 +321,9 @@ namespace llvm {
|
||||
// computing a symbol's address in non-PIC mode:
|
||||
//
|
||||
// (add %hi(sym), %lo(sym))
|
||||
template<class NodeTy>
|
||||
SDValue getAddrNonPIC(NodeTy *N, EVT Ty, SelectionDAG &DAG) const {
|
||||
SDLoc DL(N);
|
||||
template <class NodeTy>
|
||||
SDValue getAddrNonPIC(NodeTy *N, SDLoc DL, EVT Ty,
|
||||
SelectionDAG &DAG) const {
|
||||
SDValue Hi = getTargetNode(N, Ty, DAG, MipsII::MO_ABS_HI);
|
||||
SDValue Lo = getTargetNode(N, Ty, DAG, MipsII::MO_ABS_LO);
|
||||
return DAG.getNode(ISD::ADD, DL, Ty,
|
||||
@ -338,9 +335,8 @@ namespace llvm {
|
||||
// computing a symbol's address using gp-relative addressing:
|
||||
//
|
||||
// (add $gp, %gp_rel(sym))
|
||||
template<class NodeTy>
|
||||
SDValue getAddrGPRel(NodeTy *N, EVT Ty, SelectionDAG &DAG) const {
|
||||
SDLoc DL(N);
|
||||
template <class NodeTy>
|
||||
SDValue getAddrGPRel(NodeTy *N, SDLoc DL, EVT Ty, SelectionDAG &DAG) const {
|
||||
assert(Ty == MVT::i32);
|
||||
SDValue GPRel = getTargetNode(N, Ty, DAG, MipsII::MO_GPREL);
|
||||
return DAG.getNode(ISD::ADD, DL, Ty,
|
||||
|
@ -15,6 +15,7 @@
|
||||
#define LLVM_LIB_TARGET_NVPTX_NVPTXALLOCAHOISTING_H
|
||||
|
||||
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
|
||||
#include "llvm/CodeGen/StackProtector.h"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
#include "llvm/Pass.h"
|
||||
|
||||
@ -32,8 +33,8 @@ class NVPTXAllocaHoisting : public FunctionPass {
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.addRequired<DataLayoutPass>();
|
||||
AU.addPreserved("stack-protector");
|
||||
AU.addPreserved<MachineFunctionAnalysis>();
|
||||
AU.addPreserved<StackProtector>();
|
||||
}
|
||||
|
||||
const char *getPassName() const override {
|
||||
|
@ -16,6 +16,7 @@
|
||||
#define LLVM_LIB_TARGET_NVPTX_NVPTXLOWERAGGRCOPIES_H
|
||||
|
||||
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
|
||||
#include "llvm/CodeGen/StackProtector.h"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
#include "llvm/Pass.h"
|
||||
|
||||
@ -29,8 +30,8 @@ struct NVPTXLowerAggrCopies : public FunctionPass {
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.addRequired<DataLayoutPass>();
|
||||
AU.addPreserved("stack-protector");
|
||||
AU.addPreserved<MachineFunctionAnalysis>();
|
||||
AU.addPreserved<StackProtector>();
|
||||
}
|
||||
|
||||
bool runOnFunction(Function &F) override;
|
||||
|
@ -41,7 +41,7 @@ class PPCMachObjectWriter : public MCMachObjectTargetWriter {
|
||||
: MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
|
||||
/*UseAggressiveSymbolFolding=*/Is64Bit) {}
|
||||
|
||||
void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
|
||||
void RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm,
|
||||
const MCAsmLayout &Layout, const MCFragment *Fragment,
|
||||
const MCFixup &Fixup, MCValue Target,
|
||||
uint64_t &FixedValue) override {
|
||||
@ -282,7 +282,7 @@ bool PPCMachObjectWriter::RecordScatteredRelocation(
|
||||
MachO::any_relocation_info MRE;
|
||||
makeScatteredRelocationInfo(MRE, other_half, MachO::GENERIC_RELOC_PAIR,
|
||||
Log2Size, IsPCRel, Value2);
|
||||
Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
|
||||
Writer->addRelocation(Fragment->getParent(), MRE);
|
||||
} else {
|
||||
// If the offset is more than 24-bits, it won't fit in a scattered
|
||||
// relocation offset field, so we fall back to using a non-scattered
|
||||
@ -296,7 +296,7 @@ bool PPCMachObjectWriter::RecordScatteredRelocation(
|
||||
}
|
||||
MachO::any_relocation_info MRE;
|
||||
makeScatteredRelocationInfo(MRE, FixupOffset, Type, Log2Size, IsPCRel, Value);
|
||||
Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
|
||||
Writer->addRelocation(Fragment->getParent(), MRE);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -331,9 +331,9 @@ void PPCMachObjectWriter::RecordPPCRelocation(
|
||||
// See <reloc.h>.
|
||||
const uint32_t FixupOffset = getFixupOffset(Layout, Fragment, Fixup);
|
||||
unsigned Index = 0;
|
||||
unsigned IsExtern = 0;
|
||||
unsigned Type = RelocType;
|
||||
|
||||
const MCSymbolData *RelSymbol = nullptr;
|
||||
if (Target.isAbsolute()) { // constant
|
||||
// SymbolNum of 0 indicates the absolute section.
|
||||
//
|
||||
@ -355,7 +355,8 @@ void PPCMachObjectWriter::RecordPPCRelocation(
|
||||
|
||||
// Check whether we need an external or internal relocation.
|
||||
if (Writer->doesSymbolRequireExternRelocation(SD)) {
|
||||
RelSymbol = SD;
|
||||
IsExtern = 1;
|
||||
Index = SD->getIndex();
|
||||
// For external relocations, make sure to offset the fixup value to
|
||||
// compensate for the addend of the symbol address, if it was
|
||||
// undefined. This occurs with weak definitions, for example.
|
||||
@ -374,8 +375,9 @@ void PPCMachObjectWriter::RecordPPCRelocation(
|
||||
|
||||
// struct relocation_info (8 bytes)
|
||||
MachO::any_relocation_info MRE;
|
||||
makeRelocationInfo(MRE, FixupOffset, Index, IsPCRel, Log2Size, false, Type);
|
||||
Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
|
||||
makeRelocationInfo(MRE, FixupOffset, Index, IsPCRel, Log2Size, IsExtern,
|
||||
Type);
|
||||
Writer->addRelocation(Fragment->getParent(), MRE);
|
||||
}
|
||||
|
||||
MCObjectWriter *llvm::createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit,
|
||||
|
@ -77,7 +77,11 @@ extern Target TheGCNTarget;
|
||||
|
||||
namespace AMDGPU {
|
||||
enum TargetIndex {
|
||||
TI_CONSTDATA_START
|
||||
TI_CONSTDATA_START,
|
||||
TI_SCRATCH_RSRC_DWORD0,
|
||||
TI_SCRATCH_RSRC_DWORD1,
|
||||
TI_SCRATCH_RSRC_DWORD2,
|
||||
TI_SCRATCH_RSRC_DWORD3
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -92,6 +92,11 @@ def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
|
||||
"true",
|
||||
"Support flat address space">;
|
||||
|
||||
def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
|
||||
"EnableVGPRSpilling",
|
||||
"true",
|
||||
"Enable spilling of VGPRs to scratch memory">;
|
||||
|
||||
class SubtargetFeatureFetchLimit <string Value> :
|
||||
SubtargetFeature <"fetch"#Value,
|
||||
"TexVTXClauseSize",
|
||||
|
@ -116,7 +116,6 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
|
||||
const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
|
||||
SIProgramInfo KernelInfo;
|
||||
if (STM.isAmdHsaOS()) {
|
||||
OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
|
||||
getSIProgramInfo(KernelInfo, MF);
|
||||
EmitAmdKernelCodeT(MF, KernelInfo);
|
||||
OutStreamer.EmitCodeAlignment(2 << (MF.getAlignment() - 1));
|
||||
@ -421,6 +420,7 @@ static unsigned getRsrcReg(unsigned ShaderType) {
|
||||
|
||||
void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
|
||||
const SIProgramInfo &KernelInfo) {
|
||||
const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
|
||||
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
unsigned RsrcReg = getRsrcReg(MFI->getShaderType());
|
||||
|
||||
@ -441,6 +441,10 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
|
||||
OutStreamer.EmitIntValue(RsrcReg, 4);
|
||||
OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.VGPRBlocks) |
|
||||
S_00B028_SGPRS(KernelInfo.SGPRBlocks), 4);
|
||||
if (STM.isVGPRSpillingEnabled(MFI)) {
|
||||
OutStreamer.EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4);
|
||||
OutStreamer.EmitIntValue(S_0286E8_WAVESIZE(KernelInfo.ScratchBlocks), 4);
|
||||
}
|
||||
}
|
||||
|
||||
if (MFI->getShaderType() == ShaderType::PIXEL) {
|
||||
@ -504,6 +508,19 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
|
||||
|
||||
header.wavefront_size = STM.getWavefrontSize();
|
||||
|
||||
const MCSectionELF *VersionSection = OutContext.getELFSection(".hsa.version",
|
||||
ELF::SHT_PROGBITS, 0, SectionKind::getReadOnly());
|
||||
OutStreamer.SwitchSection(VersionSection);
|
||||
OutStreamer.EmitBytes(Twine("HSA Code Unit:" +
|
||||
Twine(header.hsail_version_major) + "." +
|
||||
Twine(header.hsail_version_minor) + ":" +
|
||||
"AMD:" +
|
||||
Twine(header.amd_code_version_major) + "." +
|
||||
Twine(header.amd_code_version_minor) + ":" +
|
||||
"GFX8.1:0").str());
|
||||
|
||||
OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
|
||||
|
||||
if (isVerbose()) {
|
||||
OutStreamer.emitRawComment("amd_code_version_major = " +
|
||||
Twine(header.amd_code_version_major), false);
|
||||
|
@ -417,6 +417,28 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
|
||||
N->getValueType(0), Ops);
|
||||
}
|
||||
|
||||
case ISD::LOAD: {
|
||||
// To simplify the TableGen patters, we replace all i64 loads with
|
||||
// v2i32 loads. Alternatively, we could promote i64 loads to v2i32
|
||||
// during DAG legalization, however, so places (ExpandUnalignedLoad)
|
||||
// in the DAG legalizer assume that if i64 is legal, so doing this
|
||||
// promotion early can cause problems.
|
||||
EVT VT = N->getValueType(0);
|
||||
LoadSDNode *LD = cast<LoadSDNode>(N);
|
||||
if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD)
|
||||
break;
|
||||
|
||||
SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SDLoc(N), LD->getChain(),
|
||||
LD->getBasePtr(), LD->getMemOperand());
|
||||
SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SDLoc(N),
|
||||
MVT::i64, NewLoad);
|
||||
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLoad.getValue(1));
|
||||
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), BitCast);
|
||||
SelectCode(NewLoad.getNode());
|
||||
N = BitCast.getNode();
|
||||
break;
|
||||
}
|
||||
|
||||
case AMDGPUISD::REGISTER_LOAD: {
|
||||
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
|
||||
break;
|
||||
@ -962,16 +984,27 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
|
||||
const SITargetLowering& Lowering =
|
||||
*static_cast<const SITargetLowering*>(getTargetLowering());
|
||||
|
||||
unsigned ScratchPtrReg =
|
||||
TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR);
|
||||
unsigned ScratchOffsetReg =
|
||||
TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET);
|
||||
Lowering.CreateLiveInRegister(*CurDAG, &AMDGPU::SReg_32RegClass,
|
||||
ScratchOffsetReg, MVT::i32);
|
||||
SDValue Sym0 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD0", MVT::i32);
|
||||
SDValue ScratchRsrcDword0 =
|
||||
SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym0), 0);
|
||||
|
||||
SDValue ScratchPtr =
|
||||
CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
|
||||
MRI.getLiveInVirtReg(ScratchPtrReg), MVT::i64);
|
||||
SDValue Sym1 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD1", MVT::i32);
|
||||
SDValue ScratchRsrcDword1 =
|
||||
SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym1), 0);
|
||||
|
||||
const SDValue RsrcOps[] = {
|
||||
CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
|
||||
ScratchRsrcDword0,
|
||||
CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
|
||||
ScratchRsrcDword1,
|
||||
CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32),
|
||||
};
|
||||
SDValue ScratchPtr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
|
||||
MVT::v2i32, RsrcOps), 0);
|
||||
Rsrc = SDValue(Lowering.buildScratchRSRC(*CurDAG, DL, ScratchPtr), 0);
|
||||
SOffset = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
|
||||
MRI.getLiveInVirtReg(ScratchOffsetReg), MVT::i32);
|
||||
@ -988,22 +1021,6 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
|
||||
}
|
||||
}
|
||||
|
||||
// (add FI, n0)
|
||||
if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
|
||||
isa<FrameIndexSDNode>(Addr.getOperand(0))) {
|
||||
VAddr = Addr.getOperand(1);
|
||||
ImmOffset = Addr.getOperand(0);
|
||||
return true;
|
||||
}
|
||||
|
||||
// (FI)
|
||||
if (isa<FrameIndexSDNode>(Addr)) {
|
||||
VAddr = SDValue(CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32,
|
||||
CurDAG->getConstant(0, MVT::i32)), 0);
|
||||
ImmOffset = Addr;
|
||||
return true;
|
||||
}
|
||||
|
||||
// (node)
|
||||
VAddr = Addr;
|
||||
ImmOffset = CurDAG->getTargetConstant(0, MVT::i16);
|
||||
|
@ -187,9 +187,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
|
||||
setOperationAction(ISD::LOAD, MVT::v2f32, Promote);
|
||||
AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32);
|
||||
|
||||
setOperationAction(ISD::LOAD, MVT::i64, Promote);
|
||||
AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32);
|
||||
|
||||
setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
|
||||
AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
|
||||
|
||||
|
@ -341,8 +341,39 @@ int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const {
|
||||
// instead.
|
||||
namespace llvm {
|
||||
namespace AMDGPU {
|
||||
int getMCOpcode(uint16_t Opcode, unsigned Gen) {
|
||||
static int getMCOpcode(uint16_t Opcode, unsigned Gen) {
|
||||
return getMCOpcodeGen(Opcode, (enum Subtarget)Gen);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This must be kept in sync with the SISubtarget class in SIInstrInfo.td
|
||||
enum SISubtarget {
|
||||
SI = 0,
|
||||
VI = 1
|
||||
};
|
||||
|
||||
enum SISubtarget AMDGPUSubtargetToSISubtarget(unsigned Gen) {
|
||||
switch (Gen) {
|
||||
default:
|
||||
return SI;
|
||||
case AMDGPUSubtarget::VOLCANIC_ISLANDS:
|
||||
return VI;
|
||||
}
|
||||
}
|
||||
|
||||
int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
|
||||
int MCOp = AMDGPU::getMCOpcode(Opcode,
|
||||
AMDGPUSubtargetToSISubtarget(RI.ST.getGeneration()));
|
||||
|
||||
// -1 means that Opcode is already a native instruction.
|
||||
if (MCOp == -1)
|
||||
return Opcode;
|
||||
|
||||
// (uint16_t)-1 means that Opcode is a pseudo instruction that has
|
||||
// no encoding in the given subtarget generation.
|
||||
if (MCOp == (uint16_t)-1)
|
||||
return -1;
|
||||
|
||||
return MCOp;
|
||||
}
|
||||
|
@ -135,6 +135,11 @@ class AMDGPUInstrInfo : public AMDGPUGenInstrInfo {
|
||||
bool isRegisterStore(const MachineInstr &MI) const;
|
||||
bool isRegisterLoad(const MachineInstr &MI) const;
|
||||
|
||||
/// \brief Return a target-specific opcode if Opcode is a pseudo instruction.
|
||||
/// Return -1 if the target-specific opcode for the pseudo instruction does
|
||||
/// not exist. If Opcode is not a pseudo instruction, this is identity.
|
||||
int pseudoToMCOpcode(int Opcode) const;
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
// Pure virtual funtions to be implemented by sub-classes.
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
#include "llvm/CodeGen/MachineInstr.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/GlobalVariable.h"
|
||||
#include "llvm/MC/MCCodeEmitter.h"
|
||||
#include "llvm/MC/MCContext.h"
|
||||
@ -39,29 +40,17 @@ AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &st):
|
||||
Ctx(ctx), ST(st)
|
||||
{ }
|
||||
|
||||
enum AMDGPUMCInstLower::SISubtarget
|
||||
AMDGPUMCInstLower::AMDGPUSubtargetToSISubtarget(unsigned Gen) const {
|
||||
switch (Gen) {
|
||||
default:
|
||||
return AMDGPUMCInstLower::SI;
|
||||
case AMDGPUSubtarget::VOLCANIC_ISLANDS:
|
||||
return AMDGPUMCInstLower::VI;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned AMDGPUMCInstLower::getMCOpcode(unsigned MIOpcode) const {
|
||||
|
||||
int MCOpcode = AMDGPU::getMCOpcode(MIOpcode,
|
||||
AMDGPUSubtargetToSISubtarget(ST.getGeneration()));
|
||||
if (MCOpcode == -1)
|
||||
MCOpcode = MIOpcode;
|
||||
|
||||
return MCOpcode;
|
||||
}
|
||||
|
||||
void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
|
||||
|
||||
OutMI.setOpcode(getMCOpcode(MI->getOpcode()));
|
||||
int MCOpcode = ST.getInstrInfo()->pseudoToMCOpcode(MI->getOpcode());
|
||||
|
||||
if (MCOpcode == -1) {
|
||||
LLVMContext &C = MI->getParent()->getParent()->getFunction()->getContext();
|
||||
C.emitError("AMDGPUMCInstLower::lower - Pseudo instruction doesn't have "
|
||||
"a target-specific version: " + Twine(MI->getOpcode()));
|
||||
}
|
||||
|
||||
OutMI.setOpcode(MCOpcode);
|
||||
|
||||
for (const MachineOperand &MO : MI->explicit_operands()) {
|
||||
MCOperand MCOp;
|
||||
@ -91,6 +80,12 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
|
||||
MCOp = MCOperand::CreateExpr(Expr);
|
||||
break;
|
||||
}
|
||||
case MachineOperand::MO_ExternalSymbol: {
|
||||
MCSymbol *Sym = Ctx.GetOrCreateSymbol(StringRef(MO.getSymbolName()));
|
||||
const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
|
||||
MCOp = MCOperand::CreateExpr(Expr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
OutMI.addOperand(MCOp);
|
||||
}
|
||||
|
@ -19,23 +19,9 @@ class MCContext;
|
||||
class MCInst;
|
||||
|
||||
class AMDGPUMCInstLower {
|
||||
|
||||
// This must be kept in sync with the SISubtarget class in SIInstrInfo.td
|
||||
enum SISubtarget {
|
||||
SI = 0,
|
||||
VI = 1
|
||||
};
|
||||
|
||||
MCContext &Ctx;
|
||||
const AMDGPUSubtarget &ST;
|
||||
|
||||
/// Convert a member of the AMDGPUSubtarget::Generation enum to the
|
||||
/// SISubtarget enum.
|
||||
enum SISubtarget AMDGPUSubtargetToSISubtarget(unsigned Gen) const;
|
||||
|
||||
/// Get the MC opcode for this MachineInstr.
|
||||
unsigned getMCOpcode(unsigned MIOpcode) const;
|
||||
|
||||
public:
|
||||
AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &ST);
|
||||
|
||||
|
@ -18,7 +18,9 @@
|
||||
#include "R600MachineScheduler.h"
|
||||
#include "SIISelLowering.h"
|
||||
#include "SIInstrInfo.h"
|
||||
#include "SIMachineFunctionInfo.h"
|
||||
#include "llvm/ADT/SmallString.h"
|
||||
#include "llvm/CodeGen/MachineScheduler.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
@ -78,6 +80,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS,
|
||||
FlatAddressSpace(false), EnableIRStructurizer(true),
|
||||
EnablePromoteAlloca(false), EnableIfCvt(true),
|
||||
EnableLoadStoreOpt(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
|
||||
EnableVGPRSpilling(false),
|
||||
DL(computeDataLayout(initializeSubtargetDependencies(GPU, FS))),
|
||||
FrameLowering(TargetFrameLowering::StackGrowsUp,
|
||||
64 * 16, // Maximum stack alignment (long16)
|
||||
@ -113,3 +116,26 @@ unsigned AMDGPUSubtarget::getAmdKernelCodeChipID() const {
|
||||
case SEA_ISLANDS: return 12;
|
||||
}
|
||||
}
|
||||
|
||||
bool AMDGPUSubtarget::isVGPRSpillingEnabled(
|
||||
const SIMachineFunctionInfo *MFI) const {
|
||||
return MFI->getShaderType() == ShaderType::COMPUTE || EnableVGPRSpilling;
|
||||
}
|
||||
|
||||
void AMDGPUSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
|
||||
MachineInstr *begin,
|
||||
MachineInstr *end,
|
||||
unsigned NumRegionInstrs) const {
|
||||
if (getGeneration() >= SOUTHERN_ISLANDS) {
|
||||
|
||||
// Track register pressure so the scheduler can try to decrease
|
||||
// pressure once register usage is above the threshold defined by
|
||||
// SIRegisterInfo::getRegPressureSetLimit()
|
||||
Policy.ShouldTrackPressure = true;
|
||||
|
||||
// Enabling both top down and bottom up scheduling seems to give us less
|
||||
// register spills than just using one of these approaches on its own.
|
||||
Policy.OnlyTopDown = false;
|
||||
Policy.OnlyBottomUp = false;
|
||||
}
|
||||
}
|
||||
|
@ -30,6 +30,8 @@
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class SIMachineFunctionInfo;
|
||||
|
||||
class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
|
||||
|
||||
public:
|
||||
@ -63,6 +65,7 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
|
||||
unsigned WavefrontSize;
|
||||
bool CFALUBug;
|
||||
int LocalMemorySize;
|
||||
bool EnableVGPRSpilling;
|
||||
|
||||
const DataLayout DL;
|
||||
AMDGPUFrameLowering FrameLowering;
|
||||
@ -206,6 +209,10 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
|
||||
return getGeneration() <= NORTHERN_ISLANDS;
|
||||
}
|
||||
|
||||
void overrideSchedPolicy(MachineSchedPolicy &Policy,
|
||||
MachineInstr *begin, MachineInstr *end,
|
||||
unsigned NumRegionInstrs) const override;
|
||||
|
||||
// Helper functions to simplify if statements
|
||||
bool isTargetELF() const {
|
||||
return false;
|
||||
@ -224,6 +231,15 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
|
||||
bool isAmdHsaOS() const {
|
||||
return TargetTriple.getOS() == Triple::AMDHSA;
|
||||
}
|
||||
bool isVGPRSpillingEnabled(const SIMachineFunctionInfo *MFI) const;
|
||||
|
||||
unsigned getMaxWavesPerCU() const {
|
||||
if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
|
||||
return 10;
|
||||
|
||||
// FIXME: Not sure what this is for other subtagets.
|
||||
llvm_unreachable("do not know max waves per CU for this subtarget.");
|
||||
}
|
||||
};
|
||||
|
||||
} // End namespace llvm
|
||||
|
@ -29,7 +29,7 @@ class AMDGPUMCObjectWriter : public MCObjectWriter {
|
||||
const MCAsmLayout &Layout) override {
|
||||
//XXX: Implement if necessary.
|
||||
}
|
||||
void RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
|
||||
void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
|
||||
const MCFragment *Fragment, const MCFixup &Fixup,
|
||||
MCValue Target, bool &IsPCRel,
|
||||
uint64_t &FixedValue) override {
|
||||
|
@ -163,4 +163,8 @@ namespace SIOutMods {
|
||||
#define R_00B860_COMPUTE_TMPRING_SIZE 0x00B860
|
||||
#define S_00B860_WAVESIZE(x) (((x) & 0x1FFF) << 12)
|
||||
|
||||
#define R_0286E8_SPI_TMPRING_SIZE 0x0286E8
|
||||
#define S_0286E8_WAVESIZE(x) (((x) & 0x1FFF) << 12)
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -588,6 +588,12 @@ SDValue SITargetLowering::LowerFormalArguments(
|
||||
|
||||
InVals.push_back(Val);
|
||||
}
|
||||
|
||||
if (Info->getShaderType() != ShaderType::COMPUTE) {
|
||||
unsigned ScratchIdx = CCInfo.getFirstUnallocated(
|
||||
AMDGPU::SGPR_32RegClass.begin(), AMDGPU::SGPR_32RegClass.getNumRegs());
|
||||
Info->ScratchOffsetReg = AMDGPU::SGPR_32RegClass.getRegister(ScratchIdx);
|
||||
}
|
||||
return Chain;
|
||||
}
|
||||
|
||||
|
@ -85,49 +85,41 @@ class Enc64 {
|
||||
|
||||
let Uses = [EXEC] in {
|
||||
|
||||
class VOPCCommon <dag ins, string asm, list<dag> pattern> :
|
||||
InstSI <(outs VCCReg:$dst), ins, asm, pattern> {
|
||||
class VOPAnyCommon <dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
InstSI <outs, ins, asm, pattern> {
|
||||
|
||||
let DisableEncoding = "$dst";
|
||||
let mayLoad = 0;
|
||||
let mayStore = 0;
|
||||
let hasSideEffects = 0;
|
||||
let UseNamedOperandTable = 1;
|
||||
let VOPC = 1;
|
||||
let VALU = 1;
|
||||
}
|
||||
|
||||
class VOPCCommon <dag ins, string asm, list<dag> pattern> :
|
||||
VOPAnyCommon <(outs VCCReg:$dst), ins, asm, pattern> {
|
||||
|
||||
let DisableEncoding = "$dst";
|
||||
let VOPC = 1;
|
||||
let Size = 4;
|
||||
}
|
||||
|
||||
class VOP1Common <dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
InstSI <outs, ins, asm, pattern> {
|
||||
let mayLoad = 0;
|
||||
let mayStore = 0;
|
||||
let hasSideEffects = 0;
|
||||
let UseNamedOperandTable = 1;
|
||||
VOPAnyCommon <outs, ins, asm, pattern> {
|
||||
|
||||
let VOP1 = 1;
|
||||
let VALU = 1;
|
||||
let Size = 4;
|
||||
}
|
||||
|
||||
class VOP2Common <dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
InstSI <outs, ins, asm, pattern> {
|
||||
VOPAnyCommon <outs, ins, asm, pattern> {
|
||||
|
||||
let mayLoad = 0;
|
||||
let mayStore = 0;
|
||||
let hasSideEffects = 0;
|
||||
let UseNamedOperandTable = 1;
|
||||
let VOP2 = 1;
|
||||
let VALU = 1;
|
||||
let Size = 4;
|
||||
}
|
||||
|
||||
class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
InstSI <outs, ins, asm, pattern> {
|
||||
VOPAnyCommon <outs, ins, asm, pattern> {
|
||||
|
||||
let mayLoad = 0;
|
||||
let mayStore = 0;
|
||||
let hasSideEffects = 0;
|
||||
let UseNamedOperandTable = 1;
|
||||
// Using complex patterns gives VOP3 patterns a very high complexity rating,
|
||||
// but standalone patterns are almost always prefered, so we need to adjust the
|
||||
// priority lower. The goal is to use a high number to reduce complexity to
|
||||
@ -135,8 +127,6 @@ class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
let AddedComplexity = -1000;
|
||||
|
||||
let VOP3 = 1;
|
||||
let VALU = 1;
|
||||
|
||||
int Size = 8;
|
||||
}
|
||||
|
||||
|
@ -430,15 +430,6 @@ unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
|
||||
return AMDGPU::COPY;
|
||||
}
|
||||
|
||||
static bool shouldTryToSpillVGPRs(MachineFunction *MF) {
|
||||
|
||||
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
|
||||
|
||||
// FIXME: Implement spilling for other shader types.
|
||||
return MFI->getShaderType() == ShaderType::COMPUTE;
|
||||
|
||||
}
|
||||
|
||||
void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI,
|
||||
unsigned SrcReg, bool isKill,
|
||||
@ -462,7 +453,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
|
||||
case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break;
|
||||
case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break;
|
||||
}
|
||||
} else if(shouldTryToSpillVGPRs(MF) && RI.hasVGPRs(RC)) {
|
||||
} else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) {
|
||||
MFI->setHasSpilledVGPRs();
|
||||
|
||||
switch(RC->getSize() * 8) {
|
||||
@ -482,7 +473,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
|
||||
.addFrameIndex(FrameIndex)
|
||||
// Place-holder registers, these will be filled in by
|
||||
// SIPrepareScratchRegs.
|
||||
.addReg(AMDGPU::SGPR0_SGPR1, RegState::Undef)
|
||||
.addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
|
||||
.addReg(AMDGPU::SGPR0, RegState::Undef);
|
||||
} else {
|
||||
LLVMContext &Ctx = MF->getFunction()->getContext();
|
||||
@ -499,6 +490,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
|
||||
const TargetRegisterClass *RC,
|
||||
const TargetRegisterInfo *TRI) const {
|
||||
MachineFunction *MF = MBB.getParent();
|
||||
const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
|
||||
MachineFrameInfo *FrameInfo = MF->getFrameInfo();
|
||||
DebugLoc DL = MBB.findDebugLoc(MI);
|
||||
int Opcode = -1;
|
||||
@ -511,7 +503,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
|
||||
case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break;
|
||||
case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break;
|
||||
}
|
||||
} else if(shouldTryToSpillVGPRs(MF) && RI.hasVGPRs(RC)) {
|
||||
} else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) {
|
||||
switch(RC->getSize() * 8) {
|
||||
case 32: Opcode = AMDGPU::SI_SPILL_V32_RESTORE; break;
|
||||
case 64: Opcode = AMDGPU::SI_SPILL_V64_RESTORE; break;
|
||||
@ -528,7 +520,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
|
||||
.addFrameIndex(FrameIndex)
|
||||
// Place-holder registers, these will be filled in by
|
||||
// SIPrepareScratchRegs.
|
||||
.addReg(AMDGPU::SGPR0_SGPR1, RegState::Undef)
|
||||
.addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
|
||||
.addReg(AMDGPU::SGPR0, RegState::Undef);
|
||||
|
||||
} else {
|
||||
@ -615,7 +607,7 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB,
|
||||
.addImm(-1)
|
||||
.addImm(0);
|
||||
|
||||
BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e32),
|
||||
BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e64),
|
||||
TIDReg)
|
||||
.addImm(-1)
|
||||
.addReg(TIDReg);
|
||||
@ -1053,7 +1045,11 @@ bool SIInstrInfo::canFoldOffset(unsigned OffsetSize, unsigned AS) const {
|
||||
}
|
||||
|
||||
bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
|
||||
return AMDGPU::getVOPe32(Opcode) != -1;
|
||||
int Op32 = AMDGPU::getVOPe32(Opcode);
|
||||
if (Op32 == -1)
|
||||
return false;
|
||||
|
||||
return pseudoToMCOpcode(Op32) != -1;
|
||||
}
|
||||
|
||||
bool SIInstrInfo::hasModifiers(unsigned Opcode) const {
|
||||
@ -1126,12 +1122,18 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
|
||||
}
|
||||
|
||||
switch (Desc.OpInfo[i].OperandType) {
|
||||
case MCOI::OPERAND_REGISTER: {
|
||||
if (MI->getOperand(i).isImm() &&
|
||||
!isImmOperandLegal(MI, i, MI->getOperand(i))) {
|
||||
ErrInfo = "Illegal immediate value for operand.";
|
||||
return false;
|
||||
}
|
||||
case MCOI::OPERAND_REGISTER:
|
||||
if (MI->getOperand(i).isImm() || MI->getOperand(i).isFPImm()) {
|
||||
ErrInfo = "Illegal immediate value for operand.";
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case AMDGPU::OPERAND_REG_IMM32:
|
||||
break;
|
||||
case AMDGPU::OPERAND_REG_INLINE_C:
|
||||
if (MI->getOperand(i).isImm() && !isInlineConstant(MI->getOperand(i))) {
|
||||
ErrInfo = "Illegal immediate value for operand.";
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case MCOI::OPERAND_IMMEDIATE:
|
||||
@ -1287,7 +1289,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
|
||||
case AMDGPU::S_LOAD_DWORDX2_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64;
|
||||
case AMDGPU::S_LOAD_DWORDX4_IMM:
|
||||
case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64;
|
||||
case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e32;
|
||||
case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
|
||||
case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
|
||||
case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
|
||||
}
|
||||
@ -2278,7 +2280,7 @@ void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist
|
||||
MachineOperand &Dest = Inst->getOperand(0);
|
||||
MachineOperand &Src = Inst->getOperand(1);
|
||||
|
||||
const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e32);
|
||||
const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64);
|
||||
const TargetRegisterClass *SrcRC = Src.isReg() ?
|
||||
MRI.getRegClass(Src.getReg()) :
|
||||
&AMDGPU::SGPR_32RegClass;
|
||||
|
@ -325,7 +325,6 @@ namespace AMDGPU {
|
||||
int getVOPe32(uint16_t Opcode);
|
||||
int getCommuteRev(uint16_t Opcode);
|
||||
int getCommuteOrig(uint16_t Opcode);
|
||||
int getMCOpcode(uint16_t Opcode, unsigned Gen);
|
||||
int getAddr64Inst(uint16_t Opcode);
|
||||
int getAtomicRetOp(uint16_t Opcode);
|
||||
int getAtomicNoRetOp(uint16_t Opcode);
|
||||
|
@ -36,6 +36,12 @@ class vop2 <bits<6> si, bits<6> vi = si> : vop {
|
||||
field bits<10> VI3 = {0, 1, 0, 0, vi{5-0}};
|
||||
}
|
||||
|
||||
// Specify a VOP2 opcode for SI and VOP3 opcode for VI
|
||||
// that doesn't have VOP2 encoding on VI
|
||||
class vop23 <bits<6> si, bits<10> vi> : vop2 <si> {
|
||||
let VI3 = vi;
|
||||
}
|
||||
|
||||
class vop3 <bits<9> si, bits<10> vi = {0, si}> : vop {
|
||||
let SI3 = si;
|
||||
let VI3 = vi;
|
||||
@ -57,7 +63,7 @@ class sopk <bits<5> si, bits<5> vi = si> {
|
||||
}
|
||||
|
||||
// Execpt for the NONE field, this must be kept in sync with the SISubtarget enum
|
||||
// in AMDGPUMCInstLower.h
|
||||
// in AMDGPUInstrInfo.cpp
|
||||
def SISubtarget {
|
||||
int NONE = -1;
|
||||
int SI = 0;
|
||||
@ -731,7 +737,7 @@ class getAsm32 <int NumSrcArgs> {
|
||||
// Returns the assembly string for the inputs and outputs of a VOP3
|
||||
// instruction.
|
||||
class getAsm64 <int NumSrcArgs, bit HasModifiers> {
|
||||
string src0 = "$src0_modifiers,";
|
||||
string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
|
||||
string src1 = !if(!eq(NumSrcArgs, 1), "",
|
||||
!if(!eq(NumSrcArgs, 2), " $src1_modifiers",
|
||||
" $src1_modifiers,"));
|
||||
@ -848,6 +854,16 @@ class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
|
||||
let isPseudo = 1;
|
||||
}
|
||||
|
||||
multiclass VOP2SI_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
|
||||
string opName, string revOpSI> {
|
||||
def "" : VOP2_Pseudo <outs, ins, pattern, opName>,
|
||||
VOP2_REV<revOpSI#"_e32", !eq(revOpSI, opName)>;
|
||||
|
||||
def _si : VOP2 <op.SI, outs, ins, opName#asm, []>,
|
||||
VOP2_REV<revOpSI#"_e32_si", !eq(revOpSI, opName)>,
|
||||
SIMCInstr <opName#"_e32", SISubtarget.SI>;
|
||||
}
|
||||
|
||||
multiclass VOP2_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
|
||||
string opName, string revOpSI, string revOpVI> {
|
||||
def "" : VOP2_Pseudo <outs, ins, pattern, opName>,
|
||||
@ -889,16 +905,6 @@ class VOP3_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName> :
|
||||
VOP3e_vi <op>,
|
||||
SIMCInstr <opName#"_e64", SISubtarget.VI>;
|
||||
|
||||
// VI only instruction
|
||||
class VOP3_vi <bits<10> op, string opName, dag outs, dag ins, string asm,
|
||||
list<dag> pattern, int NumSrcArgs, bit HasMods = 1> :
|
||||
VOP3Common <outs, ins, asm, pattern>,
|
||||
VOP <opName>,
|
||||
VOP3e_vi <op>,
|
||||
VOP3DisableFields<!if(!eq(NumSrcArgs, 1), 0, 1),
|
||||
!if(!eq(NumSrcArgs, 2), 0, 1),
|
||||
HasMods>;
|
||||
|
||||
multiclass VOP3_m <vop op, dag outs, dag ins, string asm, list<dag> pattern,
|
||||
string opName, int NumSrcArgs, bit HasMods = 1> {
|
||||
|
||||
@ -998,6 +1004,23 @@ multiclass VOP3_C_m <vop op, dag outs, dag ins, string asm,
|
||||
}
|
||||
}
|
||||
|
||||
// An instruction that is VOP2 on SI and VOP3 on VI, no modifiers.
|
||||
multiclass VOP2SI_3VI_m <vop3 op, string opName, dag outs, dag ins,
|
||||
string asm, list<dag> pattern = []> {
|
||||
let isPseudo = 1 in {
|
||||
def "" : VOPAnyCommon <outs, ins, "", pattern>,
|
||||
SIMCInstr<opName, SISubtarget.NONE>;
|
||||
}
|
||||
|
||||
def _si : VOP2 <op.SI3{5-0}, outs, ins, asm, []>,
|
||||
SIMCInstr <opName, SISubtarget.SI>;
|
||||
|
||||
def _vi : VOP3Common <outs, ins, asm, []>,
|
||||
VOP3e_vi <op.VI3>,
|
||||
VOP3DisableFields <1, 0, 0>,
|
||||
SIMCInstr <opName, SISubtarget.VI>;
|
||||
}
|
||||
|
||||
multiclass VOP1_Helper <vop1 op, string opName, dag outs,
|
||||
dag ins32, string asm32, list<dag> pat32,
|
||||
dag ins64, string asm64, list<dag> pat64,
|
||||
@ -1089,6 +1112,33 @@ multiclass VOP2bInst <vop2 op, string opName, VOPProfile P,
|
||||
revOp, P.HasModifiers
|
||||
>;
|
||||
|
||||
// A VOP2 instruction that is VOP3-only on VI.
|
||||
multiclass VOP2_VI3_Helper <vop23 op, string opName, dag outs,
|
||||
dag ins32, string asm32, list<dag> pat32,
|
||||
dag ins64, string asm64, list<dag> pat64,
|
||||
string revOpSI, string revOpVI, bit HasMods> {
|
||||
defm _e32 : VOP2SI_m <op, outs, ins32, asm32, pat32, opName, revOpSI>;
|
||||
|
||||
defm _e64 : VOP3_2_m <op, outs, ins64, opName#"_e64"#asm64, pat64, opName,
|
||||
revOpSI, revOpVI, HasMods>;
|
||||
}
|
||||
|
||||
multiclass VOP2_VI3_Inst <vop23 op, string opName, VOPProfile P,
|
||||
SDPatternOperator node = null_frag,
|
||||
string revOpSI = opName, string revOpVI = revOpSI>
|
||||
: VOP2_VI3_Helper <
|
||||
op, opName, P.Outs,
|
||||
P.Ins32, P.Asm32, [],
|
||||
P.Ins64, P.Asm64,
|
||||
!if(P.HasModifiers,
|
||||
[(set P.DstVT:$dst,
|
||||
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
|
||||
i1:$clamp, i32:$omod)),
|
||||
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
|
||||
[(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
|
||||
revOpSI, revOpVI, P.HasModifiers
|
||||
>;
|
||||
|
||||
class VOPC_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
|
||||
VOPCCommon <ins, "", pattern>,
|
||||
VOP <opName>,
|
||||
@ -1224,34 +1274,6 @@ multiclass VOP3Inst <vop3 op, string opName, VOPProfile P,
|
||||
P.NumSrcArgs, P.HasModifiers
|
||||
>;
|
||||
|
||||
class VOP3InstVI <bits<10> op, string opName, VOPProfile P,
|
||||
SDPatternOperator node = null_frag> : VOP3_vi <
|
||||
op, opName#"_vi", P.Outs, P.Ins64, opName#P.Asm64,
|
||||
!if(!eq(P.NumSrcArgs, 3),
|
||||
!if(P.HasModifiers,
|
||||
[(set P.DstVT:$dst,
|
||||
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
|
||||
i1:$clamp, i32:$omod)),
|
||||
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
|
||||
(P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))))],
|
||||
[(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1,
|
||||
P.Src2VT:$src2))]),
|
||||
!if(!eq(P.NumSrcArgs, 2),
|
||||
!if(P.HasModifiers,
|
||||
[(set P.DstVT:$dst,
|
||||
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
|
||||
i1:$clamp, i32:$omod)),
|
||||
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
|
||||
[(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))])
|
||||
/* P.NumSrcArgs == 1 */,
|
||||
!if(P.HasModifiers,
|
||||
[(set P.DstVT:$dst,
|
||||
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
|
||||
i1:$clamp, i32:$omod))))],
|
||||
[(set P.DstVT:$dst, (node P.Src0VT:$src0))]))),
|
||||
P.NumSrcArgs, P.HasModifiers
|
||||
>;
|
||||
|
||||
multiclass VOP3b_Helper <vop op, RegisterClass vrc, RegisterOperand arc,
|
||||
string opName, list<dag> pattern> :
|
||||
VOP3b_2_m <
|
||||
|
@ -1525,25 +1525,25 @@ defm V_SUBBREV_U32 : VOP2bInst <vop2<0x2a, 0x1e>, "v_subbrev_u32",
|
||||
} // End Uses = [VCC]
|
||||
} // End isCommutable = 1, Defs = [VCC]
|
||||
|
||||
// These instructions only exist on SI and CI
|
||||
let SubtargetPredicate = isSICI in {
|
||||
|
||||
def V_READLANE_B32 : VOP2 <
|
||||
0x00000001,
|
||||
defm V_READLANE_B32 : VOP2SI_3VI_m <
|
||||
vop3 <0x001, 0x289>,
|
||||
"v_readlane_b32",
|
||||
(outs SReg_32:$vdst),
|
||||
(ins VGPR_32:$src0, SSrc_32:$vsrc1),
|
||||
"v_readlane_b32 $vdst, $src0, $vsrc1",
|
||||
[]
|
||||
"v_readlane_b32 $vdst, $src0, $vsrc1"
|
||||
>;
|
||||
|
||||
def V_WRITELANE_B32 : VOP2 <
|
||||
0x00000002,
|
||||
defm V_WRITELANE_B32 : VOP2SI_3VI_m <
|
||||
vop3 <0x002, 0x28a>,
|
||||
"v_writelane_b32",
|
||||
(outs VGPR_32:$vdst),
|
||||
(ins SReg_32:$src0, SSrc_32:$vsrc1),
|
||||
"v_writelane_b32 $vdst, $src0, $vsrc1",
|
||||
[]
|
||||
"v_writelane_b32 $vdst, $src0, $vsrc1"
|
||||
>;
|
||||
|
||||
// These instructions only exist on SI and CI
|
||||
let SubtargetPredicate = isSICI in {
|
||||
|
||||
let isCommutable = 1 in {
|
||||
defm V_MAC_LEGACY_F32 : VOP2Inst <vop2<0x6>, "v_mac_legacy_f32",
|
||||
VOP_F32_F32_F32
|
||||
@ -1568,30 +1568,33 @@ defm V_LSHL_B32 : VOP2Inst <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32, shl>;
|
||||
}
|
||||
|
||||
} // End isCommutable = 1
|
||||
} // End let SubtargetPredicate = SICI
|
||||
|
||||
defm V_BFM_B32 : VOP2Inst <vop2<0x1e>, "v_bfm_b32", VOP_I32_I32_I32,
|
||||
AMDGPUbfm>;
|
||||
defm V_BCNT_U32_B32 : VOP2Inst <vop2<0x22>, "v_bcnt_u32_b32", VOP_I32_I32_I32>;
|
||||
defm V_MBCNT_LO_U32_B32 : VOP2Inst <vop2<0x23>, "v_mbcnt_lo_u32_b32",
|
||||
defm V_BFM_B32 : VOP2_VI3_Inst <vop23<0x1e, 0x293>, "v_bfm_b32", VOP_I32_I32_I32,
|
||||
AMDGPUbfm
|
||||
>;
|
||||
defm V_BCNT_U32_B32 : VOP2_VI3_Inst <vop23<0x22, 0x28b>, "v_bcnt_u32_b32",
|
||||
VOP_I32_I32_I32
|
||||
>;
|
||||
defm V_MBCNT_HI_U32_B32 : VOP2Inst <vop2<0x24>, "v_mbcnt_hi_u32_b32",
|
||||
defm V_MBCNT_LO_U32_B32 : VOP2_VI3_Inst <vop23<0x23, 0x28c>, "v_mbcnt_lo_u32_b32",
|
||||
VOP_I32_I32_I32
|
||||
>;
|
||||
defm V_LDEXP_F32 : VOP2Inst <vop2<0x2b>, "v_ldexp_f32",
|
||||
defm V_MBCNT_HI_U32_B32 : VOP2_VI3_Inst <vop23<0x24, 0x28d>, "v_mbcnt_hi_u32_b32",
|
||||
VOP_I32_I32_I32
|
||||
>;
|
||||
defm V_LDEXP_F32 : VOP2_VI3_Inst <vop23<0x2b, 0x288>, "v_ldexp_f32",
|
||||
VOP_F32_F32_I32, AMDGPUldexp
|
||||
>;
|
||||
|
||||
////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "v_cvt_pkaccum_u8_f32", []>;
|
||||
////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "v_cvt_pknorm_i16_f32", []>;
|
||||
////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "v_cvt_pknorm_u16_f32", []>;
|
||||
defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <vop2<0x2f>, "v_cvt_pkrtz_f16_f32",
|
||||
defm V_CVT_PKRTZ_F16_F32 : VOP2_VI3_Inst <vop23<0x2f, 0x296>, "v_cvt_pkrtz_f16_f32",
|
||||
VOP_I32_F32_F32, int_SI_packf16
|
||||
>;
|
||||
////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "v_cvt_pk_u16_u32", []>;
|
||||
////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "v_cvt_pk_i16_i32", []>;
|
||||
|
||||
} // End let SubtargetPredicate = SICI
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VOP3 Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -1656,9 +1659,6 @@ defm V_ALIGNBYTE_B32 : VOP3Inst <vop3<0x14f, 0x1cf>, "v_alignbyte_b32",
|
||||
VOP_I32_I32_I32_I32
|
||||
>;
|
||||
|
||||
// Only on SI
|
||||
defm V_MULLIT_F32 : VOP3Inst <vop3<0x150>, "v_mullit_f32",
|
||||
VOP_F32_F32_F32_F32>;
|
||||
defm V_MIN3_F32 : VOP3Inst <vop3<0x151>, "v_min3_f32",
|
||||
VOP_F32_F32_F32_F32, AMDGPUfmin3>;
|
||||
|
||||
@ -1699,20 +1699,6 @@ defm V_DIV_FIXUP_F64 : VOP3Inst <
|
||||
|
||||
} // let SchedRW = [WriteDouble]
|
||||
|
||||
defm V_LSHL_B64 : VOP3Inst <vop3<0x161>, "v_lshl_b64",
|
||||
VOP_I64_I64_I32, shl
|
||||
>;
|
||||
|
||||
// Only on SI
|
||||
defm V_LSHR_B64 : VOP3Inst <vop3<0x162>, "v_lshr_b64",
|
||||
VOP_I64_I64_I32, srl
|
||||
>;
|
||||
|
||||
// Only on SI
|
||||
defm V_ASHR_I64 : VOP3Inst <vop3<0x163>, "v_ashr_i64",
|
||||
VOP_I64_I64_I32, sra
|
||||
>;
|
||||
|
||||
let SchedRW = [WriteDouble] in {
|
||||
let isCommutable = 1 in {
|
||||
|
||||
@ -1785,6 +1771,26 @@ defm V_TRIG_PREOP_F64 : VOP3Inst <
|
||||
|
||||
} // let SchedRW = [WriteDouble]
|
||||
|
||||
// These instructions only exist on SI and CI
|
||||
let SubtargetPredicate = isSICI in {
|
||||
|
||||
defm V_LSHL_B64 : VOP3Inst <vop3<0x161>, "v_lshl_b64",
|
||||
VOP_I64_I64_I32, shl
|
||||
>;
|
||||
|
||||
defm V_LSHR_B64 : VOP3Inst <vop3<0x162>, "v_lshr_b64",
|
||||
VOP_I64_I64_I32, srl
|
||||
>;
|
||||
|
||||
defm V_ASHR_I64 : VOP3Inst <vop3<0x163>, "v_ashr_i64",
|
||||
VOP_I64_I64_I32, sra
|
||||
>;
|
||||
|
||||
defm V_MULLIT_F32 : VOP3Inst <vop3<0x150>, "v_mullit_f32",
|
||||
VOP_F32_F32_F32_F32>;
|
||||
|
||||
} // End SubtargetPredicate = isSICI
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Pseudo Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -1943,14 +1949,14 @@ multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
|
||||
let UseNamedOperandTable = 1 in {
|
||||
def _SAVE : InstSI <
|
||||
(outs),
|
||||
(ins sgpr_class:$src, i32imm:$frame_idx, SReg_64:$scratch_ptr,
|
||||
(ins sgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
|
||||
SReg_32:$scratch_offset),
|
||||
"", []
|
||||
>;
|
||||
|
||||
def _RESTORE : InstSI <
|
||||
(outs sgpr_class:$dst),
|
||||
(ins i32imm:$frame_idx, SReg_64:$scratch_ptr, SReg_32:$scratch_offset),
|
||||
(ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),
|
||||
"", []
|
||||
>;
|
||||
} // End UseNamedOperandTable = 1
|
||||
@ -1966,14 +1972,14 @@ multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
|
||||
let UseNamedOperandTable = 1 in {
|
||||
def _SAVE : InstSI <
|
||||
(outs),
|
||||
(ins vgpr_class:$src, i32imm:$frame_idx, SReg_64:$scratch_ptr,
|
||||
(ins vgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
|
||||
SReg_32:$scratch_offset),
|
||||
"", []
|
||||
>;
|
||||
|
||||
def _RESTORE : InstSI <
|
||||
(outs vgpr_class:$dst),
|
||||
(ins i32imm:$frame_idx, SReg_64:$scratch_ptr, SReg_32:$scratch_offset),
|
||||
(ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),
|
||||
"", []
|
||||
>;
|
||||
} // End UseNamedOperandTable = 1
|
||||
@ -2728,16 +2734,12 @@ def : Pat <
|
||||
(V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 $src0))))
|
||||
>;
|
||||
|
||||
let Predicates = [isSICI] in {
|
||||
|
||||
def : Pat <
|
||||
(int_SI_tid),
|
||||
(V_MBCNT_HI_U32_B32_e32 0xffffffff,
|
||||
(V_MBCNT_HI_U32_B32_e64 0xffffffff,
|
||||
(V_MBCNT_LO_U32_B32_e64 0xffffffff, 0))
|
||||
>;
|
||||
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VOP3 Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -50,6 +50,7 @@ class SIMachineFunctionInfo : public AMDGPUMachineFunction {
|
||||
unsigned NumUserSGPRs;
|
||||
std::map<unsigned, unsigned> LaneVGPRs;
|
||||
unsigned LDSWaveSpillSize;
|
||||
unsigned ScratchOffsetReg;
|
||||
bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };
|
||||
unsigned getTIDReg() const { return TIDReg; };
|
||||
void setTIDReg(unsigned Reg) { TIDReg = Reg; }
|
||||
|
@ -84,28 +84,10 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
|
||||
if (!Entry->isLiveIn(ScratchOffsetPreloadReg))
|
||||
Entry->addLiveIn(ScratchOffsetPreloadReg);
|
||||
|
||||
// Load the scratch pointer
|
||||
unsigned ScratchPtrReg =
|
||||
TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass);
|
||||
int ScratchPtrFI = -1;
|
||||
|
||||
if (ScratchPtrReg != AMDGPU::NoRegister) {
|
||||
// Found an SGPR to use.
|
||||
MRI.setPhysRegUsed(ScratchPtrReg);
|
||||
BuildMI(*Entry, I, DL, TII->get(AMDGPU::S_MOV_B64), ScratchPtrReg)
|
||||
.addReg(ScratchPtrPreloadReg);
|
||||
} else {
|
||||
// No SGPR is available, we must spill.
|
||||
ScratchPtrFI = FrameInfo->CreateSpillStackObject(8, 4);
|
||||
BuildMI(*Entry, I, DL, TII->get(AMDGPU::SI_SPILL_S64_SAVE))
|
||||
.addReg(ScratchPtrPreloadReg)
|
||||
.addFrameIndex(ScratchPtrFI);
|
||||
}
|
||||
|
||||
// Load the scratch offset.
|
||||
unsigned ScratchOffsetReg =
|
||||
TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_32RegClass);
|
||||
int ScratchOffsetFI = ~0;
|
||||
int ScratchOffsetFI = -1;
|
||||
|
||||
if (ScratchOffsetReg != AMDGPU::NoRegister) {
|
||||
// Found an SGPR to use
|
||||
@ -117,7 +99,9 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
|
||||
ScratchOffsetFI = FrameInfo->CreateSpillStackObject(4,4);
|
||||
BuildMI(*Entry, I, DL, TII->get(AMDGPU::SI_SPILL_S32_SAVE))
|
||||
.addReg(ScratchOffsetPreloadReg)
|
||||
.addFrameIndex(ScratchOffsetFI);
|
||||
.addFrameIndex(ScratchOffsetFI)
|
||||
.addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
|
||||
.addReg(AMDGPU::SGPR0, RegState::Undef);
|
||||
}
|
||||
|
||||
|
||||
@ -125,22 +109,27 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
|
||||
// add them to all the SI_SPILL_V* instructions.
|
||||
|
||||
RegScavenger RS;
|
||||
bool UseRegScavenger =
|
||||
(ScratchPtrReg == AMDGPU::NoRegister ||
|
||||
ScratchOffsetReg == AMDGPU::NoRegister);
|
||||
unsigned ScratchRsrcFI = FrameInfo->CreateSpillStackObject(16, 4);
|
||||
RS.addScavengingFrameIndex(ScratchRsrcFI);
|
||||
|
||||
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
|
||||
BI != BE; ++BI) {
|
||||
|
||||
MachineBasicBlock &MBB = *BI;
|
||||
if (UseRegScavenger)
|
||||
RS.enterBasicBlock(&MBB);
|
||||
// Add the scratch offset reg as a live-in so that the register scavenger
|
||||
// doesn't re-use it.
|
||||
if (!MBB.isLiveIn(ScratchOffsetReg) &&
|
||||
ScratchOffsetReg != AMDGPU::NoRegister)
|
||||
MBB.addLiveIn(ScratchOffsetReg);
|
||||
RS.enterBasicBlock(&MBB);
|
||||
|
||||
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
|
||||
I != E; ++I) {
|
||||
MachineInstr &MI = *I;
|
||||
RS.forward(I);
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
switch(MI.getOpcode()) {
|
||||
default: break;;
|
||||
default: break;
|
||||
case AMDGPU::SI_SPILL_V512_SAVE:
|
||||
case AMDGPU::SI_SPILL_V256_SAVE:
|
||||
case AMDGPU::SI_SPILL_V128_SAVE:
|
||||
@ -153,43 +142,66 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
|
||||
case AMDGPU::SI_SPILL_V256_RESTORE:
|
||||
case AMDGPU::SI_SPILL_V512_RESTORE:
|
||||
|
||||
// Scratch Pointer
|
||||
if (ScratchPtrReg == AMDGPU::NoRegister) {
|
||||
ScratchPtrReg = RS.scavengeRegister(&AMDGPU::SGPR_64RegClass, 0);
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S64_RESTORE),
|
||||
ScratchPtrReg)
|
||||
.addFrameIndex(ScratchPtrFI)
|
||||
.addReg(AMDGPU::NoRegister)
|
||||
.addReg(AMDGPU::NoRegister);
|
||||
} else if (!MBB.isLiveIn(ScratchPtrReg)) {
|
||||
MBB.addLiveIn(ScratchPtrReg);
|
||||
}
|
||||
// Scratch resource
|
||||
unsigned ScratchRsrcReg =
|
||||
RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0);
|
||||
|
||||
uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
|
||||
0xffffffff; // Size
|
||||
|
||||
unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
|
||||
unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
|
||||
unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
|
||||
unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
|
||||
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc0)
|
||||
.addExternalSymbol("SCRATCH_RSRC_DWORD0")
|
||||
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
||||
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc1)
|
||||
.addExternalSymbol("SCRATCH_RSRC_DWORD1")
|
||||
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
||||
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2)
|
||||
.addImm(Rsrc & 0xffffffff)
|
||||
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
||||
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3)
|
||||
.addImm(Rsrc >> 32)
|
||||
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
||||
|
||||
// Scratch Offset
|
||||
if (ScratchOffsetReg == AMDGPU::NoRegister) {
|
||||
ScratchOffsetReg = RS.scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S32_RESTORE),
|
||||
ScratchOffsetReg)
|
||||
.addFrameIndex(ScratchOffsetFI)
|
||||
.addReg(AMDGPU::NoRegister)
|
||||
.addReg(AMDGPU::NoRegister);
|
||||
.addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
|
||||
.addReg(AMDGPU::SGPR0, RegState::Undef);
|
||||
} else if (!MBB.isLiveIn(ScratchOffsetReg)) {
|
||||
MBB.addLiveIn(ScratchOffsetReg);
|
||||
}
|
||||
|
||||
if (ScratchPtrReg == AMDGPU::NoRegister ||
|
||||
if (ScratchRsrcReg == AMDGPU::NoRegister ||
|
||||
ScratchOffsetReg == AMDGPU::NoRegister) {
|
||||
LLVMContext &Ctx = MF.getFunction()->getContext();
|
||||
Ctx.emitError("ran out of SGPRs for spilling VGPRs");
|
||||
ScratchPtrReg = AMDGPU::SGPR0;
|
||||
ScratchRsrcReg = AMDGPU::SGPR0;
|
||||
ScratchOffsetReg = AMDGPU::SGPR0;
|
||||
}
|
||||
MI.getOperand(2).setReg(ScratchPtrReg);
|
||||
MI.getOperand(2).setReg(ScratchRsrcReg);
|
||||
MI.getOperand(2).setIsKill(true);
|
||||
MI.getOperand(2).setIsUndef(false);
|
||||
MI.getOperand(3).setReg(ScratchOffsetReg);
|
||||
MI.getOperand(3).setIsUndef(false);
|
||||
MI.getOperand(3).setIsKill(false);
|
||||
MI.addOperand(MachineOperand::CreateReg(Rsrc0, false, true, true));
|
||||
MI.addOperand(MachineOperand::CreateReg(Rsrc1, false, true, true));
|
||||
MI.addOperand(MachineOperand::CreateReg(Rsrc2, false, true, true));
|
||||
MI.addOperand(MachineOperand::CreateReg(Rsrc3, false, true, true));
|
||||
|
||||
break;
|
||||
}
|
||||
if (UseRegScavenger)
|
||||
RS.forward();
|
||||
}
|
||||
}
|
||||
return true;
|
||||
|
@ -23,7 +23,6 @@
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/LLVMContext.h"
|
||||
|
||||
#include "llvm/Support/Debug.h"
|
||||
using namespace llvm;
|
||||
|
||||
SIRegisterInfo::SIRegisterInfo(const AMDGPUSubtarget &st)
|
||||
@ -51,9 +50,32 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
|
||||
return Reserved;
|
||||
}
|
||||
|
||||
unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
|
||||
MachineFunction &MF) const {
|
||||
return RC->getNumRegs();
|
||||
unsigned SIRegisterInfo::getRegPressureSetLimit(unsigned Idx) const {
|
||||
|
||||
// FIXME: We should adjust the max number of waves based on LDS size.
|
||||
unsigned SGPRLimit = getNumSGPRsAllowed(ST.getMaxWavesPerCU());
|
||||
unsigned VGPRLimit = getNumVGPRsAllowed(ST.getMaxWavesPerCU());
|
||||
|
||||
for (regclass_iterator I = regclass_begin(), E = regclass_end();
|
||||
I != E; ++I) {
|
||||
|
||||
unsigned NumSubRegs = std::max((int)(*I)->getSize() / 4, 1);
|
||||
unsigned Limit;
|
||||
|
||||
if (isSGPRClass(*I)) {
|
||||
Limit = SGPRLimit / NumSubRegs;
|
||||
} else {
|
||||
Limit = VGPRLimit / NumSubRegs;
|
||||
}
|
||||
|
||||
const int *Sets = getRegClassPressureSets(*I);
|
||||
assert(Sets);
|
||||
for (unsigned i = 0; Sets[i] != -1; ++i) {
|
||||
if (Sets[i] == (int)Idx)
|
||||
return Limit;
|
||||
}
|
||||
}
|
||||
return 256;
|
||||
}
|
||||
|
||||
bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
|
||||
@ -98,7 +120,7 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
|
||||
void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
|
||||
unsigned LoadStoreOp,
|
||||
unsigned Value,
|
||||
unsigned ScratchPtr,
|
||||
unsigned ScratchRsrcReg,
|
||||
unsigned ScratchOffset,
|
||||
int64_t Offset,
|
||||
RegScavenger *RS) const {
|
||||
@ -113,33 +135,9 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
|
||||
bool RanOutOfSGPRs = false;
|
||||
unsigned SOffset = ScratchOffset;
|
||||
|
||||
unsigned RsrcReg = RS->scavengeRegister(&AMDGPU::SReg_128RegClass, MI, 0);
|
||||
if (RsrcReg == AMDGPU::NoRegister) {
|
||||
RanOutOfSGPRs = true;
|
||||
RsrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
|
||||
}
|
||||
|
||||
unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
|
||||
unsigned Size = NumSubRegs * 4;
|
||||
|
||||
uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
|
||||
0xffffffff; // Size
|
||||
|
||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B64),
|
||||
getSubReg(RsrcReg, AMDGPU::sub0_sub1))
|
||||
.addReg(ScratchPtr)
|
||||
.addReg(RsrcReg, RegState::ImplicitDefine);
|
||||
|
||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32),
|
||||
getSubReg(RsrcReg, AMDGPU::sub2))
|
||||
.addImm(Rsrc & 0xffffffff)
|
||||
.addReg(RsrcReg, RegState::ImplicitDefine);
|
||||
|
||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32),
|
||||
getSubReg(RsrcReg, AMDGPU::sub3))
|
||||
.addImm(Rsrc >> 32)
|
||||
.addReg(RsrcReg, RegState::ImplicitDefine);
|
||||
|
||||
if (!isUInt<12>(Offset + Size)) {
|
||||
SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0);
|
||||
if (SOffset == AMDGPU::NoRegister) {
|
||||
@ -163,9 +161,9 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
|
||||
|
||||
BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
|
||||
.addReg(SubReg, getDefRegState(IsLoad))
|
||||
.addReg(RsrcReg, getKillRegState(IsKill))
|
||||
.addReg(ScratchRsrcReg, getKillRegState(IsKill))
|
||||
.addImm(Offset)
|
||||
.addReg(SOffset, getKillRegState(IsKill))
|
||||
.addReg(SOffset)
|
||||
.addImm(0) // glc
|
||||
.addImm(0) // slc
|
||||
.addImm(0) // tfe
|
||||
@ -235,9 +233,8 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
|
||||
Ctx.emitError("Ran out of VGPRs for spilling SGPR");
|
||||
}
|
||||
|
||||
if (isM0) {
|
||||
if (isM0)
|
||||
SubReg = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0);
|
||||
}
|
||||
|
||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg)
|
||||
.addReg(Spill.VGPR)
|
||||
@ -262,7 +259,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
|
||||
case AMDGPU::SI_SPILL_V32_SAVE:
|
||||
buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
|
||||
TII->getNamedOperand(*MI, AMDGPU::OpName::src)->getReg(),
|
||||
TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_ptr)->getReg(),
|
||||
TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
|
||||
TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
|
||||
FrameInfo->getObjectOffset(Index), RS);
|
||||
MI->eraseFromParent();
|
||||
@ -274,7 +271,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
|
||||
case AMDGPU::SI_SPILL_V512_RESTORE: {
|
||||
buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
|
||||
TII->getNamedOperand(*MI, AMDGPU::OpName::dst)->getReg(),
|
||||
TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_ptr)->getReg(),
|
||||
TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
|
||||
TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
|
||||
FrameInfo->getObjectOffset(Index), RS);
|
||||
MI->eraseFromParent();
|
||||
@ -289,7 +286,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
|
||||
BuildMI(*MBB, MI, MI->getDebugLoc(),
|
||||
TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
|
||||
.addImm(Offset);
|
||||
FIOp.ChangeToRegister(TmpReg, false);
|
||||
FIOp.ChangeToRegister(TmpReg, false, false, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -446,6 +443,8 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
|
||||
case SIRegisterInfo::TGID_Z:
|
||||
return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 2);
|
||||
case SIRegisterInfo::SCRATCH_WAVE_OFFSET:
|
||||
if (MFI->getShaderType() != ShaderType::COMPUTE)
|
||||
return MFI->ScratchOffsetReg;
|
||||
return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 4);
|
||||
case SIRegisterInfo::SCRATCH_PTR:
|
||||
return AMDGPU::SGPR2_SGPR3;
|
||||
@ -475,3 +474,29 @@ unsigned SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
|
||||
return AMDGPU::NoRegister;
|
||||
}
|
||||
|
||||
unsigned SIRegisterInfo::getNumVGPRsAllowed(unsigned WaveCount) const {
|
||||
switch(WaveCount) {
|
||||
case 10: return 24;
|
||||
case 9: return 28;
|
||||
case 8: return 32;
|
||||
case 7: return 36;
|
||||
case 6: return 40;
|
||||
case 5: return 48;
|
||||
case 4: return 64;
|
||||
case 3: return 84;
|
||||
case 2: return 128;
|
||||
default: return 256;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned SIRegisterInfo::getNumSGPRsAllowed(unsigned WaveCount) const {
|
||||
switch(WaveCount) {
|
||||
case 10: return 48;
|
||||
case 9: return 56;
|
||||
case 8: return 64;
|
||||
case 7: return 72;
|
||||
case 6: return 80;
|
||||
case 5: return 96;
|
||||
default: return 103;
|
||||
}
|
||||
}
|
||||
|
@ -17,6 +17,7 @@
|
||||
#define LLVM_LIB_TARGET_R600_SIREGISTERINFO_H
|
||||
|
||||
#include "AMDGPURegisterInfo.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
@ -26,8 +27,7 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
|
||||
|
||||
BitVector getReservedRegs(const MachineFunction &MF) const override;
|
||||
|
||||
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
|
||||
MachineFunction &MF) const override;
|
||||
unsigned getRegPressureSetLimit(unsigned Idx) const override;
|
||||
|
||||
bool requiresRegisterScavenging(const MachineFunction &Fn) const override;
|
||||
|
||||
@ -105,13 +105,21 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
|
||||
unsigned getPreloadedValue(const MachineFunction &MF,
|
||||
enum PreloadedValue Value) const;
|
||||
|
||||
/// \brief Give the maximum number of VGPRs that can be used by \p WaveCount
|
||||
/// concurrent waves.
|
||||
unsigned getNumVGPRsAllowed(unsigned WaveCount) const;
|
||||
|
||||
/// \brief Give the maximum number of SGPRs that can be used by \p WaveCount
|
||||
/// concurrent waves.
|
||||
unsigned getNumSGPRsAllowed(unsigned WaveCount) const;
|
||||
|
||||
unsigned findUnusedRegister(const MachineRegisterInfo &MRI,
|
||||
const TargetRegisterClass *RC) const;
|
||||
|
||||
private:
|
||||
void buildScratchLoadStore(MachineBasicBlock::iterator MI,
|
||||
unsigned LoadStoreOp, unsigned Value,
|
||||
unsigned ScratchPtr, unsigned ScratchOffset,
|
||||
unsigned ScratchRsrcReg, unsigned ScratchOffset,
|
||||
int64_t Offset, RegScavenger *RS) const;
|
||||
};
|
||||
|
||||
|
@ -10,6 +10,7 @@
|
||||
//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDGPUMCInstLower.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "SIInstrInfo.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
@ -206,13 +207,13 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
|
||||
continue;
|
||||
}
|
||||
|
||||
int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
|
||||
|
||||
// Op32 could be -1 here if we started with an instruction that had a
|
||||
// getVOPe32 could be -1 here if we started with an instruction that had
|
||||
// a 32-bit encoding and then commuted it to an instruction that did not.
|
||||
if (Op32 == -1)
|
||||
if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
|
||||
continue;
|
||||
|
||||
int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
|
||||
|
||||
if (TII->isVOPC(Op32)) {
|
||||
unsigned DstReg = MI.getOperand(0).getReg();
|
||||
if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
|
||||
|
@ -11,22 +11,6 @@
|
||||
|
||||
let SubtargetPredicate = isVI in {
|
||||
|
||||
def V_LDEXP_F32 : VOP3InstVI <0x288, "v_ldexp_f32", VOP_F32_F32_I32,
|
||||
AMDGPUldexp
|
||||
>;
|
||||
def V_BFM_B32 : VOP3InstVI <0x293, "v_bfm_b32", VOP_I32_I32_I32, AMDGPUbfm>;
|
||||
def V_BCNT_U32_B32 : VOP3InstVI <0x28b, "v_bcnt_u32_b32", VOP_I32_I32_I32>;
|
||||
def V_MBCNT_LO_U32_B32 : VOP3InstVI <0x28c, "v_mbcnt_lo_u32_b32",
|
||||
VOP_I32_I32_I32
|
||||
>;
|
||||
def V_MBCNT_HI_U32_B32 : VOP3InstVI <0x28d, "v_mbcnt_hi_u32_b32",
|
||||
VOP_I32_I32_I32
|
||||
>;
|
||||
|
||||
def V_CVT_PKRTZ_F16_F32 : VOP3InstVI <0x296, "v_cvt_pkrtz_f16_f32",
|
||||
VOP_I32_F32_F32, int_SI_packf16
|
||||
>;
|
||||
|
||||
defm BUFFER_LOAD_DWORD_VI : MUBUF_Load_Helper_vi <
|
||||
0x14, "buffer_load_dword", VGPR_32, i32, global_load
|
||||
>;
|
||||
@ -37,22 +21,13 @@ defm BUFFER_LOAD_FORMAT_XYZW_VI : MUBUF_Load_Helper_vi <
|
||||
|
||||
} // End SubtargetPredicate = isVI
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VOP2 Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let Predicates = [isVI] in {
|
||||
|
||||
def : Pat <
|
||||
(int_SI_tid),
|
||||
(V_MBCNT_HI_U32_B32 0xffffffff,
|
||||
(V_MBCNT_LO_U32_B32 0xffffffff, 0))
|
||||
>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SMEM Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let Predicates = [isVI] in {
|
||||
|
||||
// 1. Offset as 8bit DWORD immediate
|
||||
def : Pat <
|
||||
(SIload_constant v4i32:$sbase, IMM20bit:$offset),
|
||||
|
@ -42,7 +42,8 @@ SparcELFMCAsmInfo::SparcELFMCAsmInfo(StringRef TT) {
|
||||
SunStyleELFSectionSwitchSyntax = true;
|
||||
UsesELFSectionDirectiveForBSS = true;
|
||||
|
||||
UseIntegratedAssembler = true;
|
||||
if (TheTriple.isOSSolaris() || TheTriple.isOSOpenBSD())
|
||||
UseIntegratedAssembler = true;
|
||||
}
|
||||
|
||||
const MCExpr*
|
||||
|
@ -777,6 +777,19 @@ class DarwinX86_64AsmBackend : public DarwinX86AsmBackend {
|
||||
MachO::CPU_TYPE_X86_64, Subtype);
|
||||
}
|
||||
|
||||
bool doesSectionRequireSymbols(const MCSection &Section) const override {
|
||||
// Temporary labels in the string literals sections require symbols. The
|
||||
// issue is that the x86_64 relocation format does not allow symbol +
|
||||
// offset, and so the linker does not have enough information to resolve the
|
||||
// access to the appropriate atom unless an external relocation is used. For
|
||||
// non-cstring sections, we expect the compiler to use a non-temporary label
|
||||
// for anything that could have an addend pointing outside the symbol.
|
||||
//
|
||||
// See <rdar://problem/4765733>.
|
||||
const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
|
||||
return SMO.getType() == MachO::S_CSTRING_LITERALS;
|
||||
}
|
||||
|
||||
/// \brief Generate the compact unwind encoding for the CFI instructions.
|
||||
uint32_t generateCompactUnwindEncoding(
|
||||
ArrayRef<MCCFIInstruction> Instrs) const override {
|
||||
|
@ -222,6 +222,9 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
|
||||
case MCSymbolRefExpr::VK_GOT:
|
||||
Type = ELF::R_386_GOT32;
|
||||
break;
|
||||
case MCSymbolRefExpr::VK_PLT:
|
||||
Type = ELF::R_386_PLT32;
|
||||
break;
|
||||
case MCSymbolRefExpr::VK_GOTOFF:
|
||||
Type = ELF::R_386_GOTOFF;
|
||||
break;
|
||||
|
@ -10,7 +10,6 @@
|
||||
#include "MCTargetDesc/X86MCTargetDesc.h"
|
||||
#include "MCTargetDesc/X86FixupKinds.h"
|
||||
#include "llvm/ADT/Twine.h"
|
||||
#include "llvm/MC/MCAsmInfo.h"
|
||||
#include "llvm/MC/MCAsmLayout.h"
|
||||
#include "llvm/MC/MCAssembler.h"
|
||||
#include "llvm/MC/MCContext.h"
|
||||
@ -48,21 +47,23 @@ class X86MachObjectWriter : public MCMachObjectTargetWriter {
|
||||
const MCFixup &Fixup,
|
||||
MCValue Target,
|
||||
uint64_t &FixedValue);
|
||||
void RecordX86_64Relocation(MachObjectWriter *Writer, MCAssembler &Asm,
|
||||
void RecordX86_64Relocation(MachObjectWriter *Writer,
|
||||
const MCAssembler &Asm,
|
||||
const MCAsmLayout &Layout,
|
||||
const MCFragment *Fragment, const MCFixup &Fixup,
|
||||
MCValue Target, uint64_t &FixedValue);
|
||||
|
||||
const MCFragment *Fragment,
|
||||
const MCFixup &Fixup,
|
||||
MCValue Target,
|
||||
uint64_t &FixedValue);
|
||||
public:
|
||||
X86MachObjectWriter(bool Is64Bit, uint32_t CPUType,
|
||||
uint32_t CPUSubtype)
|
||||
: MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
|
||||
/*UseAggressiveSymbolFolding=*/Is64Bit) {}
|
||||
|
||||
void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
|
||||
const MCAsmLayout &Layout, const MCFragment *Fragment,
|
||||
const MCFixup &Fixup, MCValue Target,
|
||||
uint64_t &FixedValue) override {
|
||||
void RecordRelocation(MachObjectWriter *Writer,
|
||||
const MCAssembler &Asm, const MCAsmLayout &Layout,
|
||||
const MCFragment *Fragment, const MCFixup &Fixup,
|
||||
MCValue Target, uint64_t &FixedValue) override {
|
||||
if (Writer->is64Bit())
|
||||
RecordX86_64Relocation(Writer, Asm, Layout, Fragment, Fixup, Target,
|
||||
FixedValue);
|
||||
@ -96,10 +97,13 @@ static unsigned getFixupKindLog2Size(unsigned Kind) {
|
||||
}
|
||||
}
|
||||
|
||||
void X86MachObjectWriter::RecordX86_64Relocation(
|
||||
MachObjectWriter *Writer, MCAssembler &Asm, const MCAsmLayout &Layout,
|
||||
const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target,
|
||||
uint64_t &FixedValue) {
|
||||
void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
|
||||
const MCAssembler &Asm,
|
||||
const MCAsmLayout &Layout,
|
||||
const MCFragment *Fragment,
|
||||
const MCFixup &Fixup,
|
||||
MCValue Target,
|
||||
uint64_t &FixedValue) {
|
||||
unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
|
||||
unsigned IsRIPRel = isFixupKindRIPRel(Fixup.getKind());
|
||||
unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
|
||||
@ -113,7 +117,6 @@ void X86MachObjectWriter::RecordX86_64Relocation(
|
||||
unsigned Index = 0;
|
||||
unsigned IsExtern = 0;
|
||||
unsigned Type = 0;
|
||||
const MCSymbolData *RelSymbol = nullptr;
|
||||
|
||||
Value = Target.getConstant();
|
||||
|
||||
@ -129,6 +132,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(
|
||||
if (Target.isAbsolute()) { // constant
|
||||
// SymbolNum of 0 indicates the absolute section.
|
||||
Type = MachO::X86_64_RELOC_UNSIGNED;
|
||||
Index = 0;
|
||||
|
||||
// FIXME: I believe this is broken, I don't think the linker can understand
|
||||
// it. I think it would require a local relocation, but I'm not sure if that
|
||||
@ -189,30 +193,36 @@ void X86MachObjectWriter::RecordX86_64Relocation(
|
||||
Value -= Writer->getSymbolAddress(&B_SD, Layout) -
|
||||
(!B_Base ? 0 : Writer->getSymbolAddress(B_Base, Layout));
|
||||
|
||||
if (!A_Base)
|
||||
if (A_Base) {
|
||||
Index = A_Base->getIndex();
|
||||
IsExtern = 1;
|
||||
} else {
|
||||
Index = A_SD.getFragment()->getParent()->getOrdinal() + 1;
|
||||
IsExtern = 0;
|
||||
}
|
||||
Type = MachO::X86_64_RELOC_UNSIGNED;
|
||||
|
||||
MachO::any_relocation_info MRE;
|
||||
MRE.r_word0 = FixupOffset;
|
||||
MRE.r_word1 =
|
||||
(Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
|
||||
Writer->addRelocation(A_Base, Fragment->getParent(), MRE);
|
||||
MRE.r_word1 = ((Index << 0) |
|
||||
(IsPCRel << 24) |
|
||||
(Log2Size << 25) |
|
||||
(IsExtern << 27) |
|
||||
(Type << 28));
|
||||
Writer->addRelocation(Fragment->getParent(), MRE);
|
||||
|
||||
if (B_Base)
|
||||
RelSymbol = B_Base;
|
||||
else
|
||||
if (B_Base) {
|
||||
Index = B_Base->getIndex();
|
||||
IsExtern = 1;
|
||||
} else {
|
||||
Index = B_SD.getFragment()->getParent()->getOrdinal() + 1;
|
||||
IsExtern = 0;
|
||||
}
|
||||
Type = MachO::X86_64_RELOC_SUBTRACTOR;
|
||||
} else {
|
||||
const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
|
||||
if (Symbol->isTemporary() && Value) {
|
||||
const MCSection &Sec = Symbol->getSection();
|
||||
if (!Asm.getContext().getAsmInfo()->isSectionAtomizableBySymbols(Sec))
|
||||
Asm.addLocalUsedInReloc(*Symbol);
|
||||
}
|
||||
const MCSymbolData &SD = Asm.getSymbolData(*Symbol);
|
||||
RelSymbol = Asm.getAtom(&SD);
|
||||
const MCSymbolData *Base = Asm.getAtom(&SD);
|
||||
|
||||
// Relocations inside debug sections always use local relocations when
|
||||
// possible. This seems to be done because the debugger doesn't fully
|
||||
@ -222,20 +232,23 @@ void X86MachObjectWriter::RecordX86_64Relocation(
|
||||
const MCSectionMachO &Section = static_cast<const MCSectionMachO&>(
|
||||
Fragment->getParent()->getSection());
|
||||
if (Section.hasAttribute(MachO::S_ATTR_DEBUG))
|
||||
RelSymbol = nullptr;
|
||||
Base = nullptr;
|
||||
}
|
||||
|
||||
// x86_64 almost always uses external relocations, except when there is no
|
||||
// symbol to use as a base address (a local symbol with no preceding
|
||||
// non-local symbol).
|
||||
if (RelSymbol) {
|
||||
if (Base) {
|
||||
Index = Base->getIndex();
|
||||
IsExtern = 1;
|
||||
|
||||
// Add the local offset, if needed.
|
||||
if (RelSymbol != &SD)
|
||||
Value +=
|
||||
Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(RelSymbol);
|
||||
if (Base != &SD)
|
||||
Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base);
|
||||
} else if (Symbol->isInSection() && !Symbol->isVariable()) {
|
||||
// The index is the section ordinal (1-based).
|
||||
Index = SD.getFragment()->getParent()->getOrdinal() + 1;
|
||||
IsExtern = 0;
|
||||
Value += Writer->getSymbolAddress(&SD, Layout);
|
||||
|
||||
if (IsPCRel)
|
||||
@ -334,9 +347,12 @@ void X86MachObjectWriter::RecordX86_64Relocation(
|
||||
// struct relocation_info (8 bytes)
|
||||
MachO::any_relocation_info MRE;
|
||||
MRE.r_word0 = FixupOffset;
|
||||
MRE.r_word1 = (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
|
||||
(IsExtern << 27) | (Type << 28);
|
||||
Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
|
||||
MRE.r_word1 = ((Index << 0) |
|
||||
(IsPCRel << 24) |
|
||||
(Log2Size << 25) |
|
||||
(IsExtern << 27) |
|
||||
(Type << 28));
|
||||
Writer->addRelocation(Fragment->getParent(), MRE);
|
||||
}
|
||||
|
||||
bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
|
||||
@ -408,7 +424,7 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
|
||||
(IsPCRel << 30) |
|
||||
MachO::R_SCATTERED);
|
||||
MRE.r_word1 = Value2;
|
||||
Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
|
||||
Writer->addRelocation(Fragment->getParent(), MRE);
|
||||
} else {
|
||||
// If the offset is more than 24-bits, it won't fit in a scattered
|
||||
// relocation offset field, so we fall back to using a non-scattered
|
||||
@ -430,7 +446,7 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
|
||||
(IsPCRel << 30) |
|
||||
MachO::R_SCATTERED);
|
||||
MRE.r_word1 = Value;
|
||||
Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
|
||||
Writer->addRelocation(Fragment->getParent(), MRE);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -451,6 +467,7 @@ void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer,
|
||||
|
||||
// Get the symbol data.
|
||||
const MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol());
|
||||
unsigned Index = SD_A->getIndex();
|
||||
|
||||
// We're only going to have a second symbol in pic mode and it'll be a
|
||||
// subtraction from the picbase. For 32-bit pic the addend is the difference
|
||||
@ -473,9 +490,12 @@ void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer,
|
||||
// struct relocation_info (8 bytes)
|
||||
MachO::any_relocation_info MRE;
|
||||
MRE.r_word0 = Value;
|
||||
MRE.r_word1 =
|
||||
(IsPCRel << 24) | (Log2Size << 25) | (MachO::GENERIC_RELOC_TLV << 28);
|
||||
Writer->addRelocation(SD_A, Fragment->getParent(), MRE);
|
||||
MRE.r_word1 = ((Index << 0) |
|
||||
(IsPCRel << 24) |
|
||||
(Log2Size << 25) |
|
||||
(1 << 27) | // r_extern
|
||||
(MachO::GENERIC_RELOC_TLV << 28)); // r_type
|
||||
Writer->addRelocation(Fragment->getParent(), MRE);
|
||||
}
|
||||
|
||||
void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
|
||||
@ -526,8 +546,8 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
|
||||
// See <reloc.h>.
|
||||
uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
|
||||
unsigned Index = 0;
|
||||
unsigned IsExtern = 0;
|
||||
unsigned Type = 0;
|
||||
const MCSymbolData *RelSymbol = nullptr;
|
||||
|
||||
if (Target.isAbsolute()) { // constant
|
||||
// SymbolNum of 0 indicates the absolute section.
|
||||
@ -548,7 +568,8 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
|
||||
|
||||
// Check whether we need an external or internal relocation.
|
||||
if (Writer->doesSymbolRequireExternRelocation(SD)) {
|
||||
RelSymbol = SD;
|
||||
IsExtern = 1;
|
||||
Index = SD->getIndex();
|
||||
// For external relocations, make sure to offset the fixup value to
|
||||
// compensate for the addend of the symbol address, if it was
|
||||
// undefined. This occurs with weak definitions, for example.
|
||||
@ -570,9 +591,12 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
|
||||
// struct relocation_info (8 bytes)
|
||||
MachO::any_relocation_info MRE;
|
||||
MRE.r_word0 = FixupOffset;
|
||||
MRE.r_word1 =
|
||||
(Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
|
||||
Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
|
||||
MRE.r_word1 = ((Index << 0) |
|
||||
(IsPCRel << 24) |
|
||||
(Log2Size << 25) |
|
||||
(IsExtern << 27) |
|
||||
(Type << 28));
|
||||
Writer->addRelocation(Fragment->getParent(), MRE);
|
||||
}
|
||||
|
||||
MCObjectWriter *llvm::createX86MachObjectWriter(raw_ostream &OS,
|
||||
|
@ -1376,6 +1376,10 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
|
||||
dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
|
||||
if (!Callee)
|
||||
return false;
|
||||
// The prototype of thunks are a lie, don't try to directly call such
|
||||
// functions.
|
||||
if (Callee->hasFnAttribute("thunk"))
|
||||
return false;
|
||||
Instruction *Caller = CS.getInstruction();
|
||||
const AttributeSet &CallerPAL = CS.getAttributes();
|
||||
|
||||
|
@ -2182,9 +2182,16 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS,
|
||||
|
||||
// Handle the floating point versions of equality comparisons too.
|
||||
if ((isKnownTrue && Cmp->getPredicate() == CmpInst::FCMP_OEQ) ||
|
||||
(isKnownFalse && Cmp->getPredicate() == CmpInst::FCMP_UNE))
|
||||
Worklist.push_back(std::make_pair(Op0, Op1));
|
||||
|
||||
(isKnownFalse && Cmp->getPredicate() == CmpInst::FCMP_UNE)) {
|
||||
// Floating point -0.0 and 0.0 compare equal, so we can't
|
||||
// propagate a constant based on that comparison.
|
||||
// FIXME: We should do this optimization if 'no signed zeros' is
|
||||
// applicable via an instruction-level fast-math-flag or some other
|
||||
// indicator that relaxed FP semantics are being used.
|
||||
if (!isa<ConstantFP>(Op1) || !cast<ConstantFP>(Op1)->isZero())
|
||||
Worklist.push_back(std::make_pair(Op0, Op1));
|
||||
}
|
||||
|
||||
// If "A >= B" is known true, replace "A < B" with false everywhere.
|
||||
CmpInst::Predicate NotPred = Cmp->getInversePredicate();
|
||||
Constant *NotVal = ConstantInt::get(Cmp->getType(), isKnownFalse);
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include "llvm/Analysis/InstructionSimplify.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/LoopPass.h"
|
||||
#include "llvm/Analysis/ScalarEvolution.h"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
#include "llvm/IR/Dominators.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
@ -47,7 +48,7 @@ namespace {
|
||||
AU.addRequiredID(LoopSimplifyID);
|
||||
AU.addPreservedID(LoopSimplifyID);
|
||||
AU.addPreservedID(LCSSAID);
|
||||
AU.addPreserved("scalar-evolution");
|
||||
AU.addPreserved<ScalarEvolution>();
|
||||
AU.addRequired<TargetLibraryInfo>();
|
||||
}
|
||||
};
|
||||
|
@ -46,7 +46,6 @@ namespace {
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
// This is a cluster of orthogonal Transforms
|
||||
AU.addPreserved<UnifyFunctionExitNodes>();
|
||||
AU.addPreserved("mem2reg");
|
||||
AU.addPreservedID(LowerInvokePassID);
|
||||
}
|
||||
|
||||
|
@ -278,9 +278,8 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,
|
||||
Value *IVOperand) {
|
||||
|
||||
// Currently we only handle instructions of the form "add <indvar> <value>"
|
||||
// and "sub <indvar> <value>".
|
||||
unsigned Op = BO->getOpcode();
|
||||
if (!(Op == Instruction::Add || Op == Instruction::Sub))
|
||||
if (Op != Instruction::Add)
|
||||
return false;
|
||||
|
||||
// If BO is already both nuw and nsw then there is nothing left to do
|
||||
@ -304,15 +303,6 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,
|
||||
if (OtherOpSCEV == SE->getCouldNotCompute())
|
||||
return false;
|
||||
|
||||
if (Op == Instruction::Sub) {
|
||||
// If the subtraction is of the form "sub <indvar>, <op>", then pretend it
|
||||
// is "add <indvar>, -<op>" and continue, else bail out.
|
||||
if (OtherOperandIdx != 1)
|
||||
return false;
|
||||
|
||||
OtherOpSCEV = SE->getNegativeSCEV(OtherOpSCEV);
|
||||
}
|
||||
|
||||
const SCEV *IVOpSCEV = SE->getSCEV(IVOperand);
|
||||
const SCEV *ZeroSCEV = SE->getConstant(IVOpSCEV->getType(), 0);
|
||||
|
||||
|
@ -1968,8 +1968,12 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
|
||||
// Try to further simplify the result.
|
||||
CallInst *SimplifiedCI = dyn_cast<CallInst>(SimplifiedFortifiedCI);
|
||||
if (SimplifiedCI && SimplifiedCI->getCalledFunction())
|
||||
if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, Builder))
|
||||
if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, Builder)) {
|
||||
// If we were able to further simplify, remove the now redundant call.
|
||||
SimplifiedCI->replaceAllUsesWith(V);
|
||||
SimplifiedCI->eraseFromParent();
|
||||
return V;
|
||||
}
|
||||
return SimplifiedFortifiedCI;
|
||||
}
|
||||
|
||||
@ -2218,11 +2222,11 @@ Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI, IRBuilder<> &
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Value *FortifiedLibCallSimplifier::optimizeStrCpyChk(CallInst *CI, IRBuilder<> &B) {
|
||||
Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
|
||||
IRBuilder<> &B,
|
||||
LibFunc::Func Func) {
|
||||
Function *Callee = CI->getCalledFunction();
|
||||
StringRef Name = Callee->getName();
|
||||
LibFunc::Func Func =
|
||||
Name.startswith("str") ? LibFunc::strcpy_chk : LibFunc::stpcpy_chk;
|
||||
|
||||
if (!checkStringCopyLibFuncSignature(Callee, Func, DL))
|
||||
return nullptr;
|
||||
@ -2231,7 +2235,7 @@ Value *FortifiedLibCallSimplifier::optimizeStrCpyChk(CallInst *CI, IRBuilder<> &
|
||||
*ObjSize = CI->getArgOperand(2);
|
||||
|
||||
// __stpcpy_chk(x,x,...) -> x+strlen(x)
|
||||
if (!OnlyLowerUnknownSize && Dst == Src) {
|
||||
if (Func == LibFunc::stpcpy_chk && !OnlyLowerUnknownSize && Dst == Src) {
|
||||
Value *StrLen = EmitStrLen(Src, B, DL, TLI);
|
||||
return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr;
|
||||
}
|
||||
@ -2266,11 +2270,11 @@ Value *FortifiedLibCallSimplifier::optimizeStrCpyChk(CallInst *CI, IRBuilder<> &
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Value *FortifiedLibCallSimplifier::optimizeStrNCpyChk(CallInst *CI, IRBuilder<> &B) {
|
||||
Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI,
|
||||
IRBuilder<> &B,
|
||||
LibFunc::Func Func) {
|
||||
Function *Callee = CI->getCalledFunction();
|
||||
StringRef Name = Callee->getName();
|
||||
LibFunc::Func Func =
|
||||
Name.startswith("str") ? LibFunc::strncpy_chk : LibFunc::stpncpy_chk;
|
||||
|
||||
if (!checkStringCopyLibFuncSignature(Callee, Func, DL))
|
||||
return nullptr;
|
||||
@ -2310,10 +2314,10 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {
|
||||
return optimizeMemSetChk(CI, Builder);
|
||||
case LibFunc::stpcpy_chk:
|
||||
case LibFunc::strcpy_chk:
|
||||
return optimizeStrCpyChk(CI, Builder);
|
||||
return optimizeStrpCpyChk(CI, Builder, Func);
|
||||
case LibFunc::stpncpy_chk:
|
||||
case LibFunc::strncpy_chk:
|
||||
return optimizeStrNCpyChk(CI, Builder);
|
||||
return optimizeStrpNCpyChk(CI, Builder, Func);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -79,6 +79,19 @@ static cl::list<std::string> RewriteMapFiles("rewrite-map-file",
|
||||
|
||||
namespace llvm {
|
||||
namespace SymbolRewriter {
|
||||
void rewriteComdat(Module &M, GlobalObject *GO, const std::string &Source,
|
||||
const std::string &Target) {
|
||||
if (Comdat *CD = GO->getComdat()) {
|
||||
auto &Comdats = M.getComdatSymbolTable();
|
||||
|
||||
Comdat *C = M.getOrInsertComdat(Target);
|
||||
C->setSelectionKind(CD->getSelectionKind());
|
||||
GO->setComdat(C);
|
||||
|
||||
Comdats.erase(Comdats.find(Source));
|
||||
}
|
||||
}
|
||||
|
||||
template <RewriteDescriptor::Type DT, typename ValueType,
|
||||
ValueType *(llvm::Module::*Get)(StringRef) const>
|
||||
class ExplicitRewriteDescriptor : public RewriteDescriptor {
|
||||
@ -102,10 +115,14 @@ template <RewriteDescriptor::Type DT, typename ValueType,
|
||||
bool ExplicitRewriteDescriptor<DT, ValueType, Get>::performOnModule(Module &M) {
|
||||
bool Changed = false;
|
||||
if (ValueType *S = (M.*Get)(Source)) {
|
||||
if (GlobalObject *GO = dyn_cast<GlobalObject>(S))
|
||||
rewriteComdat(M, GO, Source, Target);
|
||||
|
||||
if (Value *T = (M.*Get)(Target))
|
||||
S->setValueName(T->getValueName());
|
||||
else
|
||||
S->setName(Target);
|
||||
|
||||
Changed = true;
|
||||
}
|
||||
return Changed;
|
||||
@ -145,6 +162,12 @@ performOnModule(Module &M) {
|
||||
report_fatal_error("unable to transforn " + C.getName() + " in " +
|
||||
M.getModuleIdentifier() + ": " + Error);
|
||||
|
||||
if (C.getName() == Name)
|
||||
continue;
|
||||
|
||||
if (GlobalObject *GO = dyn_cast<GlobalObject>(&C))
|
||||
rewriteComdat(M, GO, C.getName(), Name);
|
||||
|
||||
if (Value *V = (M.*Get)(Name))
|
||||
C.setValueName(V->getValueName());
|
||||
else
|
||||
|
@ -35,7 +35,6 @@ void UnifyFunctionExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{
|
||||
// We preserve the non-critical-edgeness property
|
||||
AU.addPreservedID(BreakCriticalEdgesID);
|
||||
// This is a cluster of orthogonal Transforms
|
||||
AU.addPreserved("mem2reg");
|
||||
AU.addPreservedID(LowerSwitchID);
|
||||
}
|
||||
|
||||
|
41
test/Analysis/ScalarEvolution/nw-sub-is-not-nw-add.ll
Normal file
41
test/Analysis/ScalarEvolution/nw-sub-is-not-nw-add.ll
Normal file
@ -0,0 +1,41 @@
|
||||
; RUN: opt -S -indvars < %s | FileCheck %s
|
||||
|
||||
; Check that SCEV does not assume sub nuw X Y == add nuw X, -Y
|
||||
define void @f(i32* %loc) {
|
||||
; CHECK-LABEL: @f
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%idx = phi i32 [ 6, %entry ], [ %idx.dec, %loop ]
|
||||
store i32 %idx, i32* %loc
|
||||
%idx.dec = sub nuw i32 %idx, 1
|
||||
%cond = icmp uge i32 %idx.dec, 5
|
||||
br i1 %cond, label %loop, label %exit
|
||||
; CHECK-NOT: br i1 true, label %loop, label %exit
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @use_i1(i1)
|
||||
|
||||
; Check that SCEV does not assume sub nsw X Y == add nsw X, -Y
|
||||
define void @g(i32 %lim) {
|
||||
; CHECK-LABEL: @g
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%idx = phi i32 [ -1, %entry ], [ %idx.dec, %loop ]
|
||||
%t = icmp sgt i32 %idx, 0
|
||||
; CHECK-NOT: call void @use_i1(i1 false)
|
||||
; CHECK: call void @use_i1(i1 %t)
|
||||
call void @use_i1(i1 %t)
|
||||
%idx.dec = sub nsw i32 %idx, -2147483648
|
||||
%cond = icmp eq i32 %idx.dec, %lim
|
||||
br i1 %cond, label %loop, label %exit
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
2
test/Bindings/llvm-c/add_named_metadata_operand.ll
Normal file
2
test/Bindings/llvm-c/add_named_metadata_operand.ll
Normal file
@ -0,0 +1,2 @@
|
||||
; RUN: llvm-c-test --add-named-metadata-operand < /dev/null
|
||||
; This used to trigger an assertion
|
2
test/Bindings/llvm-c/set_metadata.ll
Normal file
2
test/Bindings/llvm-c/set_metadata.ll
Normal file
@ -0,0 +1,2 @@
|
||||
; RUN: llvm-c-test --set-metadata < /dev/null
|
||||
; This used to trigger an assertion
|
@ -1,4 +1,5 @@
|
||||
; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s --check-prefix=CHECK-DARWIN
|
||||
; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s --check-prefix=CHECK-RESERVE-X18
|
||||
; RUN: llc -mtriple=arm64-freebsd-gnu -aarch64-reserve-x18 -o - %s | FileCheck %s --check-prefix=CHECK-RESERVE-X18
|
||||
; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s
|
||||
|
||||
; x18 is reserved as a platform register on Darwin but not on other
|
||||
@ -16,11 +17,11 @@ define void @keep_live() {
|
||||
; CHECK: ldr x18
|
||||
; CHECK: str x18
|
||||
|
||||
; CHECK-DARWIN-NOT: ldr fp
|
||||
; CHECK-DARWIN-NOT: ldr x18
|
||||
; CHECK-DARWIN: Spill
|
||||
; CHECK-DARWIN-NOT: ldr fp
|
||||
; CHECK-DARWIN-NOT: ldr x18
|
||||
; CHECK-DARWIN: ret
|
||||
; CHECK-RESERVE-X18-NOT: ldr fp
|
||||
; CHECK-RESERVE-X18-NOT: ldr x18
|
||||
; CHECK-RESERVE-X18: Spill
|
||||
; CHECK-RESERVE-X18-NOT: ldr fp
|
||||
; CHECK-RESERVE-X18-NOT: ldr x18
|
||||
; CHECK-RESERVE-X18: ret
|
||||
ret void
|
||||
}
|
||||
|
89
test/CodeGen/AArch64/ghc-cc.ll
Normal file
89
test/CodeGen/AArch64/ghc-cc.ll
Normal file
@ -0,0 +1,89 @@
|
||||
; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
|
||||
|
||||
; Check the GHC call convention works (aarch64)
|
||||
|
||||
@base = external global i64 ; assigned to register: r19
|
||||
@sp = external global i64 ; assigned to register: r20
|
||||
@hp = external global i64 ; assigned to register: r21
|
||||
@r1 = external global i64 ; assigned to register: r22
|
||||
@r2 = external global i64 ; assigned to register: r23
|
||||
@r3 = external global i64 ; assigned to register: r24
|
||||
@r4 = external global i64 ; assigned to register: r25
|
||||
@r5 = external global i64 ; assigned to register: r26
|
||||
@r6 = external global i64 ; assigned to register: r27
|
||||
@splim = external global i64 ; assigned to register: r28
|
||||
|
||||
@f1 = external global float ; assigned to register: s8
|
||||
@f2 = external global float ; assigned to register: s9
|
||||
@f3 = external global float ; assigned to register: s10
|
||||
@f4 = external global float ; assigned to register: s11
|
||||
|
||||
@d1 = external global double ; assigned to register: d12
|
||||
@d2 = external global double ; assigned to register: d13
|
||||
@d3 = external global double ; assigned to register: d14
|
||||
@d4 = external global double ; assigned to register: d15
|
||||
|
||||
define ghccc i64 @addtwo(i64 %x, i64 %y) nounwind {
|
||||
entry:
|
||||
; CHECK-LABEL: addtwo
|
||||
; CHECK: add x0, x19, x20
|
||||
; CHECK-NEXT: ret
|
||||
%0 = add i64 %x, %y
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
define void @zap(i64 %a, i64 %b) nounwind {
|
||||
entry:
|
||||
; CHECK-LABEL: zap
|
||||
; CHECK-NOT: mov {{x[0-9]+}}, sp
|
||||
; CHECK: bl addtwo
|
||||
; CHECK-NEXT: bl foo
|
||||
%0 = call ghccc i64 @addtwo(i64 %a, i64 %b)
|
||||
call void @foo() nounwind
|
||||
ret void
|
||||
}
|
||||
|
||||
define ghccc void @foo_i64 () nounwind {
|
||||
entry:
|
||||
; CHECK-LABEL: foo_i64
|
||||
; CHECK: adrp {{x[0-9]+}}, base
|
||||
; CHECK-NEXT: ldr x19, [{{x[0-9]+}}, :lo12:base]
|
||||
; CHECK-NEXT: bl bar_i64
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%0 = load i64* @base
|
||||
tail call ghccc void @bar_i64( i64 %0 ) nounwind
|
||||
ret void
|
||||
}
|
||||
|
||||
define ghccc void @foo_float () nounwind {
|
||||
entry:
|
||||
; CHECK-LABEL: foo_float
|
||||
; CHECK: adrp {{x[0-9]+}}, f1
|
||||
; CHECK-NEXT: ldr s8, [{{x[0-9]+}}, :lo12:f1]
|
||||
; CHECK-NEXT: bl bar_float
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%0 = load float* @f1
|
||||
tail call ghccc void @bar_float( float %0 ) nounwind
|
||||
ret void
|
||||
}
|
||||
|
||||
define ghccc void @foo_double () nounwind {
|
||||
entry:
|
||||
; CHECK-LABEL: foo_double
|
||||
; CHECK: adrp {{x[0-9]+}}, d1
|
||||
; CHECK-NEXT: ldr d12, [{{x[0-9]+}}, :lo12:d1]
|
||||
; CHECK-NEXT: bl bar_double
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%0 = load double* @d1
|
||||
tail call ghccc void @bar_double( double %0 ) nounwind
|
||||
ret void
|
||||
}
|
||||
|
||||
declare ghccc void @foo ()
|
||||
|
||||
declare ghccc void @bar_i64 (i64)
|
||||
declare ghccc void @bar_float (float)
|
||||
declare ghccc void @bar_double (double)
|
55
test/CodeGen/ARM/2015-01-21-thumbv4t-ldstr-opt.ll
Normal file
55
test/CodeGen/ARM/2015-01-21-thumbv4t-ldstr-opt.ll
Normal file
@ -0,0 +1,55 @@
|
||||
; RUN: llc -mtriple=thumbv4t-none--eabi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-V4T
|
||||
; RUN: llc -mtriple=thumbv6m-none--eabi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-V6M
|
||||
|
||||
; CHECK-LABEL: foo
|
||||
define i32 @foo(i32 %z, ...) #0 {
|
||||
entry:
|
||||
%a = alloca i32, align 4
|
||||
%b = alloca i32, align 4
|
||||
%c = alloca i32, align 4
|
||||
%d = alloca i32, align 4
|
||||
%e = alloca i32, align 4
|
||||
%f = alloca i32, align 4
|
||||
%g = alloca i32, align 4
|
||||
%h = alloca i32, align 4
|
||||
|
||||
store i32 1, i32* %a, align 4
|
||||
store i32 2, i32* %b, align 4
|
||||
store i32 3, i32* %c, align 4
|
||||
store i32 4, i32* %d, align 4
|
||||
store i32 5, i32* %e, align 4
|
||||
store i32 6, i32* %f, align 4
|
||||
store i32 7, i32* %g, align 4
|
||||
store i32 8, i32* %h, align 4
|
||||
|
||||
%0 = load i32* %a, align 4
|
||||
%1 = load i32* %b, align 4
|
||||
%2 = load i32* %c, align 4
|
||||
%3 = load i32* %d, align 4
|
||||
%4 = load i32* %e, align 4
|
||||
%5 = load i32* %f, align 4
|
||||
%6 = load i32* %g, align 4
|
||||
%7 = load i32* %h, align 4
|
||||
|
||||
%add = add nsw i32 %0, %1
|
||||
%add4 = add nsw i32 %add, %2
|
||||
%add5 = add nsw i32 %add4, %3
|
||||
%add6 = add nsw i32 %add5, %4
|
||||
%add7 = add nsw i32 %add6, %5
|
||||
%add8 = add nsw i32 %add7, %6
|
||||
%add9 = add nsw i32 %add8, %7
|
||||
|
||||
%addz = add nsw i32 %add9, %z
|
||||
call void @llvm.va_start(i8* null)
|
||||
ret i32 %addz
|
||||
|
||||
; CHECK: sub sp, #40
|
||||
; CHECK-NEXT: add [[BASE:r[0-9]]], sp, #8
|
||||
|
||||
; CHECK-V4T: movs [[NEWBASE:r[0-9]]], [[BASE]]
|
||||
; CHECK-V6M: mov [[NEWBASE:r[0-9]]], [[BASE]]
|
||||
; CHECK-NEXT: adds [[NEWBASE]], #8
|
||||
; CHECK-NEXT: ldm [[NEWBASE]],
|
||||
}
|
||||
|
||||
declare void @llvm.va_start(i8*) nounwind
|
@ -781,3 +781,93 @@ define i32 @true_f64(double %a, double %b) nounwind {
|
||||
%2 = zext i1 %1 to i32
|
||||
ret i32 %2
|
||||
}
|
||||
|
||||
; The optimizers sometimes produce setlt instead of setolt/setult.
|
||||
define float @bug1_f32(float %angle, float %at) #0 {
|
||||
entry:
|
||||
; ALL-LABEL: bug1_f32:
|
||||
|
||||
; 32-C-DAG: add.s $[[T0:f[0-9]+]], $f14, $f12
|
||||
; 32-C-DAG: lwc1 $[[T1:f[0-9]+]], %lo($CPI32_0)(
|
||||
; 32-C-DAG: c.ole.s $[[T0]], $[[T1]]
|
||||
; 32-C-DAG: bc1t
|
||||
|
||||
; 32-CMP-DAG: add.s $[[T0:f[0-9]+]], $f14, $f12
|
||||
; 32-CMP-DAG: lwc1 $[[T1:f[0-9]+]], %lo($CPI32_0)(
|
||||
; 32-CMP-DAG: cmp.le.s $[[T2:f[0-9]+]], $[[T0]], $[[T1]]
|
||||
; 32-CMP-DAG: mfc1 $[[T3:[0-9]+]], $[[T2]]
|
||||
; FIXME: This instruction is redundant.
|
||||
; 32-CMP-DAG: andi $[[T4:[0-9]+]], $[[T3]], 1
|
||||
; 32-CMP-DAG: bnez $[[T4]],
|
||||
|
||||
; 64-C-DAG: add.s $[[T0:f[0-9]+]], $f13, $f12
|
||||
; 64-C-DAG: lwc1 $[[T1:f[0-9]+]], %got_ofst($CPI32_0)(
|
||||
; 64-C-DAG: c.ole.s $[[T0]], $[[T1]]
|
||||
; 64-C-DAG: bc1t
|
||||
|
||||
; 64-CMP-DAG: add.s $[[T0:f[0-9]+]], $f13, $f12
|
||||
; 64-CMP-DAG: lwc1 $[[T1:f[0-9]+]], %got_ofst($CPI32_0)(
|
||||
; 64-CMP-DAG: cmp.le.s $[[T2:f[0-9]+]], $[[T0]], $[[T1]]
|
||||
; 64-CMP-DAG: mfc1 $[[T3:[0-9]+]], $[[T2]]
|
||||
; FIXME: This instruction is redundant.
|
||||
; 64-CMP-DAG: andi $[[T4:[0-9]+]], $[[T3]], 1
|
||||
; 64-CMP-DAG: bnez $[[T4]],
|
||||
|
||||
%add = fadd fast float %at, %angle
|
||||
%cmp = fcmp ogt float %add, 1.000000e+00
|
||||
br i1 %cmp, label %if.then, label %if.end
|
||||
|
||||
if.then:
|
||||
%sub = fadd fast float %add, -1.000000e+00
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
%theta.0 = phi float [ %sub, %if.then ], [ %add, %entry ]
|
||||
ret float %theta.0
|
||||
}
|
||||
|
||||
; The optimizers sometimes produce setlt instead of setolt/setult.
|
||||
define double @bug1_f64(double %angle, double %at) #0 {
|
||||
entry:
|
||||
; ALL-LABEL: bug1_f64:
|
||||
|
||||
; 32-C-DAG: add.d $[[T0:f[0-9]+]], $f14, $f12
|
||||
; 32-C-DAG: ldc1 $[[T1:f[0-9]+]], %lo($CPI33_0)(
|
||||
; 32-C-DAG: c.ole.d $[[T0]], $[[T1]]
|
||||
; 32-C-DAG: bc1t
|
||||
|
||||
; 32-CMP-DAG: add.d $[[T0:f[0-9]+]], $f14, $f12
|
||||
; 32-CMP-DAG: ldc1 $[[T1:f[0-9]+]], %lo($CPI33_0)(
|
||||
; 32-CMP-DAG: cmp.le.d $[[T2:f[0-9]+]], $[[T0]], $[[T1]]
|
||||
; 32-CMP-DAG: mfc1 $[[T3:[0-9]+]], $[[T2]]
|
||||
; FIXME: This instruction is redundant.
|
||||
; 32-CMP-DAG: andi $[[T4:[0-9]+]], $[[T3]], 1
|
||||
; 32-CMP-DAG: bnez $[[T4]],
|
||||
|
||||
; 64-C-DAG: add.d $[[T0:f[0-9]+]], $f13, $f12
|
||||
; 64-C-DAG: ldc1 $[[T1:f[0-9]+]], %got_ofst($CPI33_0)(
|
||||
; 64-C-DAG: c.ole.d $[[T0]], $[[T1]]
|
||||
; 64-C-DAG: bc1t
|
||||
|
||||
; 64-CMP-DAG: add.d $[[T0:f[0-9]+]], $f13, $f12
|
||||
; 64-CMP-DAG: ldc1 $[[T1:f[0-9]+]], %got_ofst($CPI33_0)(
|
||||
; 64-CMP-DAG: cmp.le.d $[[T2:f[0-9]+]], $[[T0]], $[[T1]]
|
||||
; 64-CMP-DAG: mfc1 $[[T3:[0-9]+]], $[[T2]]
|
||||
; FIXME: This instruction is redundant.
|
||||
; 64-CMP-DAG: andi $[[T4:[0-9]+]], $[[T3]], 1
|
||||
; 64-CMP-DAG: bnez $[[T4]],
|
||||
|
||||
%add = fadd fast double %at, %angle
|
||||
%cmp = fcmp ogt double %add, 1.000000e+00
|
||||
br i1 %cmp, label %if.then, label %if.end
|
||||
|
||||
if.then:
|
||||
%sub = fadd fast double %add, -1.000000e+00
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
%theta.0 = phi double [ %sub, %if.then ], [ %add, %entry ]
|
||||
ret double %theta.0
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone "no-nans-fp-math"="true" }
|
||||
|
@ -1,4 +1,3 @@
|
||||
; XFAIL: *
|
||||
; RUN: llc -O0 -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: {{^}}test_loop:
|
||||
|
@ -24,8 +24,7 @@ define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
|
||||
; XXX - Why 0 in register?
|
||||
; FUNC-LABEL: {{^}}v_ctpop_i32:
|
||||
; SI: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; SI: v_mov_b32_e32 [[VZERO:v[0-9]+]], 0
|
||||
; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VZERO]]
|
||||
; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 0
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
|
||||
@ -40,8 +39,7 @@ define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noali
|
||||
; FUNC-LABEL: {{^}}v_ctpop_add_chain_i32:
|
||||
; SI: buffer_load_dword [[VAL0:v[0-9]+]],
|
||||
; SI: buffer_load_dword [[VAL1:v[0-9]+]],
|
||||
; SI: v_mov_b32_e32 [[VZERO:v[0-9]+]], 0
|
||||
; SI: v_bcnt_u32_b32_e32 [[MIDRESULT:v[0-9]+]], [[VAL1]], [[VZERO]]
|
||||
; SI: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], [[VAL1]], 0
|
||||
; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
@ -73,8 +71,8 @@ define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_v2i32:
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: s_endpgm
|
||||
|
||||
; EG: BCNT_INT
|
||||
@ -87,10 +85,10 @@ define void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrs
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_v4i32:
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: s_endpgm
|
||||
|
||||
; EG: BCNT_INT
|
||||
@ -105,14 +103,14 @@ define void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrs
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_v8i32:
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: s_endpgm
|
||||
|
||||
; EG: BCNT_INT
|
||||
@ -131,22 +129,22 @@ define void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrs
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_v16i32:
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: s_endpgm
|
||||
|
||||
; EG: BCNT_INT
|
||||
|
@ -21,8 +21,7 @@ define void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_i64:
|
||||
; SI: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
|
||||
; SI: v_mov_b32_e32 [[VZERO:v[0-9]+]], 0
|
||||
; SI: v_bcnt_u32_b32_e32 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], [[VZERO]]
|
||||
; SI: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0
|
||||
; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
|
@ -65,8 +65,8 @@ define void @simple_read2st64_f32_max_offset(float addrspace(1)* %out, float add
|
||||
|
||||
; SI-LABEL: @simple_read2st64_f32_over_max_offset
|
||||
; SI-NOT: ds_read2st64_b32
|
||||
; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256
|
||||
; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
|
||||
; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256
|
||||
; SI: ds_read_b32 {{v[0-9]+}}, [[BIGADD]]
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2st64_f32_over_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
|
||||
@ -197,8 +197,8 @@ define void @simple_read2st64_f64_max_offset(double addrspace(1)* %out, double a
|
||||
|
||||
; SI-LABEL: @simple_read2st64_f64_over_max_offset
|
||||
; SI-NOT: ds_read2st64_b64
|
||||
; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512
|
||||
; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
|
||||
; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512
|
||||
; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]]
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2st64_f64_over_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
|
||||
|
@ -1,16 +1,27 @@
|
||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=EG --check-prefix=FUNC
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s --check-prefix=SI --check-prefix=FUNC
|
||||
|
||||
declare float @llvm.fabs.f32(float) #0
|
||||
|
||||
; FUNC-LABEL: {{^}}fp_to_sint_i32:
|
||||
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
|
||||
; SI: v_cvt_i32_f32_e32
|
||||
; SI: s_endpgm
|
||||
define void @fp_to_sint_i32 (i32 addrspace(1)* %out, float %in) {
|
||||
define void @fp_to_sint_i32(i32 addrspace(1)* %out, float %in) {
|
||||
%conv = fptosi float %in to i32
|
||||
store i32 %conv, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fp_to_sint_i32_fabs:
|
||||
; SI: v_cvt_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|{{$}}
|
||||
define void @fp_to_sint_i32_fabs(i32 addrspace(1)* %out, float %in) {
|
||||
%in.fabs = call float @llvm.fabs.f32(float %in) #0
|
||||
%conv = fptosi float %in.fabs to i32
|
||||
store i32 %conv, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fp_to_sint_v2i32:
|
||||
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
|
||||
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
|
||||
@ -214,3 +225,5 @@ define void @fp_to_sint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) {
|
||||
store <4 x i64> %conv, <4 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
|
@ -1,6 +1,8 @@
|
||||
; RUN: llc < %s -mtriple=r600--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA %s
|
||||
|
||||
; HSA: {{^}}simple:
|
||||
; HSA: .section .hsa.version
|
||||
; HSA-NEXT: .ascii "HSA Code Unit:0.0:AMD:0.1:GFX8.1:0"
|
||||
; Make sure we are setting the ATC bit:
|
||||
; HSA: s_mov_b32 s[[HI:[0-9]]], 0x100f000
|
||||
; HSA: buffer_store_dword v{{[0-9]+}}, s[0:[[HI]]], 0
|
||||
|
18
test/CodeGen/R600/misaligned-load.ll
Normal file
18
test/CodeGen/R600/misaligned-load.ll
Normal file
@ -0,0 +1,18 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
; SI: @byte_aligned_load64
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: s_endpgm
|
||||
define void @byte_aligned_load64(i64 addrspace(1)* %out, i64 addrspace(3)* %in) {
|
||||
entry:
|
||||
%0 = load i64 addrspace(3)* %in, align 1
|
||||
store i64 %0, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
86
test/CodeGen/R600/scratch-buffer.ll
Normal file
86
test/CodeGen/R600/scratch-buffer.ll
Normal file
@ -0,0 +1,86 @@
|
||||
; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s
|
||||
|
||||
; When a frame index offset is more than 12-bits, make sure we don't store
|
||||
; it in mubuf's offset field.
|
||||
|
||||
; Also, make sure we use the same register for storing the scratch buffer addresss
|
||||
; for both stores. This register is allocated by the register scavenger, so we
|
||||
; should be able to reuse the same regiser for each scratch buffer access.
|
||||
|
||||
; CHECK-LABEL: {{^}}legal_offset_fi:
|
||||
; CHECK: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0{{$}}
|
||||
; CHECK: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen
|
||||
; CHECK: v_mov_b32_e32 [[OFFSET]], 0x8000
|
||||
; CHECK: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}}
|
||||
|
||||
define void @legal_offset_fi(i32 addrspace(1)* %out, i32 %cond, i32 %if_offset, i32 %else_offset) {
|
||||
entry:
|
||||
%scratch0 = alloca [8192 x i32]
|
||||
%scratch1 = alloca [8192 x i32]
|
||||
|
||||
%scratchptr0 = getelementptr [8192 x i32]* %scratch0, i32 0, i32 0
|
||||
store i32 1, i32* %scratchptr0
|
||||
|
||||
%scratchptr1 = getelementptr [8192 x i32]* %scratch1, i32 0, i32 0
|
||||
store i32 2, i32* %scratchptr1
|
||||
|
||||
%cmp = icmp eq i32 %cond, 0
|
||||
br i1 %cmp, label %if, label %else
|
||||
|
||||
if:
|
||||
%if_ptr = getelementptr [8192 x i32]* %scratch0, i32 0, i32 %if_offset
|
||||
%if_value = load i32* %if_ptr
|
||||
br label %done
|
||||
|
||||
else:
|
||||
%else_ptr = getelementptr [8192 x i32]* %scratch1, i32 0, i32 %else_offset
|
||||
%else_value = load i32* %else_ptr
|
||||
br label %done
|
||||
|
||||
done:
|
||||
%value = phi i32 [%if_value, %if], [%else_value, %else]
|
||||
store i32 %value, i32 addrspace(1)* %out
|
||||
ret void
|
||||
|
||||
ret void
|
||||
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}legal_offset_fi_offset
|
||||
; CHECK: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen
|
||||
; CHECK: v_add_i32_e32 [[OFFSET:v[0-9]+]], 0x8000
|
||||
; CHECK: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}}
|
||||
|
||||
define void @legal_offset_fi_offset(i32 addrspace(1)* %out, i32 %cond, i32 addrspace(1)* %offsets, i32 %if_offset, i32 %else_offset) {
|
||||
entry:
|
||||
%scratch0 = alloca [8192 x i32]
|
||||
%scratch1 = alloca [8192 x i32]
|
||||
|
||||
%offset0 = load i32 addrspace(1)* %offsets
|
||||
%scratchptr0 = getelementptr [8192 x i32]* %scratch0, i32 0, i32 %offset0
|
||||
store i32 %offset0, i32* %scratchptr0
|
||||
|
||||
%offsetptr1 = getelementptr i32 addrspace(1)* %offsets, i32 1
|
||||
%offset1 = load i32 addrspace(1)* %offsetptr1
|
||||
%scratchptr1 = getelementptr [8192 x i32]* %scratch1, i32 0, i32 %offset1
|
||||
store i32 %offset1, i32* %scratchptr1
|
||||
|
||||
%cmp = icmp eq i32 %cond, 0
|
||||
br i1 %cmp, label %if, label %else
|
||||
|
||||
if:
|
||||
%if_ptr = getelementptr [8192 x i32]* %scratch0, i32 0, i32 %if_offset
|
||||
%if_value = load i32* %if_ptr
|
||||
br label %done
|
||||
|
||||
else:
|
||||
%else_ptr = getelementptr [8192 x i32]* %scratch1, i32 0, i32 %else_offset
|
||||
%else_value = load i32* %else_ptr
|
||||
br label %done
|
||||
|
||||
done:
|
||||
%value = phi i32 [%if_value, %if], [%else_value, %else]
|
||||
store i32 %value, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
@ -51,8 +51,8 @@ define void @no_reorder_local_load_volatile_global_store_local_load(i32 addrspac
|
||||
|
||||
; FUNC-LABEL: @no_reorder_barrier_local_load_global_store_local_load
|
||||
; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:4
|
||||
; CI: buffer_store_dword
|
||||
; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:8
|
||||
; CI: buffer_store_dword
|
||||
define void @no_reorder_barrier_local_load_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
|
||||
%ptr0 = load i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s -march=sparc -no-integrated-as
|
||||
; RUN: llc < %s -march=sparc
|
||||
; PR 1557
|
||||
|
||||
target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128"
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -march=sparc -no-integrated-as <%s | FileCheck %s
|
||||
; RUN: llc -march=sparc <%s | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: test_constraint_r
|
||||
; CHECK: add %o1, %o0, %o0
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s -march=sparc -no-integrated-as
|
||||
; RUN: llc < %s -march=sparc
|
||||
; ModuleID = 'mult-alt-generic.c'
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32"
|
||||
target triple = "sparc"
|
||||
|
84
test/DebugInfo/Mips/fn-call-line.ll
Normal file
84
test/DebugInfo/Mips/fn-call-line.ll
Normal file
@ -0,0 +1,84 @@
|
||||
; RUN: llc -mtriple=mips-linux-gnu -filetype=asm -asm-verbose=0 -O0 < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=mips-linux-gnu -filetype=obj -O0 < %s | llvm-dwarfdump -debug-dump=line - | FileCheck %s --check-prefix=INT
|
||||
|
||||
; Mips used to generate 'jumpy' debug line info around calls. The address
|
||||
; calculation for each call to f1() would share the same line info so it would
|
||||
; emit output of the form:
|
||||
; .loc $first_call_location
|
||||
; .. address calculation ..
|
||||
; .. function call ..
|
||||
; .. address calculation ..
|
||||
; .loc $second_call_location
|
||||
; .. function call ..
|
||||
; .loc $first_call_location
|
||||
; .. address calculation ..
|
||||
; .loc $third_call_location
|
||||
; .. function call ..
|
||||
; ...
|
||||
; which would cause confusing stepping behaviour for the end user.
|
||||
;
|
||||
; This test checks that we emit more user friendly debug line info of the form:
|
||||
; .loc $first_call_location
|
||||
; .. address calculation ..
|
||||
; .. function call ..
|
||||
; .loc $second_call_location
|
||||
; .. address calculation ..
|
||||
; .. function call ..
|
||||
; .loc $third_call_location
|
||||
; .. address calculation ..
|
||||
; .. function call ..
|
||||
; ...
|
||||
;
|
||||
; Generated with clang from fn-call-line.c:
|
||||
; void f1();
|
||||
; void f2() {
|
||||
; f1();
|
||||
; f1();
|
||||
; }
|
||||
|
||||
; CHECK: .loc 1 3 3
|
||||
; CHECK-NOT: .loc
|
||||
; CHECK: %call16(f1)
|
||||
; CHECK-NOT: .loc
|
||||
; CHECK: .loc 1 4 3
|
||||
; CHECK-NOT: .loc
|
||||
; CHECK: %call16(f1)
|
||||
|
||||
; INT: {{^}}Address
|
||||
; INT: -----
|
||||
; INT-NEXT: 2 0 1 0 0 is_stmt{{$}}
|
||||
; INT-NEXT: 3 3 1 0 0 is_stmt prologue_end{{$}}
|
||||
; INT-NEXT: 4 3 1 0 0 is_stmt{{$}}
|
||||
|
||||
|
||||
; Function Attrs: nounwind uwtable
|
||||
define void @f2() #0 {
|
||||
entry:
|
||||
call void (...)* @f1(), !dbg !11
|
||||
call void (...)* @f1(), !dbg !12
|
||||
ret void, !dbg !13
|
||||
}
|
||||
|
||||
declare void @f1(...) #1
|
||||
|
||||
attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.module.flags = !{!8, !9}
|
||||
!llvm.ident = !{!10}
|
||||
|
||||
!0 = !{!"0x11\0012\00clang version 3.7.0 (trunk 226641)\000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/fn-call-line.c] [DW_LANG_C99]
|
||||
!1 = !{!"fn-call-line.c", !"/tmp/dbginfo"}
|
||||
!2 = !{}
|
||||
!3 = !{!4}
|
||||
!4 = !{!"0x2e\00f2\00f2\00\002\000\001\000\000\000\000\002", !1, !5, !6, null, void ()* @f2, null, null, !2} ; [ DW_TAG_subprogram ] [line 2] [def] [f2]
|
||||
!5 = !{!"0x29", !1} ; [ DW_TAG_file_type ] [/tmp/dbginfo/fn-call-line.c]
|
||||
!6 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
|
||||
!7 = !{null}
|
||||
!8 = !{i32 2, !"Dwarf Version", i32 4}
|
||||
!9 = !{i32 2, !"Debug Info Version", i32 2}
|
||||
!10 = !{!"clang version 3.7.0 (trunk 226641)"}
|
||||
!11 = !MDLocation(line: 3, column: 3, scope: !4)
|
||||
!12 = !MDLocation(line: 4, column: 3, scope: !4)
|
||||
!13 = !MDLocation(line: 5, column: 1, scope: !4)
|
@ -7,8 +7,9 @@
|
||||
; struct base {
|
||||
; virtual ~base();
|
||||
; };
|
||||
; typedef base base_type;
|
||||
; struct foo {
|
||||
; base b;
|
||||
; base_type b;
|
||||
; };
|
||||
; foo f;
|
||||
|
||||
@ -20,40 +21,47 @@
|
||||
|
||||
%struct.foo = type { %struct.base }
|
||||
%struct.base = type { i32 (...)** }
|
||||
|
||||
$_ZN3fooC2Ev = comdat any
|
||||
|
||||
$_ZN3fooD2Ev = comdat any
|
||||
|
||||
$_ZN4baseC2Ev = comdat any
|
||||
|
||||
@f = global %struct.foo zeroinitializer, align 8
|
||||
@__dso_handle = external global i8
|
||||
@_ZTV4base = external unnamed_addr constant [4 x i8*]
|
||||
@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }]
|
||||
@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_decl_derived_member.cpp, i8* null }]
|
||||
|
||||
define internal void @__cxx_global_var_init() section ".text.startup" {
|
||||
entry:
|
||||
call void @_ZN3fooC2Ev(%struct.foo* @f) #2, !dbg !35
|
||||
%0 = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.foo*)* @_ZN3fooD2Ev to void (i8*)*), i8* bitcast (%struct.foo* @f to i8*), i8* @__dso_handle) #2, !dbg !35
|
||||
ret void, !dbg !35
|
||||
call void @_ZN3fooC2Ev(%struct.foo* @f) #2, !dbg !33
|
||||
%0 = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.foo*)* @_ZN3fooD2Ev to void (i8*)*), i8* bitcast (%struct.foo* @f to i8*), i8* @__dso_handle) #2, !dbg !33
|
||||
ret void, !dbg !33
|
||||
}
|
||||
|
||||
; Function Attrs: inlinehint nounwind uwtable
|
||||
define linkonce_odr void @_ZN3fooC2Ev(%struct.foo* %this) unnamed_addr #0 align 2 {
|
||||
define linkonce_odr void @_ZN3fooC2Ev(%struct.foo* %this) unnamed_addr #0 comdat align 2 {
|
||||
entry:
|
||||
%this.addr = alloca %struct.foo*, align 8
|
||||
store %struct.foo* %this, %struct.foo** %this.addr, align 8
|
||||
call void @llvm.dbg.declare(metadata %struct.foo** %this.addr, metadata !36, metadata !{!"0x102"}), !dbg !38
|
||||
call void @llvm.dbg.declare(metadata %struct.foo** %this.addr, metadata !34, metadata !36), !dbg !37
|
||||
%this1 = load %struct.foo** %this.addr
|
||||
%b = getelementptr inbounds %struct.foo* %this1, i32 0, i32 0, !dbg !39
|
||||
call void @_ZN4baseC2Ev(%struct.base* %b) #2, !dbg !39
|
||||
ret void, !dbg !39
|
||||
%b = getelementptr inbounds %struct.foo* %this1, i32 0, i32 0, !dbg !38
|
||||
call void @_ZN4baseC2Ev(%struct.base* %b) #2, !dbg !38
|
||||
ret void, !dbg !38
|
||||
}
|
||||
|
||||
; Function Attrs: inlinehint uwtable
|
||||
define linkonce_odr void @_ZN3fooD2Ev(%struct.foo* %this) unnamed_addr #1 align 2 {
|
||||
define linkonce_odr void @_ZN3fooD2Ev(%struct.foo* %this) unnamed_addr #1 comdat align 2 {
|
||||
entry:
|
||||
%this.addr = alloca %struct.foo*, align 8
|
||||
store %struct.foo* %this, %struct.foo** %this.addr, align 8
|
||||
call void @llvm.dbg.declare(metadata %struct.foo** %this.addr, metadata !40, metadata !{!"0x102"}), !dbg !41
|
||||
call void @llvm.dbg.declare(metadata %struct.foo** %this.addr, metadata !39, metadata !36), !dbg !40
|
||||
%this1 = load %struct.foo** %this.addr
|
||||
%b = getelementptr inbounds %struct.foo* %this1, i32 0, i32 0, !dbg !42
|
||||
call void @_ZN4baseD1Ev(%struct.base* %b), !dbg !42
|
||||
ret void, !dbg !44
|
||||
%b = getelementptr inbounds %struct.foo* %this1, i32 0, i32 0, !dbg !41
|
||||
call void @_ZN4baseD1Ev(%struct.base* %b), !dbg !41
|
||||
ret void, !dbg !43
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
@ -62,24 +70,24 @@ declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*) #2
|
||||
; Function Attrs: nounwind readnone
|
||||
declare void @llvm.dbg.declare(metadata, metadata, metadata) #3
|
||||
|
||||
declare void @_ZN4baseD1Ev(%struct.base*) #4
|
||||
|
||||
; Function Attrs: inlinehint nounwind uwtable
|
||||
define linkonce_odr void @_ZN4baseC2Ev(%struct.base* %this) unnamed_addr #0 align 2 {
|
||||
define linkonce_odr void @_ZN4baseC2Ev(%struct.base* %this) unnamed_addr #0 comdat align 2 {
|
||||
entry:
|
||||
%this.addr = alloca %struct.base*, align 8
|
||||
store %struct.base* %this, %struct.base** %this.addr, align 8
|
||||
call void @llvm.dbg.declare(metadata %struct.base** %this.addr, metadata !45, metadata !{!"0x102"}), !dbg !47
|
||||
call void @llvm.dbg.declare(metadata %struct.base** %this.addr, metadata !44, metadata !36), !dbg !46
|
||||
%this1 = load %struct.base** %this.addr
|
||||
%0 = bitcast %struct.base* %this1 to i8***, !dbg !48
|
||||
store i8** getelementptr inbounds ([4 x i8*]* @_ZTV4base, i64 0, i64 2), i8*** %0, !dbg !48
|
||||
ret void, !dbg !48
|
||||
%0 = bitcast %struct.base* %this1 to i32 (...)***, !dbg !47
|
||||
store i32 (...)** bitcast (i8** getelementptr inbounds ([4 x i8*]* @_ZTV4base, i64 0, i64 2) to i32 (...)**), i32 (...)*** %0, !dbg !47
|
||||
ret void, !dbg !47
|
||||
}
|
||||
|
||||
define internal void @_GLOBAL__I_a() section ".text.startup" {
|
||||
declare void @_ZN4baseD1Ev(%struct.base*) #4
|
||||
|
||||
define internal void @_GLOBAL__sub_I_decl_derived_member.cpp() section ".text.startup" {
|
||||
entry:
|
||||
call void @__cxx_global_var_init(), !dbg !49
|
||||
ret void, !dbg !49
|
||||
call void @__cxx_global_var_init(), !dbg !48
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { inlinehint nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
@ -89,56 +97,55 @@ attributes #3 = { nounwind readnone }
|
||||
attributes #4 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.module.flags = !{!32, !33}
|
||||
!llvm.ident = !{!34}
|
||||
!llvm.module.flags = !{!30, !31}
|
||||
!llvm.ident = !{!32}
|
||||
|
||||
!0 = !{!"0x11\004\00clang version 3.5.0 (trunk 203673) (llvm/trunk 203681)\000\00\000\00\001", !1, !2, !3, !8, !30, !2} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/foo.cc] [DW_LANG_C_plus_plus]
|
||||
!1 = !{!"foo.cc", !"/usr/local/google/home/echristo"}
|
||||
!0 = !{!"0x11\004\00clang version 3.7.0 (trunk 227104) (llvm/trunk 227103)\000\00\000\00\001", !1, !2, !3, !9, !28, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/decl-derived-member.cpp] [DW_LANG_C_plus_plus]
|
||||
!1 = !{!"decl-derived-member.cpp", !"/tmp/dbginfo"}
|
||||
!2 = !{}
|
||||
!3 = !{!4, !7}
|
||||
!3 = !{!4, !8}
|
||||
!4 = !{!"0x13\00foo\005\0064\0064\000\000\000", !1, null, null, !5, null, null, !"_ZTS3foo"} ; [ DW_TAG_structure_type ] [foo] [line 5, size 64, align 64, offset 0] [def] [from ]
|
||||
!5 = !{!6}
|
||||
!6 = !{!"0xd\00b\006\0064\0064\000\000", !1, !"_ZTS3foo", !"_ZTS4base"} ; [ DW_TAG_member ] [b] [line 6, size 64, align 64, offset 0] [from _ZTS4base]
|
||||
!7 = !{!"0x13\00base\001\000\000\000\004\000", !1, null, null, null, null, null, !"_ZTS4base"} ; [ DW_TAG_structure_type ] [base] [line 1, size 0, align 0, offset 0] [decl] [from ]
|
||||
!8 = !{!9, !13, !19, !22, !28}
|
||||
!9 = !{!"0x2e\00__cxx_global_var_init\00__cxx_global_var_init\00\009\001\001\000\006\00256\000\009", !1, !10, !11, null, void ()* @__cxx_global_var_init, null, null, !2} ; [ DW_TAG_subprogram ] [line 9] [local] [def] [__cxx_global_var_init]
|
||||
!10 = !{!"0x29", !1} ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/foo.cc]
|
||||
!11 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !12, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
|
||||
!12 = !{null}
|
||||
!13 = !{!"0x2e\00~foo\00~foo\00_ZN3fooD2Ev\005\000\001\000\006\00320\000\005", !1, !"_ZTS3foo", !14, null, void (%struct.foo*)* @_ZN3fooD2Ev, null, !17, !2} ; [ DW_TAG_subprogram ] [line 5] [def] [~foo]
|
||||
!14 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !15, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
|
||||
!15 = !{null, !16}
|
||||
!16 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !"_ZTS3foo"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS3foo]
|
||||
!17 = !{!"0x2e\00~foo\00~foo\00\000\000\000\000\006\00320\000\000", null, !"_ZTS3foo", !14, null, null, null, i32 0, !18} ; [ DW_TAG_subprogram ] [line 0] [~foo]
|
||||
!18 = !{i32 786468}
|
||||
!19 = !{!"0x2e\00foo\00foo\00_ZN3fooC2Ev\005\000\001\000\006\00320\000\005", !1, !"_ZTS3foo", !14, null, void (%struct.foo*)* @_ZN3fooC2Ev, null, !20, !2} ; [ DW_TAG_subprogram ] [line 5] [def] [foo]
|
||||
!20 = !{!"0x2e\00foo\00foo\00\000\000\000\000\006\00320\000\000", null, !"_ZTS3foo", !14, null, null, null, i32 0, !21} ; [ DW_TAG_subprogram ] [line 0] [foo]
|
||||
!21 = !{i32 786468}
|
||||
!22 = !{!"0x2e\00base\00base\00_ZN4baseC2Ev\001\000\001\000\006\00320\000\001", !1, !"_ZTS4base", !23, null, void (%struct.base*)* @_ZN4baseC2Ev, null, !26, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [base]
|
||||
!23 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !24, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
|
||||
!24 = !{null, !25}
|
||||
!25 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !"_ZTS4base"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS4base]
|
||||
!26 = !{!"0x2e\00base\00base\00\000\000\000\000\006\00320\000\000", null, !"_ZTS4base", !23, null, null, null, i32 0, !27} ; [ DW_TAG_subprogram ] [line 0] [base]
|
||||
!27 = !{i32 786468}
|
||||
!28 = !{!"0x2e\00\00\00_GLOBAL__I_a\001\001\001\000\006\0064\000\001", !1, !10, !29, null, void ()* @_GLOBAL__I_a, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [local] [def]
|
||||
!29 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
|
||||
!30 = !{!31}
|
||||
!31 = !{!"0x34\00f\00f\00\009\000\001", null, !10, !4, %struct.foo* @f, null} ; [ DW_TAG_variable ] [f] [line 9] [def]
|
||||
!32 = !{i32 2, !"Dwarf Version", i32 4}
|
||||
!33 = !{i32 1, !"Debug Info Version", i32 2}
|
||||
!34 = !{!"clang version 3.5.0 (trunk 203673) (llvm/trunk 203681)"}
|
||||
!35 = !MDLocation(line: 9, scope: !9)
|
||||
!36 = !{!"0x101\00this\0016777216\001088", !19, null, !37} ; [ DW_TAG_arg_variable ] [this] [line 0]
|
||||
!37 = !{!"0xf\00\000\0064\0064\000\000", null, null, !"_ZTS3foo"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS3foo]
|
||||
!38 = !MDLocation(line: 0, scope: !19)
|
||||
!39 = !MDLocation(line: 5, scope: !19)
|
||||
!40 = !{!"0x101\00this\0016777216\001088", !13, null, !37} ; [ DW_TAG_arg_variable ] [this] [line 0]
|
||||
!41 = !MDLocation(line: 0, scope: !13)
|
||||
!42 = !MDLocation(line: 5, scope: !43)
|
||||
!43 = !{!"0xb\005\000\000", !1, !13} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/foo.cc]
|
||||
!44 = !MDLocation(line: 5, scope: !13)
|
||||
!45 = !{!"0x101\00this\0016777216\001088", !22, null, !46} ; [ DW_TAG_arg_variable ] [this] [line 0]
|
||||
!46 = !{!"0xf\00\000\0064\0064\000\000", null, null, !"_ZTS4base"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS4base]
|
||||
!47 = !MDLocation(line: 0, scope: !22)
|
||||
!48 = !MDLocation(line: 1, scope: !22)
|
||||
!49 = !MDLocation(line: 1, scope: !28)
|
||||
!6 = !{!"0xd\00b\006\0064\0064\000\000", !1, !"_ZTS3foo", !7} ; [ DW_TAG_member ] [b] [line 6, size 64, align 64, offset 0] [from base_type]
|
||||
!7 = !{!"0x16\00base_type\004\000\000\000\000", !1, null, !"_ZTS4base"} ; [ DW_TAG_typedef ] [base_type] [line 4, size 0, align 0, offset 0] [from _ZTS4base]
|
||||
!8 = !{!"0x13\00base\001\000\000\000\004\000", !1, null, null, null, null, null, !"_ZTS4base"} ; [ DW_TAG_structure_type ] [base] [line 1, size 0, align 0, offset 0] [decl] [from ]
|
||||
!9 = !{!10, !14, !19, !24, !26}
|
||||
!10 = !{!"0x2e\00__cxx_global_var_init\00__cxx_global_var_init\00\008\001\001\000\000\00256\000\008", !1, !11, !12, null, void ()* @__cxx_global_var_init, null, null, !2} ; [ DW_TAG_subprogram ] [line 8] [local] [def] [__cxx_global_var_init]
|
||||
!11 = !{!"0x29", !1} ; [ DW_TAG_file_type ] [/tmp/dbginfo/decl-derived-member.cpp]
|
||||
!12 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !13, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
|
||||
!13 = !{null}
|
||||
!14 = !{!"0x2e\00foo\00foo\00_ZN3fooC2Ev\005\000\001\000\000\00320\000\005", !1, !"_ZTS3foo", !15, null, void (%struct.foo*)* @_ZN3fooC2Ev, null, !18, !2} ; [ DW_TAG_subprogram ] [line 5] [def] [foo]
|
||||
!15 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !16, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
|
||||
!16 = !{null, !17}
|
||||
!17 = !{!"0xf\00\000\0064\0064\000\001088\00", null, null, !"_ZTS3foo"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS3foo]
|
||||
!18 = !{!"0x2e\00foo\00foo\00\000\000\000\000\000\00320\000\000", null, !"_ZTS3foo", !15, null, null, null, null, null} ; [ DW_TAG_subprogram ] [line 0] [foo]
|
||||
!19 = !{!"0x2e\00base\00base\00_ZN4baseC2Ev\001\000\001\000\000\00320\000\001", !1, !"_ZTS4base", !20, null, void (%struct.base*)* @_ZN4baseC2Ev, null, !23, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [base]
|
||||
!20 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !21, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
|
||||
!21 = !{null, !22}
|
||||
!22 = !{!"0xf\00\000\0064\0064\000\001088\00", null, null, !"_ZTS4base"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS4base]
|
||||
!23 = !{!"0x2e\00base\00base\00\000\000\000\000\000\00320\000\000", null, !"_ZTS4base", !20, null, null, null, null, null} ; [ DW_TAG_subprogram ] [line 0] [base]
|
||||
!24 = !{!"0x2e\00~foo\00~foo\00_ZN3fooD2Ev\005\000\001\000\000\00320\000\005", !1, !"_ZTS3foo", !15, null, void (%struct.foo*)* @_ZN3fooD2Ev, null, !25, !2} ; [ DW_TAG_subprogram ] [line 5] [def] [~foo]
|
||||
!25 = !{!"0x2e\00~foo\00~foo\00\000\000\000\000\000\00320\000\000", null, !"_ZTS3foo", !15, null, null, null, null, null} ; [ DW_TAG_subprogram ] [line 0] [~foo]
|
||||
!26 = !{!"0x2e\00\00\00_GLOBAL__sub_I_decl_derived_member.cpp\000\001\001\000\000\0064\000\000", !1, !11, !27, null, void ()* @_GLOBAL__sub_I_decl_derived_member.cpp, null, null, !2} ; [ DW_TAG_subprogram ] [line 0] [local] [def]
|
||||
!27 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
|
||||
!28 = !{!29}
|
||||
!29 = !{!"0x34\00f\00f\00\008\000\001", null, !11, !"_ZTS3foo", %struct.foo* @f, null} ; [ DW_TAG_variable ] [f] [line 8] [def]
|
||||
!30 = !{i32 2, !"Dwarf Version", i32 4}
|
||||
!31 = !{i32 2, !"Debug Info Version", i32 2}
|
||||
!32 = !{!"clang version 3.7.0 (trunk 227104) (llvm/trunk 227103)"}
|
||||
!33 = !MDLocation(line: 8, column: 5, scope: !10)
|
||||
!34 = !{!"0x101\00this\0016777216\001088", !14, null, !35} ; [ DW_TAG_arg_variable ] [this] [line 0]
|
||||
!35 = !{!"0xf\00\000\0064\0064\000\000", null, null, !"_ZTS3foo"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS3foo]
|
||||
!36 = !{!"0x102"} ; [ DW_TAG_expression ]
|
||||
!37 = !MDLocation(line: 0, scope: !14)
|
||||
!38 = !MDLocation(line: 5, column: 8, scope: !14)
|
||||
!39 = !{!"0x101\00this\0016777216\001088", !24, null, !35} ; [ DW_TAG_arg_variable ] [this] [line 0]
|
||||
!40 = !MDLocation(line: 0, scope: !24)
|
||||
!41 = !MDLocation(line: 5, column: 8, scope: !42)
|
||||
!42 = !{!"0xb\005\008\002", !1, !24} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/decl-derived-member.cpp]
|
||||
!43 = !MDLocation(line: 5, column: 8, scope: !24)
|
||||
!44 = !{!"0x101\00this\0016777216\001088", !19, null, !45} ; [ DW_TAG_arg_variable ] [this] [line 0]
|
||||
!45 = !{!"0xf\00\000\0064\0064\000\000", null, null, !"_ZTS4base"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS4base]
|
||||
!46 = !MDLocation(line: 0, scope: !19)
|
||||
!47 = !MDLocation(line: 1, column: 8, scope: !19)
|
||||
!48 = !MDLocation(line: 0, scope: !26)
|
||||
|
63
test/MC/ARM/pr22395.s
Normal file
63
test/MC/ARM/pr22395.s
Normal file
@ -0,0 +1,63 @@
|
||||
@ RUN: llvm-mc -triple armv4t-eabi -filetype asm -o - %s 2>&1 | FileCheck %s
|
||||
|
||||
.text
|
||||
.thumb
|
||||
|
||||
.p2align 2
|
||||
|
||||
.fpu neon
|
||||
vldmia r0, {d16-d31}
|
||||
|
||||
@ CHECK: vldmia r0, {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
|
||||
@ CHECK-NOT: error: instruction requires: VFP2
|
||||
|
||||
.fpu vfpv3
|
||||
vadd.f32 s1, s2, s3
|
||||
@ CHECK: vadd.f32 s1, s2, s3
|
||||
@ CHECK-NOT: error: instruction requires: VPF2
|
||||
|
||||
.fpu vfpv3-d16
|
||||
vadd.f32 s1, s2, s3
|
||||
@ CHECK: vadd.f32 s1, s2, s3
|
||||
@ CHECK-NOT: error: instruction requires: VPF2
|
||||
|
||||
.fpu vfpv4
|
||||
vadd.f32 s1, s2, s3
|
||||
@ CHECK: vadd.f32 s1, s2, s3
|
||||
@ CHECK-NOT: error: instruction requires: VPF2
|
||||
|
||||
.fpu vfpv4-d16
|
||||
vadd.f32 s1, s2, s3
|
||||
@ CHECK: vadd.f32 s1, s2, s3
|
||||
@ CHECK-NOT: error: instruction requires: VPF2
|
||||
|
||||
.fpu fpv5-d16
|
||||
vadd.f32 s1, s2, s3
|
||||
@ CHECK: vadd.f32 s1, s2, s3
|
||||
@ CHECK-NOT: error: instruction requires: VPF2
|
||||
|
||||
.fpu fp-armv8
|
||||
vadd.f32 s1, s2, s3
|
||||
@ CHECK: vadd.f32 s1, s2, s3
|
||||
@ CHECK-NOT: error: instruction requires: VPF2
|
||||
|
||||
.fpu fp-armv8
|
||||
vadd.f32 s1, s2, s3
|
||||
@ CHECK: vadd.f32 s1, s2, s3
|
||||
@ CHECK-NOT: error: instruction requires: VPF2
|
||||
|
||||
.fpu neon
|
||||
vadd.f32 s1, s2, s3
|
||||
@ CHECK: vadd.f32 s1, s2, s3
|
||||
@ CHECK-NOT: error: instruction requires: VPF2
|
||||
|
||||
.fpu neon-vfpv4
|
||||
vadd.f32 s1, s2, s3
|
||||
@ CHECK: vadd.f32 s1, s2, s3
|
||||
@ CHECK-NOT: error: instruction requires: VPF2
|
||||
|
||||
.fpu crypto-neon-fp-armv8
|
||||
vadd.f32 s1, s2, s3
|
||||
@ CHECK: vadd.f32 s1, s2, s3
|
||||
@ CHECK-NOT: error: instruction requires: VPF2
|
||||
|
@ -63,6 +63,8 @@
|
||||
// Relocation 28 (und_symbol-bar2) is of type R_386_PC8
|
||||
// CHECK-NEXT: 0xA0 R_386_PC8 und_symbol 0x0
|
||||
// CHECK-NEXT: 0xA3 R_386_GOTOFF und_symbol 0x0
|
||||
// Relocation 29 (zed@PLT) is of type R_386_PLT32 and uses the symbol
|
||||
// CHECK-NEXT: 0xA9 R_386_PLT32 zed 0x0
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: ]
|
||||
|
||||
@ -129,6 +131,7 @@ bar2:
|
||||
.byte und_symbol-bar2
|
||||
|
||||
leal 1 + und_symbol@GOTOFF, %edi
|
||||
movl zed@PLT(%eax), %eax
|
||||
|
||||
.section zedsec,"awT",@progbits
|
||||
zed:
|
||||
|
@ -1,59 +0,0 @@
|
||||
// RUN: llvm-mc -triple aarch64-apple-darwin14 %s -filetype=obj -o - | llvm-readobj -r --expand-relocs | FileCheck %s
|
||||
|
||||
// Test that we "S + K" produce a relocation with a symbol, but just S produces
|
||||
// a relocation with the section.
|
||||
|
||||
.section __TEXT,__literal4,4byte_literals
|
||||
L0:
|
||||
.long 42
|
||||
|
||||
.section __TEXT,__cstring,cstring_literals
|
||||
L1:
|
||||
.asciz "42"
|
||||
|
||||
.section __DATA,__data
|
||||
.quad L0
|
||||
.quad L0 + 1
|
||||
.quad L1
|
||||
.quad L1 + 1
|
||||
|
||||
// CHECK: Relocations [
|
||||
// CHECK-NEXT: Section __data {
|
||||
// CHECK-NEXT: Relocation {
|
||||
// CHECK-NEXT: Offset: 0x18
|
||||
// CHECK-NEXT: PCRel: 0
|
||||
// CHECK-NEXT: Length: 3
|
||||
// CHECK-NEXT: Extern: 1
|
||||
// CHECK-NEXT: Type: ARM64_RELOC_UNSIGNED (0)
|
||||
// CHECK-NEXT: Symbol: L1
|
||||
// CHECK-NEXT: Scattered: 0
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: Relocation {
|
||||
// CHECK-NEXT: Offset: 0x10
|
||||
// CHECK-NEXT: PCRel: 0
|
||||
// CHECK-NEXT: Length: 3
|
||||
// CHECK-NEXT: Extern: 1
|
||||
// CHECK-NEXT: Type: ARM64_RELOC_UNSIGNED (0)
|
||||
// CHECK-NEXT: Symbol: L1
|
||||
// CHECK-NEXT: Scattered: 0
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: Relocation {
|
||||
// CHECK-NEXT: Offset: 0x8
|
||||
// CHECK-NEXT: PCRel: 0
|
||||
// CHECK-NEXT: Length: 3
|
||||
// CHECK-NEXT: Extern: 1
|
||||
// CHECK-NEXT: Type: ARM64_RELOC_UNSIGNED (0)
|
||||
// CHECK-NEXT: Symbol: L0
|
||||
// CHECK-NEXT: Scattered: 0
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: Relocation {
|
||||
// CHECK-NEXT: Offset: 0x0
|
||||
// CHECK-NEXT: PCRel: 0
|
||||
// CHECK-NEXT: Length: 3
|
||||
// CHECK-NEXT: Extern: 0
|
||||
// CHECK-NEXT: Type: ARM64_RELOC_UNSIGNED (0)
|
||||
// CHECK-NEXT: Symbol: 0x2
|
||||
// CHECK-NEXT: Scattered: 0
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: ]
|
@ -1,59 +0,0 @@
|
||||
// RUN: llvm-mc -triple x86_64-apple-darwin14 %s -filetype=obj -o - | llvm-readobj -r --expand-relocs | FileCheck %s
|
||||
|
||||
// Test that we "S + K" produce a relocation with a symbol, but just S produces
|
||||
// a relocation with the section.
|
||||
|
||||
.section __TEXT,__literal4,4byte_literals
|
||||
L0:
|
||||
.long 42
|
||||
|
||||
.section __TEXT,__cstring,cstring_literals
|
||||
L1:
|
||||
.asciz "42"
|
||||
|
||||
.section __DATA,__data
|
||||
.quad L0
|
||||
.quad L0 + 1
|
||||
.quad L1
|
||||
.quad L1 + 1
|
||||
|
||||
// CHECK: Relocations [
|
||||
// CHECK-NEXT: Section __data {
|
||||
// CHECK-NEXT: Relocation {
|
||||
// CHECK-NEXT: Offset: 0x18
|
||||
// CHECK-NEXT: PCRel: 0
|
||||
// CHECK-NEXT: Length: 3
|
||||
// CHECK-NEXT: Extern: 1
|
||||
// CHECK-NEXT: Type: X86_64_RELOC_UNSIGNED (0)
|
||||
// CHECK-NEXT: Symbol: L1
|
||||
// CHECK-NEXT: Scattered: 0
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: Relocation {
|
||||
// CHECK-NEXT: Offset: 0x10
|
||||
// CHECK-NEXT: PCRel: 0
|
||||
// CHECK-NEXT: Length: 3
|
||||
// CHECK-NEXT: Extern: 0
|
||||
// CHECK-NEXT: Type: X86_64_RELOC_UNSIGNED (0)
|
||||
// CHECK-NEXT: Symbol: 0x3
|
||||
// CHECK-NEXT: Scattered: 0
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: Relocation {
|
||||
// CHECK-NEXT: Offset: 0x8
|
||||
// CHECK-NEXT: PCRel: 0
|
||||
// CHECK-NEXT: Length: 3
|
||||
// CHECK-NEXT: Extern: 1
|
||||
// CHECK-NEXT: Type: X86_64_RELOC_UNSIGNED (0)
|
||||
// CHECK-NEXT: Symbol: L0
|
||||
// CHECK-NEXT: Scattered: 0
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: Relocation {
|
||||
// CHECK-NEXT: Offset: 0x0
|
||||
// CHECK-NEXT: PCRel: 0
|
||||
// CHECK-NEXT: Length: 3
|
||||
// CHECK-NEXT: Extern: 0
|
||||
// CHECK-NEXT: Type: X86_64_RELOC_UNSIGNED (0)
|
||||
// CHECK-NEXT: Symbol: 0x2
|
||||
// CHECK-NEXT: Scattered: 0
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: ]
|
@ -121,12 +121,6 @@ D38:
|
||||
//L39:
|
||||
//D39:
|
||||
|
||||
.section foo, bar
|
||||
.long L4 + 1
|
||||
.long L35 + 1
|
||||
.long L36 + 1
|
||||
.long L37 + 1
|
||||
.long L38 + 1
|
||||
|
||||
// CHECK: Symbols [
|
||||
// CHECK-NEXT: Symbol {
|
||||
|
@ -28,12 +28,40 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
$source_comdat_function = comdat any
|
||||
define dllexport void @source_comdat_function() comdat($source_comdat_function) {
|
||||
entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
$source_comdat_function_1 = comdat exactmatch
|
||||
define dllexport void @source_comdat_function_1() comdat($source_comdat_function_1) {
|
||||
entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
$source_comdat_variable = comdat largest
|
||||
@source_comdat_variable = global i32 32, comdat($source_comdat_variable)
|
||||
|
||||
$source_comdat_variable_1 = comdat noduplicates
|
||||
@source_comdat_variable_1 = global i32 64, comdat($source_comdat_variable_1)
|
||||
|
||||
; CHECK: $target_comdat_function = comdat any
|
||||
; CHECK: $target_comdat_function_1 = comdat exactmatch
|
||||
; CHECK: $target_comdat_variable = comdat largest
|
||||
; CHECK: $target_comdat_variable_1 = comdat noduplicates
|
||||
|
||||
; CHECK: @target_variable = external global i32
|
||||
; CHECK-NOT: @source_variable = external global i32
|
||||
; CHECK: @target_pattern_variable = external global i32
|
||||
; CHECK-NOT: @source_pattern_variable = external global i32
|
||||
; CHECK: @target_pattern_multiple_variable_matches = external global i32
|
||||
; CHECK-NOT: @source_pattern_multiple_variable_matches = external global i32
|
||||
; CHECK: @target_comdat_variable = global i32 32, comdat
|
||||
; CHECK-NOT: @source_comdat_variable = global i32 32, comdat
|
||||
; CHECK: @target_comdat_variable_1 = global i32 64, comdat
|
||||
; CHECK-NOT: @source_comdat_variable_1 = global i32 64, comdat
|
||||
|
||||
; CHECK: declare void @target_function()
|
||||
; CHECK-NOT: declare void @source_function()
|
||||
; CHECK: declare void @target_pattern_function()
|
||||
@ -57,3 +85,8 @@ entry:
|
||||
; CHECK: ret i32 %res
|
||||
; CHECK: }
|
||||
|
||||
; CHECK: define dllexport void @target_comdat_function() comdat
|
||||
; CHECK-NOT: define dllexport void @source_comdat_function() comdat
|
||||
; CHECK: define dllexport void @target_comdat_function_1() comdat
|
||||
; CHECK-NOT: define dllexport void @source_comdat_function_1() comdat
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user