From 2fe5752e3a7c345cdb59e869278d36af33c13fa4 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Fri, 25 Dec 2015 14:25:49 +0000 Subject: [PATCH] Import llvm 3.7.1 release (r255217). --- CMakeLists.txt | 2 +- CREDITS.TXT | 7 + autoconf/configure.ac | 4 +- bindings/go/llvm/ir.go | 2 +- bindings/ocaml/llvm/llvm_ocaml.c | 2 +- configure | 101 +- docs/ReleaseNotes.rst | 30 +- include/llvm-c/Core.h | 3 +- include/llvm/CodeGen/CommandFlags.h | 2 +- lib/CodeGen/AsmPrinter/WinException.cpp | 2 +- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 2 +- lib/CodeGen/SelectionDAG/LegalizeTypes.cpp | 2 + .../SelectionDAG/LegalizeVectorTypes.cpp | 20 +- lib/IR/AsmWriter.cpp | 14 +- lib/IR/Core.cpp | 9 +- lib/LTO/LTOCodeGenerator.cpp | 11 +- lib/MC/MCContext.cpp | 1 + lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 6 + lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp | 25 +- lib/Target/AMDGPU/AMDGPURegisterInfo.td | 3 +- .../AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp | 34 +- lib/Target/AMDGPU/SIISelLowering.cpp | 5 +- lib/Target/AMDGPU/SIInstrInfo.cpp | 13 + lib/Target/AMDGPU/SIInstrInfo.h | 2 +- lib/Target/AMDGPU/SIInstructions.td | 12 +- lib/Target/AMDGPU/SIPrepareScratchRegs.cpp | 7 +- lib/Target/AMDGPU/SIRegisterInfo.cpp | 31 +- lib/Target/AMDGPU/SIRegisterInfo.h | 3 + lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 5 + lib/Target/BPF/BPFISelDAGToDAG.cpp | 35 +- lib/Target/BPF/BPFISelLowering.cpp | 4 +- lib/Target/BPF/BPFInstrInfo.td | 12 +- lib/Target/BPF/BPFRegisterInfo.cpp | 25 +- lib/Target/Mips/MipsISelLowering.h | 8 + lib/Target/Mips/MipsSEISelDAGToDAG.cpp | 5 + lib/Target/PowerPC/PPCAsmPrinter.cpp | 10 +- lib/Target/PowerPC/PPCCTRLoops.cpp | 18 +- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 20 +- lib/Target/PowerPC/PPCISelLowering.cpp | 16 +- lib/Target/PowerPC/PPCInstrInfo.cpp | 5 + lib/Target/PowerPC/PPCInstrInfo.td | 120 +- lib/Target/PowerPC/PPCInstrQPX.td | 24 + lib/Target/PowerPC/PPCInstrVSX.td | 27 +- lib/Target/PowerPC/PPCVSXFMAMutate.cpp | 18 +- lib/Target/PowerPC/PPCVSXSwapRemoval.cpp | 3 + lib/Target/Sparc/AsmParser/SparcAsmParser.cpp | 2 +- lib/Target/X86/X86ISelLowering.cpp | 29 + lib/Transforms/IPO/PassManagerBuilder.cpp | 2 +- lib/Transforms/Scalar/GVN.cpp | 3 +- lib/Transforms/Utils/Local.cpp | 5 + test/CodeGen/AMDGPU/llvm.dbg.value.ll | 37 + .../AMDGPU/promote-alloca-bitcast-function.ll | 22 + .../promote-alloca-stored-pointer-value.ll | 52 + test/CodeGen/AMDGPU/trunc-store.ll | 48 + test/CodeGen/BPF/fi_ri.ll | 25 + test/CodeGen/BPF/sockex2.ll | 2 +- test/CodeGen/BPF/undef.ll | 68 + test/CodeGen/Mips/llvm-ir/addrspacecast.ll | 12 + test/CodeGen/Mips/llvm-ir/extractelement.ll | 19 + test/CodeGen/Mips/micromips-zero-mat-uses.ll | 8 + test/CodeGen/PowerPC/ctr-loop-tls-const.ll | 40 + test/CodeGen/PowerPC/ctrloop-intrin.ll | 349 ++++ .../PowerPC/no-rlwimi-trivial-commute.mir | 92 + .../PowerPC/p8altivec-shuffles-pred.ll | 28 + test/CodeGen/PowerPC/pr24546.ll | 116 ++ test/CodeGen/PowerPC/pr25157.ll | 58 + test/CodeGen/PowerPC/rlwimi-and-or-bits.ll | 27 + test/CodeGen/PowerPC/select-i1-vs-i1.ll | 1685 +++++++++++++++++ .../PowerPC/vsx-fma-mutate-trivial-copy.ll | 38 + test/CodeGen/PowerPC/vsx-fma-mutate-undef.ll | 33 + test/CodeGen/X86/pr24374.ll | 37 + test/CodeGen/X86/setcc-lowering.ll | 29 + test/DebugInfo/gvn.ll | 135 ++ test/LTO/X86/diagnostic-handler-noexit.ll | 13 + test/MC/AMDGPU/vop3.s | 110 +- test/MC/ARM/directive-arch-semantic-action.s | 12 + test/MC/Sparc/sparc-directive-xword.s | 5 +- tools/llvm-lto/llvm-lto.cpp | 5 +- tools/llvm-shlib/Makefile | 2 +- unittests/Transforms/Utils/Local.cpp | 37 + 80 files changed, 3659 insertions(+), 236 deletions(-) create mode 100644 test/CodeGen/AMDGPU/llvm.dbg.value.ll create mode 100644 test/CodeGen/AMDGPU/promote-alloca-bitcast-function.ll create mode 100644 test/CodeGen/AMDGPU/promote-alloca-stored-pointer-value.ll create mode 100644 test/CodeGen/AMDGPU/trunc-store.ll create mode 100644 test/CodeGen/BPF/fi_ri.ll create mode 100644 test/CodeGen/BPF/undef.ll create mode 100644 test/CodeGen/Mips/llvm-ir/addrspacecast.ll create mode 100644 test/CodeGen/Mips/llvm-ir/extractelement.ll create mode 100644 test/CodeGen/Mips/micromips-zero-mat-uses.ll create mode 100644 test/CodeGen/PowerPC/ctr-loop-tls-const.ll create mode 100644 test/CodeGen/PowerPC/ctrloop-intrin.ll create mode 100644 test/CodeGen/PowerPC/no-rlwimi-trivial-commute.mir create mode 100644 test/CodeGen/PowerPC/p8altivec-shuffles-pred.ll create mode 100644 test/CodeGen/PowerPC/pr24546.ll create mode 100644 test/CodeGen/PowerPC/pr25157.ll create mode 100644 test/CodeGen/PowerPC/rlwimi-and-or-bits.ll create mode 100644 test/CodeGen/PowerPC/select-i1-vs-i1.ll create mode 100644 test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll create mode 100644 test/CodeGen/PowerPC/vsx-fma-mutate-undef.ll create mode 100644 test/CodeGen/X86/pr24374.ll create mode 100644 test/CodeGen/X86/setcc-lowering.ll create mode 100644 test/DebugInfo/gvn.ll create mode 100644 test/LTO/X86/diagnostic-handler-noexit.ll create mode 100644 test/MC/ARM/directive-arch-semantic-action.s diff --git a/CMakeLists.txt b/CMakeLists.txt index ac3b978ee6f5..78fc78b11781 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -60,7 +60,7 @@ set(CMAKE_MODULE_PATH set(LLVM_VERSION_MAJOR 3) set(LLVM_VERSION_MINOR 7) -set(LLVM_VERSION_PATCH 0) +set(LLVM_VERSION_PATCH 1) set(LLVM_VERSION_SUFFIX "") if (NOT PACKAGE_VERSION) diff --git a/CREDITS.TXT b/CREDITS.TXT index 7cdd97c309a6..fd5119f01111 100644 --- a/CREDITS.TXT +++ b/CREDITS.TXT @@ -509,3 +509,10 @@ N: Michael Wong E: fraggamuffin@gmail.com D: Clang OpenMP implementation +N: Alexander Mussman +E: alexander.musman@intel.com +D: Clang OpenMP implementation + +N: Kevin O'Brien +E: caomhin@us.ibm.com +D: Clang OpenMP implementation \ No newline at end of file diff --git a/autoconf/configure.ac b/autoconf/configure.ac index 74ebea2f5a7a..af57712b57c5 100644 --- a/autoconf/configure.ac +++ b/autoconf/configure.ac @@ -32,11 +32,11 @@ dnl===-----------------------------------------------------------------------=== dnl Initialize autoconf and define the package name, version number and dnl address for reporting bugs. -AC_INIT([LLVM],[3.7.0],[http://llvm.org/bugs/]) +AC_INIT([LLVM],[3.7.1],[http://llvm.org/bugs/]) LLVM_VERSION_MAJOR=3 LLVM_VERSION_MINOR=7 -LLVM_VERSION_PATCH=0 +LLVM_VERSION_PATCH=1 LLVM_VERSION_SUFFIX= AC_DEFINE_UNQUOTED([LLVM_VERSION_MAJOR], $LLVM_VERSION_MAJOR, [Major version of the LLVM API]) diff --git a/bindings/go/llvm/ir.go b/bindings/go/llvm/ir.go index 80f7798ea064..76f5f06017c8 100644 --- a/bindings/go/llvm/ir.go +++ b/bindings/go/llvm/ir.go @@ -1728,7 +1728,7 @@ func (b Builder) CreatePtrDiff(lhs, rhs Value, name string) (v Value) { func (b Builder) CreateLandingPad(t Type, personality Value, nclauses int, name string) (l Value) { cname := C.CString(name) defer C.free(unsafe.Pointer(cname)) - l.C = C.LLVMBuildLandingPad(b.C, t.C, C.unsigned(nclauses), cname) + l.C = C.LLVMBuildLandingPad(b.C, t.C, nil, C.unsigned(nclauses), cname) return l } diff --git a/bindings/ocaml/llvm/llvm_ocaml.c b/bindings/ocaml/llvm/llvm_ocaml.c index 26835d015599..3889f9276ccd 100644 --- a/bindings/ocaml/llvm/llvm_ocaml.c +++ b/bindings/ocaml/llvm/llvm_ocaml.c @@ -1745,7 +1745,7 @@ CAMLprim LLVMValueRef llvm_build_invoke_bc(value Args[], int NumArgs) { CAMLprim LLVMValueRef llvm_build_landingpad(LLVMTypeRef Ty, LLVMValueRef PersFn, value NumClauses, value Name, value B) { - return LLVMBuildLandingPad(Builder_val(B), Ty, Int_val(NumClauses), + return LLVMBuildLandingPad(Builder_val(B), Ty, PersFn, Int_val(NumClauses), String_val(Name)); } diff --git a/configure b/configure index c562f830b3ae..c192415c24a8 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.60 for LLVM 3.7.0. +# Generated by GNU Autoconf 2.60 for LLVM 3.7.1. # # Report bugs to . # @@ -561,8 +561,8 @@ SHELL=${CONFIG_SHELL-/bin/sh} # Identity of this package. PACKAGE_NAME='LLVM' PACKAGE_TARNAME='llvm' -PACKAGE_VERSION='3.7.0' -PACKAGE_STRING='LLVM 3.7.0' +PACKAGE_VERSION='3.7.1' +PACKAGE_STRING='LLVM 3.7.1' PACKAGE_BUGREPORT='http://llvm.org/bugs/' ac_unique_file="lib/IR/Module.cpp" @@ -1333,7 +1333,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures LLVM 3.7.0 to adapt to many kinds of systems. +\`configure' configures LLVM 3.7.1 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1399,7 +1399,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of LLVM 3.7.0:";; + short | recursive ) echo "Configuration of LLVM 3.7.1:";; esac cat <<\_ACEOF @@ -1583,7 +1583,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -LLVM configure 3.7.0 +LLVM configure 3.7.1 generated by GNU Autoconf 2.60 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, @@ -1599,7 +1599,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by LLVM $as_me 3.7.0, which was +It was created by LLVM $as_me 3.7.1, which was generated by GNU Autoconf 2.60. Invocation command line was $ $0 $@ @@ -1955,7 +1955,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu LLVM_VERSION_MAJOR=3 LLVM_VERSION_MINOR=7 -LLVM_VERSION_PATCH=0 +LLVM_VERSION_PATCH=1 LLVM_VERSION_SUFFIX= @@ -8643,87 +8643,6 @@ fi if test "$llvm_cv_os_type" = "MingW" ; then -{ echo "$as_me:$LINENO: checking for main in -limagehlp" >&5 -echo $ECHO_N "checking for main in -limagehlp... $ECHO_C" >&6; } -if test "${ac_cv_lib_imagehlp_main+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - ac_check_lib_save_LIBS=$LIBS -LIBS="-limagehlp $LIBS" -cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ - - -int -main () -{ -return main (); - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && - { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' - { (case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_try") 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; } && - { ac_try='test -s conftest$ac_exeext' - { (case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_try") 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; }; then - ac_cv_lib_imagehlp_main=yes -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_cv_lib_imagehlp_main=no -fi - -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS -fi -{ echo "$as_me:$LINENO: result: $ac_cv_lib_imagehlp_main" >&5 -echo "${ECHO_T}$ac_cv_lib_imagehlp_main" >&6; } -if test $ac_cv_lib_imagehlp_main = yes; then - cat >>confdefs.h <<_ACEOF -#define HAVE_LIBIMAGEHLP 1 -_ACEOF - - LIBS="-limagehlp $LIBS" - -fi - - { echo "$as_me:$LINENO: checking for main in -lole32" >&5 echo $ECHO_N "checking for main in -lole32... $ECHO_C" >&6; } if test "${ac_cv_lib_ole32_main+set}" = set; then @@ -18610,7 +18529,7 @@ exec 6>&1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by LLVM $as_me 3.7.0, which was +This file was extended by LLVM $as_me 3.7.1, which was generated by GNU Autoconf 2.60. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -18663,7 +18582,7 @@ Report bugs to ." _ACEOF cat >>$CONFIG_STATUS <<_ACEOF ac_cs_version="\\ -LLVM config.status 3.7.0 +LLVM config.status 3.7.1 configured by $0, generated by GNU Autoconf 2.60, with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\" diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst index fd149c97e44c..b68f5ecd493e 100644 --- a/docs/ReleaseNotes.rst +++ b/docs/ReleaseNotes.rst @@ -25,7 +25,35 @@ LLVM web page, this document applies to the *next* release, not the current one. To see the release notes for a specific release, please see the `releases page `_. -Non-comprehensive list of changes in this release +Major changes in 3.7.1 +====================== + +* 3.7.0 was released with an inadvertent change to the signature of the C + API function: LLVMBuildLandingPad, which made the C API incompatible with + prior releases. This has been corrected in LLVM 3.7.1. + + As a result of this change, 3.7.0 is not ABI compatible with 3.7.1. + + +----------------------------------------------------------------------------+ + | History of the LLVMBuildLandingPad() function | + +===========================+================================================+ + | 3.6.2 and prior releases | LLVMBuildLandingPad(LLVMBuilderRef, | + | | LLVMTypeRef, | + | | LLVMValueRef, | + | | unsigned, const char*) | + +---------------------------+------------------------------------------------+ + | 3.7.0 | LLVMBuildLandingPad(LLVMBuilderRef, | + | | LLVMTypeRef, | + | | unsigned, const char*) | + +---------------------------+------------------------------------------------+ + | 3.7.1 and future releases | LLVMBuildLandingPad(LLVMBuilderRef, | + | | LLVMTypeRef, | + | | LLVMValueRef, | + | | unsigned, const char*) | + +---------------------------+------------------------------------------------+ + + +Non-comprehensive list of changes in 3.7.0 ================================================= .. NOTE diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h index 15290072abe8..9dbcbfea387f 100644 --- a/include/llvm-c/Core.h +++ b/include/llvm-c/Core.h @@ -2675,7 +2675,8 @@ LLVMValueRef LLVMBuildInvoke(LLVMBuilderRef, LLVMValueRef Fn, LLVMBasicBlockRef Then, LLVMBasicBlockRef Catch, const char *Name); LLVMValueRef LLVMBuildLandingPad(LLVMBuilderRef B, LLVMTypeRef Ty, - unsigned NumClauses, const char *Name); + LLVMValueRef PersFn, unsigned NumClauses, + const char *Name); LLVMValueRef LLVMBuildResume(LLVMBuilderRef B, LLVMValueRef Exn); LLVMValueRef LLVMBuildUnreachable(LLVMBuilderRef); diff --git a/include/llvm/CodeGen/CommandFlags.h b/include/llvm/CodeGen/CommandFlags.h index 4b2e0b06584c..bedb7d5549eb 100644 --- a/include/llvm/CodeGen/CommandFlags.h +++ b/include/llvm/CodeGen/CommandFlags.h @@ -21,7 +21,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCTargetOptionsCommandFlags.h" -#include "llvm//MC/SubtargetFeature.h" +#include "llvm/MC/SubtargetFeature.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Host.h" diff --git a/lib/CodeGen/AsmPrinter/WinException.cpp b/lib/CodeGen/AsmPrinter/WinException.cpp index 71c77815e281..a2b9316aa875 100644 --- a/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/lib/CodeGen/AsmPrinter/WinException.cpp @@ -169,7 +169,7 @@ void WinException::endFunction(const MachineFunction *MF) { Asm->OutStreamer->PopSection(); } - if (shouldEmitMoves) + if (shouldEmitMoves || shouldEmitPersonality) Asm->OutStreamer->EmitWinCFIEndProc(); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 21ab07234c81..fbc8f1e89f6e 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -439,7 +439,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, ISD::ANY_EXTEND, dl, VT, Result); ValResult = Result; - ChainResult = Chain; + ChainResult = newLoad.getValue(1); return; } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index a7392fabf1e7..54cfaf570619 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -1010,6 +1010,8 @@ SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT, // Calculate the element offset and add it to the pointer. unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size. + assert(EltSize * 8 == EltVT.getSizeInBits() && + "Converting bits to bytes lost precision"); Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index, DAG.getConstant(EltSize, dl, Index.getValueType())); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 4348ab79f7d1..51cd6619f783 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1528,9 +1528,25 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { if (CustomLowerNode(N, N->getValueType(0), true)) return SDValue(); - // Store the vector to the stack. - EVT EltVT = VecVT.getVectorElementType(); + // Make the vector elements byte-addressable if they aren't already. SDLoc dl(N); + EVT EltVT = VecVT.getVectorElementType(); + if (EltVT.getSizeInBits() < 8) { + SmallVector ElementOps; + for (unsigned i = 0; i < VecVT.getVectorNumElements(); ++i) { + ElementOps.push_back(DAG.getAnyExtOrTrunc( + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vec, + DAG.getConstant(i, dl, MVT::i8)), + dl, MVT::i8)); + } + + EltVT = MVT::i8; + VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, + VecVT.getVectorNumElements()); + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, ElementOps); + } + + // Store the vector to the stack. SDValue StackPtr = DAG.CreateStackTemporary(VecVT); SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo(), false, false, 0); diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index adc620db897c..b553f11018c7 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -794,6 +794,10 @@ void SlotTracker::processFunction() { ST_DEBUG("begin processFunction!\n"); fNext = 0; + // Process function metadata if it wasn't hit at the module-level. + if (!ShouldInitializeAllMetadata) + processFunctionMetadata(*TheFunction); + // Add all the function arguments with no names. for(Function::const_arg_iterator AI = TheFunction->arg_begin(), AE = TheFunction->arg_end(); AI != AE; ++AI) @@ -807,8 +811,6 @@ void SlotTracker::processFunction() { if (!BB.hasName()) CreateFunctionSlot(&BB); - processFunctionMetadata(*TheFunction); - for (auto &I : BB) { if (!I.getType()->isVoidTy() && !I.hasName()) CreateFunctionSlot(&I); @@ -836,11 +838,11 @@ void SlotTracker::processFunction() { void SlotTracker::processFunctionMetadata(const Function &F) { SmallVector, 4> MDs; - for (auto &BB : F) { - F.getAllMetadata(MDs); - for (auto &MD : MDs) - CreateMetadataSlot(MD.second); + F.getAllMetadata(MDs); + for (auto &MD : MDs) + CreateMetadataSlot(MD.second); + for (auto &BB : F) { for (auto &I : BB) processInstructionMetadata(I); } diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp index e0e729d534bd..0eb88a967575 100644 --- a/lib/IR/Core.cpp +++ b/lib/IR/Core.cpp @@ -2257,7 +2257,14 @@ LLVMValueRef LLVMBuildInvoke(LLVMBuilderRef B, LLVMValueRef Fn, } LLVMValueRef LLVMBuildLandingPad(LLVMBuilderRef B, LLVMTypeRef Ty, - unsigned NumClauses, const char *Name) { + LLVMValueRef PersFn, unsigned NumClauses, + const char *Name) { + // The personality used to live on the landingpad instruction, but now it + // lives on the parent function. For compatibility, take the provided + // personality and put it on the parent function. + if (PersFn) + unwrap(B)->GetInsertBlock()->getParent()->setPersonalityFn( + cast(unwrap(PersFn))); return wrap(unwrap(B)->CreateLandingPad(unwrap(Ty), NumClauses, Name)); } diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp index 149ec6a4f372..25ae4ac76e3c 100644 --- a/lib/LTO/LTOCodeGenerator.cpp +++ b/lib/LTO/LTOCodeGenerator.cpp @@ -63,14 +63,21 @@ const char* LTOCodeGenerator::getVersionString() { #endif } +static void handleLTODiagnostic(const DiagnosticInfo &DI) { + DiagnosticPrinterRawOStream DP(errs()); + DI.print(DP); + errs() << "\n"; +} + LTOCodeGenerator::LTOCodeGenerator() - : Context(getGlobalContext()), IRLinker(new Module("ld-temp.o", Context)) { + : Context(getGlobalContext()), IRLinker(new Module("ld-temp.o", Context), + handleLTODiagnostic) { initializeLTOPasses(); } LTOCodeGenerator::LTOCodeGenerator(std::unique_ptr Context) : OwnedContext(std::move(Context)), Context(*OwnedContext), - IRLinker(new Module("ld-temp.o", *OwnedContext)) { + IRLinker(new Module("ld-temp.o", *OwnedContext), handleLTODiagnostic) { initializeLTOPasses(); } diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp index c601c56f3952..a85796cfbad9 100644 --- a/lib/MC/MCContext.cpp +++ b/lib/MC/MCContext.cpp @@ -82,6 +82,7 @@ void MCContext::reset() { UsedNames.clear(); Symbols.clear(); + SectionSymbols.clear(); Allocator.Reset(); Instances.clear(); CompilationDir.clear(); diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 709d7531d38b..0a5309b16ee5 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -264,6 +264,12 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, for (const MachineBasicBlock &MBB : MF) { for (const MachineInstr &MI : MBB) { // TODO: CodeSize should account for multiple functions. + + // TODO: Should we count size of debug info? + if (MI.isDebugValue()) + continue; + + // FIXME: This is reporting 0 for many instructions. CodeSize += MI.getDesc().Size; unsigned numOperands = MI.getNumOperands(); diff --git a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index 4a65bfc57f14..57b7a73bf56c 100644 --- a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -134,13 +134,17 @@ static Value* GEPToVectorIndex(GetElementPtrInst *GEP) { // // TODO: Check isTriviallyVectorizable for calls and handle other // instructions. -static bool canVectorizeInst(Instruction *Inst) { +static bool canVectorizeInst(Instruction *Inst, User *User) { switch (Inst->getOpcode()) { case Instruction::Load: - case Instruction::Store: case Instruction::BitCast: case Instruction::AddrSpaceCast: return true; + case Instruction::Store: { + // Must be the stored pointer operand, not a stored value. + StoreInst *SI = cast(Inst); + return SI->getPointerOperand() == User; + } default: return false; } @@ -166,7 +170,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) { for (User *AllocaUser : Alloca->users()) { GetElementPtrInst *GEP = dyn_cast(AllocaUser); if (!GEP) { - if (!canVectorizeInst(cast(AllocaUser))) + if (!canVectorizeInst(cast(AllocaUser), Alloca)) return false; WorkList.push_back(AllocaUser); @@ -184,7 +188,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) { GEPVectorIdx[GEP] = Index; for (User *GEPUser : AllocaUser->users()) { - if (!canVectorizeInst(cast(GEPUser))) + if (!canVectorizeInst(cast(GEPUser), AllocaUser)) return false; WorkList.push_back(GEPUser); @@ -240,7 +244,12 @@ static bool collectUsesWithPtrTypes(Value *Val, std::vector &WorkList) { for (User *User : Val->users()) { if(std::find(WorkList.begin(), WorkList.end(), User) != WorkList.end()) continue; - if (isa(User)) { + if (CallInst *CI = dyn_cast(User)) { + // TODO: We might be able to handle some cases where the callee is a + // constantexpr bitcast of a function. + if (!CI->getCalledFunction()) + return false; + WorkList.push_back(User); continue; } @@ -250,6 +259,12 @@ static bool collectUsesWithPtrTypes(Value *Val, std::vector &WorkList) { if (UseInst && UseInst->getOpcode() == Instruction::PtrToInt) return false; + if (StoreInst *SI = dyn_cast_or_null(UseInst)) { + // Reject if the stored value is not the pointer operand. + if (SI->getPointerOperand() != Val) + return false; + } + if (!User->getType()->isPointerTy()) continue; diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.td b/lib/Target/AMDGPU/AMDGPURegisterInfo.td index 835a1464395c..ba0490abee8c 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterInfo.td +++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.td @@ -14,8 +14,7 @@ let Namespace = "AMDGPU" in { foreach Index = 0-15 in { - // Indices are used in a variety of ways here, so don't set a size/offset. - def sub#Index : SubRegIndex<-1, -1>; + def sub#Index : SubRegIndex<32, !shl(Index, 5)>; } def INDIRECT_BASE_ADDR : Register <"INDIRECT_BASE_ADDR">; diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp index 468563c44982..4434d9b119c6 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -71,12 +71,26 @@ void AMDGPUMCObjectWriter::writeObject(MCAssembler &Asm, } } +static unsigned getFixupKindNumBytes(unsigned Kind) { + switch (Kind) { + case FK_Data_1: + return 1; + case FK_Data_2: + return 2; + case FK_Data_4: + return 4; + case FK_Data_8: + return 8; + default: + llvm_unreachable("Unknown fixup kind!"); + } +} + void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value, bool IsPCRel) const { switch ((unsigned)Fixup.getKind()) { - default: llvm_unreachable("Unknown fixup kind"); case AMDGPU::fixup_si_sopp_br: { uint16_t *Dst = (uint16_t*)(Data + Fixup.getOffset()); *Dst = (Value - 4) / 4; @@ -96,6 +110,24 @@ void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, *Dst = Value + 4; break; } + default: { + // FIXME: Copied from AArch64 + unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); + if (!Value) + return; // Doesn't change encoding. + MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind()); + + // Shift the value into position. + Value <<= Info.TargetOffset; + + unsigned Offset = Fixup.getOffset(); + assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); + + // For each byte of the fragment that the fixup touches, mask in the + // bits from the fixup value. + for (unsigned i = 0; i != NumBytes; ++i) + Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff); + } } } diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 099b0b15942b..c2db9ff537e9 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -157,6 +157,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM, setTruncStoreAction(MVT::i64, MVT::i32, Expand); setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand); + setTruncStoreAction(MVT::v16i32, MVT::v16i8, Expand); setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand); setOperationAction(ISD::LOAD, MVT::i1, Custom); @@ -2252,10 +2253,8 @@ MachineSDNode *SITargetLowering::buildScratchRSRC(SelectionDAG &DAG, SDValue Ptr) const { const SIInstrInfo *TII = static_cast(Subtarget->getInstrInfo()); - uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | AMDGPU::RSRC_TID_ENABLE | - 0xffffffff; // Size - return buildRSRC(DAG, DL, Ptr, 0, Rsrc); + return buildRSRC(DAG, DL, Ptr, 0, TII->getScratchRsrcWords23()); } SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG, diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index 18910615bebe..cfd2c42d1aef 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2778,3 +2778,16 @@ uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const { return RsrcDataFormat; } + +uint64_t SIInstrInfo::getScratchRsrcWords23() const { + uint64_t Rsrc23 = getDefaultRsrcDataFormat() | + AMDGPU::RSRC_TID_ENABLE | + 0xffffffff; // Size; + + // If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17]. + // Clear them unless we want a huge stride. + if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) + Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT; + + return Rsrc23; +} diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h index 015ea12d4598..5053786a39f5 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.h +++ b/lib/Target/AMDGPU/SIInstrInfo.h @@ -353,7 +353,7 @@ class SIInstrInfo : public AMDGPUInstrInfo { } uint64_t getDefaultRsrcDataFormat() const; - + uint64_t getScratchRsrcWords23() const; }; namespace AMDGPU { diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index f78ffd72314c..e0eeea9034b3 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -1548,6 +1548,12 @@ defm V_WRITELANE_B32 : VOP2SI_3VI_m < // These instructions only exist on SI and CI let SubtargetPredicate = isSICI in { +let isCommutable = 1 in { +defm V_MAC_LEGACY_F32 : VOP2InstSI , "v_mac_legacy_f32", + VOP_F32_F32_F32 +>; +} // End isCommutable = 1 + defm V_MIN_LEGACY_F32 : VOP2InstSI , "v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmin_legacy >; @@ -1562,12 +1568,6 @@ defm V_LSHL_B32 : VOP2InstSI , "v_lshl_b32", VOP_I32_I32_I32>; } // End isCommutable = 1 } // End let SubtargetPredicate = SICI -let isCommutable = 1 in { -defm V_MAC_LEGACY_F32 : VOP2_VI3_Inst , "v_mac_legacy_f32", - VOP_F32_F32_F32 ->; -} // End isCommutable = 1 - defm V_BFM_B32 : VOP2_VI3_Inst , "v_bfm_b32", VOP_I32_I32_I32 >; diff --git a/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp b/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp index 0a7f684552f0..2cd600df2268 100644 --- a/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp +++ b/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp @@ -135,8 +135,7 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) { unsigned ScratchRsrcReg = RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0); - uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE | - 0xffffffff; // Size + uint64_t Rsrc23 = TII->getScratchRsrcWords23(); unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); @@ -152,11 +151,11 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) { .addReg(ScratchRsrcReg, RegState::ImplicitDefine); BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2) - .addImm(Rsrc & 0xffffffff) + .addImm(Rsrc23 & 0xffffffff) .addReg(ScratchRsrcReg, RegState::ImplicitDefine); BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3) - .addImm(Rsrc >> 32) + .addImm(Rsrc23 >> 32) .addReg(ScratchRsrcReg, RegState::ImplicitDefine); // Scratch Offset diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp index 54c4d549fac7..e9e8412e263d 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -26,23 +26,25 @@ using namespace llvm; SIRegisterInfo::SIRegisterInfo() : AMDGPURegisterInfo() {} +void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const { + MCRegAliasIterator R(Reg, this, true); + + for (; R.isValid(); ++R) + Reserved.set(*R); +} + BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); - Reserved.set(AMDGPU::EXEC); - - // EXEC_LO and EXEC_HI could be allocated and used as regular register, - // but this seems likely to result in bugs, so I'm marking them as reserved. - Reserved.set(AMDGPU::EXEC_LO); - Reserved.set(AMDGPU::EXEC_HI); - Reserved.set(AMDGPU::INDIRECT_BASE_ADDR); - Reserved.set(AMDGPU::FLAT_SCR); - Reserved.set(AMDGPU::FLAT_SCR_LO); - Reserved.set(AMDGPU::FLAT_SCR_HI); + + // EXEC_LO and EXEC_HI could be allocated and used as regular register, but + // this seems likely to result in bugs, so I'm marking them as reserved. + reserveRegisterTuples(Reserved, AMDGPU::EXEC); + reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR); // Reserve some VGPRs to use as temp registers in case we have to spill VGPRs - Reserved.set(AMDGPU::VGPR255); - Reserved.set(AMDGPU::VGPR254); + reserveRegisterTuples(Reserved, AMDGPU::VGPR254); + reserveRegisterTuples(Reserved, AMDGPU::VGPR255); // Tonga and Iceland can only allocate a fixed number of SGPRs due // to a hw bug. @@ -54,10 +56,7 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { for (unsigned i = Limit; i < NumSGPRs; ++i) { unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i); - MCRegAliasIterator R = MCRegAliasIterator(Reg, this, true); - - for (; R.isValid(); ++R) - Reserved.set(*R); + reserveRegisterTuples(Reserved, Reg); } } diff --git a/lib/Target/AMDGPU/SIRegisterInfo.h b/lib/Target/AMDGPU/SIRegisterInfo.h index bfdb67c5e12b..7da6de282c11 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/lib/Target/AMDGPU/SIRegisterInfo.h @@ -23,7 +23,10 @@ namespace llvm { struct SIRegisterInfo : public AMDGPURegisterInfo { +private: + void reserveRegisterTuples(BitVector &, unsigned Reg) const; +public: SIRegisterInfo(); BitVector getReservedRegs(const MachineFunction &MF) const override; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index f8f0eb2d4baa..cf6b8929f311 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" @@ -9104,6 +9105,10 @@ bool ARMAsmParser::parseDirectiveArch(SMLoc L) { return false; } + Triple T; + STI.setDefaultFeatures(T.getARMCPUForArch(Arch)); + setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); + getTargetStreamer().emitArch(ID); return false; } diff --git a/lib/Target/BPF/BPFISelDAGToDAG.cpp b/lib/Target/BPF/BPFISelDAGToDAG.cpp index d9e654c76428..9d5f1d406d0e 100644 --- a/lib/Target/BPF/BPFISelDAGToDAG.cpp +++ b/lib/Target/BPF/BPFISelDAGToDAG.cpp @@ -50,6 +50,7 @@ class BPFDAGToDAGISel : public SelectionDAGISel { // Complex Pattern for address selection. bool SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset); + bool SelectFIAddr(SDValue Addr, SDValue &Base, SDValue &Offset); }; } @@ -67,7 +68,7 @@ bool BPFDAGToDAGISel::SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { Addr.getOpcode() == ISD::TargetGlobalAddress) return false; - // Addresses of the form FI+const or FI|const + // Addresses of the form Addr+const or Addr|const if (CurDAG->isBaseWithConstantOffset(Addr)) { ConstantSDNode *CN = dyn_cast(Addr.getOperand(1)); if (isInt<32>(CN->getSExtValue())) { @@ -89,6 +90,31 @@ bool BPFDAGToDAGISel::SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { return true; } +// ComplexPattern used on BPF FI instruction +bool BPFDAGToDAGISel::SelectFIAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { + SDLoc DL(Addr); + + if (!CurDAG->isBaseWithConstantOffset(Addr)) + return false; + + // Addresses of the form Addr+const or Addr|const + ConstantSDNode *CN = dyn_cast(Addr.getOperand(1)); + if (isInt<32>(CN->getSExtValue())) { + + // If the first operand is a FI, get the TargetFI Node + if (FrameIndexSDNode *FIN = + dyn_cast(Addr.getOperand(0))) + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); + else + return false; + + Offset = CurDAG->getTargetConstant(CN->getSExtValue(), DL, MVT::i64); + return true; + } + + return false; +} + SDNode *BPFDAGToDAGISel::Select(SDNode *Node) { unsigned Opcode = Node->getOpcode(); @@ -104,13 +130,6 @@ SDNode *BPFDAGToDAGISel::Select(SDNode *Node) { // tablegen selection should be handled here. switch (Opcode) { default: break; - - case ISD::UNDEF: { - errs() << "BUG: "; Node->dump(CurDAG); errs() << '\n'; - report_fatal_error("shouldn't see UNDEF during Select"); - break; - } - case ISD::INTRINSIC_W_CHAIN: { unsigned IntNo = cast(Node->getOperand(1))->getZExtValue(); switch (IntNo) { diff --git a/lib/Target/BPF/BPFISelLowering.cpp b/lib/Target/BPF/BPFISelLowering.cpp index 58498a1aec7d..73418283d9bf 100644 --- a/lib/Target/BPF/BPFISelLowering.cpp +++ b/lib/Target/BPF/BPFISelLowering.cpp @@ -102,6 +102,7 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM, setOperationAction(ISD::BR_CC, MVT::i64, Custom); setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::BRIND, MVT::Other, Expand); setOperationAction(ISD::BRCOND, MVT::Other, Expand); setOperationAction(ISD::SETCC, MVT::i64, Expand); setOperationAction(ISD::SELECT, MVT::i64, Expand); @@ -128,9 +129,6 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SUBC, MVT::i64, Expand); setOperationAction(ISD::SUBE, MVT::i64, Expand); - // no UNDEF allowed - setOperationAction(ISD::UNDEF, MVT::i64, Expand); - setOperationAction(ISD::ROTR, MVT::i64, Expand); setOperationAction(ISD::ROTL, MVT::i64, Expand); setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand); diff --git a/lib/Target/BPF/BPFInstrInfo.td b/lib/Target/BPF/BPFInstrInfo.td index 26b2cfebdc83..6b73db87fa26 100644 --- a/lib/Target/BPF/BPFInstrInfo.td +++ b/lib/Target/BPF/BPFInstrInfo.td @@ -54,7 +54,8 @@ def i64immSExt32 : PatLeaf<(imm), [{return isInt<32>(N->getSExtValue()); }]>; // Addressing modes. -def ADDRri : ComplexPattern; +def ADDRri : ComplexPattern; +def FIri : ComplexPattern; // Address operands def MEMri : Operand { @@ -260,6 +261,15 @@ def MOV_rr : MOV_RR<"mov">; def MOV_ri : MOV_RI<"mov">; } +def FI_ri + : InstBPF<(outs GPR:$dst), (ins MEMri:$addr), + "lea\t$dst, $addr", + [(set i64:$dst, FIri:$addr)]> { + // This is a tentative instruction, and will be replaced + // with MOV_rr and ADD_ri in PEI phase +} + + def LD_pseudo : InstBPF<(outs GPR:$dst), (ins i64imm:$pseudo, u64imm:$imm), "ld_pseudo\t$dst, $pseudo, $imm", diff --git a/lib/Target/BPF/BPFRegisterInfo.cpp b/lib/Target/BPF/BPFRegisterInfo.cpp index 8f885c3ea61b..952615bd1c2b 100644 --- a/lib/Target/BPF/BPFRegisterInfo.cpp +++ b/lib/Target/BPF/BPFRegisterInfo.cpp @@ -58,14 +58,13 @@ void BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned FrameReg = getFrameRegister(MF); int FrameIndex = MI.getOperand(i).getIndex(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + MachineBasicBlock &MBB = *MI.getParent(); if (MI.getOpcode() == BPF::MOV_rr) { - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex); MI.getOperand(i).ChangeToRegister(FrameReg, false); - - MachineBasicBlock &MBB = *MI.getParent(); unsigned reg = MI.getOperand(i - 1).getReg(); BuildMI(MBB, ++II, DL, TII.get(BPF::ADD_ri), reg) .addReg(reg) @@ -79,8 +78,24 @@ void BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (!isInt<32>(Offset)) llvm_unreachable("bug in frame offset"); - MI.getOperand(i).ChangeToRegister(FrameReg, false); - MI.getOperand(i + 1).ChangeToImmediate(Offset); + if (MI.getOpcode() == BPF::FI_ri) { + // architecture does not really support FI_ri, replace it with + // MOV_rr , frame_reg + // ADD_ri , imm + unsigned reg = MI.getOperand(i - 1).getReg(); + + BuildMI(MBB, ++II, DL, TII.get(BPF::MOV_rr), reg) + .addReg(FrameReg); + BuildMI(MBB, II, DL, TII.get(BPF::ADD_ri), reg) + .addReg(reg) + .addImm(Offset); + + // Remove FI_ri instruction + MI.eraseFromParent(); + } else { + MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.getOperand(i + 1).ChangeToImmediate(Offset); + } } unsigned BPFRegisterInfo::getFrameRegister(const MachineFunction &MF) const { diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 6fe8f830d35d..b3d861d34da7 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -269,6 +269,14 @@ namespace llvm { unsigned getRegisterByName(const char* RegName, EVT VT, SelectionDAG &DAG) const override; + /// Returns true if a cast between SrcAS and DestAS is a noop. + bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { + // Mips doesn't have any special address spaces so we just reserve + // the first 256 for software use (e.g. OpenCL) and treat casts + // between them as noops. + return SrcAS < 256 && DestAS < 256; + } + protected: SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) const; diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index cb46d731da29..2ebfbd17d7d0 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -115,6 +115,11 @@ bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI, if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo()) continue; + // Also, we have to check that the register class of the operand + // contains the zero register. + if (!MRI->getRegClass(MO.getReg())->contains(ZeroReg)) + continue; + MO.setReg(ZeroReg); } diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 444446692c58..8e118ec27e67 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -947,11 +947,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case PPC::ADDISdtprelHA: - // Transform: %Xd = ADDISdtprelHA %X3, - // Into: %Xd = ADDIS8 %X3, sym@dtprel@ha + // Transform: %Xd = ADDISdtprelHA %Xs, + // Into: %Xd = ADDIS8 %Xs, sym@dtprel@ha case PPC::ADDISdtprelHA32: { - // Transform: %Rd = ADDISdtprelHA32 %R3, - // Into: %Rd = ADDIS %R3, sym@dtprel@ha + // Transform: %Rd = ADDISdtprelHA32 %Rs, + // Into: %Rd = ADDIS %Rs, sym@dtprel@ha const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); @@ -962,7 +962,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { *OutStreamer, MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDIS8 : PPC::ADDIS) .addReg(MI->getOperand(0).getReg()) - .addReg(Subtarget->isPPC64() ? PPC::X3 : PPC::R3) + .addReg(MI->getOperand(1).getReg()) .addExpr(SymDtprel)); return; } diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index baadf081a64c..fd150beeb5a9 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -197,10 +197,18 @@ static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) { // Determining the address of a TLS variable results in a function call in // certain TLS models. static bool memAddrUsesCTR(const PPCTargetMachine *TM, - const llvm::Value *MemAddr) { + const Value *MemAddr) { const auto *GV = dyn_cast(MemAddr); - if (!GV) + if (!GV) { + // Recurse to check for constants that refer to TLS global variables. + if (const auto *CV = dyn_cast(MemAddr)) + for (const auto &CO : CV->operands()) + if (memAddrUsesCTR(TM, CO)) + return true; + return false; + } + if (!GV->isThreadLocal()) return false; if (!TM) @@ -239,6 +247,11 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { if (F->getIntrinsicID() != Intrinsic::not_intrinsic) { switch (F->getIntrinsicID()) { default: continue; + // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr + // we're definitely using CTR. + case Intrinsic::ppc_is_decremented_ctr_nonzero: + case Intrinsic::ppc_mtctr: + return true; // VisualStudio defines setjmp as _setjmp #if defined(_MSC_VER) && defined(setjmp) && \ @@ -426,6 +439,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) { // Process nested loops first. for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) { MadeChange |= convertToCTRLoop(*I); + DEBUG(dbgs() << "Nested loop converted\n"); } // If a nested loop has been converted, then we can't convert this loop. diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index b6025bf66ef7..932226842bb7 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -2570,13 +2570,25 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { return nullptr; } // ISD::OR doesn't get all the bitfield insertion fun. - // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) is a bitfield insert + // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a + // bitfield insert. if (isInt32Immediate(N->getOperand(1), Imm) && N->getOperand(0).getOpcode() == ISD::OR && isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) { + // The idea here is to check whether this is equivalent to: + // (c1 & m) | (x & ~m) + // where m is a run-of-ones mask. The logic here is that, for each bit in + // c1 and c2: + // - if both are 1, then the output will be 1. + // - if both are 0, then the output will be 0. + // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will + // come from x. + // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will + // be 0. + // If that last condition is never the case, then we can form m from the + // bits that are the same between c1 and c2. unsigned MB, ME; - Imm = ~(Imm^Imm2); - if (isRunOfOnes(Imm, MB, ME)) { + if (isRunOfOnes(~(Imm^Imm2), MB, ME) && !(~Imm & Imm2)) { SDValue Ops[] = { N->getOperand(0).getOperand(0), N->getOperand(0).getOperand(1), getI32Imm(0, dl), getI32Imm(MB, dl), @@ -2787,6 +2799,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Base, Offset; if (LD->isUnindexed() && + (LD->getMemoryVT() == MVT::f64 || + LD->getMemoryVT() == MVT::i64) && SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) { SDValue Chain = LD->getChain(); SDValue Ops[] = { Base, Offset, Chain }; diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 1e28913d1fca..1b8f8fb2f45b 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -431,6 +431,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, AddPromotedToType (ISD::LOAD , VT, MVT::v4i32); setOperationAction(ISD::SELECT, VT, Promote); AddPromotedToType (ISD::SELECT, VT, MVT::v4i32); + setOperationAction(ISD::SELECT_CC, VT, Promote); + AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32); setOperationAction(ISD::STORE, VT, Promote); AddPromotedToType (ISD::STORE, VT, MVT::v4i32); @@ -7175,7 +7177,6 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, PPC::isSplatShuffleMask(SVOp, 4) || PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) || PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) || - PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) || PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 || PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) || PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) || @@ -7183,8 +7184,10 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) || PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) || PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) || - PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) || - PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)) { + (Subtarget.hasP8Altivec() && ( + PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) || + PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) || + PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) { return Op; } } @@ -7195,7 +7198,6 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, unsigned int ShuffleKind = isLittleEndian ? 2 : 0; if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) || PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) || - PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) || PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 || PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) || PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) || @@ -7203,8 +7205,10 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) || PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) || PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) || - PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) || - PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG)) + (Subtarget.hasP8Altivec() && ( + PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) || + PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) || + PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG)))) return Op; // Check to see if this is a shuffle of 4-byte values. If so, we can use our diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index bf6e40296405..d4e666cc1f3e 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -309,6 +309,11 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { unsigned MB = MI->getOperand(4).getImm(); unsigned ME = MI->getOperand(5).getImm(); + // We can't commute a trivial mask (there is no way to represent an all-zero + // mask). + if (MB == 0 && ME == 31) + return nullptr; + if (NewMI) { // Create a new instruction. unsigned Reg0 = ChangeReg0 ? Reg2 : MI->getOperand(0).getReg(); diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index b50124db1ea1..24fd9bd5c1f7 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -2835,24 +2835,84 @@ def : Pat<(i64 (anyext i1:$in)), (SELECT_I8 $in, (LI8 1), (LI8 0))>; // match setcc on i1 variables. +// CRANDC is: +// 1 1 : F +// 1 0 : T +// 0 1 : F +// 0 0 : F +// +// LT is: +// -1 -1 : F +// -1 0 : T +// 0 -1 : F +// 0 0 : F +// +// ULT is: +// 1 1 : F +// 1 0 : F +// 0 1 : T +// 0 0 : F def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETLT)), - (CRANDC $s2, $s1)>; + (CRANDC $s1, $s2)>; def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETULT)), (CRANDC $s2, $s1)>; +// CRORC is: +// 1 1 : T +// 1 0 : T +// 0 1 : F +// 0 0 : T +// +// LE is: +// -1 -1 : T +// -1 0 : T +// 0 -1 : F +// 0 0 : T +// +// ULE is: +// 1 1 : T +// 1 0 : F +// 0 1 : T +// 0 0 : T def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETLE)), - (CRORC $s2, $s1)>; + (CRORC $s1, $s2)>; def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETULE)), (CRORC $s2, $s1)>; + def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETEQ)), (CREQV $s1, $s2)>; + +// GE is: +// -1 -1 : T +// -1 0 : F +// 0 -1 : T +// 0 0 : T +// +// UGE is: +// 1 1 : T +// 1 0 : T +// 0 1 : F +// 0 0 : T def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETGE)), - (CRORC $s1, $s2)>; + (CRORC $s2, $s1)>; def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETUGE)), (CRORC $s1, $s2)>; + +// GT is: +// -1 -1 : F +// -1 0 : F +// 0 -1 : T +// 0 0 : F +// +// UGT is: +// 1 1 : F +// 1 0 : T +// 0 1 : F +// 0 0 : F def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETGT)), - (CRANDC $s1, $s2)>; + (CRANDC $s2, $s1)>; def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETUGT)), (CRANDC $s1, $s2)>; + def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETNE)), (CRXOR $s1, $s2)>; @@ -3203,18 +3263,30 @@ def : Pat<(i1 (select i1:$cond, i1:$tval, i1:$fval)), // select (lhs == rhs), tval, fval is: // ((lhs == rhs) & tval) | (!(lhs == rhs) & fval) def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETLT)), + (CROR (CRAND (CRANDC $lhs, $rhs), $tval), + (CRAND (CRORC $rhs, $lhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETULT)), (CROR (CRAND (CRANDC $rhs, $lhs), $tval), (CRAND (CRORC $lhs, $rhs), $fval))>; def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETLE)), + (CROR (CRAND (CRORC $lhs, $rhs), $tval), + (CRAND (CRANDC $rhs, $lhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETULE)), (CROR (CRAND (CRORC $rhs, $lhs), $tval), (CRAND (CRANDC $lhs, $rhs), $fval))>; def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETEQ)), (CROR (CRAND (CREQV $lhs, $rhs), $tval), (CRAND (CRXOR $lhs, $rhs), $fval))>; def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETGE)), + (CROR (CRAND (CRORC $rhs, $lhs), $tval), + (CRAND (CRANDC $lhs, $rhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETUGE)), (CROR (CRAND (CRORC $lhs, $rhs), $tval), (CRAND (CRANDC $rhs, $lhs), $fval))>; def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETGT)), + (CROR (CRAND (CRANDC $rhs, $lhs), $tval), + (CRAND (CRORC $lhs, $rhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETUGT)), (CROR (CRAND (CRANDC $lhs, $rhs), $tval), (CRAND (CRORC $rhs, $lhs), $fval))>; def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETNE)), @@ -3223,66 +3295,106 @@ def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETNE)), // match selectcc on i1 variables with non-i1 output. def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETLT)), + (SELECT_I4 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETULT)), (SELECT_I4 (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETLE)), + (SELECT_I4 (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETULE)), (SELECT_I4 (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETEQ)), (SELECT_I4 (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETGE)), + (SELECT_I4 (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETUGE)), (SELECT_I4 (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETGT)), + (SELECT_I4 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETUGT)), (SELECT_I4 (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETNE)), (SELECT_I4 (CRXOR $lhs, $rhs), $tval, $fval)>; def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETLT)), + (SELECT_I8 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETULT)), (SELECT_I8 (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETLE)), + (SELECT_I8 (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETULE)), (SELECT_I8 (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETEQ)), (SELECT_I8 (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETGE)), + (SELECT_I8 (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETUGE)), (SELECT_I8 (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETGT)), + (SELECT_I8 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETUGT)), (SELECT_I8 (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETNE)), (SELECT_I8 (CRXOR $lhs, $rhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)), + (SELECT_F4 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULT)), (SELECT_F4 (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)), + (SELECT_F4 (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULE)), (SELECT_F4 (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)), (SELECT_F4 (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)), + (SELECT_F4 (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGE)), (SELECT_F4 (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)), + (SELECT_F4 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGT)), (SELECT_F4 (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)), (SELECT_F4 (CRXOR $lhs, $rhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)), + (SELECT_F8 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULT)), (SELECT_F8 (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)), + (SELECT_F8 (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULE)), (SELECT_F8 (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)), (SELECT_F8 (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)), + (SELECT_F8 (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGE)), (SELECT_F8 (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)), + (SELECT_F8 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGT)), (SELECT_F8 (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)), (SELECT_F8 (CRXOR $lhs, $rhs), $tval, $fval)>; def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETLT)), + (SELECT_VRRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETULT)), (SELECT_VRRC (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETLE)), + (SELECT_VRRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETULE)), (SELECT_VRRC (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETEQ)), (SELECT_VRRC (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETGE)), + (SELECT_VRRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETUGE)), (SELECT_VRRC (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETGT)), + (SELECT_VRRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETUGT)), (SELECT_VRRC (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETNE)), (SELECT_VRRC (CRXOR $lhs, $rhs), $tval, $fval)>; diff --git a/lib/Target/PowerPC/PPCInstrQPX.td b/lib/Target/PowerPC/PPCInstrQPX.td index 5c66b42690c3..0a044c5c6ea4 100644 --- a/lib/Target/PowerPC/PPCInstrQPX.td +++ b/lib/Target/PowerPC/PPCInstrQPX.td @@ -1115,40 +1115,64 @@ def : Pat<(v4f64 (PPCqbflt v4i1:$src)), (COPY_TO_REGCLASS $src, QFRC)>; def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETLT)), + (SELECT_QFRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETULT)), (SELECT_QFRC (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETLE)), + (SELECT_QFRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETULE)), (SELECT_QFRC (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETEQ)), (SELECT_QFRC (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETGE)), + (SELECT_QFRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETUGE)), (SELECT_QFRC (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETGT)), + (SELECT_QFRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETUGT)), (SELECT_QFRC (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETNE)), (SELECT_QFRC (CRXOR $lhs, $rhs), $tval, $fval)>; def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETLT)), + (SELECT_QSRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETULT)), (SELECT_QSRC (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETLE)), + (SELECT_QSRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETULE)), (SELECT_QSRC (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETEQ)), (SELECT_QSRC (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETGE)), + (SELECT_QSRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETUGE)), (SELECT_QSRC (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETGT)), + (SELECT_QSRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETUGT)), (SELECT_QSRC (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETNE)), (SELECT_QSRC (CRXOR $lhs, $rhs), $tval, $fval)>; def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETLT)), + (SELECT_QBRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETULT)), (SELECT_QBRC (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETLE)), + (SELECT_QBRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETULE)), (SELECT_QBRC (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETEQ)), (SELECT_QBRC (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETGE)), + (SELECT_QBRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETUGE)), (SELECT_QBRC (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETGT)), + (SELECT_QBRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETUGT)), (SELECT_QBRC (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETNE)), (SELECT_QBRC (CRXOR $lhs, $rhs), $tval, $fval)>; diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index 20c95fe888e0..ce63c22992e8 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -958,27 +958,43 @@ def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>; // Selects. def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)), + (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULT)), (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLE)), + (SELECT_VSRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULE)), (SELECT_VSRC (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETEQ)), (SELECT_VSRC (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGE)), + (SELECT_VSRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGE)), (SELECT_VSRC (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGT)), + (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGT)), (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETNE)), (SELECT_VSRC (CRXOR $lhs, $rhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)), + (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULT)), (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)), + (SELECT_VSFRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULE)), (SELECT_VSFRC (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)), (SELECT_VSFRC (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)), + (SELECT_VSFRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGE)), (SELECT_VSFRC (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)), + (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGT)), (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)), (SELECT_VSFRC (CRXOR $lhs, $rhs), $tval, $fval)>; @@ -1060,18 +1076,27 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. (COPY_TO_REGCLASS (LXSSPX xoaddr:$src), VSFRC)>; def : Pat<(f64 (fextend f32:$src)), (COPY_TO_REGCLASS $src, VSFRC)>; + def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)), + (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>; + def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULT)), (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)), + (SELECT_VSSRC (CRORC $lhs, $rhs), $tval, $fval)>; + def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULE)), (SELECT_VSSRC (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)), (SELECT_VSSRC (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)), + (SELECT_VSSRC (CRORC $rhs, $lhs), $tval, $fval)>; + def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGE)), (SELECT_VSSRC (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)), + (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>; + def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGT)), (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)), - (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>; + (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>; // VSX Elementary Scalar FP arithmetic (SP) let isCommutable = 1 in { diff --git a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp index 58d3c3d3fa2e..46b8d13e47b9 100644 --- a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp +++ b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp @@ -103,6 +103,11 @@ namespace { VNInfo *AddendValNo = LIS->getInterval(MI->getOperand(1).getReg()).Query(FMAIdx).valueIn(); + if (!AddendValNo) { + // This can be null if the register is undef. + continue; + } + MachineInstr *AddendMI = LIS->getInstructionFromIndex(AddendValNo->def); // The addend and this instruction must be in the same block. @@ -181,11 +186,14 @@ namespace { if (!KilledProdOp) continue; - // For virtual registers, verify that the addend source register - // is live here (as should have been assured above). - assert((!TargetRegisterInfo::isVirtualRegister(AddendSrcReg) || - LIS->getInterval(AddendSrcReg).liveAt(FMAIdx)) && - "Addend source register is not live!"); + // If the addend copy is used only by this MI, then the addend source + // register is likely not live here. This could be fixed (based on the + // legality checks above, the live range for the addend source register + // could be extended), but it seems likely that such a trivial copy can + // be coalesced away later, and thus is not worth the effort. + if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg) && + !LIS->getInterval(AddendSrcReg).liveAt(FMAIdx)) + continue; // Transform: (O2 * O3) + O1 -> (O2 * O1) + O3. diff --git a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp index 3fb1dcc3d4af..d7132d5272d8 100644 --- a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp +++ b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp @@ -240,6 +240,9 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { for (MachineBasicBlock &MBB : *MF) { for (MachineInstr &MI : MBB) { + if (MI.isDebugValue()) + continue; + bool RelevantInstr = false; bool Partial = false; diff --git a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp index 4a33f7fc3467..1c4e486da418 100644 --- a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp +++ b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp @@ -77,7 +77,7 @@ class SparcAsmParser : public MCTargetAsmParser { bool parseDirectiveWord(unsigned Size, SMLoc L); bool is64Bit() const { - return STI.getTargetTriple().getArchName().startswith("sparcv9"); + return STI.getTargetTriple().getArch() == Triple::sparcv9; } void expandSET(MCInst &Inst, SMLoc IDLoc, diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 71ccb1ab1e55..0f29b514146c 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -13573,6 +13573,35 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, DAG.getConstant(SSECC, dl, MVT::i8)); } + MVT VTOp0 = Op0.getSimpleValueType(); + assert(VTOp0 == Op1.getSimpleValueType() && + "Expected operands with same type!"); + assert(VT.getVectorNumElements() == VTOp0.getVectorNumElements() && + "Invalid number of packed elements for source and destination!"); + + if (VT.is128BitVector() && VTOp0.is256BitVector()) { + // On non-AVX512 targets, a vector of MVT::i1 is promoted by the type + // legalizer to a wider vector type. In the case of 'vsetcc' nodes, the + // legalizer firstly checks if the first operand in input to the setcc has + // a legal type. If so, then it promotes the return type to that same type. + // Otherwise, the return type is promoted to the 'next legal type' which, + // for a vector of MVT::i1 is always a 128-bit integer vector type. + // + // We reach this code only if the following two conditions are met: + // 1. Both return type and operand type have been promoted to wider types + // by the type legalizer. + // 2. The original operand type has been promoted to a 256-bit vector. + // + // Note that condition 2. only applies for AVX targets. + SDValue NewOp = DAG.getSetCC(dl, VTOp0, Op0, Op1, SetCCOpcode); + return DAG.getZExtOrTrunc(NewOp, dl, VT); + } + + // The non-AVX512 code below works under the assumption that source and + // destination types are the same. + assert((Subtarget->hasAVX512() || (VT == VTOp0)) && + "Value types for source and destination must be the same!"); + // Break 256-bit integer vector compare into smaller ones. if (VT.is256BitVector() && !Subtarget->hasInt256()) return Lower256IntVSETCC(Op, DAG); diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 88e5e479136f..909baae92548 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -228,7 +228,7 @@ void PassManagerBuilder::populateModulePassManager( // Start of function pass. // Break up aggregate allocas, using SSAUpdater. if (UseNewSROA) - MPM.add(createSROAPass(/*RequiresDomTree*/ false)); + MPM.add(createSROAPass()); else MPM.add(createScalarReplAggregatesPass(-1, false)); MPM.add(createEarlyCSEPass()); // Catch trivial redundancies diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index d1eba6e70e57..89a0d0af93be 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -1761,7 +1761,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { if (isa(V)) V->takeName(LI); if (Instruction *I = dyn_cast(V)) - I->setDebugLoc(LI->getDebugLoc()); + if (LI->getDebugLoc()) + I->setDebugLoc(LI->getDebugLoc()); if (V->getType()->getScalarType()->isPointerTy()) MD->invalidateCachedPointerInfo(V); markInstructionForDeletion(LI); diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 50ca6234d0b7..ba8af47b54e1 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -869,6 +869,11 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { PN->replaceAllUsesWith(*Inserted.first); PN->eraseFromParent(); Changed = true; + + // The RAUW can change PHIs that we already visited. Start over from the + // beginning. + PHISet.clear(); + I = BB->begin(); } } diff --git a/test/CodeGen/AMDGPU/llvm.dbg.value.ll b/test/CodeGen/AMDGPU/llvm.dbg.value.ll new file mode 100644 index 000000000000..d001bcb4db17 --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.dbg.value.ll @@ -0,0 +1,37 @@ +; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck %s + +; CHECK-LABEL: {{^}}test_debug_value: +; CHECK: s_load_dwordx2 +; CHECK: DEBUG_VALUE: test_debug_value:globalptr_arg <- SGPR0_SGPR1 +; CHECK: buffer_store_dword +; CHECK: s_endpgm +define void @test_debug_value(i32 addrspace(1)* nocapture %globalptr_arg) #0 { +entry: + tail call void @llvm.dbg.value(metadata i32 addrspace(1)* %globalptr_arg, i64 0, metadata !10, metadata !13), !dbg !14 + store i32 123, i32 addrspace(1)* %globalptr_arg, align 4 + ret void +} + +declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1 + +attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!11, !12} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 244715) (llvm/trunk 244718)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3) +!1 = !DIFile(filename: "/tmp/test_debug_value.cl", directory: "/Users/matt/src/llvm/build_debug") +!2 = !{} +!3 = !{!4} +!4 = !DISubprogram(name: "test_debug_value", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, function: void (i32 addrspace(1)*)* @test_debug_value, variables: !9) +!5 = !DISubroutineType(types: !6) +!6 = !{null, !7} +!7 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 64, align: 32) +!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!9 = !{!10} +!10 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "globalptr_arg", arg: 1, scope: !4, file: !1, line: 1, type: !7) +!11 = !{i32 2, !"Dwarf Version", i32 4} +!12 = !{i32 2, !"Debug Info Version", i32 3} +!13 = !DIExpression() +!14 = !DILocation(line: 1, column: 42, scope: !4) diff --git a/test/CodeGen/AMDGPU/promote-alloca-bitcast-function.ll b/test/CodeGen/AMDGPU/promote-alloca-bitcast-function.ll new file mode 100644 index 000000000000..10739df08379 --- /dev/null +++ b/test/CodeGen/AMDGPU/promote-alloca-bitcast-function.ll @@ -0,0 +1,22 @@ +; RUN: not llc -march=amdgcn < %s 2>&1 | FileCheck %s + +; Make sure that AMDGPUPromoteAlloca doesn't crash if the called +; function is a constantexpr cast of a function. + +declare void @foo(float*) #0 +declare void @foo.varargs(...) #0 + +; CHECK: error: unsupported call to function foo in crash_call_constexpr_cast +define void @crash_call_constexpr_cast() #0 { + %alloca = alloca i32 + call void bitcast (void (float*)* @foo to void (i32*)*)(i32* %alloca) #0 + ret void +} + +define void @crash_call_constexpr_cast_varargs() #0 { + %alloca = alloca i32 + call void bitcast (void (...)* @foo.varargs to void (i32*)*)(i32* %alloca) #0 + ret void +} + +attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/promote-alloca-stored-pointer-value.ll b/test/CodeGen/AMDGPU/promote-alloca-stored-pointer-value.ll new file mode 100644 index 000000000000..2ee98cc3d2d2 --- /dev/null +++ b/test/CodeGen/AMDGPU/promote-alloca-stored-pointer-value.ll @@ -0,0 +1,52 @@ +; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=GCN %s + +; Pointer value is stored in a candidate for LDS usage. + +; GCN-LABEL: {{^}}stored_lds_pointer_value: +; GCN: buffer_store_dword v +define void @stored_lds_pointer_value(float* addrspace(1)* %ptr) #0 { + %tmp = alloca float + store float 0.0, float *%tmp + store float* %tmp, float* addrspace(1)* %ptr + ret void +} + +; GCN-LABEL: {{^}}stored_lds_pointer_value_gep: +; GCN-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0 +; GCN-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1 +; GCN: buffer_store_dword v +; GCN: buffer_store_dword v +define void @stored_lds_pointer_value_gep(float* addrspace(1)* %ptr, i32 %idx) #0 { +bb: + %tmp = alloca float, i32 16 + store float 0.0, float* %tmp + %tmp2 = getelementptr inbounds float, float* %tmp, i32 %idx + store float* %tmp2, float* addrspace(1)* %ptr + ret void +} + +; Pointer value is stored in a candidate for vector usage +; GCN-LABEL: {{^}}stored_vector_pointer_value: +; GCN-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0 +; GCN-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1 +; GCN: buffer_store_dword +; GCN: buffer_store_dword +; GCN: buffer_store_dword +; GCN: buffer_store_dword +define void @stored_vector_pointer_value(i32* addrspace(1)* %out, i32 %index) { +entry: + %tmp0 = alloca [4 x i32] + %x = getelementptr [4 x i32], [4 x i32]* %tmp0, i32 0, i32 0 + %y = getelementptr [4 x i32], [4 x i32]* %tmp0, i32 0, i32 1 + %z = getelementptr [4 x i32], [4 x i32]* %tmp0, i32 0, i32 2 + %w = getelementptr [4 x i32], [4 x i32]* %tmp0, i32 0, i32 3 + store i32 0, i32* %x + store i32 1, i32* %y + store i32 2, i32* %z + store i32 3, i32* %w + %tmp1 = getelementptr [4 x i32], [4 x i32]* %tmp0, i32 0, i32 %index + store i32* %tmp1, i32* addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/trunc-store.ll b/test/CodeGen/AMDGPU/trunc-store.ll new file mode 100644 index 000000000000..4ba815f26690 --- /dev/null +++ b/test/CodeGen/AMDGPU/trunc-store.ll @@ -0,0 +1,48 @@ +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}truncstore_arg_v16i32_to_v16i8: +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +define void @truncstore_arg_v16i32_to_v16i8(<16 x i8> addrspace(1)* %out, <16 x i32> %in) { + %trunc = trunc <16 x i32> %in to <16 x i8> + store <16 x i8> %trunc, <16 x i8> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}truncstore_arg_v16i64_to_v16i8: +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +define void @truncstore_arg_v16i64_to_v16i8(<16 x i8> addrspace(1)* %out, <16 x i64> %in) { + %trunc = trunc <16 x i64> %in to <16 x i8> + store <16 x i8> %trunc, <16 x i8> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/BPF/fi_ri.ll b/test/CodeGen/BPF/fi_ri.ll new file mode 100644 index 000000000000..64773b429fba --- /dev/null +++ b/test/CodeGen/BPF/fi_ri.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s -march=bpf | FileCheck %s + +%struct.key_t = type { i32, [16 x i8] } + +; Function Attrs: nounwind uwtable +define i32 @test() #0 { + %key = alloca %struct.key_t, align 4 + %1 = bitcast %struct.key_t* %key to i8* +; CHECK: mov r1, 0 +; CHECK: stw -8(r10), r1 +; CHECK: std -16(r10), r1 +; CHECK: std -24(r10), r1 + call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 20, i32 4, i1 false) +; CHECK: mov r1, r10 +; CHECK: addi r1, -20 + %2 = getelementptr inbounds %struct.key_t, %struct.key_t* %key, i64 0, i32 1, i64 0 +; CHECK: call test1 + call void @test1(i8* %2) #3 + ret i32 0 +} + +; Function Attrs: nounwind argmemonly +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #1 + +declare void @test1(i8*) #2 diff --git a/test/CodeGen/BPF/sockex2.ll b/test/CodeGen/BPF/sockex2.ll index d372a5982f68..5de2787d5b07 100644 --- a/test/CodeGen/BPF/sockex2.ll +++ b/test/CodeGen/BPF/sockex2.ll @@ -311,7 +311,7 @@ flow_dissector.exit.thread: ; preds = %86, %12, %196, %199 ; CHECK-LABEL: bpf_prog2: ; CHECK: ldabs_h r0, r6.data + 12 # encoding: [0x28,0x00,0x00,0x00,0x0c,0x00,0x00,0x00] ; CHECK: ldabs_h r0, r6.data + 16 # encoding: [0x28,0x00,0x00,0x00,0x10,0x00,0x00,0x00] -; CHECK-NOT: implicit +; CHECK: implicit-def: R ; CHECK: ld_64 r1 ; CHECK-NOT: ori ; CHECK: call 1 # encoding: [0x85,0x00,0x00,0x00,0x01,0x00,0x00,0x00] diff --git a/test/CodeGen/BPF/undef.ll b/test/CodeGen/BPF/undef.ll new file mode 100644 index 000000000000..ef712c4a595f --- /dev/null +++ b/test/CodeGen/BPF/undef.ll @@ -0,0 +1,68 @@ +; RUN: llc < %s -march=bpf | FileCheck %s + +%struct.bpf_map_def = type { i32, i32, i32, i32 } +%struct.__sk_buff = type opaque +%struct.routing_key_2 = type { [6 x i8] } + +@routing = global %struct.bpf_map_def { i32 1, i32 6, i32 12, i32 1024 }, section "maps", align 4 +@routing_miss_0 = global %struct.bpf_map_def { i32 1, i32 1, i32 12, i32 1 }, section "maps", align 4 +@test1 = global %struct.bpf_map_def { i32 2, i32 4, i32 8, i32 1024 }, section "maps", align 4 +@test1_miss_4 = global %struct.bpf_map_def { i32 2, i32 1, i32 8, i32 1 }, section "maps", align 4 +@_license = global [4 x i8] c"GPL\00", section "license", align 1 +@llvm.used = appending global [6 x i8*] [i8* getelementptr inbounds ([4 x i8], [4 x i8]* @_license, i32 0, i32 0), i8* bitcast (i32 (%struct.__sk_buff*)* @ebpf_filter to i8*), i8* bitcast (%struct.bpf_map_def* @routing to i8*), i8* bitcast (%struct.bpf_map_def* @routing_miss_0 to i8*), i8* bitcast (%struct.bpf_map_def* @test1 to i8*), i8* bitcast (%struct.bpf_map_def* @test1_miss_4 to i8*)], section "llvm.metadata" + +; Function Attrs: nounwind uwtable +define i32 @ebpf_filter(%struct.__sk_buff* nocapture readnone %ebpf_packet) #0 section "socket1" { + %key = alloca %struct.routing_key_2, align 1 + %1 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 0 +; CHECK: mov r1, 5 +; CHECK: stb -8(r10), r1 + store i8 5, i8* %1, align 1 + %2 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 1 +; CHECK: mov r1, 6 +; CHECK: stb -7(r10), r1 + store i8 6, i8* %2, align 1 + %3 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 2 +; CHECK: mov r1, 7 +; CHECK: stb -6(r10), r1 + store i8 7, i8* %3, align 1 + %4 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 3 +; CHECK: mov r1, 8 +; CHECK: stb -5(r10), r1 + store i8 8, i8* %4, align 1 + %5 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 4 +; CHECK: mov r1, 9 +; CHECK: stb -4(r10), r1 + store i8 9, i8* %5, align 1 + %6 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 5 +; CHECK: mov r1, 10 +; CHECK: stb -3(r10), r1 + store i8 10, i8* %6, align 1 + %7 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 1, i32 0, i64 0 +; CHECK: mov r1, r10 +; CHECK: addi r1, -2 +; CHECK: mov r2, 0 +; CHECK: sth 6(r1), r2 +; CHECK: sth 4(r1), r2 +; CHECK: sth 2(r1), r2 +; CHECK: sth 24(r10), r2 +; CHECK: sth 22(r10), r2 +; CHECK: sth 20(r10), r2 +; CHECK: sth 18(r10), r2 +; CHECK: sth 16(r10), r2 +; CHECK: sth 14(r10), r2 +; CHECK: sth 12(r10), r2 +; CHECK: sth 10(r10), r2 +; CHECK: sth 8(r10), r2 +; CHECK: sth 6(r10), r2 +; CHECK: sth -2(r10), r2 +; CHECK: sth 26(r10), r2 + call void @llvm.memset.p0i8.i64(i8* %7, i8 0, i64 30, i32 1, i1 false) + %8 = call i32 (%struct.bpf_map_def*, %struct.routing_key_2*, ...) bitcast (i32 (...)* @bpf_map_lookup_elem to i32 (%struct.bpf_map_def*, %struct.routing_key_2*, ...)*)(%struct.bpf_map_def* nonnull @routing, %struct.routing_key_2* nonnull %key) #3 + ret i32 undef +} + +; Function Attrs: nounwind argmemonly +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #1 + +declare i32 @bpf_map_lookup_elem(...) #2 diff --git a/test/CodeGen/Mips/llvm-ir/addrspacecast.ll b/test/CodeGen/Mips/llvm-ir/addrspacecast.ll new file mode 100644 index 000000000000..060fa4ce7bb1 --- /dev/null +++ b/test/CodeGen/Mips/llvm-ir/addrspacecast.ll @@ -0,0 +1,12 @@ +; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s -check-prefix=ALL + +; Address spaces 1-255 are software defined. +define i32* @cast(i32 *%arg) { + %1 = addrspacecast i32* %arg to i32 addrspace(1)* + %2 = addrspacecast i32 addrspace(1)* %1 to i32 addrspace(2)* + %3 = addrspacecast i32 addrspace(2)* %2 to i32 addrspace(0)* + ret i32* %3 +} + +; ALL-LABEL: cast: +; ALL: move $2, $4 diff --git a/test/CodeGen/Mips/llvm-ir/extractelement.ll b/test/CodeGen/Mips/llvm-ir/extractelement.ll new file mode 100644 index 000000000000..1e1b02df99a2 --- /dev/null +++ b/test/CodeGen/Mips/llvm-ir/extractelement.ll @@ -0,0 +1,19 @@ +; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s -check-prefix=ALL + +; This test triggered a bug in the vector splitting where the type legalizer +; attempted to extract the element with by storing the vector, then reading +; an element back. However, the address calculation was: +; Base + Index * (EltSizeInBits / 8) +; and EltSizeInBits was 1. This caused the index to be forgotten. +define i1 @via_stack_bug(i8 signext %idx) { + %1 = extractelement <2 x i1> , i8 %idx + ret i1 %1 +} + +; ALL-LABEL: via_stack_bug: +; ALL-DAG: addiu [[ONE:\$[0-9]+]], $zero, 1 +; ALL-DAG: sb [[ONE]], 7($sp) +; ALL-DAG: sb $zero, 6($sp) +; ALL-DAG: addiu [[VPTR:\$[0-9]+]], $sp, 6 +; ALL-DAG: addu [[EPTR:\$[0-9]+]], $4, [[VPTR]] +; ALL: lbu $2, 0([[EPTR]]) diff --git a/test/CodeGen/Mips/micromips-zero-mat-uses.ll b/test/CodeGen/Mips/micromips-zero-mat-uses.ll new file mode 100644 index 000000000000..b38747a2d2c2 --- /dev/null +++ b/test/CodeGen/Mips/micromips-zero-mat-uses.ll @@ -0,0 +1,8 @@ +; RUN: llc -march=mips -mcpu=mips32r2 -mattr=+micromips,+nooddspreg -O0 < %s | FileCheck %s + +; CHECK: addiu $[[R0:[0-9]+]], $zero, 0 +; CHECK: subu16 $2, $[[R0]], ${{[0-9]+}} +define i32 @foo() { + %1 = sub i32 0, undef + ret i32 %1 +} diff --git a/test/CodeGen/PowerPC/ctr-loop-tls-const.ll b/test/CodeGen/PowerPC/ctr-loop-tls-const.ll new file mode 100644 index 000000000000..01f837cb9933 --- /dev/null +++ b/test/CodeGen/PowerPC/ctr-loop-tls-const.ll @@ -0,0 +1,40 @@ +; RUN: llc -mcpu=pwr7 -relocation-model=pic < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +@x = thread_local global [1600 x i32] zeroinitializer, align 4 + +; Function Attrs: nounwind +define void @foo(i32 signext %v) #0 { +entry: + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %induction5 = or i64 %index, 1 + %0 = getelementptr inbounds [1600 x i32], [1600 x i32]* @x, i64 0, i64 %index + %1 = getelementptr inbounds [1600 x i32], [1600 x i32]* @x, i64 0, i64 %induction5 + %2 = load i32, i32* %0, align 4 + %3 = load i32, i32* %1, align 4 + %4 = add nsw i32 %2, %v + %5 = add nsw i32 %3, %v + store i32 %4, i32* %0, align 4 + store i32 %5, i32* %1, align 4 + %index.next = add i64 %index, 2 + %6 = icmp eq i64 %index.next, 1600 + br i1 %6, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} + +; CHECK-LABEL: @foo +; CHECK-NOT: mtctr +; CHECK: __tls_get_addr + +attributes #0 = { nounwind } + +!llvm.module.flags = !{!0} + +!0 = !{i32 1, !"PIC Level", i32 2} + diff --git a/test/CodeGen/PowerPC/ctrloop-intrin.ll b/test/CodeGen/PowerPC/ctrloop-intrin.ll new file mode 100644 index 000000000000..7c781cd15e41 --- /dev/null +++ b/test/CodeGen/PowerPC/ctrloop-intrin.ll @@ -0,0 +1,349 @@ +; RUN: llc < %s +; ModuleID = 'new.bc' +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le--linux-gnu" + +@.str.87 = external hidden unnamed_addr constant [5 x i8], align 1 +@.str.1.88 = external hidden unnamed_addr constant [4 x i8], align 1 +@.str.2.89 = external hidden unnamed_addr constant [5 x i8], align 1 +@.str.3.90 = external hidden unnamed_addr constant [4 x i8], align 1 +@.str.4.91 = external hidden unnamed_addr constant [14 x i8], align 1 +@.str.5.92 = external hidden unnamed_addr constant [13 x i8], align 1 +@.str.6.93 = external hidden unnamed_addr constant [10 x i8], align 1 +@.str.7.94 = external hidden unnamed_addr constant [9 x i8], align 1 +@.str.8.95 = external hidden unnamed_addr constant [2 x i8], align 1 +@.str.9.96 = external hidden unnamed_addr constant [2 x i8], align 1 +@.str.10.97 = external hidden unnamed_addr constant [3 x i8], align 1 +@.str.11.98 = external hidden unnamed_addr constant [3 x i8], align 1 + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare i8* @halide_string_to_string(i8*, i8*, i8*) #1 + +; Function Attrs: nounwind +declare i8* @halide_int64_to_string(i8*, i8*, i64, i32) #1 + +; Function Attrs: nounwind +define weak i8* @halide_double_to_string(i8* %dst, i8* %end, double %arg, i32 %scientific) #1 { +entry: + %arg.addr = alloca double, align 8 + %bits = alloca i64, align 8 + %buf = alloca [512 x i8], align 1 + store double %arg, double* %arg.addr, align 8, !tbaa !4 + %0 = bitcast i64* %bits to i8* + call void @llvm.lifetime.start(i64 8, i8* %0) #0 + store i64 0, i64* %bits, align 8, !tbaa !8 + %1 = bitcast double* %arg.addr to i8* + %call = call i8* @memcpy(i8* %0, i8* %1, i64 8) #2 + %2 = load i64, i64* %bits, align 8, !tbaa !8 + %and = and i64 %2, 4503599627370495 + %shr = lshr i64 %2, 52 + %shr.tr = trunc i64 %shr to i32 + %conv = and i32 %shr.tr, 2047 + %shr2 = lshr i64 %2, 63 + %conv3 = trunc i64 %shr2 to i32 + %cmp = icmp eq i32 %conv, 2047 + br i1 %cmp, label %if.then, label %if.else.15 + +if.then: ; preds = %entry + %tobool = icmp eq i64 %and, 0 + %tobool5 = icmp ne i32 %conv3, 0 + br i1 %tobool, label %if.else.9, label %if.then.4 + +if.then.4: ; preds = %if.then + br i1 %tobool5, label %if.then.6, label %if.else + +if.then.6: ; preds = %if.then.4 + %call7 = call i8* @halide_string_to_string(i8* %dst, i8* %end, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.87, i64 0, i64 0)) #3 + br label %cleanup.148 + +if.else: ; preds = %if.then.4 + %call8 = call i8* @halide_string_to_string(i8* %dst, i8* %end, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.1.88, i64 0, i64 0)) #3 + br label %cleanup.148 + +if.else.9: ; preds = %if.then + br i1 %tobool5, label %if.then.11, label %if.else.13 + +if.then.11: ; preds = %if.else.9 + %call12 = call i8* @halide_string_to_string(i8* %dst, i8* %end, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.2.89, i64 0, i64 0)) #3 + br label %cleanup.148 + +if.else.13: ; preds = %if.else.9 + %call14 = call i8* @halide_string_to_string(i8* %dst, i8* %end, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.3.90, i64 0, i64 0)) #3 + br label %cleanup.148 + +if.else.15: ; preds = %entry + %cmp16 = icmp eq i32 %conv, 0 + %cmp17 = icmp eq i64 %and, 0 + %or.cond = and i1 %cmp17, %cmp16 + br i1 %or.cond, label %if.then.18, label %if.end.32 + +if.then.18: ; preds = %if.else.15 + %tobool19 = icmp eq i32 %scientific, 0 + %tobool21 = icmp ne i32 %conv3, 0 + br i1 %tobool19, label %if.else.26, label %if.then.20 + +if.then.20: ; preds = %if.then.18 + br i1 %tobool21, label %if.then.22, label %if.else.24 + +if.then.22: ; preds = %if.then.20 + %call23 = call i8* @halide_string_to_string(i8* %dst, i8* %end, i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.4.91, i64 0, i64 0)) #3 + br label %cleanup.148 + +if.else.24: ; preds = %if.then.20 + %call25 = call i8* @halide_string_to_string(i8* %dst, i8* %end, i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str.5.92, i64 0, i64 0)) #3 + br label %cleanup.148 + +if.else.26: ; preds = %if.then.18 + br i1 %tobool21, label %if.then.28, label %if.else.30 + +if.then.28: ; preds = %if.else.26 + %call29 = call i8* @halide_string_to_string(i8* %dst, i8* %end, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.6.93, i64 0, i64 0)) #3 + br label %cleanup.148 + +if.else.30: ; preds = %if.else.26 + %call31 = call i8* @halide_string_to_string(i8* %dst, i8* %end, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.7.94, i64 0, i64 0)) #3 + br label %cleanup.148 + +if.end.32: ; preds = %if.else.15 + %tobool33 = icmp eq i32 %conv3, 0 + br i1 %tobool33, label %if.end.37, label %if.then.34 + +if.then.34: ; preds = %if.end.32 + %call35 = call i8* @halide_string_to_string(i8* %dst, i8* %end, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.8.95, i64 0, i64 0)) #3 + %sub36 = fsub double -0.000000e+00, %arg + store double %sub36, double* %arg.addr, align 8, !tbaa !4 + br label %if.end.37 + +if.end.37: ; preds = %if.then.34, %if.end.32 + %.pr = phi double [ %sub36, %if.then.34 ], [ %arg, %if.end.32 ] + %dst.addr.0 = phi i8* [ %call35, %if.then.34 ], [ %dst, %if.end.32 ] + %tobool38 = icmp eq i32 %scientific, 0 + br i1 %tobool38, label %if.else.62, label %while.condthread-pre-split + +while.condthread-pre-split: ; preds = %if.end.37 + %cmp40.261 = fcmp olt double %.pr, 1.000000e+00 + br i1 %cmp40.261, label %while.body, label %while.cond.41thread-pre-split + +while.body: ; preds = %while.body, %while.condthread-pre-split + %exponent_base_10.0262 = phi i32 [ %dec, %while.body ], [ 0, %while.condthread-pre-split ] + %3 = phi double [ %mul, %while.body ], [ %.pr, %while.condthread-pre-split ] + %mul = fmul double %3, 1.000000e+01 + %dec = add nsw i32 %exponent_base_10.0262, -1 + %cmp40 = fcmp olt double %mul, 1.000000e+00 + br i1 %cmp40, label %while.body, label %while.cond.while.cond.41thread-pre-split_crit_edge + +while.cond.while.cond.41thread-pre-split_crit_edge: ; preds = %while.body + store double %mul, double* %arg.addr, align 8, !tbaa !4 + br label %while.cond.41thread-pre-split + +while.cond.41thread-pre-split: ; preds = %while.cond.while.cond.41thread-pre-split_crit_edge, %while.condthread-pre-split + %.pr246 = phi double [ %mul, %while.cond.while.cond.41thread-pre-split_crit_edge ], [ %.pr, %while.condthread-pre-split ] + %exponent_base_10.0.lcssa = phi i32 [ %dec, %while.cond.while.cond.41thread-pre-split_crit_edge ], [ 0, %while.condthread-pre-split ] + %cmp42.257 = fcmp ult double %.pr246, 1.000000e+01 + br i1 %cmp42.257, label %while.end.44, label %while.body.43 + +while.body.43: ; preds = %while.body.43, %while.cond.41thread-pre-split + %exponent_base_10.1258 = phi i32 [ %inc, %while.body.43 ], [ %exponent_base_10.0.lcssa, %while.cond.41thread-pre-split ] + %4 = phi double [ %div, %while.body.43 ], [ %.pr246, %while.cond.41thread-pre-split ] + %div = fdiv double %4, 1.000000e+01 + %inc = add nsw i32 %exponent_base_10.1258, 1 + %cmp42 = fcmp ult double %div, 1.000000e+01 + br i1 %cmp42, label %while.cond.41.while.end.44_crit_edge, label %while.body.43 + +while.cond.41.while.end.44_crit_edge: ; preds = %while.body.43 + store double %div, double* %arg.addr, align 8, !tbaa !4 + br label %while.end.44 + +while.end.44: ; preds = %while.cond.41.while.end.44_crit_edge, %while.cond.41thread-pre-split + %exponent_base_10.1.lcssa = phi i32 [ %inc, %while.cond.41.while.end.44_crit_edge ], [ %exponent_base_10.0.lcssa, %while.cond.41thread-pre-split ] + %.lcssa = phi double [ %div, %while.cond.41.while.end.44_crit_edge ], [ %.pr246, %while.cond.41thread-pre-split ] + %mul45 = fmul double %.lcssa, 1.000000e+06 + %add = fadd double %mul45, 5.000000e-01 + %conv46 = fptoui double %add to i64 + %div47 = udiv i64 %conv46, 1000000 + %5 = mul i64 %div47, -1000000 + %sub49 = add i64 %conv46, %5 + %call50 = call i8* @halide_int64_to_string(i8* %dst.addr.0, i8* %end, i64 %div47, i32 1) #3 + %call51 = call i8* @halide_string_to_string(i8* %call50, i8* %end, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.9.96, i64 0, i64 0)) #3 + %call52 = call i8* @halide_int64_to_string(i8* %call51, i8* %end, i64 %sub49, i32 6) #3 + %cmp53 = icmp sgt i32 %exponent_base_10.1.lcssa, -1 + br i1 %cmp53, label %if.then.54, label %if.else.56 + +if.then.54: ; preds = %while.end.44 + %call55 = call i8* @halide_string_to_string(i8* %call52, i8* %end, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.10.97, i64 0, i64 0)) #3 + br label %if.end.59 + +if.else.56: ; preds = %while.end.44 + %call57 = call i8* @halide_string_to_string(i8* %call52, i8* %end, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.11.98, i64 0, i64 0)) #3 + %sub58 = sub nsw i32 0, %exponent_base_10.1.lcssa + br label %if.end.59 + +if.end.59: ; preds = %if.else.56, %if.then.54 + %exponent_base_10.2 = phi i32 [ %exponent_base_10.1.lcssa, %if.then.54 ], [ %sub58, %if.else.56 ] + %dst.addr.1 = phi i8* [ %call55, %if.then.54 ], [ %call57, %if.else.56 ] + %conv60 = sext i32 %exponent_base_10.2 to i64 + %call61 = call i8* @halide_int64_to_string(i8* %dst.addr.1, i8* %end, i64 %conv60, i32 2) #3 + br label %cleanup.148 + +if.else.62: ; preds = %if.end.37 + br i1 %cmp16, label %if.then.64, label %if.end.66 + +if.then.64: ; preds = %if.else.62 + %call65 = call i8* @halide_double_to_string(i8* %dst.addr.0, i8* %end, double 0.000000e+00, i32 0) #3 + br label %cleanup.148 + +if.end.66: ; preds = %if.else.62 + %add68 = or i64 %and, 4503599627370496 + %sub70 = add nsw i32 %conv, -1075 + %cmp71 = icmp ult i32 %conv, 1075 + br i1 %cmp71, label %if.then.72, label %if.end.105 + +if.then.72: ; preds = %if.end.66 + %cmp73 = icmp slt i32 %sub70, -52 + br i1 %cmp73, label %if.end.84, label %if.else.76 + +if.else.76: ; preds = %if.then.72 + %sub77 = sub nsw i32 1075, %conv + %sh_prom = zext i32 %sub77 to i64 + %shr78 = lshr i64 %add68, %sh_prom + %shl81 = shl i64 %shr78, %sh_prom + %sub82 = sub i64 %add68, %shl81 + br label %if.end.84 + +if.end.84: ; preds = %if.else.76, %if.then.72 + %integer_part.0 = phi i64 [ %shr78, %if.else.76 ], [ 0, %if.then.72 ] + %f.0.in = phi i64 [ %sub82, %if.else.76 ], [ %add68, %if.then.72 ] + %f.0 = uitofp i64 %f.0.in to double + %conv85.244 = zext i32 %sub70 to i64 + %shl86 = shl i64 %conv85.244, 52 + %add88 = add i64 %shl86, 4696837146684686336 + %6 = bitcast i64 %add88 to double + %mul90 = fmul double %6, %f.0 + %add91 = fadd double %mul90, 5.000000e-01 + %conv92 = fptoui double %add91 to i64 + %conv93 = uitofp i64 %conv92 to double + %and96 = and i64 %conv92, 1 + %notlhs = fcmp oeq double %conv93, %add91 + %notrhs = icmp ne i64 %and96, 0 + %not.or.cond245 = and i1 %notrhs, %notlhs + %dec99 = sext i1 %not.or.cond245 to i64 + %fractional_part.0 = add i64 %dec99, %conv92 + %cmp101 = icmp eq i64 %fractional_part.0, 1000000 + %inc103 = zext i1 %cmp101 to i64 + %inc103.integer_part.0 = add i64 %inc103, %integer_part.0 + %.fractional_part.0 = select i1 %cmp101, i64 0, i64 %fractional_part.0 + br label %if.end.105 + +if.end.105: ; preds = %if.end.84, %if.end.66 + %integer_part.2 = phi i64 [ %inc103.integer_part.0, %if.end.84 ], [ %add68, %if.end.66 ] + %integer_exponent.0 = phi i32 [ 0, %if.end.84 ], [ %sub70, %if.end.66 ] + %fractional_part.2 = phi i64 [ %.fractional_part.0, %if.end.84 ], [ 0, %if.end.66 ] + %7 = bitcast [512 x i8]* %buf to i8* + call void @llvm.lifetime.start(i64 512, i8* %7) #0 + %add.ptr = getelementptr inbounds [512 x i8], [512 x i8]* %buf, i64 0, i64 512 + %add.ptr106 = getelementptr inbounds [512 x i8], [512 x i8]* %buf, i64 0, i64 480 + %call109 = call i8* @halide_int64_to_string(i8* %add.ptr106, i8* %add.ptr, i64 %integer_part.2, i32 1) #3 + %cmp110.252 = icmp sgt i32 %integer_exponent.0, 0 + br i1 %cmp110.252, label %for.cond.112.preheader, label %for.cond.cleanup + +for.cond.112.preheader: ; preds = %if.end.138, %if.end.105 + %i.0255 = phi i32 [ %inc140, %if.end.138 ], [ 0, %if.end.105 ] + %int_part_ptr.0253 = phi i8* [ %int_part_ptr.1, %if.end.138 ], [ %add.ptr106, %if.end.105 ] + %int_part_ptr.02534 = ptrtoint i8* %int_part_ptr.0253 to i64 + %cmp114.249 = icmp eq i8* %call109, %int_part_ptr.0253 + br i1 %cmp114.249, label %if.end.138, label %for.body.116.preheader + +for.body.116.preheader: ; preds = %for.cond.112.preheader + %8 = sub i64 0, %int_part_ptr.02534 + %scevgep5 = getelementptr i8, i8* %call109, i64 %8 + %scevgep56 = ptrtoint i8* %scevgep5 to i64 + call void @llvm.ppc.mtctr.i64(i64 %scevgep56) + br label %for.body.116 + +for.cond.cleanup: ; preds = %if.end.138, %if.end.105 + %int_part_ptr.0.lcssa = phi i8* [ %add.ptr106, %if.end.105 ], [ %int_part_ptr.1, %if.end.138 ] + %9 = bitcast [512 x i8]* %buf to i8* + %call142 = call i8* @halide_string_to_string(i8* %dst.addr.0, i8* %end, i8* %int_part_ptr.0.lcssa) #3 + %call143 = call i8* @halide_string_to_string(i8* %call142, i8* %end, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.9.96, i64 0, i64 0)) #3 + %call144 = call i8* @halide_int64_to_string(i8* %call143, i8* %end, i64 %fractional_part.2, i32 6) #3 + call void @llvm.lifetime.end(i64 512, i8* %9) #0 + br label %cleanup.148 + +for.cond.cleanup.115: ; preds = %for.body.116 + br i1 %cmp125, label %if.then.136, label %if.end.138 + +for.body.116: ; preds = %for.body.116, %for.body.116.preheader + %call109.pn = phi i8* [ %p.0251, %for.body.116 ], [ %call109, %for.body.116.preheader ] + %carry.0250 = phi i32 [ %carry.1, %for.body.116 ], [ 0, %for.body.116.preheader ] + %call109.pn2 = ptrtoint i8* %call109.pn to i64 + %p.0251 = getelementptr inbounds i8, i8* %call109.pn, i64 -1 + %scevgep3 = getelementptr i8, i8* inttoptr (i64 -1 to i8*), i64 %call109.pn2 + %10 = load i8, i8* %scevgep3, align 1, !tbaa !10 + %sub118 = add i8 %10, -48 + %conv120 = sext i8 %sub118 to i32 + %mul121 = shl nsw i32 %conv120, 1 + %add122 = or i32 %mul121, %carry.0250 + %11 = trunc i32 %add122 to i8 + %cmp125 = icmp sgt i8 %11, 9 + %sub128 = add nsw i32 %add122, 246 + %carry.1 = zext i1 %cmp125 to i32 + %new_digit.0.in = select i1 %cmp125, i32 %sub128, i32 %add122 + %add133 = add nsw i32 %new_digit.0.in, 48 + %conv134 = trunc i32 %add133 to i8 + %scevgep = getelementptr i8, i8* inttoptr (i64 -1 to i8*), i64 %call109.pn2 + store i8 %conv134, i8* %scevgep, align 1, !tbaa !10 + %12 = call i1 @llvm.ppc.is.decremented.ctr.nonzero() + br i1 %12, label %for.body.116, label %for.cond.cleanup.115 + +if.then.136: ; preds = %for.cond.cleanup.115 + %incdec.ptr137 = getelementptr inbounds i8, i8* %int_part_ptr.0253, i64 -1 + store i8 49, i8* %incdec.ptr137, align 1, !tbaa !10 + br label %if.end.138 + +if.end.138: ; preds = %if.then.136, %for.cond.cleanup.115, %for.cond.112.preheader + %int_part_ptr.1 = phi i8* [ %incdec.ptr137, %if.then.136 ], [ %call109, %for.cond.112.preheader ], [ %int_part_ptr.0253, %for.cond.cleanup.115 ] + %inc140 = add nuw nsw i32 %i.0255, 1 + %exitcond = icmp eq i32 %inc140, %integer_exponent.0 + br i1 %exitcond, label %for.cond.cleanup, label %for.cond.112.preheader + +cleanup.148: ; preds = %for.cond.cleanup, %if.then.64, %if.end.59, %if.else.30, %if.then.28, %if.else.24, %if.then.22, %if.else.13, %if.then.11, %if.else, %if.then.6 + %retval.1 = phi i8* [ %call7, %if.then.6 ], [ %call8, %if.else ], [ %call12, %if.then.11 ], [ %call14, %if.else.13 ], [ %call23, %if.then.22 ], [ %call25, %if.else.24 ], [ %call29, %if.then.28 ], [ %call31, %if.else.30 ], [ %call65, %if.then.64 ], [ %call61, %if.end.59 ], [ %call144, %for.cond.cleanup ] + %13 = bitcast i64* %bits to i8* + call void @llvm.lifetime.end(i64 8, i8* %13) #0 + ret i8* %retval.1 +} + +; Function Attrs: nounwind +declare i8* @memcpy(i8*, i8* nocapture readonly, i64) #1 + +; Function Attrs: nounwind +declare void @llvm.ppc.mtctr.i64(i64) #0 + +; Function Attrs: nounwind +declare i1 @llvm.ppc.is.decremented.ctr.nonzero() #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind } +attributes #3 = { nounwind } + +!llvm.ident = !{!0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0} +!llvm.module.flags = !{!1, !2, !3} + +!0 = !{!"clang version 3.7.0 (branches/release_37 246867) (llvm/branches/release_37 246866)"} +!1 = !{i32 2, !"halide_use_soft_float_abi", i32 0} +!2 = !{i32 2, !"halide_mcpu", !"pwr8"} +!3 = !{i32 2, !"halide_mattrs", !"+altivec,+vsx,+power8-altivec,+direct-move"} +!4 = !{!5, !5, i64 0} +!5 = !{!"double", !6, i64 0} +!6 = !{!"omnipotent char", !7, i64 0} +!7 = !{!"Simple C/C++ TBAA"} +!8 = !{!9, !9, i64 0} +!9 = !{!"long long", !6, i64 0} +!10 = !{!6, !6, i64 0} diff --git a/test/CodeGen/PowerPC/no-rlwimi-trivial-commute.mir b/test/CodeGen/PowerPC/no-rlwimi-trivial-commute.mir new file mode 100644 index 000000000000..5c998d09a3db --- /dev/null +++ b/test/CodeGen/PowerPC/no-rlwimi-trivial-commute.mir @@ -0,0 +1,92 @@ +# RUN: llc -start-after=dead-mi-elimination -stop-after=twoaddressinstruction -o /dev/null %s | FileCheck %s + +--- | + target datalayout = "E-m:e-i64:64-n32:64" + target triple = "powerpc64-unknown-linux-gnu" + + @d = global i32 15, align 4 + @b = global i32* @d, align 8 + @a = common global i32 0, align 4 + + ; Function Attrs: nounwind + define signext i32 @main() #0 { + entry: + %0 = load i32*, i32** @b, align 8 + %1 = load i32, i32* @a, align 4 + %lnot = icmp eq i32 %1, 0 + %lnot.ext = zext i1 %lnot to i32 + %shr.i = lshr i32 2072, %lnot.ext + %call.lobit = lshr i32 %shr.i, 7 + %2 = and i32 %call.lobit, 1 + %3 = load i32, i32* %0, align 4 + %or = or i32 %2, %3 + store i32 %or, i32* %0, align 4 + %4 = load i32, i32* @a, align 4 + %lnot.1 = icmp eq i32 %4, 0 + %lnot.ext.1 = zext i1 %lnot.1 to i32 + %shr.i.1 = lshr i32 2072, %lnot.ext.1 + %call.lobit.1 = lshr i32 %shr.i.1, 7 + %5 = and i32 %call.lobit.1, 1 + %or.1 = or i32 %5, %or + store i32 %or.1, i32* %0, align 4 + ret i32 %or.1 + } + + attributes #0 = { nounwind "target-cpu"="ppc64" } + +... +--- +name: main +alignment: 2 +exposesReturnsTwice: false +hasInlineAsm: false +isSSA: true +tracksRegLiveness: true +tracksSubRegLiveness: false +registers: + - { id: 0, class: g8rc_and_g8rc_nox0 } + - { id: 1, class: g8rc_and_g8rc_nox0 } + - { id: 2, class: gprc } + - { id: 3, class: gprc } + - { id: 4, class: gprc } + - { id: 5, class: g8rc_and_g8rc_nox0 } + - { id: 6, class: g8rc_and_g8rc_nox0 } + - { id: 7, class: gprc } + - { id: 8, class: gprc } + - { id: 9, class: gprc } + - { id: 10, class: g8rc } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.entry: + liveins: %x2 + + %0 = ADDIStocHA %x2, @b + %1 = LD target-flags(ppc-toc-lo) @b, killed %0 :: (load 8 from @b) + %2 = LWZ 0, %1 :: (load 4 from %ir.0) + %3 = LI 0 + %4 = RLWIMI %3, killed %2, 0, 0, 31 + ; CHECK-LABEL: name: main + ; CHECK: %[[REG1:[0-9]+]] = LI 0 + ; CHECK: %[[REG2:[0-9]+]] = COPY %[[REG1]] + ; CHECK: %[[REG2]] = RLWIMI %[[REG2]], killed %2, 0, 0, 31 + %8 = RLWIMI %3, %4, 0, 0, 31 + STW %4, 0, %1 :: (store 4 into %ir.0) + %10 = EXTSW_32_64 %8 + STW %8, 0, %1 :: (store 4 into %ir.0) + %x3 = COPY %10 + BLR8 implicit %x3, implicit %lr8, implicit %rm + +... diff --git a/test/CodeGen/PowerPC/p8altivec-shuffles-pred.ll b/test/CodeGen/PowerPC/p8altivec-shuffles-pred.ll new file mode 100644 index 000000000000..052f55644fe2 --- /dev/null +++ b/test/CodeGen/PowerPC/p8altivec-shuffles-pred.ll @@ -0,0 +1,28 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +; Function Attrs: nounwind +define <2 x i32> @test1(<4 x i32> %wide.vec) #0 { +entry: + %strided.vec = shufflevector <4 x i32> %wide.vec, <4 x i32> undef, <2 x i32> + ret <2 x i32> %strided.vec + +; CHECK-LABEL: @test1 +; CHECK: vsldoi 2, 2, 2, 12 +; CHECK: blr +} + +; Function Attrs: nounwind +define <16 x i8> @test2(<16 x i8> %wide.vec) #0 { +entry: + %strided.vec = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <16 x i32> + ret <16 x i8> %strided.vec + +; CHECK-LABEL: @test2 +; CHECK: vsldoi 2, 2, 2, 12 +; CHECK: blr +} + +attributes #0 = { nounwind "target-cpu"="pwr7" } + diff --git a/test/CodeGen/PowerPC/pr24546.ll b/test/CodeGen/PowerPC/pr24546.ll new file mode 100644 index 000000000000..3bb638af2343 --- /dev/null +++ b/test/CodeGen/PowerPC/pr24546.ll @@ -0,0 +1,116 @@ +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s + +; Verify that we no longer crash in VSX swap removal when debug values +; are in the code stream. + +@php_intpow10.powers = external unnamed_addr constant [23 x double], align 8 + +; Function Attrs: nounwind +define double @_php_math_round(double %value, i32 signext %places, i32 signext %mode) #0 { +entry: + br i1 undef, label %if.then, label %if.else, !dbg !32 + +if.then: ; preds = %entry + %conv = sitofp i32 undef to double, !dbg !34 + br i1 undef, label %if.then.i, label %if.end.i, !dbg !36 + +if.then.i: ; preds = %if.then + %call.i = tail call double @pow(double 1.000000e+01, double undef) #3, !dbg !39 + br label %php_intpow10.exit, !dbg !41 + +if.end.i: ; preds = %if.then + %0 = load double, double* undef, align 8, !dbg !42, !tbaa !43 + br label %php_intpow10.exit, !dbg !47 + +php_intpow10.exit: ; preds = %if.end.i, %if.then.i + %retval.0.i = phi double [ %call.i, %if.then.i ], [ %0, %if.end.i ], !dbg !48 + tail call void @llvm.dbg.value(metadata double %retval.0.i, i64 0, metadata !15, metadata !49), !dbg !50 + %div = fdiv double %conv, %retval.0.i, !dbg !51 + br label %if.end.15, !dbg !52 + +if.else: ; preds = %entry + %mul = fmul double %value, undef, !dbg !53 + br label %if.end.15 + +if.end.15: ; preds = %if.else, %php_intpow10.exit + %tmp_value.1 = phi double [ %div, %php_intpow10.exit ], [ %mul, %if.else ] + ret double %tmp_value.1, !dbg !57 +} + +declare signext i32 @php_intlog10abs(...) #1 + +declare signext i32 @php_round_helper(...) #1 + +; Function Attrs: nounwind +declare double @pow(double, double) #0 + +; Function Attrs: nounwind readnone +declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2 + +attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind readnone } +attributes #3 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!29, !30} +!llvm.ident = !{!31} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (git://github.com/llvm-mirror/clang.git e0848b6353721eb1b278a5bbea257bbf6316251e) (git://github.com/llvm-mirror/llvm.git 8724a428dfd5e78d7865bb01783708e83f9ed128)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !3, subprograms: !5, globals: !23) +!1 = !DIFile(filename: "testcase.i", directory: "/tmp/glibc.build") +!2 = !{} +!3 = !{!4} +!4 = !DIBasicType(name: "double", size: 64, align: 64, encoding: DW_ATE_float) +!5 = !{!6, !18} +!6 = !DISubprogram(name: "_php_math_round", scope: !1, file: !1, line: 15, type: !7, isLocal: false, isDefinition: true, scopeLine: 16, flags: DIFlagPrototyped, isOptimized: true, function: double (double, i32, i32)* @_php_math_round, variables: !10) +!7 = !DISubroutineType(types: !8) +!8 = !{!4, !4, !9, !9} +!9 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!10 = !{!11, !12, !13, !14, !15, !16, !17} +!11 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "value", arg: 1, scope: !6, file: !1, line: 15, type: !4) +!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "places", arg: 2, scope: !6, file: !1, line: 15, type: !9) +!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "mode", arg: 3, scope: !6, file: !1, line: 15, type: !9) +!14 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "f1", scope: !6, file: !1, line: 17, type: !4) +!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "f2", scope: !6, file: !1, line: 17, type: !4) +!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "tmp_value", scope: !6, file: !1, line: 18, type: !4) +!17 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "precision_places", scope: !6, file: !1, line: 19, type: !9) +!18 = !DISubprogram(name: "php_intpow10", scope: !1, file: !1, line: 1, type: !19, isLocal: true, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, variables: !21) +!19 = !DISubroutineType(types: !20) +!20 = !{!4, !9} +!21 = !{!22} +!22 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "power", arg: 1, scope: !18, file: !1, line: 1, type: !9) +!23 = !{!24} +!24 = !DIGlobalVariable(name: "powers", scope: !18, file: !1, line: 3, type: !25, isLocal: true, isDefinition: true, variable: [23 x double]* @php_intpow10.powers) +!25 = !DICompositeType(tag: DW_TAG_array_type, baseType: !26, size: 1472, align: 64, elements: !27) +!26 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !4) +!27 = !{!28} +!28 = !DISubrange(count: 23) +!29 = !{i32 2, !"Dwarf Version", i32 4} +!30 = !{i32 2, !"Debug Info Version", i32 3} +!31 = !{!"clang version 3.8.0 (git://github.com/llvm-mirror/clang.git e0848b6353721eb1b278a5bbea257bbf6316251e) (git://github.com/llvm-mirror/llvm.git 8724a428dfd5e78d7865bb01783708e83f9ed128)"} +!32 = !DILocation(line: 21, column: 32, scope: !33) +!33 = distinct !DILexicalBlock(scope: !6, file: !1, line: 21, column: 6) +!34 = !DILocation(line: 22, column: 15, scope: !35) +!35 = distinct !DILexicalBlock(scope: !33, file: !1, line: 21, column: 67) +!36 = !DILocation(line: 8, column: 16, scope: !37, inlinedAt: !38) +!37 = distinct !DILexicalBlock(scope: !18, file: !1, line: 8, column: 6) +!38 = distinct !DILocation(line: 23, column: 8, scope: !35) +!39 = !DILocation(line: 9, column: 10, scope: !40, inlinedAt: !38) +!40 = distinct !DILexicalBlock(scope: !37, file: !1, line: 8, column: 31) +!41 = !DILocation(line: 9, column: 3, scope: !40, inlinedAt: !38) +!42 = !DILocation(line: 11, column: 9, scope: !18, inlinedAt: !38) +!43 = !{!44, !44, i64 0} +!44 = !{!"double", !45, i64 0} +!45 = !{!"omnipotent char", !46, i64 0} +!46 = !{!"Simple C/C++ TBAA"} +!47 = !DILocation(line: 11, column: 2, scope: !18, inlinedAt: !38) +!48 = !DILocation(line: 23, column: 8, scope: !35) +!49 = !DIExpression() +!50 = !DILocation(line: 17, column: 13, scope: !6) +!51 = !DILocation(line: 24, column: 25, scope: !35) +!52 = !DILocation(line: 25, column: 2, scope: !35) +!53 = !DILocation(line: 27, column: 22, scope: !54) +!54 = distinct !DILexicalBlock(scope: !55, file: !1, line: 26, column: 20) +!55 = distinct !DILexicalBlock(scope: !56, file: !1, line: 26, column: 7) +!56 = distinct !DILexicalBlock(scope: !33, file: !1, line: 25, column: 9) +!57 = !DILocation(line: 32, column: 2, scope: !6) diff --git a/test/CodeGen/PowerPC/pr25157.ll b/test/CodeGen/PowerPC/pr25157.ll new file mode 100644 index 000000000000..7137d675a74a --- /dev/null +++ b/test/CodeGen/PowerPC/pr25157.ll @@ -0,0 +1,58 @@ +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s + +; Verify correct generation of an lxsspx rather than an invalid optimization +; to lxvdsx. Bugpoint-reduced test from Eric Schweitz. + +%struct.BSS38.51.4488.9911.14348.16813.20264.24701.28152.31603.35054.39491.44914.45407.46393.46886.47872.49351.49844.50830.51323.52309.53295.53788.54281.55267.55760.59211.61625 = type <{ [28 x i8] }> +%struct_main1_2_.491.4928.10351.14788.17253.20704.25141.28592.32043.35494.39931.45354.45847.46833.47326.48312.49791.50284.51270.51763.52749.53735.54228.54721.55707.56200.59651.61626 = type <{ [64 x i8] }> + +@.BSS38 = external global %struct.BSS38.51.4488.9911.14348.16813.20264.24701.28152.31603.35054.39491.44914.45407.46393.46886.47872.49351.49844.50830.51323.52309.53295.53788.54281.55267.55760.59211.61625, align 32 +@_main1_2_ = external global %struct_main1_2_.491.4928.10351.14788.17253.20704.25141.28592.32043.35494.39931.45354.45847.46833.47326.48312.49791.50284.51270.51763.52749.53735.54228.54721.55707.56200.59651.61626, section ".comm", align 16 + +define void @aercalc_() { +L.entry: + br i1 undef, label %L.LB38_2426, label %L.LB38_2911 + +L.LB38_2911: + br i1 undef, label %L.LB38_2140, label %L.LB38_2640 + +L.LB38_2640: + unreachable + +L.LB38_2426: + br i1 undef, label %L.LB38_2438, label %L.LB38_2920 + +L.LB38_2920: + br i1 undef, label %L.LB38_2438, label %L.LB38_2921 + +L.LB38_2921: + br label %L.LB38_2140 + +L.LB38_2140: + ret void + +L.LB38_2438: + br i1 undef, label %L.LB38_2451, label %L.LB38_2935 + +L.LB38_2935: + br i1 undef, label %L.LB38_2451, label %L.LB38_2936 + +L.LB38_2936: + unreachable + +L.LB38_2451: + br i1 undef, label %L.LB38_2452, label %L.LB38_2937 + +L.LB38_2937: + unreachable + +L.LB38_2452: + %0 = load float, float* bitcast (i8* getelementptr inbounds (%struct.BSS38.51.4488.9911.14348.16813.20264.24701.28152.31603.35054.39491.44914.45407.46393.46886.47872.49351.49844.50830.51323.52309.53295.53788.54281.55267.55760.59211.61625, %struct.BSS38.51.4488.9911.14348.16813.20264.24701.28152.31603.35054.39491.44914.45407.46393.46886.47872.49351.49844.50830.51323.52309.53295.53788.54281.55267.55760.59211.61625* @.BSS38, i64 0, i32 0, i64 16) to float*), align 16 + %1 = fpext float %0 to double + %2 = insertelement <2 x double> undef, double %1, i32 1 + store <2 x double> %2, <2 x double>* bitcast (i8* getelementptr inbounds (%struct_main1_2_.491.4928.10351.14788.17253.20704.25141.28592.32043.35494.39931.45354.45847.46833.47326.48312.49791.50284.51270.51763.52749.53735.54228.54721.55707.56200.59651.61626, %struct_main1_2_.491.4928.10351.14788.17253.20704.25141.28592.32043.35494.39931.45354.45847.46833.47326.48312.49791.50284.51270.51763.52749.53735.54228.54721.55707.56200.59651.61626* @_main1_2_, i64 0, i32 0, i64 32) to <2 x double>*), align 16 + unreachable +} + +; CHECK-LABEL: @aercalc_ +; CHECK: lxsspx diff --git a/test/CodeGen/PowerPC/rlwimi-and-or-bits.ll b/test/CodeGen/PowerPC/rlwimi-and-or-bits.ll new file mode 100644 index 000000000000..a74bc7273962 --- /dev/null +++ b/test/CodeGen/PowerPC/rlwimi-and-or-bits.ll @@ -0,0 +1,27 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +@m = external global i32, align 4 + +; Function Attrs: nounwind +define signext i32 @main() #0 { +entry: + +; CHECK-LABEL: @main +; CHECK-NOT: rlwimi +; CHECK: andi + + %0 = load i32, i32* @m, align 4 + %or = or i32 %0, 250 + store i32 %or, i32* @m, align 4 + %and = and i32 %or, 249 + %sub.i = sub i32 %and, 0 + %sext = shl i32 %sub.i, 24 + %conv = ashr exact i32 %sext, 24 + ret i32 %conv +} + +attributes #0 = { nounwind "target-cpu"="pwr7" } +attributes #1 = { nounwind } + diff --git a/test/CodeGen/PowerPC/select-i1-vs-i1.ll b/test/CodeGen/PowerPC/select-i1-vs-i1.ll new file mode 100644 index 000000000000..6dabbaa4208b --- /dev/null +++ b/test/CodeGen/PowerPC/select-i1-vs-i1.ll @@ -0,0 +1,1685 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +; FIXME: We should check the operands to the cr* logical operation itself, but +; unfortunately, FileCheck does not yet understand how to do arithmetic, so we +; can't do so without introducing a register-allocation dependency. + +define signext i32 @testi32slt(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 { +entry: + %cmp1 = icmp eq i32 %c3, %c4 + %cmp3tmp = icmp eq i32 %c1, %c2 + %cmp3 = icmp slt i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, i32 %a1, i32 %a2 + ret i32 %cond + +; CHECK-LABEL: @testi32slt +; CHECK-DAG: cmpw {{[0-9]+}}, 5, 6 +; CHECK-DAG: cmpw {{[0-9]+}}, 3, 4 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: isel 3, 7, 8, [[REG1]] +; CHECK: blr +} + +define signext i32 @testi32ult(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 { +entry: + %cmp1 = icmp eq i32 %c3, %c4 + %cmp3tmp = icmp eq i32 %c1, %c2 + %cmp3 = icmp ult i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, i32 %a1, i32 %a2 + ret i32 %cond + +; CHECK-LABEL: @testi32ult +; CHECK-DAG: cmpw {{[0-9]+}}, 5, 6 +; CHECK-DAG: cmpw {{[0-9]+}}, 3, 4 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: isel 3, 7, 8, [[REG1]] +; CHECK: blr +} + +define signext i32 @testi32sle(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 { +entry: + %cmp1 = icmp eq i32 %c3, %c4 + %cmp3tmp = icmp eq i32 %c1, %c2 + %cmp3 = icmp sle i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, i32 %a1, i32 %a2 + ret i32 %cond + +; CHECK-LABEL: @testi32sle +; CHECK-DAG: cmpw {{[0-9]+}}, 5, 6 +; CHECK-DAG: cmpw {{[0-9]+}}, 3, 4 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: isel 3, 7, 8, [[REG1]] +; CHECK: blr +} + +define signext i32 @testi32ule(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 { +entry: + %cmp1 = icmp eq i32 %c3, %c4 + %cmp3tmp = icmp eq i32 %c1, %c2 + %cmp3 = icmp ule i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, i32 %a1, i32 %a2 + ret i32 %cond + +; CHECK-LABEL: @testi32ule +; CHECK-DAG: cmpw {{[0-9]+}}, 5, 6 +; CHECK-DAG: cmpw {{[0-9]+}}, 3, 4 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: isel 3, 7, 8, [[REG1]] +; CHECK: blr +} + +define signext i32 @testi32eq(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 { +entry: + %cmp1 = icmp eq i32 %c3, %c4 + %cmp3tmp = icmp eq i32 %c1, %c2 + %cmp3 = icmp eq i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, i32 %a1, i32 %a2 + ret i32 %cond + +; CHECK-LABEL: @testi32eq +; CHECK-DAG: cmpw {{[0-9]+}}, 5, 6 +; CHECK-DAG: cmpw {{[0-9]+}}, 3, 4 +; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: isel 3, 7, 8, [[REG1]] +; CHECK: blr +} + +define signext i32 @testi32sge(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 { +entry: + %cmp1 = icmp eq i32 %c3, %c4 + %cmp3tmp = icmp eq i32 %c1, %c2 + %cmp3 = icmp sge i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, i32 %a1, i32 %a2 + ret i32 %cond + +; CHECK-LABEL: @testi32sge +; CHECK-DAG: cmpw {{[0-9]+}}, 5, 6 +; CHECK-DAG: cmpw {{[0-9]+}}, 3, 4 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: isel 3, 7, 8, [[REG1]] +; CHECK: blr +} + +define signext i32 @testi32uge(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 { +entry: + %cmp1 = icmp eq i32 %c3, %c4 + %cmp3tmp = icmp eq i32 %c1, %c2 + %cmp3 = icmp uge i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, i32 %a1, i32 %a2 + ret i32 %cond + +; CHECK-LABEL: @testi32uge +; CHECK-DAG: cmpw {{[0-9]+}}, 5, 6 +; CHECK-DAG: cmpw {{[0-9]+}}, 3, 4 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: isel 3, 7, 8, [[REG1]] +; CHECK: blr +} + +define signext i32 @testi32sgt(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 { +entry: + %cmp1 = icmp eq i32 %c3, %c4 + %cmp3tmp = icmp eq i32 %c1, %c2 + %cmp3 = icmp sgt i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, i32 %a1, i32 %a2 + ret i32 %cond + +; CHECK-LABEL: @testi32sgt +; CHECK-DAG: cmpw {{[0-9]+}}, 5, 6 +; CHECK-DAG: cmpw {{[0-9]+}}, 3, 4 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: isel 3, 7, 8, [[REG1]] +; CHECK: blr +} + +define signext i32 @testi32ugt(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 { +entry: + %cmp1 = icmp eq i32 %c3, %c4 + %cmp3tmp = icmp eq i32 %c1, %c2 + %cmp3 = icmp ugt i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, i32 %a1, i32 %a2 + ret i32 %cond + +; CHECK-LABEL: @testi32ugt +; CHECK-DAG: cmpw {{[0-9]+}}, 5, 6 +; CHECK-DAG: cmpw {{[0-9]+}}, 3, 4 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: isel 3, 7, 8, [[REG1]] +; CHECK: blr +} + +define signext i32 @testi32ne(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 { +entry: + %cmp1 = icmp eq i32 %c3, %c4 + %cmp3tmp = icmp eq i32 %c1, %c2 + %cmp3 = icmp ne i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, i32 %a1, i32 %a2 + ret i32 %cond + +; CHECK-LABEL: @testi32ne +; CHECK-DAG: cmpw {{[0-9]+}}, 5, 6 +; CHECK-DAG: cmpw {{[0-9]+}}, 3, 4 +; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: isel 3, 7, 8, [[REG1]] +; CHECK: blr +} + +define i64 @testi64slt(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 { +entry: + %cmp1 = icmp eq i64 %c3, %c4 + %cmp3tmp = icmp eq i64 %c1, %c2 + %cmp3 = icmp slt i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, i64 %a1, i64 %a2 + ret i64 %cond + +; CHECK-LABEL: @testi64slt +; CHECK-DAG: cmpd {{([0-9]+, )?}}5, 6 +; CHECK-DAG: cmpd {{([0-9]+, )?}}3, 4 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: isel 3, 7, 8, [[REG1]] +; CHECK: blr +} + +define i64 @testi64ult(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 { +entry: + %cmp1 = icmp eq i64 %c3, %c4 + %cmp3tmp = icmp eq i64 %c1, %c2 + %cmp3 = icmp ult i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, i64 %a1, i64 %a2 + ret i64 %cond + +; CHECK-LABEL: @testi64ult +; CHECK-DAG: cmpd {{([0-9]+, )?}}5, 6 +; CHECK-DAG: cmpd {{([0-9]+, )?}}3, 4 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: isel 3, 7, 8, [[REG1]] +; CHECK: blr +} + +define i64 @testi64sle(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 { +entry: + %cmp1 = icmp eq i64 %c3, %c4 + %cmp3tmp = icmp eq i64 %c1, %c2 + %cmp3 = icmp sle i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, i64 %a1, i64 %a2 + ret i64 %cond + +; CHECK-LABEL: @testi64sle +; CHECK-DAG: cmpd {{([0-9]+, )?}}5, 6 +; CHECK-DAG: cmpd {{([0-9]+, )?}}3, 4 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: isel 3, 7, 8, [[REG1]] +; CHECK: blr +} + +define i64 @testi64ule(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 { +entry: + %cmp1 = icmp eq i64 %c3, %c4 + %cmp3tmp = icmp eq i64 %c1, %c2 + %cmp3 = icmp ule i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, i64 %a1, i64 %a2 + ret i64 %cond + +; CHECK-LABEL: @testi64ule +; CHECK-DAG: cmpd {{([0-9]+, )?}}5, 6 +; CHECK-DAG: cmpd {{([0-9]+, )?}}3, 4 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: isel 3, 7, 8, [[REG1]] +; CHECK: blr +} + +define i64 @testi64eq(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 { +entry: + %cmp1 = icmp eq i64 %c3, %c4 + %cmp3tmp = icmp eq i64 %c1, %c2 + %cmp3 = icmp eq i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, i64 %a1, i64 %a2 + ret i64 %cond + +; CHECK-LABEL: @testi64eq +; CHECK-DAG: cmpd {{([0-9]+, )?}}5, 6 +; CHECK-DAG: cmpd {{([0-9]+, )?}}3, 4 +; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: isel 3, 7, 8, [[REG1]] +; CHECK: blr +} + +define i64 @testi64sge(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 { +entry: + %cmp1 = icmp eq i64 %c3, %c4 + %cmp3tmp = icmp eq i64 %c1, %c2 + %cmp3 = icmp sge i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, i64 %a1, i64 %a2 + ret i64 %cond + +; CHECK-LABEL: @testi64sge +; CHECK-DAG: cmpd {{([0-9]+, )?}}5, 6 +; CHECK-DAG: cmpd {{([0-9]+, )?}}3, 4 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: isel 3, 7, 8, [[REG1]] +; CHECK: blr +} + +define i64 @testi64uge(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 { +entry: + %cmp1 = icmp eq i64 %c3, %c4 + %cmp3tmp = icmp eq i64 %c1, %c2 + %cmp3 = icmp uge i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, i64 %a1, i64 %a2 + ret i64 %cond + +; CHECK-LABEL: @testi64uge +; CHECK-DAG: cmpd {{([0-9]+, )?}}5, 6 +; CHECK-DAG: cmpd {{([0-9]+, )?}}3, 4 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: isel 3, 7, 8, [[REG1]] +; CHECK: blr +} + +define i64 @testi64sgt(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 { +entry: + %cmp1 = icmp eq i64 %c3, %c4 + %cmp3tmp = icmp eq i64 %c1, %c2 + %cmp3 = icmp sgt i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, i64 %a1, i64 %a2 + ret i64 %cond + +; CHECK-LABEL: @testi64sgt +; CHECK-DAG: cmpd {{([0-9]+, )?}}5, 6 +; CHECK-DAG: cmpd {{([0-9]+, )?}}3, 4 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: isel 3, 7, 8, [[REG1]] +; CHECK: blr +} + +define i64 @testi64ugt(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 { +entry: + %cmp1 = icmp eq i64 %c3, %c4 + %cmp3tmp = icmp eq i64 %c1, %c2 + %cmp3 = icmp ugt i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, i64 %a1, i64 %a2 + ret i64 %cond + +; CHECK-LABEL: @testi64ugt +; CHECK-DAG: cmpd {{([0-9]+, )?}}5, 6 +; CHECK-DAG: cmpd {{([0-9]+, )?}}3, 4 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: isel 3, 7, 8, [[REG1]] +; CHECK: blr +} + +define i64 @testi64ne(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 { +entry: + %cmp1 = icmp eq i64 %c3, %c4 + %cmp3tmp = icmp eq i64 %c1, %c2 + %cmp3 = icmp ne i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, i64 %a1, i64 %a2 + ret i64 %cond + +; CHECK-LABEL: @testi64ne +; CHECK-DAG: cmpd {{([0-9]+, )?}}5, 6 +; CHECK-DAG: cmpd {{([0-9]+, )?}}3, 4 +; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: isel 3, 7, 8, [[REG1]] +; CHECK: blr +} + +define float @testfloatslt(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp slt i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, float %a1, float %a2 + ret float %cond + +; CHECK-LABEL: @testfloatslt +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: fmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: fmr 1, 5 +; CHECK: blr +} + +define float @testfloatult(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp ult i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, float %a1, float %a2 + ret float %cond + +; CHECK-LABEL: @testfloatult +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: fmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: fmr 1, 5 +; CHECK: blr +} + +define float @testfloatsle(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp sle i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, float %a1, float %a2 + ret float %cond + +; CHECK-LABEL: @testfloatsle +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: fmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: fmr 1, 5 +; CHECK: blr +} + +define float @testfloatule(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp ule i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, float %a1, float %a2 + ret float %cond + +; CHECK-LABEL: @testfloatule +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: fmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: fmr 1, 5 +; CHECK: blr +} + +define float @testfloateq(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp eq i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, float %a1, float %a2 + ret float %cond + +; CHECK-LABEL: @testfloateq +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: fmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: fmr 1, 5 +; CHECK: blr +} + +define float @testfloatsge(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp sge i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, float %a1, float %a2 + ret float %cond + +; CHECK-LABEL: @testfloatsge +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: fmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: fmr 1, 5 +; CHECK: blr +} + +define float @testfloatuge(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp uge i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, float %a1, float %a2 + ret float %cond + +; CHECK-LABEL: @testfloatuge +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: fmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: fmr 1, 5 +; CHECK: blr +} + +define float @testfloatsgt(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp sgt i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, float %a1, float %a2 + ret float %cond + +; CHECK-LABEL: @testfloatsgt +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: fmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: fmr 1, 5 +; CHECK: blr +} + +define float @testfloatugt(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp ugt i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, float %a1, float %a2 + ret float %cond + +; CHECK-LABEL: @testfloatugt +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: fmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: fmr 1, 5 +; CHECK: blr +} + +define float @testfloatne(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp ne i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, float %a1, float %a2 + ret float %cond + +; CHECK-LABEL: @testfloatne +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: fmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: fmr 1, 5 +; CHECK: blr +} + +define double @testdoubleslt(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 { +entry: + %cmp1 = fcmp oeq double %c3, %c4 + %cmp3tmp = fcmp oeq double %c1, %c2 + %cmp3 = icmp slt i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, double %a1, double %a2 + ret double %cond + +; CHECK-LABEL: @testdoubleslt +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: fmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: fmr 1, 5 +; CHECK: blr +} + +define double @testdoubleult(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 { +entry: + %cmp1 = fcmp oeq double %c3, %c4 + %cmp3tmp = fcmp oeq double %c1, %c2 + %cmp3 = icmp ult i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, double %a1, double %a2 + ret double %cond + +; CHECK-LABEL: @testdoubleult +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: fmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: fmr 1, 5 +; CHECK: blr +} + +define double @testdoublesle(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 { +entry: + %cmp1 = fcmp oeq double %c3, %c4 + %cmp3tmp = fcmp oeq double %c1, %c2 + %cmp3 = icmp sle i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, double %a1, double %a2 + ret double %cond + +; CHECK-LABEL: @testdoublesle +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: fmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: fmr 1, 5 +; CHECK: blr +} + +define double @testdoubleule(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 { +entry: + %cmp1 = fcmp oeq double %c3, %c4 + %cmp3tmp = fcmp oeq double %c1, %c2 + %cmp3 = icmp ule i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, double %a1, double %a2 + ret double %cond + +; CHECK-LABEL: @testdoubleule +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: fmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: fmr 1, 5 +; CHECK: blr +} + +define double @testdoubleeq(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 { +entry: + %cmp1 = fcmp oeq double %c3, %c4 + %cmp3tmp = fcmp oeq double %c1, %c2 + %cmp3 = icmp eq i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, double %a1, double %a2 + ret double %cond + +; CHECK-LABEL: @testdoubleeq +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: fmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: fmr 1, 5 +; CHECK: blr +} + +define double @testdoublesge(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 { +entry: + %cmp1 = fcmp oeq double %c3, %c4 + %cmp3tmp = fcmp oeq double %c1, %c2 + %cmp3 = icmp sge i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, double %a1, double %a2 + ret double %cond + +; CHECK-LABEL: @testdoublesge +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: fmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: fmr 1, 5 +; CHECK: blr +} + +define double @testdoubleuge(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 { +entry: + %cmp1 = fcmp oeq double %c3, %c4 + %cmp3tmp = fcmp oeq double %c1, %c2 + %cmp3 = icmp uge i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, double %a1, double %a2 + ret double %cond + +; CHECK-LABEL: @testdoubleuge +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: fmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: fmr 1, 5 +; CHECK: blr +} + +define double @testdoublesgt(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 { +entry: + %cmp1 = fcmp oeq double %c3, %c4 + %cmp3tmp = fcmp oeq double %c1, %c2 + %cmp3 = icmp sgt i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, double %a1, double %a2 + ret double %cond + +; CHECK-LABEL: @testdoublesgt +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: fmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: fmr 1, 5 +; CHECK: blr +} + +define double @testdoubleugt(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 { +entry: + %cmp1 = fcmp oeq double %c3, %c4 + %cmp3tmp = fcmp oeq double %c1, %c2 + %cmp3 = icmp ugt i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, double %a1, double %a2 + ret double %cond + +; CHECK-LABEL: @testdoubleugt +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: fmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: fmr 1, 5 +; CHECK: blr +} + +define double @testdoublene(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 { +entry: + %cmp1 = fcmp oeq double %c3, %c4 + %cmp3tmp = fcmp oeq double %c1, %c2 + %cmp3 = icmp ne i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, double %a1, double %a2 + ret double %cond + +; CHECK-LABEL: @testdoublene +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: fmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: fmr 1, 5 +; CHECK: blr +} + +define <4 x float> @testv4floatslt(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #0 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp slt i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2 + ret <4 x float> %cond + +; FIXME: This test (and the other v4f32 tests) should use the same bclr +; technique as the v2f64 tests below. + +; CHECK-LABEL: @testv4floatslt +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34 +; CHECK-DAG: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: xxlor [[REG2]], 35, 35 +; CHECK: .LBB[[BB]]: +; CHECK: xxlor 34, [[REG2]], [[REG2]] +; CHECK: blr +} + +define <4 x float> @testv4floatult(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #0 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp ult i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2 + ret <4 x float> %cond + +; CHECK-LABEL: @testv4floatult +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34 +; CHECK-DAG: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: xxlor [[REG2]], 35, 35 +; CHECK: .LBB[[BB]]: +; CHECK: xxlor 34, [[REG2]], [[REG2]] +; CHECK: blr +} + +define <4 x float> @testv4floatsle(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #0 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp sle i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2 + ret <4 x float> %cond + +; CHECK-LABEL: @testv4floatsle +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34 +; CHECK-DAG: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: xxlor [[REG2]], 35, 35 +; CHECK: .LBB[[BB]]: +; CHECK: xxlor 34, [[REG2]], [[REG2]] +; CHECK: blr +} + +define <4 x float> @testv4floatule(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #0 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp ule i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2 + ret <4 x float> %cond + +; CHECK-LABEL: @testv4floatule +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34 +; CHECK-DAG: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: xxlor [[REG2]], 35, 35 +; CHECK: .LBB[[BB]]: +; CHECK: xxlor 34, [[REG2]], [[REG2]] +; CHECK: blr +} + +define <4 x float> @testv4floateq(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #0 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp eq i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2 + ret <4 x float> %cond + +; CHECK-LABEL: @testv4floateq +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34 +; CHECK-DAG: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: xxlor [[REG2]], 35, 35 +; CHECK: .LBB[[BB]]: +; CHECK: xxlor 34, [[REG2]], [[REG2]] +; CHECK: blr +} + +define <4 x float> @testv4floatsge(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #0 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp sge i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2 + ret <4 x float> %cond + +; CHECK-LABEL: @testv4floatsge +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34 +; CHECK-DAG: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: xxlor [[REG2]], 35, 35 +; CHECK: .LBB[[BB]]: +; CHECK: xxlor 34, [[REG2]], [[REG2]] +; CHECK: blr +} + +define <4 x float> @testv4floatuge(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #0 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp uge i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2 + ret <4 x float> %cond + +; CHECK-LABEL: @testv4floatuge +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34 +; CHECK-DAG: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: xxlor [[REG2]], 35, 35 +; CHECK: .LBB[[BB]]: +; CHECK: xxlor 34, [[REG2]], [[REG2]] +; CHECK: blr +} + +define <4 x float> @testv4floatsgt(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #0 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp sgt i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2 + ret <4 x float> %cond + +; CHECK-LABEL: @testv4floatsgt +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34 +; CHECK-DAG: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: xxlor [[REG2]], 35, 35 +; CHECK: .LBB[[BB]]: +; CHECK: xxlor 34, [[REG2]], [[REG2]] +; CHECK: blr +} + +define <4 x float> @testv4floatugt(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #0 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp ugt i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2 + ret <4 x float> %cond + +; CHECK-LABEL: @testv4floatugt +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34 +; CHECK-DAG: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: xxlor [[REG2]], 35, 35 +; CHECK: .LBB[[BB]]: +; CHECK: xxlor 34, [[REG2]], [[REG2]] +; CHECK: blr +} + +define <4 x float> @testv4floatne(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #0 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp ne i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2 + ret <4 x float> %cond + +; CHECK-LABEL: @testv4floatne +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34 +; CHECK-DAG: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: xxlor [[REG2]], 35, 35 +; CHECK: .LBB[[BB]]: +; CHECK: xxlor 34, [[REG2]], [[REG2]] +; CHECK: blr +} + +define ppc_fp128 @testppc_fp128eq(ppc_fp128 %c1, ppc_fp128 %c2, ppc_fp128 %c3, ppc_fp128 %c4, ppc_fp128 %a1, ppc_fp128 %a2) #0 { +entry: + %cmp1 = fcmp oeq ppc_fp128 %c3, %c4 + %cmp3tmp = fcmp oeq ppc_fp128 %c1, %c2 + %cmp3 = icmp eq i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, ppc_fp128 %a1, ppc_fp128 %a2 + ret ppc_fp128 %cond + +; FIXME: Because of the way that the late SELECT_* pseudo-instruction expansion +; works, we end up with two blocks with the same predicate. These could be +; combined. + +; CHECK-LABEL: @testppc_fp128eq +; CHECK-DAG: fcmpu {{[0-9]+}}, 6, 8 +; CHECK-DAG: fcmpu {{[0-9]+}}, 5, 7 +; CHECK-DAG: fcmpu {{[0-9]+}}, 2, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 3 +; CHECK: crand [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: crand [[REG2:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: creqv [[REG3:[0-9]+]], [[REG2]], [[REG1]] +; CHECK: bc 12, [[REG3]], .LBB[[BB1:[0-9_]+]] +; CHECK: fmr 9, 11 +; CHECK: .LBB[[BB1]]: +; CHECK: bc 12, [[REG3]], .LBB[[BB2:[0-9_]+]] +; CHECK: fmr 10, 12 +; CHECK: .LBB[[BB2]]: +; CHECK-DAG: fmr 1, 9 +; CHECK-DAG: fmr 2, 10 +; CHECK: blr +} + +define <2 x double> @testv2doubleslt(float %c1, float %c2, float %c3, float %c4, <2 x double> %a1, <2 x double> %a2) #0 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp slt i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <2 x double> %a1, <2 x double> %a2 + ret <2 x double> %cond + +; CHECK-LABEL: @testv2doubleslt +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: vor 2, 3, 3 +; CHECK: blr +} + +define <2 x double> @testv2doubleult(float %c1, float %c2, float %c3, float %c4, <2 x double> %a1, <2 x double> %a2) #0 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp ult i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <2 x double> %a1, <2 x double> %a2 + ret <2 x double> %cond + +; CHECK-LABEL: @testv2doubleult +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: vor 2, 3, 3 +; CHECK: blr +} + +define <2 x double> @testv2doublesle(float %c1, float %c2, float %c3, float %c4, <2 x double> %a1, <2 x double> %a2) #0 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp sle i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <2 x double> %a1, <2 x double> %a2 + ret <2 x double> %cond + +; CHECK-LABEL: @testv2doublesle +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: vor 2, 3, 3 +; CHECK: blr +} + +define <2 x double> @testv2doubleule(float %c1, float %c2, float %c3, float %c4, <2 x double> %a1, <2 x double> %a2) #0 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp ule i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <2 x double> %a1, <2 x double> %a2 + ret <2 x double> %cond + +; CHECK-LABEL: @testv2doubleule +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: vor 2, 3, 3 +; CHECK: blr +} + +define <2 x double> @testv2doubleeq(float %c1, float %c2, float %c3, float %c4, <2 x double> %a1, <2 x double> %a2) #0 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp eq i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <2 x double> %a1, <2 x double> %a2 + ret <2 x double> %cond + +; CHECK-LABEL: @testv2doubleeq +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: vor 2, 3, 3 +; CHECK: blr +} + +define <2 x double> @testv2doublesge(float %c1, float %c2, float %c3, float %c4, <2 x double> %a1, <2 x double> %a2) #0 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp sge i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <2 x double> %a1, <2 x double> %a2 + ret <2 x double> %cond + +; CHECK-LABEL: @testv2doublesge +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: vor 2, 3, 3 +; CHECK: blr +} + +define <2 x double> @testv2doubleuge(float %c1, float %c2, float %c3, float %c4, <2 x double> %a1, <2 x double> %a2) #0 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp uge i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <2 x double> %a1, <2 x double> %a2 + ret <2 x double> %cond + +; CHECK-LABEL: @testv2doubleuge +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: vor 2, 3, 3 +; CHECK: blr +} + +define <2 x double> @testv2doublesgt(float %c1, float %c2, float %c3, float %c4, <2 x double> %a1, <2 x double> %a2) #0 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp sgt i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <2 x double> %a1, <2 x double> %a2 + ret <2 x double> %cond + +; CHECK-LABEL: @testv2doublesgt +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: vor 2, 3, 3 +; CHECK: blr +} + +define <2 x double> @testv2doubleugt(float %c1, float %c2, float %c3, float %c4, <2 x double> %a1, <2 x double> %a2) #0 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp ugt i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <2 x double> %a1, <2 x double> %a2 + ret <2 x double> %cond + +; CHECK-LABEL: @testv2doubleugt +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: vor 2, 3, 3 +; CHECK: blr +} + +define <2 x double> @testv2doublene(float %c1, float %c2, float %c3, float %c4, <2 x double> %a1, <2 x double> %a2) #0 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp ne i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <2 x double> %a1, <2 x double> %a2 + ret <2 x double> %cond + +; CHECK-LABEL: @testv2doublene +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: vor 2, 3, 3 +; CHECK: blr +} + +define <4 x double> @testqv4doubleslt(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp slt i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2 + ret <4 x double> %cond + +; CHECK-LABEL: @testqv4doubleslt +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: qvfmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: qvfmr 1, 5 +; CHECK: blr +} + +define <4 x double> @testqv4doubleult(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp ult i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2 + ret <4 x double> %cond + +; CHECK-LABEL: @testqv4doubleult +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: qvfmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: qvfmr 1, 5 +; CHECK: blr +} + +define <4 x double> @testqv4doublesle(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp sle i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2 + ret <4 x double> %cond + +; CHECK-LABEL: @testqv4doublesle +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: qvfmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: qvfmr 1, 5 +; CHECK: blr +} + +define <4 x double> @testqv4doubleule(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp ule i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2 + ret <4 x double> %cond + +; CHECK-LABEL: @testqv4doubleule +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: qvfmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: qvfmr 1, 5 +; CHECK: blr +} + +define <4 x double> @testqv4doubleeq(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp eq i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2 + ret <4 x double> %cond + +; CHECK-LABEL: @testqv4doubleeq +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: qvfmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: qvfmr 1, 5 +; CHECK: blr +} + +define <4 x double> @testqv4doublesge(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp sge i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2 + ret <4 x double> %cond + +; CHECK-LABEL: @testqv4doublesge +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: qvfmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: qvfmr 1, 5 +; CHECK: blr +} + +define <4 x double> @testqv4doubleuge(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp uge i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2 + ret <4 x double> %cond + +; CHECK-LABEL: @testqv4doubleuge +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: qvfmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: qvfmr 1, 5 +; CHECK: blr +} + +define <4 x double> @testqv4doublesgt(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp sgt i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2 + ret <4 x double> %cond + +; CHECK-LABEL: @testqv4doublesgt +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: qvfmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: qvfmr 1, 5 +; CHECK: blr +} + +define <4 x double> @testqv4doubleugt(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp ugt i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2 + ret <4 x double> %cond + +; CHECK-LABEL: @testqv4doubleugt +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: qvfmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: qvfmr 1, 5 +; CHECK: blr +} + +define <4 x double> @testqv4doublene(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp ne i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2 + ret <4 x double> %cond + +; CHECK-LABEL: @testqv4doublene +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: qvfmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: qvfmr 1, 5 +; CHECK: blr +} + +define <4 x float> @testqv4floatslt(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp slt i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2 + ret <4 x float> %cond + +; CHECK-LABEL: @testqv4floatslt +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: qvfmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: qvfmr 1, 5 +; CHECK: blr +} + +define <4 x float> @testqv4floatult(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp ult i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2 + ret <4 x float> %cond + +; CHECK-LABEL: @testqv4floatult +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: qvfmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: qvfmr 1, 5 +; CHECK: blr +} + +define <4 x float> @testqv4floatsle(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp sle i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2 + ret <4 x float> %cond + +; CHECK-LABEL: @testqv4floatsle +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: qvfmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: qvfmr 1, 5 +; CHECK: blr +} + +define <4 x float> @testqv4floatule(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp ule i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2 + ret <4 x float> %cond + +; CHECK-LABEL: @testqv4floatule +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: qvfmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: qvfmr 1, 5 +; CHECK: blr +} + +define <4 x float> @testqv4floateq(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp eq i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2 + ret <4 x float> %cond + +; CHECK-LABEL: @testqv4floateq +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: qvfmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: qvfmr 1, 5 +; CHECK: blr +} + +define <4 x float> @testqv4floatsge(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp sge i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2 + ret <4 x float> %cond + +; CHECK-LABEL: @testqv4floatsge +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: qvfmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: qvfmr 1, 5 +; CHECK: blr +} + +define <4 x float> @testqv4floatuge(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp uge i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2 + ret <4 x float> %cond + +; CHECK-LABEL: @testqv4floatuge +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: qvfmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: qvfmr 1, 5 +; CHECK: blr +} + +define <4 x float> @testqv4floatsgt(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp sgt i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2 + ret <4 x float> %cond + +; CHECK-LABEL: @testqv4floatsgt +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: qvfmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: qvfmr 1, 5 +; CHECK: blr +} + +define <4 x float> @testqv4floatugt(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp ugt i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2 + ret <4 x float> %cond + +; CHECK-LABEL: @testqv4floatugt +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: qvfmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: qvfmr 1, 5 +; CHECK: blr +} + +define <4 x float> @testqv4floatne(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp ne i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2 + ret <4 x float> %cond + +; CHECK-LABEL: @testqv4floatne +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: qvfmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: qvfmr 1, 5 +; CHECK: blr +} + +define <4 x i1> @testqv4i1slt(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp slt i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2 + ret <4 x i1> %cond + +; CHECK-LABEL: @testqv4i1slt +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: qvfmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: qvfmr 1, 5 +; CHECK: blr +} + +define <4 x i1> @testqv4i1ult(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp ult i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2 + ret <4 x i1> %cond + +; CHECK-LABEL: @testqv4i1ult +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: qvfmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: qvfmr 1, 5 +; CHECK: blr +} + +define <4 x i1> @testqv4i1sle(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp sle i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2 + ret <4 x i1> %cond + +; CHECK-LABEL: @testqv4i1sle +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: qvfmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: qvfmr 1, 5 +; CHECK: blr +} + +define <4 x i1> @testqv4i1ule(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp ule i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2 + ret <4 x i1> %cond + +; CHECK-LABEL: @testqv4i1ule +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: qvfmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: qvfmr 1, 5 +; CHECK: blr +} + +define <4 x i1> @testqv4i1eq(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp eq i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2 + ret <4 x i1> %cond + +; CHECK-LABEL: @testqv4i1eq +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: qvfmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: qvfmr 1, 5 +; CHECK: blr +} + +define <4 x i1> @testqv4i1sge(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp sge i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2 + ret <4 x i1> %cond + +; CHECK-LABEL: @testqv4i1sge +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: qvfmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: qvfmr 1, 5 +; CHECK: blr +} + +define <4 x i1> @testqv4i1uge(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp uge i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2 + ret <4 x i1> %cond + +; CHECK-LABEL: @testqv4i1uge +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: qvfmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: qvfmr 1, 5 +; CHECK: blr +} + +define <4 x i1> @testqv4i1sgt(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp sgt i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2 + ret <4 x i1> %cond + +; CHECK-LABEL: @testqv4i1sgt +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: qvfmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: qvfmr 1, 5 +; CHECK: blr +} + +define <4 x i1> @testqv4i1ugt(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp ugt i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2 + ret <4 x i1> %cond + +; CHECK-LABEL: @testqv4i1ugt +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: qvfmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: qvfmr 1, 5 +; CHECK: blr +} + +define <4 x i1> @testqv4i1ne(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 { +entry: + %cmp1 = fcmp oeq float %c3, %c4 + %cmp3tmp = fcmp oeq float %c1, %c2 + %cmp3 = icmp ne i1 %cmp3tmp, %cmp1 + %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2 + ret <4 x i1> %cond + +; CHECK-LABEL: @testqv4i1ne +; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 +; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] +; CHECK: qvfmr 5, 6 +; CHECK: .LBB[[BB]]: +; CHECK: qvfmr 1, 5 +; CHECK: blr +} + +attributes #0 = { nounwind readnone "target-cpu"="pwr7" } +attributes #1 = { nounwind readnone "target-cpu"="a2q" } + diff --git a/test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll b/test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll new file mode 100644 index 000000000000..a5b4474460c0 --- /dev/null +++ b/test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll @@ -0,0 +1,38 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +; Function Attrs: nounwind +define void @LSH_recall_init(float %d_min, float %W) #0 { +entry: + br i1 undef, label %for.body.lr.ph, label %for.end + +; CHECK-LABEL: @LSH_recall_init +; CHECK: xsnmsubadp + +for.body.lr.ph: ; preds = %entry + %conv3 = fpext float %W to double + br label %for.body + +for.body: ; preds = %for.body, %for.body.lr.ph + %div = fdiv fast float 0.000000e+00, 0.000000e+00 + %add = fadd fast float %div, %d_min + %conv2 = fpext float %add to double + %0 = tail call double @llvm.sqrt.f64(double %conv2) + %div4 = fdiv fast double %conv3, %0 + %call = tail call signext i32 bitcast (i32 (...)* @p_col_helper to i32 (double)*)(double %div4) #2 + br label %for.body + +for.end: ; preds = %entry + ret void +} + +; Function Attrs: nounwind readnone +declare double @llvm.sqrt.f64(double) #1 + +declare signext i32 @p_col_helper(...) #2 + +attributes #0 = { nounwind "no-infs-fp-math"="true" "no-nans-fp-math"="true" "target-cpu"="pwr7" "unsafe-fp-math"="true" } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind } + diff --git a/test/CodeGen/PowerPC/vsx-fma-mutate-undef.ll b/test/CodeGen/PowerPC/vsx-fma-mutate-undef.ll new file mode 100644 index 000000000000..e3f4001aa1d3 --- /dev/null +++ b/test/CodeGen/PowerPC/vsx-fma-mutate-undef.ll @@ -0,0 +1,33 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +; Function Attrs: nounwind +define void @acosh_float8() #0 { +entry: + br i1 undef, label %if.then, label %if.end + +if.then: ; preds = %entry + %0 = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> undef, <4 x float> , <4 x float> undef) #0 + %astype.i.i.74.i = bitcast <4 x float> %0 to <4 x i32> + %and.i.i.76.i = and <4 x i32> %astype.i.i.74.i, undef + %or.i.i.79.i = or <4 x i32> %and.i.i.76.i, undef + %astype5.i.i.80.i = bitcast <4 x i32> %or.i.i.79.i to <4 x float> + %1 = shufflevector <4 x float> %astype5.i.i.80.i, <4 x float> undef, <8 x i32> + %2 = shufflevector <8 x float> undef, <8 x float> %1, <8 x i32> + store <8 x float> %2, <8 x float>* undef, align 32 + br label %if.end + +; CHECK-LABEL: @acosh_float8 +; CHECK: xvmaddasp + +if.end: ; preds = %if.then, %entry + ret void +} + +; Function Attrs: nounwind readnone +declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + diff --git a/test/CodeGen/X86/pr24374.ll b/test/CodeGen/X86/pr24374.ll new file mode 100644 index 000000000000..7f331e103966 --- /dev/null +++ b/test/CodeGen/X86/pr24374.ll @@ -0,0 +1,37 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-w64-windows-gnu" + +@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @g, i8* null }] + +declare i32 @__gxx_personality_seh0(...) + +; Function Attrs: nounwind +define void @f() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_seh0 to i8*) { +entry: + invoke void @g() + to label %exit unwind label %lpad + +lpad: ; preds = %entry + landingpad { i8*, i32 } + cleanup + unreachable + +exit: ; preds = %entry + unreachable +} +; CHECK-LABEL: f: +; CHECK: .seh_proc f +; CHECK: .seh_handler __gxx_personality_seh0, @unwind, @except +; CHECK: callq g +; CHECK: .seh_handlerdata +; CHECK: .seh_endproc + +define void @g() { + unreachable +} +; CHECK-LABEL: g: +; CHECK: .seh_proc g +; CHECK: .seh_endproc + +attributes #0 = { nounwind } diff --git a/test/CodeGen/X86/setcc-lowering.ll b/test/CodeGen/X86/setcc-lowering.ll new file mode 100644 index 000000000000..3149fb51576f --- /dev/null +++ b/test/CodeGen/X86/setcc-lowering.ll @@ -0,0 +1,29 @@ +; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s + +; Verify that we don't crash during codegen due to a wrong lowering +; of a setcc node with illegal operand types and return type. + +define <8 x i16> @pr25080(<8 x i32> %a) { +; CHECK-LABEL: pr25080: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; CHECK-NEXT: vpshufb %xmm3, %xmm1, %xmm1 +; CHECK-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 +; CHECK-NEXT: vpshufb %xmm3, %xmm0, %xmm0 +; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 +; CHECK-NEXT: vpsraw $15, %xmm0, %xmm0 +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = trunc <8 x i32> %a to <8 x i23> + %1 = icmp eq <8 x i23> %0, zeroinitializer + %2 = or <8 x i1> %1, + %3 = sext <8 x i1> %2 to <8 x i16> + ret <8 x i16> %3 +} diff --git a/test/DebugInfo/gvn.ll b/test/DebugInfo/gvn.ll new file mode 100644 index 000000000000..3ca3663bd831 --- /dev/null +++ b/test/DebugInfo/gvn.ll @@ -0,0 +1,135 @@ +; RUN: opt < %s -O2 -gvn -S | FileCheck %s +; +; Produced at -O2 from: +; struct context { +; int cur_pid +; }; +; int a, b, c, f, d; +; int pid_for_task(int); +; sample(struct context *p1) +; { +; if (c) +; b = a; +; if (a && p1->cur_pid) +; sample_internal(); +; } +; callback() { +; f = pid_for_task(d); +; sample(&f); +; } + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-ios" + +%struct.context = type { i32 } + +@c = common global i32 0, align 4 +@a = common global i32 0, align 4 +@b = common global i32 0, align 4 +@d = common global i32 0, align 4 +@f = common global i32 0, align 4 + +; Function Attrs: nounwind +declare i32 @sample_internal(...) + +; Function Attrs: nounwind +define i32 @callback() #0 { +entry: + %0 = load i32, i32* @d, align 4, !dbg !37 + + ; Verify that the call still has a debug location after GVN. + ; CHECK: %call = tail call i32 @pid_for_task(i32 %0) #{{[0-9]}}, !dbg + %call = tail call i32 @pid_for_task(i32 %0) #3, !dbg !37 + + store i32 %call, i32* @f, align 4, !dbg !37 + tail call void @llvm.dbg.value(metadata %struct.context* bitcast (i32* @f to %struct.context*), i64 0, metadata !25, metadata !26) #3, !dbg !38 + %1 = load i32, i32* @c, align 4, !dbg !40 + %tobool.i = icmp eq i32 %1, 0, !dbg !40 + %.pr.i = load i32, i32* @a, align 4, !dbg !41 + br i1 %tobool.i, label %if.end.i, label %if.then.i, !dbg !42 + +if.then.i: ; preds = %entry + store i32 %.pr.i, i32* @b, align 4, !dbg !43 + br label %if.end.i, !dbg !43 + +if.end.i: ; preds = %if.then.i, %entry + %tobool1.i = icmp eq i32 %.pr.i, 0, !dbg !41 + + ; This instruction has no debug location -- in this + ; particular case it was removed by a bug in SimplifyCFG. + %2 = load i32, i32* @f, align 4 + + ; GVN is supposed to replace the load of @f with a direct reference to %call. + ; CHECK: %tobool2.i = icmp eq i32 %call, 0, !dbg + %tobool2.i = icmp eq i32 %2, 0, !dbg !41 + + %or.cond = or i1 %tobool1.i, %tobool2.i, !dbg !41 + br i1 %or.cond, label %sample.exit, label %if.then.3.i, !dbg !41 + +if.then.3.i: ; preds = %if.end.i + %call.i = tail call i32 bitcast (i32 (...)* @sample_internal to i32 ()*)() #3, !dbg !44 + br label %sample.exit, !dbg !44 + +sample.exit: ; preds = %if.end.i, %if.then.3.i + ret i32 undef, !dbg !45 +} + +declare i32 @pid_for_task(i32) #1 + +; Function Attrs: nounwind readnone +declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2 + +attributes #0 = { nounwind } +attributes #2 = { nounwind readnone } +attributes #3 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!22, !23} +!llvm.ident = !{!24} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 244473) (llvm/trunk 244644)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3, globals: !16) +!1 = !DIFile(filename: "test.c", directory: "/") +!2 = !{} +!3 = !{!4, !13} +!4 = !DISubprogram(name: "sample", scope: !5, file: !5, line: 6, type: !6, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, variables: !2) +!5 = !DIFile(filename: "test.i", directory: "/") +!6 = !DISubroutineType(types: !7) +!7 = !{!8, !9} +!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64, align: 64) +!10 = !DICompositeType(tag: DW_TAG_structure_type, name: "context", file: !5, line: 1, size: 32, align: 32, elements: !11) +!11 = !{!12} +!12 = !DIDerivedType(tag: DW_TAG_member, name: "cur_pid", scope: !10, file: !5, line: 2, baseType: !8, size: 32, align: 32) +!13 = !DISubprogram(name: "callback", scope: !5, file: !5, line: 13, type: !14, isLocal: false, isDefinition: true, scopeLine: 13, isOptimized: false, function: i32 ()* @callback, variables: !2) +!14 = !DISubroutineType(types: !15) +!15 = !{!8} +!16 = !{!17, !18, !19, !20, !21} +!17 = !DIGlobalVariable(name: "a", scope: !0, file: !5, line: 4, type: !8, isLocal: false, isDefinition: true, variable: i32* @a) +!18 = !DIGlobalVariable(name: "b", scope: !0, file: !5, line: 4, type: !8, isLocal: false, isDefinition: true, variable: i32* @b) +!19 = !DIGlobalVariable(name: "c", scope: !0, file: !5, line: 4, type: !8, isLocal: false, isDefinition: true, variable: i32* @c) +!20 = !DIGlobalVariable(name: "f", scope: !0, file: !5, line: 4, type: !8, isLocal: false, isDefinition: true, variable: i32* @f) +!21 = !DIGlobalVariable(name: "d", scope: !0, file: !5, line: 4, type: !8, isLocal: false, isDefinition: true, variable: i32* @d) +!22 = !{i32 2, !"Dwarf Version", i32 2} +!23 = !{i32 2, !"Debug Info Version", i32 3} +!24 = !{!"clang version 3.8.0 (trunk 244473) (llvm/trunk 244644)"} +!25 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "p1", arg: 1, scope: !4, file: !5, line: 6, type: !9) +!26 = !DIExpression() +!27 = !DILocation(line: 6, scope: !4) +!28 = !DILocation(line: 8, scope: !29) +!29 = distinct !DILexicalBlock(scope: !4, file: !5, line: 8) +!30 = !DILocation(line: 10, scope: !31) +!31 = distinct !DILexicalBlock(scope: !4, file: !5, line: 10) +!32 = !DILocation(line: 8, scope: !4) +!33 = !DILocation(line: 9, scope: !29) +!34 = !DILocation(line: 10, scope: !4) +!35 = !DILocation(line: 11, scope: !31) +!36 = !DILocation(line: 12, scope: !4) +!37 = !DILocation(line: 14, scope: !13) +!38 = !DILocation(line: 6, scope: !4, inlinedAt: !39) +!39 = distinct !DILocation(line: 15, scope: !13) +!40 = !DILocation(line: 8, scope: !29, inlinedAt: !39) +!41 = !DILocation(line: 10, scope: !31, inlinedAt: !39) +!42 = !DILocation(line: 8, scope: !4, inlinedAt: !39) +!43 = !DILocation(line: 9, scope: !29, inlinedAt: !39) +!44 = !DILocation(line: 11, scope: !31, inlinedAt: !39) +!45 = !DILocation(line: 16, scope: !13) diff --git a/test/LTO/X86/diagnostic-handler-noexit.ll b/test/LTO/X86/diagnostic-handler-noexit.ll new file mode 100644 index 000000000000..be768c900f14 --- /dev/null +++ b/test/LTO/X86/diagnostic-handler-noexit.ll @@ -0,0 +1,13 @@ +; LTO default diagnostic handler should be non-exiting. +; This test verifies that after addModule() encounters an error, the diagnostic +; handler does not call exit(1) and instead returns to the caller of addModule. + +; RUN: llvm-as <%s >%t1 +; RUN: llvm-as <%s >%t2 +; RUN: not llvm-lto -o /dev/null %t1 %t2 2>&1 | FileCheck %s + +target triple = "x86_64-unknown-linux-gnu" + +; CHECK: Linking globals named 'goodboy': symbol multiply defined! +; CHECK: llvm-lto{{.*}}: error adding file +@goodboy = global i32 3203383023, align 4 ; 0xbeefbeef diff --git a/test/MC/AMDGPU/vop3.s b/test/MC/AMDGPU/vop3.s index 205623359748..63914675a869 100644 --- a/test/MC/AMDGPU/vop3.s +++ b/test/MC/AMDGPU/vop3.s @@ -1,5 +1,8 @@ -// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s -// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s --check-prefix=SICI +// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s --check-prefix=SICI +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=VI +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s -check-prefix=NOVI + //===----------------------------------------------------------------------===// // VOPC Instructions @@ -8,63 +11,81 @@ // Test forced e64 encoding v_cmp_lt_f32_e64 s[2:3], v4, -v6 -// CHECK: v_cmp_lt_f32_e64 s[2:3], v4, -v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x40] +// SICI: v_cmp_lt_f32_e64 s[2:3], v4, -v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x40] +// VI: v_cmp_lt_f32_e64 s[2:3], v4, -v6 ; encoding: [0x02,0x00,0x41,0xd0,0x04,0x0d,0x02,0x40] + // // Modifier tests: // v_cmp_lt_f32 s[2:3] -v4, v6 -// CHECK: v_cmp_lt_f32_e64 s[2:3], -v4, v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x20] +// SICI: v_cmp_lt_f32_e64 s[2:3], -v4, v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x20] +// VI: v_cmp_lt_f32_e64 s[2:3], -v4, v6 ; encoding: [0x02,0x00,0x41,0xd0,0x04,0x0d,0x02,0x20] v_cmp_lt_f32 s[2:3] v4, -v6 -// CHECK: v_cmp_lt_f32_e64 s[2:3], v4, -v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x40] +// SICI: v_cmp_lt_f32_e64 s[2:3], v4, -v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x40] +// VI: v_cmp_lt_f32_e64 s[2:3], v4, -v6 ; encoding: [0x02,0x00,0x41,0xd0,0x04,0x0d,0x02,0x40] v_cmp_lt_f32 s[2:3] -v4, -v6 -// CHECK: v_cmp_lt_f32_e64 s[2:3], -v4, -v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x60] +// SICI: v_cmp_lt_f32_e64 s[2:3], -v4, -v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x60] +// VI: v_cmp_lt_f32_e64 s[2:3], -v4, -v6 ; encoding: [0x02,0x00,0x41,0xd0,0x04,0x0d,0x02,0x60] v_cmp_lt_f32 s[2:3] |v4|, v6 -// CHECK: v_cmp_lt_f32_e64 s[2:3], |v4|, v6 ; encoding: [0x02,0x01,0x02,0xd0,0x04,0x0d,0x02,0x00] +// SICI: v_cmp_lt_f32_e64 s[2:3], |v4|, v6 ; encoding: [0x02,0x01,0x02,0xd0,0x04,0x0d,0x02,0x00] +// VI: v_cmp_lt_f32_e64 s[2:3], |v4|, v6 ; encoding: [0x02,0x01,0x41,0xd0,0x04,0x0d,0x02,0x00] v_cmp_lt_f32 s[2:3] v4, |v6| -// CHECK: v_cmp_lt_f32_e64 s[2:3], v4, |v6| ; encoding: [0x02,0x02,0x02,0xd0,0x04,0x0d,0x02,0x00] +// SICI: v_cmp_lt_f32_e64 s[2:3], v4, |v6| ; encoding: [0x02,0x02,0x02,0xd0,0x04,0x0d,0x02,0x00] +// VI: v_cmp_lt_f32_e64 s[2:3], v4, |v6| ; encoding: [0x02,0x02,0x41,0xd0,0x04,0x0d,0x02,0x00] v_cmp_lt_f32 s[2:3] |v4|, |v6| -// CHECK: v_cmp_lt_f32_e64 s[2:3], |v4|, |v6| ; encoding: [0x02,0x03,0x02,0xd0,0x04,0x0d,0x02,0x00] +// SICI: v_cmp_lt_f32_e64 s[2:3], |v4|, |v6| ; encoding: [0x02,0x03,0x02,0xd0,0x04,0x0d,0x02,0x00] +// VI: v_cmp_lt_f32_e64 s[2:3], |v4|, |v6| ; encoding: [0x02,0x03,0x41,0xd0,0x04,0x0d,0x02,0x00] v_cmp_lt_f32 s[2:3] -|v4|, v6 -// CHECK: v_cmp_lt_f32_e64 s[2:3], -|v4|, v6 ; encoding: [0x02,0x01,0x02,0xd0,0x04,0x0d,0x02,0x20] +// SICI: v_cmp_lt_f32_e64 s[2:3], -|v4|, v6 ; encoding: [0x02,0x01,0x02,0xd0,0x04,0x0d,0x02,0x20] +// VI: v_cmp_lt_f32_e64 s[2:3], -|v4|, v6 ; encoding: [0x02,0x01,0x41,0xd0,0x04,0x0d,0x02,0x20] v_cmp_lt_f32 s[2:3] v4, -|v6| -// CHECK: v_cmp_lt_f32_e64 s[2:3], v4, -|v6| ; encoding: [0x02,0x02,0x02,0xd0,0x04,0x0d,0x02,0x40] +// SICI: v_cmp_lt_f32_e64 s[2:3], v4, -|v6| ; encoding: [0x02,0x02,0x02,0xd0,0x04,0x0d,0x02,0x40] +// VI: v_cmp_lt_f32_e64 s[2:3], v4, -|v6| ; encoding: [0x02,0x02,0x41,0xd0,0x04,0x0d,0x02,0x40] v_cmp_lt_f32 s[2:3] -|v4|, -|v6| -// CHECK: v_cmp_lt_f32_e64 s[2:3], -|v4|, -|v6| ; encoding: [0x02,0x03,0x02,0xd0,0x04,0x0d,0x02,0x60] +// SICI: v_cmp_lt_f32_e64 s[2:3], -|v4|, -|v6| ; encoding: [0x02,0x03,0x02,0xd0,0x04,0x0d,0x02,0x60] +// VI: v_cmp_lt_f32_e64 s[2:3], -|v4|, -|v6| ; encoding: [0x02,0x03,0x41,0xd0,0x04,0x0d,0x02,0x60] // // Instruction tests: // v_cmp_f_f32 s[2:3], v4, v6 -// CHECK: v_cmp_f_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x00,0xd0,0x04,0x0d,0x02,0x00] +// SICI: v_cmp_f_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x00,0xd0,0x04,0x0d,0x02,0x00] +// VI: v_cmp_f_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x40,0xd0,0x04,0x0d,0x02,0x00] v_cmp_lt_f32 s[2:3], v4, v6 -// CHECK: v_cmp_lt_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x00] +// SICI: v_cmp_lt_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x00] +// VI: v_cmp_lt_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x41,0xd0,0x04,0x0d,0x02,0x00] v_cmp_eq_f32 s[2:3], v4, v6 -// CHECK: v_cmp_eq_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x04,0xd0,0x04,0x0d,0x02,0x00] +// SICI: v_cmp_eq_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x04,0xd0,0x04,0x0d,0x02,0x00] +// VI: v_cmp_eq_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x42,0xd0,0x04,0x0d,0x02,0x00] v_cmp_le_f32 s[2:3], v4, v6 -// CHECK: v_cmp_le_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x06,0xd0,0x04,0x0d,0x02,0x00] +// SICI: v_cmp_le_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x06,0xd0,0x04,0x0d,0x02,0x00] +// VI: v_cmp_le_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x43,0xd0,0x04,0x0d,0x02,0x00] v_cmp_gt_f32 s[2:3], v4, v6 -// CHECK: v_cmp_gt_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x08,0xd0,0x04,0x0d,0x02,0x00] +// SICI: v_cmp_gt_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x08,0xd0,0x04,0x0d,0x02,0x00] +// VI: v_cmp_gt_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x44,0xd0,0x04,0x0d,0x02,0x00] v_cmp_lg_f32 s[2:3], v4, v6 -// CHECK: v_cmp_lg_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x0a,0xd0,0x04,0x0d,0x02,0x00] +// SICI: v_cmp_lg_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x0a,0xd0,0x04,0x0d,0x02,0x00] +// VI: v_cmp_lg_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x45,0xd0,0x04,0x0d,0x02,0x00] v_cmp_ge_f32 s[2:3], v4, v6 -// CHECK: v_cmp_ge_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x0c,0xd0,0x04,0x0d,0x02,0x00] +// SICI: v_cmp_ge_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x0c,0xd0,0x04,0x0d,0x02,0x00] +// VI: v_cmp_ge_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x46,0xd0,0x04,0x0d,0x02,0x00] // TODO: Finish VOPC @@ -77,22 +98,28 @@ v_cmp_ge_f32 s[2:3], v4, v6 // v_fract_f32 v1, -v2 -// CHECK: v_fract_f32_e64 v1, -v2 ; encoding: [0x01,0x00,0x40,0xd3,0x02,0x01,0x00,0x20] +// SICI: v_fract_f32_e64 v1, -v2 ; encoding: [0x01,0x00,0x40,0xd3,0x02,0x01,0x00,0x20] +// VI: v_fract_f32_e64 v1, -v2 ; encoding: [0x01,0x00,0x5b,0xd1,0x02,0x01,0x00,0x20] v_fract_f32 v1, |v2| -// CHECK: v_fract_f32_e64 v1, |v2| ; encoding: [0x01,0x01,0x40,0xd3,0x02,0x01,0x00,0x00] +// SICI: v_fract_f32_e64 v1, |v2| ; encoding: [0x01,0x01,0x40,0xd3,0x02,0x01,0x00,0x00] +// VI: v_fract_f32_e64 v1, |v2| ; encoding: [0x01,0x01,0x5b,0xd1,0x02,0x01,0x00,0x00] v_fract_f32 v1, -|v2| -// CHECK: v_fract_f32_e64 v1, -|v2| ; encoding: [0x01,0x01,0x40,0xd3,0x02,0x01,0x00,0x20] +// SICI: v_fract_f32_e64 v1, -|v2| ; encoding: [0x01,0x01,0x40,0xd3,0x02,0x01,0x00,0x20] +// VI: v_fract_f32_e64 v1, -|v2| ; encoding: [0x01,0x01,0x5b,0xd1,0x02,0x01,0x00,0x20] v_fract_f32 v1, v2 clamp -// CHECK: v_fract_f32_e64 v1, v2 clamp ; encoding: [0x01,0x08,0x40,0xd3,0x02,0x01,0x00,0x00] +// SICI: v_fract_f32_e64 v1, v2 clamp ; encoding: [0x01,0x08,0x40,0xd3,0x02,0x01,0x00,0x00] +// VI: v_fract_f32_e64 v1, v2 clamp ; encoding: [0x01,0x80,0x5b,0xd1,0x02,0x01,0x00,0x00] v_fract_f32 v1, v2 mul:2 -// CHECK: v_fract_f32_e64 v1, v2 mul:2 ; encoding: [0x01,0x00,0x40,0xd3,0x02,0x01,0x00,0x08] +// SICI: v_fract_f32_e64 v1, v2 mul:2 ; encoding: [0x01,0x00,0x40,0xd3,0x02,0x01,0x00,0x08] +// VI: v_fract_f32_e64 v1, v2 mul:2 ; encoding: [0x01,0x00,0x5b,0xd1,0x02,0x01,0x00,0x08] v_fract_f32 v1, v2, div:2 clamp -// CHECK: v_fract_f32_e64 v1, v2 clamp div:2 ; encoding: [0x01,0x08,0x40,0xd3,0x02,0x01,0x00,0x18] +// SICI: v_fract_f32_e64 v1, v2 clamp div:2 ; encoding: [0x01,0x08,0x40,0xd3,0x02,0x01,0x00,0x18] +// VI: v_fract_f32_e64 v1, v2 clamp div:2 ; encoding: [0x01,0x80,0x5b,0xd1,0x02,0x01,0x00,0x18] // TODO: Finish VOP1 @@ -102,37 +129,47 @@ v_fract_f32 v1, v2, div:2 clamp // Test forced e64 encoding with e32 operands -v_ldexp_f32_e64 v1, v3, v5 -// CHECK: v_ldexp_f32_e64 v1, v3, v5 ; encoding: [0x01,0x00,0x56,0xd2,0x03,0x0b,0x02,0x00] +v_add_f32_e64 v1, v3, v5 +// SICI: v_add_f32_e64 v1, v3, v5 ; encoding: [0x01,0x00,0x06,0xd2,0x03,0x0b,0x02,0x00] +// VI: v_add_f32_e64 v1, v3, v5 ; encoding: [0x01,0x00,0x01,0xd1,0x03,0x0b,0x02,0x00] // TODO: Modifier tests v_cndmask_b32 v1, v3, v5, s[4:5] -// CHECK: v_cndmask_b32_e64 v1, v3, v5, s[4:5] ; encoding: [0x01,0x00,0x00,0xd2,0x03,0x0b,0x12,0x00] +// SICI: v_cndmask_b32_e64 v1, v3, v5, s[4:5] ; encoding: [0x01,0x00,0x00,0xd2,0x03,0x0b,0x12,0x00] +// VI: v_cndmask_b32_e64 v1, v3, v5, s[4:5] ; encoding: [0x01,0x00,0x00,0xd1,0x03,0x0b,0x12,0x00] //TODO: readlane, writelane v_add_f32 v1, v3, s5 -// CHECK: v_add_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x06,0xd2,0x03,0x0b,0x00,0x00] +// SICI: v_add_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x06,0xd2,0x03,0x0b,0x00,0x00] +// VI: v_add_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x01,0xd1,0x03,0x0b,0x00,0x00] v_sub_f32 v1, v3, s5 -// CHECK: v_sub_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x08,0xd2,0x03,0x0b,0x00,0x00] +// SICI: v_sub_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x08,0xd2,0x03,0x0b,0x00,0x00] +// VI: v_sub_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x02,0xd1,0x03,0x0b,0x00,0x00] v_subrev_f32 v1, v3, s5 -// CHECK: v_subrev_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x0a,0xd2,0x03,0x0b,0x00,0x00] +// SICI: v_subrev_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x0a,0xd2,0x03,0x0b,0x00,0x00] +// VI: v_subrev_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x03,0xd1,0x03,0x0b,0x00,0x00] v_mac_legacy_f32 v1, v3, s5 -// CHECK: v_mac_legacy_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x0c,0xd2,0x03,0x0b,0x00,0x00] +// SICI: v_mac_legacy_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x0c,0xd2,0x03,0x0b,0x00,0x00] +// FIXME: The error message should be: error: instruction not supported on this GPU +// NOVI: error: invalid operand for instruction v_mul_legacy_f32 v1, v3, s5 -// CHECK: v_mul_legacy_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x0e,0xd2,0x03,0x0b,0x00,0x00] +// SICI: v_mul_legacy_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x0e,0xd2,0x03,0x0b,0x00,0x00] +// VI: v_mul_legacy_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x04,0xd1,0x03,0x0b,0x00,0x00] v_mul_f32 v1, v3, s5 -// CHECK: v_mul_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x10,0xd2,0x03,0x0b,0x00,0x00] +// SICI: v_mul_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x10,0xd2,0x03,0x0b,0x00,0x00] +// VI: v_mul_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x05,0xd1,0x03,0x0b,0x00,0x00] v_mul_i32_i24 v1, v3, s5 -// CHECK: v_mul_i32_i24_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x12,0xd2,0x03,0x0b,0x00,0x00] +// SICI: v_mul_i32_i24_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x12,0xd2,0x03,0x0b,0x00,0x00] +// VI: v_mul_i32_i24_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x06,0xd1,0x03,0x0b,0x00,0x00] ///===---------------------------------------------------------------------===// // VOP3 Instructions @@ -141,7 +178,8 @@ v_mul_i32_i24 v1, v3, s5 // TODO: Modifier tests v_mad_legacy_f32 v2, v4, v6, v8 -// CHECK: v_mad_legacy_f32 v2, v4, v6, v8 ; encoding: [0x02,0x00,0x80,0xd2,0x04,0x0d,0x22,0x04] +// SICI: v_mad_legacy_f32 v2, v4, v6, v8 ; encoding: [0x02,0x00,0x80,0xd2,0x04,0x0d,0x22,0x04] +// VI: v_mad_legacy_f32 v2, v4, v6, v8 ; encoding: [0x02,0x00,0xc0,0xd1,0x04,0x0d,0x22,0x04] diff --git a/test/MC/ARM/directive-arch-semantic-action.s b/test/MC/ARM/directive-arch-semantic-action.s new file mode 100644 index 000000000000..b9c65d8e49c8 --- /dev/null +++ b/test/MC/ARM/directive-arch-semantic-action.s @@ -0,0 +1,12 @@ +@ RUN: not llvm-mc -triple arm-gnueabi-linux -filetype asm %s 2>&1 | FileCheck %s + + .arch armv6 + dsb +@ CHECK: error: instruction requires: data-barriers + + .arch armv7 + dsb +@ CHECK-NOT: error: instruction requires: data-barriers + + .arch invalid_architecture_name +@ CHECK: error: Unknown arch name diff --git a/test/MC/Sparc/sparc-directive-xword.s b/test/MC/Sparc/sparc-directive-xword.s index 0c9e249a6ad3..736f99fbce74 100644 --- a/test/MC/Sparc/sparc-directive-xword.s +++ b/test/MC/Sparc/sparc-directive-xword.s @@ -1,5 +1,6 @@ ! RUN: not llvm-mc %s -arch=sparc -show-encoding 2>&1 | FileCheck %s --check-prefix=SPARC32 -! RUN: llvm-mc %s -arch=sparcv9 -show-encoding | FileCheck %s --check-prefix=SPARC64 +! RUN: llvm-mc %s -triple sparc64 -show-encoding | FileCheck %s --check-prefix=SPARC64 +! RUN: llvm-mc %s -triple sparcv9 -show-encoding | FileCheck %s --check-prefix=SPARCV9 ! SPARC32: error: unknown directive ! SPARC32-NEXT: .xword 65536 @@ -8,3 +9,5 @@ ! SPARC64: .xword 65536 .xword 65536 + ! SPARCV9: .xword 65536 + .xword 65536 diff --git a/tools/llvm-lto/llvm-lto.cpp b/tools/llvm-lto/llvm-lto.cpp index 9678c8397e0e..08218986f45b 100644 --- a/tools/llvm-lto/llvm-lto.cpp +++ b/tools/llvm-lto/llvm-lto.cpp @@ -214,8 +214,11 @@ int main(int argc, char **argv) { if (SetMergedModule && i == BaseArg) { // Transfer ownership to the code generator. CodeGen.setModule(Module.release()); - } else if (!CodeGen.addModule(Module.get())) + } else if (!CodeGen.addModule(Module.get())) { + // Print a message here so that we know addModule() did not abort. + errs() << argv[0] << ": error adding file '" << InputFilenames[i] << "'\n"; return 1; + } unsigned NumSyms = LTOMod->getSymbolCount(); for (unsigned I = 0; I < NumSyms; ++I) { diff --git a/tools/llvm-shlib/Makefile b/tools/llvm-shlib/Makefile index 19077a3858a6..2bc81dac5f00 100644 --- a/tools/llvm-shlib/Makefile +++ b/tools/llvm-shlib/Makefile @@ -61,7 +61,7 @@ endif ifeq ($(HOST_OS), $(filter $(HOST_OS), DragonFly Linux FreeBSD GNU/kFreeBSD GNU)) # Add soname to the library. - LLVMLibsOptions += -Wl,--soname,lib$(LIBRARYNAME)$(SHLIBEXT) + LLVMLibsOptions += -Wl,--soname,lib$(LIBRARYNAME).1$(SHLIBEXT) endif ifeq ($(HOST_OS), $(filter $(HOST_OS), Linux GNU GNU/kFreeBSD)) diff --git a/unittests/Transforms/Utils/Local.cpp b/unittests/Transforms/Utils/Local.cpp index f0c3ecfbb9b8..2ff560475551 100644 --- a/unittests/Transforms/Utils/Local.cpp +++ b/unittests/Transforms/Utils/Local.cpp @@ -58,3 +58,40 @@ TEST(Local, RecursivelyDeleteDeadPHINodes) { delete bb0; delete bb1; } + +TEST(Local, RemoveDuplicatePHINodes) { + LLVMContext &C(getGlobalContext()); + IRBuilder<> B(C); + + std::unique_ptr F( + Function::Create(FunctionType::get(B.getVoidTy(), false), + GlobalValue::ExternalLinkage, "F")); + BasicBlock *Entry(BasicBlock::Create(C, "", F.get())); + BasicBlock *BB(BasicBlock::Create(C, "", F.get())); + BranchInst::Create(BB, Entry); + + B.SetInsertPoint(BB); + + AssertingVH P1 = B.CreatePHI(Type::getInt32Ty(C), 2); + P1->addIncoming(B.getInt32(42), Entry); + + PHINode *P2 = B.CreatePHI(Type::getInt32Ty(C), 2); + P2->addIncoming(B.getInt32(42), Entry); + + AssertingVH P3 = B.CreatePHI(Type::getInt32Ty(C), 2); + P3->addIncoming(B.getInt32(42), Entry); + P3->addIncoming(B.getInt32(23), BB); + + PHINode *P4 = B.CreatePHI(Type::getInt32Ty(C), 2); + P4->addIncoming(B.getInt32(42), Entry); + P4->addIncoming(B.getInt32(23), BB); + + P1->addIncoming(P3, BB); + P2->addIncoming(P4, BB); + BranchInst::Create(BB, BB); + + // Verify that we can eliminate PHIs that become duplicates after chaning PHIs + // downstream. + EXPECT_TRUE(EliminateDuplicatePHINodes(BB)); + EXPECT_EQ(3U, BB->size()); +}