From 3fba7d16b41dfbefe3b1be6bc0ab94c017728f79 Mon Sep 17 00:00:00 2001 From: rdivacky Date: Fri, 15 Jan 2010 15:37:28 +0000 Subject: [PATCH] Update LLVM to 93512. --- CMakeLists.txt | 1 + LICENSE.TXT | 2 +- autoconf/configure.ac | 2 +- cmake/modules/LLVMLibDeps.cmake | 25 +- configure | 2 +- docs/CodeGenerator.html | 7 +- docs/LangRef.html | 110 +- docs/ProgrammersManual.html | 24 +- docs/ReleaseNotes.html | 3 +- docs/SourceLevelDebugging.html | 22 +- docs/TableGenFundamentals.html | 6 +- include/llvm-c/Analysis.h | 6 +- include/llvm-c/BitReader.h | 24 +- include/llvm-c/Core.h | 74 +- include/llvm-c/ExecutionEngine.h | 34 +- include/llvm-c/Target.h | 7 +- include/llvm/ADT/BitVector.h | 18 + include/llvm/ADT/SmallBitVector.h | 373 + include/llvm/ADT/StringExtras.h | 84 +- include/llvm/ADT/StringRef.h | 28 +- include/llvm/ADT/Twine.h | 34 + include/llvm/Analysis/AliasAnalysis.h | 4 + include/llvm/Analysis/DebugInfo.h | 31 +- include/llvm/Analysis/DominatorInternals.h | 9 +- include/llvm/Analysis/Dominators.h | 67 +- include/llvm/Analysis/LoopInfo.h | 4 +- include/llvm/Analysis/PostDominators.h | 16 +- include/llvm/Attributes.h | 77 +- include/llvm/Bitcode/LLVMBitCodes.h | 9 +- include/llvm/CodeGen/DAGISelHeader.h | 2 +- include/llvm/CodeGen/FastISel.h | 24 +- include/llvm/CodeGen/MachineFunction.h | 2 + include/llvm/CodeGen/MachineInstr.h | 7 +- include/llvm/CodeGen/MachineInstrBuilder.h | 6 + include/llvm/CodeGen/MachineLoopInfo.h | 2 + include/llvm/CodeGen/MachineOperand.h | 19 +- include/llvm/CodeGen/Passes.h | 4 + include/llvm/CodeGen/SelectionDAGISel.h | 11 +- include/llvm/CodeGen/ValueTypes.h | 18 +- include/llvm/IntrinsicInst.h | 127 +- include/llvm/Intrinsics.td | 8 +- include/llvm/LinkAllPasses.h | 4 - include/llvm/MC/MCAsmLexer.h | 3 +- include/llvm/MC/MCParsedAsmOperand.h | 33 + include/llvm/MC/MCSymbol.h | 5 + include/llvm/Metadata.h | 55 +- include/llvm/Module.h | 35 +- include/llvm/Support/CFG.h | 68 +- include/llvm/Support/FormattedStream.h | 4 + include/llvm/Support/Mangler.h | 30 +- include/llvm/Support/MathExtras.h | 4 +- include/llvm/Support/PatternMatch.h | 23 +- include/llvm/Target/Target.td | 7 + include/llvm/Target/TargetAsmParser.h | 20 +- include/llvm/Target/TargetInstrInfo.h | 27 +- include/llvm/Target/TargetLowering.h | 6 +- .../llvm/Target/TargetLoweringObjectFile.h | 2 +- include/llvm/Target/TargetRegisterInfo.h | 2 +- include/llvm/Target/TargetSelectionDAG.td | 9 +- include/llvm/Transforms/Instrumentation.h | 10 - .../llvm/Transforms/Utils/BasicBlockUtils.h | 10 +- include/llvm/Transforms/Utils/Local.h | 19 +- include/llvm/Type.h | 3 + include/llvm/ValueSymbolTable.h | 86 +- lib/Analysis/AliasAnalysis.cpp | 11 +- lib/Analysis/Analysis.cpp | 8 +- lib/Analysis/ConstantFolding.cpp | 23 +- lib/Analysis/DbgInfoPrinter.cpp | 59 - lib/Analysis/DebugInfo.cpp | 164 +- lib/Analysis/IVUsers.cpp | 5 +- lib/Analysis/InlineCost.cpp | 40 +- lib/Analysis/LoopInfo.cpp | 5 + lib/Analysis/ScalarEvolution.cpp | 22 +- lib/Analysis/ValueTracking.cpp | 28 +- lib/AsmParser/LLParser.cpp | 211 +- lib/AsmParser/LLParser.h | 20 +- lib/Bitcode/Reader/BitReader.cpp | 27 +- lib/Bitcode/Reader/BitcodeReader.cpp | 36 +- lib/Bitcode/Writer/BitcodeWriter.cpp | 47 +- lib/Bitcode/Writer/ValueEnumerator.cpp | 62 +- lib/Bitcode/Writer/ValueEnumerator.h | 4 + lib/CodeGen/AggressiveAntiDepBreaker.cpp | 159 +- lib/CodeGen/AggressiveAntiDepBreaker.h | 28 +- lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 348 +- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 178 +- lib/CodeGen/CMakeLists.txt | 3 +- lib/CodeGen/CriticalAntiDepBreaker.cpp | 30 +- lib/CodeGen/CriticalAntiDepBreaker.h | 8 +- lib/CodeGen/DeadMachineInstructionElim.cpp | 2 +- lib/CodeGen/ELFCodeEmitter.cpp | 2 +- lib/CodeGen/ELFWriter.cpp | 2 +- lib/CodeGen/ExactHazardRecognizer.cpp | 14 +- lib/CodeGen/GCMetadata.cpp | 3 +- lib/CodeGen/GCStrategy.cpp | 3 +- lib/CodeGen/IfConversion.cpp | 28 +- lib/CodeGen/IntrinsicLowering.cpp | 10 +- lib/CodeGen/LLVMTargetMachine.cpp | 21 +- lib/CodeGen/LiveInterval.cpp | 7 +- lib/CodeGen/LiveIntervalAnalysis.cpp | 94 +- lib/CodeGen/LiveVariables.cpp | 22 +- lib/CodeGen/LowerSubregs.cpp | 26 +- lib/CodeGen/MachOWriter.cpp | 5 +- lib/CodeGen/MachineBasicBlock.cpp | 3 +- lib/CodeGen/MachineFunction.cpp | 9 +- lib/CodeGen/MachineInstr.cpp | 20 +- lib/CodeGen/MachineLICM.cpp | 24 +- lib/CodeGen/MachineLoopInfo.cpp | 5 + lib/CodeGen/MachineSSAUpdater.cpp | 4 +- lib/CodeGen/MachineSink.cpp | 8 +- lib/CodeGen/MachineVerifier.cpp | 130 +- lib/CodeGen/OptimizeExts.cpp | 185 + lib/CodeGen/PBQP/AnnotatedGraph.h | 2 +- lib/CodeGen/PBQP/ExhaustiveSolver.h | 2 +- lib/CodeGen/PBQP/GraphBase.h | 2 +- lib/CodeGen/PBQP/HeuristicSolver.h | 2 +- lib/CodeGen/PBQP/Heuristics/Briggs.h | 2 +- lib/CodeGen/PBQP/PBQPMath.h | 2 +- lib/CodeGen/PBQP/SimpleGraph.h | 2 +- lib/CodeGen/PBQP/Solution.h | 2 +- lib/CodeGen/PBQP/Solver.h | 2 +- lib/CodeGen/PHIElimination.cpp | 6 +- lib/CodeGen/PostRASchedulerList.cpp | 26 +- lib/CodeGen/PreAllocSplitting.cpp | 67 +- lib/CodeGen/ProcessImplicitDefs.cpp | 2 +- lib/CodeGen/RegAllocLinearScan.cpp | 64 +- lib/CodeGen/RegAllocLocal.cpp | 34 +- lib/CodeGen/RegAllocPBQP.cpp | 14 +- lib/CodeGen/ScheduleDAG.cpp | 80 +- lib/CodeGen/SelectionDAG/CallingConvLower.cpp | 13 +- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 298 +- lib/CodeGen/SelectionDAG/FastISel.cpp | 34 +- .../SelectionDAG/FunctionLoweringInfo.cpp | 2 +- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 15 +- .../SelectionDAG/LegalizeFloatTypes.cpp | 28 +- .../SelectionDAG/LegalizeIntegerTypes.cpp | 24 +- lib/CodeGen/SelectionDAG/LegalizeTypes.cpp | 42 +- lib/CodeGen/SelectionDAG/LegalizeTypes.h | 3 + .../SelectionDAG/LegalizeVectorOps.cpp | 5 +- .../SelectionDAG/LegalizeVectorTypes.cpp | 117 +- lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp | 16 +- lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp | 12 +- .../SelectionDAG/ScheduleDAGRRList.cpp | 40 +- .../SelectionDAG/ScheduleDAGSDNodes.cpp | 8 +- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 62 +- .../SelectionDAG/SelectionDAGBuilder.cpp | 50 +- .../SelectionDAG/SelectionDAGBuilder.h | 12 +- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 225 +- .../SelectionDAG/SelectionDAGPrinter.cpp | 2 +- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 74 +- lib/CodeGen/ShrinkWrapping.cpp | 74 +- lib/CodeGen/SimpleRegisterCoalescing.cpp | 127 +- lib/CodeGen/SjLjEHPrepare.cpp | 9 +- lib/CodeGen/SlotIndexes.cpp | 12 +- lib/CodeGen/Spiller.cpp | 16 +- lib/CodeGen/StackProtector.cpp | 2 +- lib/CodeGen/StackSlotColoring.cpp | 26 +- lib/CodeGen/StrongPHIElimination.cpp | 12 +- lib/CodeGen/TailDuplication.cpp | 39 +- lib/CodeGen/TargetInstrInfoImpl.cpp | 7 + lib/CodeGen/TwoAddressInstructionPass.cpp | 24 +- lib/CodeGen/VirtRegMap.cpp | 2 +- lib/CodeGen/VirtRegRewriter.cpp | 144 +- lib/ExecutionEngine/ExecutionEngine.cpp | 44 +- .../ExecutionEngineBindings.cpp | 34 +- lib/ExecutionEngine/Interpreter/Execution.cpp | 78 +- lib/ExecutionEngine/JIT/JIT.cpp | 11 +- lib/ExecutionEngine/JIT/JITEmitter.cpp | 60 +- lib/ExecutionEngine/JIT/JITMemoryManager.cpp | 2 +- .../JIT/OProfileJITEventListener.cpp | 16 +- lib/Linker/LinkModules.cpp | 34 +- lib/MC/MCExpr.cpp | 5 +- lib/MC/MCInst.cpp | 9 +- lib/MC/MCSectionELF.cpp | 9 +- lib/MC/MCSymbol.cpp | 14 +- lib/MC/MCValue.cpp | 3 +- lib/Support/APInt.cpp | 68 +- lib/Support/CommandLine.cpp | 8 +- lib/Support/ConstantRange.cpp | 3 +- lib/Support/ErrorHandling.cpp | 9 +- lib/Support/FormattedStream.cpp | 8 + lib/Support/Statistic.cpp | 3 +- lib/Support/StringExtras.cpp | 59 +- lib/Support/StringRef.cpp | 25 +- lib/Support/Timer.cpp | 3 +- lib/Support/Twine.cpp | 15 +- lib/System/Win32/DynamicLibrary.inc | 42 +- lib/Target/ARM/ARMBaseInstrInfo.cpp | 67 +- lib/Target/ARM/ARMBaseInstrInfo.h | 2 + lib/Target/ARM/ARMBaseRegisterInfo.cpp | 15 +- lib/Target/ARM/ARMISelDAGToDAG.cpp | 321 +- lib/Target/ARM/ARMISelLowering.cpp | 23 +- lib/Target/ARM/ARMInstrInfo.td | 4 +- lib/Target/ARM/ARMInstrThumb.td | 33 +- lib/Target/ARM/ARMInstrThumb2.td | 2 +- lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 12 + lib/Target/ARM/ARMRegisterInfo.td | 13 - lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 39 +- lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp | 27 +- lib/Target/Alpha/AlphaISelDAGToDAG.cpp | 26 +- .../AsmPrinter/BlackfinAsmPrinter.cpp | 9 +- lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp | 11 +- lib/Target/CBackend/CBackend.cpp | 38 +- lib/Target/CellSPU/SPUISelDAGToDAG.cpp | 171 +- lib/Target/MSP430/MSP430ISelDAGToDAG.cpp | 67 +- lib/Target/MSP430/MSP430ISelLowering.cpp | 13 +- lib/Target/MSP430/MSP430InstrInfo.td | 38 +- lib/Target/Mips/MipsISelDAGToDAG.cpp | 75 +- lib/Target/PIC16/PIC16ISelDAGToDAG.cpp | 4 +- lib/Target/PIC16/PIC16ISelDAGToDAG.h | 4 +- .../PowerPC/AsmPrinter/PPCAsmPrinter.cpp | 122 +- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 36 +- lib/Target/PowerPC/PPCInstrInfo.td | 8 +- lib/Target/PowerPC/PPCJITInfo.cpp | 2 + lib/Target/PowerPC/PPCMCAsmInfo.cpp | 1 + lib/Target/PowerPC/README.txt | 33 + lib/Target/README.txt | 48 +- lib/Target/Sparc/SparcISelDAGToDAG.cpp | 19 +- lib/Target/SubtargetFeature.cpp | 3 +- lib/Target/SystemZ/SystemZISelDAGToDAG.cpp | 115 +- lib/Target/Target.cpp | 2 +- lib/Target/TargetLoweringObjectFile.cpp | 34 +- lib/Target/X86/AsmParser/X86AsmParser.cpp | 36 +- lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp | 66 +- lib/Target/X86/AsmPrinter/X86MCInstLower.cpp | 28 + lib/Target/X86/README-SSE.txt | 20 + lib/Target/X86/README.txt | 83 +- lib/Target/X86/X86.td | 4 + lib/Target/X86/X86CodeEmitter.cpp | 20 +- lib/Target/X86/X86FastISel.cpp | 16 +- lib/Target/X86/X86FloatingPoint.cpp | 16 +- lib/Target/X86/X86ISelDAGToDAG.cpp | 157 +- lib/Target/X86/X86ISelLowering.cpp | 241 +- lib/Target/X86/X86Instr64bit.td | 92 +- lib/Target/X86/X86InstrInfo.cpp | 73 +- lib/Target/X86/X86InstrInfo.h | 10 + lib/Target/X86/X86InstrInfo.td | 75 +- lib/Target/X86/X86InstrSSE.td | 10 +- lib/Target/X86/X86JITInfo.cpp | 2 +- lib/Target/X86/X86RegisterInfo.cpp | 9 + lib/Target/X86/X86Subtarget.cpp | 3 +- lib/Target/X86/X86Subtarget.h | 7 +- lib/Target/XCore/XCoreISelDAGToDAG.cpp | 31 +- lib/Transforms/IPO/ArgumentPromotion.cpp | 10 +- .../IPO/DeadArgumentElimination.cpp | 18 +- lib/Transforms/IPO/FunctionAttrs.cpp | 66 +- lib/Transforms/IPO/GlobalOpt.cpp | 93 +- lib/Transforms/IPO/Inliner.cpp | 22 +- lib/Transforms/IPO/Internalize.cpp | 6 +- lib/Transforms/IPO/MergeFunctions.cpp | 8 +- lib/Transforms/IPO/PartialInlining.cpp | 2 +- lib/Transforms/IPO/StructRetPromotion.cpp | 11 +- lib/Transforms/InstCombine/CMakeLists.txt | 17 + lib/Transforms/InstCombine/InstCombine.h | 349 + .../InstCombine/InstCombineAddSub.cpp | 740 + .../InstCombine/InstCombineAndOrXor.cpp | 1990 +++ .../InstCombine/InstCombineCalls.cpp | 1142 ++ .../InstCombine/InstCombineCasts.cpp | 1301 ++ .../InstCombine/InstCombineCompares.cpp | 2475 +++ .../InstCombineLoadStoreAlloca.cpp | 613 + .../InstCombine/InstCombineMulDivRem.cpp | 695 + lib/Transforms/InstCombine/InstCombinePHI.cpp | 841 + .../InstCombine/InstCombineSelect.cpp | 703 + .../InstCombine/InstCombineShifts.cpp | 427 + .../InstCombineSimplifyDemanded.cpp | 1106 ++ .../InstCombine/InstCombineVectorOps.cpp | 560 + .../InstCombine/InstCombineWorklist.h | 105 + .../InstCombine/InstructionCombining.cpp | 1274 ++ lib/Transforms/InstCombine/Makefile | 15 + .../Instrumentation/BlockProfiling.cpp | 128 - lib/Transforms/Instrumentation/CMakeLists.txt | 2 - .../Instrumentation/OptimalEdgeProfiling.cpp | 4 +- .../Instrumentation/ProfilingUtils.cpp | 2 +- .../Instrumentation/RSProfiling.cpp | 662 - lib/Transforms/Instrumentation/RSProfiling.h | 31 - lib/Transforms/Makefile | 2 +- lib/Transforms/Scalar/ABCD.cpp | 2 +- lib/Transforms/Scalar/ADCE.cpp | 3 +- lib/Transforms/Scalar/CMakeLists.txt | 1 - lib/Transforms/Scalar/CodeGenPrepare.cpp | 12 +- .../Scalar/DeadStoreElimination.cpp | 86 +- lib/Transforms/Scalar/GVN.cpp | 44 +- lib/Transforms/Scalar/IndVarSimplify.cpp | 10 +- .../Scalar/InstructionCombining.cpp | 13736 ---------------- lib/Transforms/Scalar/JumpThreading.cpp | 255 +- lib/Transforms/Scalar/LICM.cpp | 10 +- lib/Transforms/Scalar/LoopIndexSplit.cpp | 4 +- lib/Transforms/Scalar/LoopStrengthReduce.cpp | 2 +- lib/Transforms/Scalar/LoopUnrollPass.cpp | 12 +- lib/Transforms/Scalar/LoopUnswitch.cpp | 26 +- lib/Transforms/Scalar/MemCpyOptimizer.cpp | 13 +- lib/Transforms/Scalar/Reassociate.cpp | 48 +- lib/Transforms/Scalar/SCCP.cpp | 42 +- lib/Transforms/Scalar/SCCVN.cpp | 3 +- .../Scalar/ScalarReplAggregates.cpp | 18 +- lib/Transforms/Scalar/SimplifyCFGPass.cpp | 7 +- lib/Transforms/Scalar/SimplifyLibCalls.cpp | 85 +- lib/Transforms/Scalar/TailDuplication.cpp | 6 +- lib/Transforms/Utils/AddrModeMatcher.cpp | 5 +- lib/Transforms/Utils/BasicBlockUtils.cpp | 22 +- lib/Transforms/Utils/BasicInliner.cpp | 10 +- lib/Transforms/Utils/CloneFunction.cpp | 16 +- lib/Transforms/Utils/CloneLoop.cpp | 4 +- lib/Transforms/Utils/CodeExtractor.cpp | 41 +- lib/Transforms/Utils/InlineFunction.cpp | 45 - lib/Transforms/Utils/InstructionNamer.cpp | 4 +- lib/Transforms/Utils/Local.cpp | 56 +- lib/Transforms/Utils/LoopUnroll.cpp | 22 +- lib/Transforms/Utils/LowerInvoke.cpp | 2 +- lib/Transforms/Utils/LowerSwitch.cpp | 10 +- .../Utils/PromoteMemoryToRegister.cpp | 4 +- lib/Transforms/Utils/SSAUpdater.cpp | 4 +- lib/Transforms/Utils/SSI.cpp | 2 +- lib/Transforms/Utils/SimplifyCFG.cpp | 37 +- .../Utils/UnifyFunctionExitNodes.cpp | 2 +- lib/VMCore/AsmWriter.cpp | 34 +- lib/VMCore/Attributes.cpp | 7 +- lib/VMCore/AutoUpgrade.cpp | 59 +- lib/VMCore/ConstantFold.cpp | 12 +- lib/VMCore/Constants.cpp | 28 +- lib/VMCore/ConstantsContext.h | 2 +- lib/VMCore/Core.cpp | 68 +- lib/VMCore/Function.cpp | 2 +- lib/VMCore/InlineAsm.cpp | 2 +- lib/VMCore/Instruction.cpp | 35 +- lib/VMCore/Instructions.cpp | 29 +- lib/VMCore/IntrinsicInst.cpp | 28 +- lib/VMCore/Mangler.cpp | 202 +- lib/VMCore/Metadata.cpp | 212 +- lib/VMCore/Module.cpp | 24 +- lib/VMCore/Pass.cpp | 5 +- lib/VMCore/PassManager.cpp | 67 +- lib/VMCore/PrintModulePass.cpp | 5 +- lib/VMCore/Type.cpp | 31 +- lib/VMCore/TypeSymbolTable.cpp | 21 +- lib/VMCore/TypesContext.h | 6 +- lib/VMCore/Value.cpp | 32 +- lib/VMCore/ValueSymbolTable.cpp | 20 +- lib/VMCore/Verifier.cpp | 18 +- runtime/libprofile/exported_symbols.lst | 2 - test/Analysis/BasicAA/2007-11-05-SizeCrash.ll | 4 +- .../2007-07-15-NegativeStride.ll | 2 +- .../ScalarEvolution/2007-08-06-Unsigned.ll | 2 +- .../2008-02-11-ReversedCondition.ll | 2 +- .../2008-02-12-SMAXTripCount.ll | 2 +- .../2008-11-18-LessThanOrEqual.ll | 2 +- .../ScalarEvolution/2008-11-18-Stride1.ll | 2 +- test/Analysis/ScalarEvolution/avoid-smax-0.ll | 2 +- .../ScalarEvolution/max-trip-count.ll | 2 +- test/Analysis/ScalarEvolution/nsw-offset.ll | 16 +- test/Analysis/ScalarEvolution/nsw.ll | 2 +- test/Analysis/ScalarEvolution/sext-inreg.ll | 4 +- test/Analysis/ScalarEvolution/sext-iv-0.ll | 2 +- test/Analysis/ScalarEvolution/sext-iv-1.ll | 2 +- test/Analysis/ScalarEvolution/sext-iv-2.ll | 4 +- test/Analysis/ScalarEvolution/trip-count3.ll | 2 +- test/Analysis/ScalarEvolution/trip-count7.ll | 2 +- test/Analysis/ScalarEvolution/trip-count8.ll | 2 +- test/Analysis/ScalarEvolution/zext-wrap.ll | 2 +- test/Assembler/functionlocal-metadata.ll | 35 + test/Assembler/vector-cmp.ll | 6 +- test/CodeGen/ARM/indirectbr.ll | 6 +- test/CodeGen/ARM/private.ll | 2 +- test/CodeGen/ARM/tail-opts.ll | 2 +- test/CodeGen/Alpha/private.ll | 2 +- .../Blackfin/2009-08-15-SetCC-Undef.ll | 1 - test/CodeGen/Blackfin/ct32.ll | 6 +- test/CodeGen/Blackfin/ct64.ll | 6 +- test/CodeGen/Blackfin/ctlz16.ll | 6 +- test/CodeGen/Blackfin/ctpop16.ll | 6 +- test/CodeGen/Blackfin/cttz16.ll | 6 +- test/CodeGen/Blackfin/promote-logic.ll | 1 + test/CodeGen/CellSPU/dp_farith.ll | 2 +- test/CodeGen/CellSPU/mul_ops.ll | 1 - test/CodeGen/CellSPU/private.ll | 2 +- .../2009-03-29-SoftFloatVectorExtract.ll | 2 +- .../Generic/2009-04-28-i128-cmp-crash.ll | 16 + test/CodeGen/MSP430/bit.ll | 9 +- test/CodeGen/MSP430/setcc.ll | 1 + test/CodeGen/MSP430/shifts.ll | 51 + test/CodeGen/Mips/private.ll | 2 +- test/CodeGen/PowerPC/indirectbr.ll | 8 +- test/CodeGen/PowerPC/private.ll | 2 +- test/CodeGen/SPARC/private.ll | 2 +- test/CodeGen/SystemZ/2009-06-02-Rotate.ll | 4 +- test/CodeGen/SystemZ/2010-01-04-DivMem.ll | 50 + .../Thumb2/2010-01-06-TailDuplicateLabels.ll | 89 + test/CodeGen/Thumb2/thumb2-add.ll | 16 +- test/CodeGen/X86/2006-05-11-InstrSched.ll | 14 +- .../X86}/2007-01-08-X86-64-Pointer.ll | 0 test/CodeGen/X86/2007-02-04-OrAddrMode.ll | 4 +- test/CodeGen/X86/2009-02-26-MachineLICMBug.ll | 4 +- .../X86/2009-05-23-dagcombine-shifts.ll | 8 +- test/CodeGen/X86/2009-11-16-MachineLICM.ll | 2 +- test/CodeGen/X86/2010-01-05-ZExt-Shl.ll | 15 + test/CodeGen/X86/2010-01-07-ISelBug.ll | 27 + test/CodeGen/X86/2010-01-07-UAMemFeature.ll | 11 + test/CodeGen/X86/2010-01-08-Atomic64Bug.ll | 29 + test/CodeGen/X86/2010-01-11-ExtraPHIArg.ll | 97 + test/CodeGen/X86/2010-01-13-OptExtBug.ll | 46 + test/CodeGen/X86/3addr-or.ll | 27 + test/CodeGen/X86/addr-label-difference.ll | 22 + test/CodeGen/X86/and-su.ll | 43 +- test/CodeGen/X86/anyext-uses.ll | 47 - test/CodeGen/X86/br-fold.ll | 20 + test/CodeGen/X86/brcond.ll | 69 + test/CodeGen/X86/darwin-bzero.ll | 2 +- test/CodeGen/X86/extractelement-shuffle.ll | 10 +- test/CodeGen/X86/fast-isel.ll | 2 +- test/CodeGen/X86/fold-load.ll | 26 +- test/CodeGen/X86/lsr-sort.ll | 5 +- test/CodeGen/X86/mul-legalize.ll | 2 +- test/CodeGen/X86/private.ll | 2 +- test/CodeGen/X86/remat-mov-0.ll | 13 + test/CodeGen/X86/sext-subreg.ll | 17 + test/CodeGen/X86/stack-color-with-reg.ll | 2 +- test/CodeGen/X86/stride-nine-with-base-reg.ll | 3 + test/CodeGen/X86/tail-opts.ll | 2 +- test/CodeGen/X86/tailcall-largecode.ll | 71 + test/CodeGen/X86/test-nofold.ll | 39 +- test/CodeGen/X86/twoaddr-lea.ll | 24 + test/CodeGen/X86/use-add-flags.ll | 56 + test/CodeGen/X86/vec_cast.ll | 48 + test/CodeGen/X86/vec_ext_inreg.ll | 1 + test/CodeGen/X86/vec_shuffle-22.ll | 2 +- test/CodeGen/X86/vec_shuffle-25.ll | 18 +- test/CodeGen/X86/vec_shuffle-26.ll | 6 +- test/CodeGen/X86/widen_select-1.ll | 2 +- test/CodeGen/X86/widen_shuffle-1.ll | 2 +- test/CodeGen/X86/widen_shuffle-2.ll | 2 +- test/CodeGen/X86/x86-64-and-mask.ll | 43 +- test/CodeGen/X86/x86-64-jumps.ll | 29 + .../X86/{brcond-srl.ll => xor-icmp.ll} | 21 +- test/CodeGen/XCore/2009-03-27-v2f64-param.ll | 2 +- test/CodeGen/XCore/private.ll | 2 +- test/DebugInfo/2009-10-16-Scope.ll | 5 +- test/DebugInfo/2009-12-01-CurrentFn.ll | 17 - test/DebugInfo/2010-01-05-DbgScope.ll | 18 + test/DebugInfo/printdbginfo2.ll | 13 +- test/ExecutionEngine/2010-01-15-UndefValue.ll | 9 + test/Feature/NamedMDNode.ll | 5 +- test/FrontendC/2010-01-05-LinkageName.c | 15 + test/FrontendC/2010-01-13-MemBarrier.c | 11 + test/FrontendC/2010-01-14-FnType-DebugInfo.c | 4 + test/FrontendC/2010-01-14-StaticVariable.c | 12 + test/FrontendC/cstring-align.c | 2 + test/Integer/BitPacked.ll | 4 +- test/Integer/packed_bt.ll | 4 +- test/Integer/testvarargs_bt.ll | 2 +- test/Other/2007-06-28-PassManager.ll | 2 +- test/Other/2008-02-14-PassManager.ll | 2 +- test/Other/2008-08-14-PassManager.ll | 2 +- test/Other/2009-06-05-no-implicit-float.ll | 4 +- test/TableGen/eq.td | 13 + test/Transforms/ConstProp/loads.ll | 10 + test/Transforms/DeadArgElim/canon.ll | 4 +- .../DeadStoreElimination/const-pointers.ll | 2 +- .../DeadStoreElimination/no-targetdata.ll | 4 +- .../FunctionAttrs/2008-10-04-LocalMemory.ll | 56 +- test/Transforms/GVN/null-aliases-nothing.ll | 2 +- test/Transforms/GlobalOpt/crash.ll | 16 + .../Transforms/GlobalOpt/ctor-list-opt-dbg.ll | 2 +- .../IPConstantProp/return-argument.ll | 4 +- .../IndVarSimplify/shrunk-constant.ll | 2 +- .../Inline/2007-12-19-InlineNoUnwind.ll | 2 +- test/Transforms/InstCombine/apint-shift.ll | 7 - test/Transforms/InstCombine/bswap-fold.ll | 31 +- test/Transforms/InstCombine/bswap.ll | 2 + test/Transforms/InstCombine/cast-and-cast.ll | 17 - .../InstCombine/cast-cast-to-and.ll | 9 - test/Transforms/InstCombine/cast-load-gep.ll | 21 - test/Transforms/InstCombine/cast-propagate.ll | 11 - test/Transforms/InstCombine/cast-sext-zext.ll | 12 - test/Transforms/InstCombine/cast.ll | 280 +- test/Transforms/InstCombine/cast2.ll | 37 - test/Transforms/InstCombine/cast3.ll | 35 - .../InstCombine/cast_ld_addr_space.ll | 19 - test/Transforms/InstCombine/cast_ptr.ll | 41 + test/Transforms/InstCombine/fsub-fadd.ll | 39 + test/Transforms/InstCombine/intrinsics.ll | 47 +- test/Transforms/InstCombine/load-cmp.ll | 112 + test/Transforms/InstCombine/load.ll | 9 + .../InstCombine/loadstore-alignment.ll | 4 +- test/Transforms/InstCombine/or.ll | 63 +- .../Transforms/InstCombine/setcc-cast-cast.ll | 46 - test/Transforms/InstCombine/shift-sra.ll | 45 +- test/Transforms/InstCombine/sub.ll | 29 +- test/Transforms/JumpThreading/basic.ll | 18 +- test/Transforms/LICM/licm_preserve_dbginfo.ll | 55 - .../SplitValue-2007-08-24-dbg.ll | 2 +- test/Transforms/LoopRotate/PhiRename-1.ll | 2 +- .../2009-04-28-no-reduce-mul.ll | 10 +- .../LoopStrengthReduce/dont_reverse.ll | 4 +- .../quadradic-exit-value.ll | 2 +- test/Transforms/PruneEH/simplenoreturntest.ll | 2 +- test/Transforms/Reassociate/crash.ll | 33 + .../ScalarRepl/2009-01-09-scalarrepl-empty.ll | 2 +- .../SimplifyCFG/2003-08-17-FoldSwitch-dbg.ll | 2 +- .../2009-03-05-Speculative-Hoist-Dbg.ll | 2 +- .../Transforms/SimplifyCFG/branch_fold_dbg.ll | 2 +- .../SimplifyCFG/hoist-common-code.dbg.ll | 2 +- .../SimplifyCFG/switch_formation.dbg.ll | 2 +- .../SimplifyCFG/two-entry-phi-return.dbg.ll | 2 +- .../TailCallElim/dont_reorder_load.ll | 6 +- test/Verifier/2006-10-15-AddrLabel.ll | 2 +- tools/llc/llc.cpp | 5 + tools/llvm-mc/AsmLexer.cpp | 2 +- tools/llvm-mc/AsmParser.cpp | 28 +- tools/llvmc/plugins/Base/Base.td.in | 3 +- tools/lto/LTOCodeGenerator.cpp | 6 +- tools/opt/opt.cpp | 4 + unittests/ADT/BitVectorTest.cpp | 140 + unittests/ADT/SmallBitVectorTest.cpp | 140 + unittests/VMCore/MetadataTest.cpp | 2 +- utils/TableGen/AsmMatcherEmitter.cpp | 22 +- utils/TableGen/CodeEmitterGen.cpp | 9 +- utils/TableGen/CodeGenInstruction.cpp | 4 +- utils/TableGen/CodeGenTarget.cpp | 21 +- utils/TableGen/CodeGenTarget.h | 5 - utils/TableGen/DAGISelEmitter.cpp | 196 +- utils/TableGen/FastISelEmitter.cpp | 2 +- utils/TableGen/InstrInfoEmitter.cpp | 3 +- utils/TableGen/IntrinsicEmitter.cpp | 2 +- utils/TableGen/OptParserEmitter.cpp | 24 +- utils/TableGen/Record.cpp | 10 + utils/TableGen/Record.h | 2 +- utils/TableGen/SubtargetEmitter.cpp | 4 +- utils/TableGen/TGLexer.cpp | 1 + utils/TableGen/TGLexer.h | 2 +- utils/TableGen/TGParser.cpp | 7 + utils/buildit/GNUmakefile | 10 +- utils/vim/llvm.vim | 4 +- utils/vim/vimrc | 2 +- 532 files changed, 23259 insertions(+), 19972 deletions(-) create mode 100644 include/llvm/ADT/SmallBitVector.h create mode 100644 include/llvm/MC/MCParsedAsmOperand.h create mode 100644 lib/CodeGen/OptimizeExts.cpp create mode 100644 lib/Transforms/InstCombine/CMakeLists.txt create mode 100644 lib/Transforms/InstCombine/InstCombine.h create mode 100644 lib/Transforms/InstCombine/InstCombineAddSub.cpp create mode 100644 lib/Transforms/InstCombine/InstCombineAndOrXor.cpp create mode 100644 lib/Transforms/InstCombine/InstCombineCalls.cpp create mode 100644 lib/Transforms/InstCombine/InstCombineCasts.cpp create mode 100644 lib/Transforms/InstCombine/InstCombineCompares.cpp create mode 100644 lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp create mode 100644 lib/Transforms/InstCombine/InstCombineMulDivRem.cpp create mode 100644 lib/Transforms/InstCombine/InstCombinePHI.cpp create mode 100644 lib/Transforms/InstCombine/InstCombineSelect.cpp create mode 100644 lib/Transforms/InstCombine/InstCombineShifts.cpp create mode 100644 lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp create mode 100644 lib/Transforms/InstCombine/InstCombineVectorOps.cpp create mode 100644 lib/Transforms/InstCombine/InstCombineWorklist.h create mode 100644 lib/Transforms/InstCombine/InstructionCombining.cpp create mode 100644 lib/Transforms/InstCombine/Makefile delete mode 100644 lib/Transforms/Instrumentation/BlockProfiling.cpp delete mode 100644 lib/Transforms/Instrumentation/RSProfiling.cpp delete mode 100644 lib/Transforms/Instrumentation/RSProfiling.h delete mode 100644 lib/Transforms/Scalar/InstructionCombining.cpp create mode 100644 test/Assembler/functionlocal-metadata.ll create mode 100644 test/CodeGen/MSP430/shifts.ll create mode 100644 test/CodeGen/SystemZ/2010-01-04-DivMem.ll create mode 100644 test/CodeGen/Thumb2/2010-01-06-TailDuplicateLabels.ll rename test/{Transforms/IndVarSimplify => CodeGen/X86}/2007-01-08-X86-64-Pointer.ll (100%) create mode 100644 test/CodeGen/X86/2010-01-05-ZExt-Shl.ll create mode 100644 test/CodeGen/X86/2010-01-07-ISelBug.ll create mode 100644 test/CodeGen/X86/2010-01-07-UAMemFeature.ll create mode 100644 test/CodeGen/X86/2010-01-08-Atomic64Bug.ll create mode 100644 test/CodeGen/X86/2010-01-11-ExtraPHIArg.ll create mode 100644 test/CodeGen/X86/2010-01-13-OptExtBug.ll create mode 100644 test/CodeGen/X86/3addr-or.ll create mode 100644 test/CodeGen/X86/addr-label-difference.ll delete mode 100644 test/CodeGen/X86/anyext-uses.ll create mode 100644 test/CodeGen/X86/br-fold.ll create mode 100644 test/CodeGen/X86/brcond.ll create mode 100644 test/CodeGen/X86/remat-mov-0.ll create mode 100644 test/CodeGen/X86/sext-subreg.ll create mode 100644 test/CodeGen/X86/tailcall-largecode.ll create mode 100644 test/CodeGen/X86/twoaddr-lea.ll create mode 100644 test/CodeGen/X86/use-add-flags.ll create mode 100644 test/CodeGen/X86/vec_cast.ll rename test/CodeGen/X86/{brcond-srl.ll => xor-icmp.ll} (64%) delete mode 100644 test/DebugInfo/2009-12-01-CurrentFn.ll create mode 100644 test/DebugInfo/2010-01-05-DbgScope.ll create mode 100644 test/ExecutionEngine/2010-01-15-UndefValue.ll create mode 100644 test/FrontendC/2010-01-05-LinkageName.c create mode 100644 test/FrontendC/2010-01-13-MemBarrier.c create mode 100644 test/FrontendC/2010-01-14-FnType-DebugInfo.c create mode 100644 test/FrontendC/2010-01-14-StaticVariable.c create mode 100644 test/TableGen/eq.td create mode 100644 test/Transforms/GlobalOpt/crash.ll delete mode 100644 test/Transforms/InstCombine/cast-and-cast.ll delete mode 100644 test/Transforms/InstCombine/cast-cast-to-and.ll delete mode 100644 test/Transforms/InstCombine/cast-load-gep.ll delete mode 100644 test/Transforms/InstCombine/cast-propagate.ll delete mode 100644 test/Transforms/InstCombine/cast-sext-zext.ll delete mode 100644 test/Transforms/InstCombine/cast2.ll delete mode 100644 test/Transforms/InstCombine/cast3.ll delete mode 100644 test/Transforms/InstCombine/cast_ld_addr_space.ll create mode 100644 test/Transforms/InstCombine/fsub-fadd.ll create mode 100644 test/Transforms/InstCombine/load-cmp.ll delete mode 100644 test/Transforms/InstCombine/setcc-cast-cast.ll delete mode 100644 test/Transforms/LICM/licm_preserve_dbginfo.ll create mode 100644 test/Transforms/Reassociate/crash.ll create mode 100644 unittests/ADT/BitVectorTest.cpp create mode 100644 unittests/ADT/SmallBitVectorTest.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 9bce0392db4f..7063640ca4d7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -296,6 +296,7 @@ add_subdirectory(lib/Bitcode/Reader) add_subdirectory(lib/Bitcode/Writer) add_subdirectory(lib/Transforms/Utils) add_subdirectory(lib/Transforms/Instrumentation) +add_subdirectory(lib/Transforms/InstCombine) add_subdirectory(lib/Transforms/Scalar) add_subdirectory(lib/Transforms/IPO) add_subdirectory(lib/Transforms/Hello) diff --git a/LICENSE.TXT b/LICENSE.TXT index fd49172664b5..b8d2c742096f 100644 --- a/LICENSE.TXT +++ b/LICENSE.TXT @@ -4,7 +4,7 @@ LLVM Release License University of Illinois/NCSA Open Source License -Copyright (c) 2003-2009 University of Illinois at Urbana-Champaign. +Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign. All rights reserved. Developed by: diff --git a/autoconf/configure.ac b/autoconf/configure.ac index 9ebaadc3bbcf..7915593db4e5 100644 --- a/autoconf/configure.ac +++ b/autoconf/configure.ac @@ -512,7 +512,7 @@ case "$enableval" in PIC16) TARGETS_TO_BUILD="PIC16 $TARGETS_TO_BUILD" ;; XCore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;; MSP430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;; - SystemZ) TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;; + s390x) TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;; Blackfin) TARGETS_TO_BUILD="Blackfin $TARGETS_TO_BUILD" ;; *) AC_MSG_ERROR([Can not set target to build]) ;; esac ;; diff --git a/cmake/modules/LLVMLibDeps.cmake b/cmake/modules/LLVMLibDeps.cmake index 97d07bd9fdf9..bc2f45b3de31 100644 --- a/cmake/modules/LLVMLibDeps.cmake +++ b/cmake/modules/LLVMLibDeps.cmake @@ -7,9 +7,9 @@ set(MSVC_LIB_DEPS_LLVMAlphaCodeGen LLVMAlphaInfo LLVMCodeGen LLVMCore LLVMMC LLV set(MSVC_LIB_DEPS_LLVMAlphaInfo LLVMSupport) set(MSVC_LIB_DEPS_LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTarget) set(MSVC_LIB_DEPS_LLVMArchive LLVMBitReader LLVMCore LLVMSupport LLVMSystem) -set(MSVC_LIB_DEPS_LLVMAsmParser LLVMCore LLVMSupport LLVMSystem) +set(MSVC_LIB_DEPS_LLVMAsmParser LLVMCore LLVMSupport) set(MSVC_LIB_DEPS_LLVMAsmPrinter LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget) -set(MSVC_LIB_DEPS_LLVMBitReader LLVMCore LLVMSupport LLVMSystem) +set(MSVC_LIB_DEPS_LLVMBitReader LLVMCore LLVMSupport) set(MSVC_LIB_DEPS_LLVMBitWriter LLVMCore LLVMSupport LLVMSystem) set(MSVC_LIB_DEPS_LLVMBlackfinAsmPrinter LLVMAsmPrinter LLVMBlackfinInfo LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget) set(MSVC_LIB_DEPS_LLVMBlackfinCodeGen LLVMBlackfinInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget) @@ -21,15 +21,16 @@ set(MSVC_LIB_DEPS_LLVMCellSPUCodeGen LLVMCellSPUInfo LLVMCodeGen LLVMCore LLVMMC set(MSVC_LIB_DEPS_LLVMCellSPUInfo LLVMSupport) set(MSVC_LIB_DEPS_LLVMCodeGen LLVMAnalysis LLVMCore LLVMMC LLVMScalarOpts LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils) set(MSVC_LIB_DEPS_LLVMCore LLVMSupport LLVMSystem) -set(MSVC_LIB_DEPS_LLVMCppBackend LLVMCore LLVMCppBackendInfo LLVMSupport LLVMSystem LLVMTarget) +set(MSVC_LIB_DEPS_LLVMCppBackend LLVMCore LLVMCppBackendInfo LLVMSupport LLVMTarget) set(MSVC_LIB_DEPS_LLVMCppBackendInfo LLVMSupport) set(MSVC_LIB_DEPS_LLVMExecutionEngine LLVMCore LLVMSupport LLVMSystem LLVMTarget) -set(MSVC_LIB_DEPS_LLVMInstrumentation LLVMAnalysis LLVMCore LLVMScalarOpts LLVMSupport LLVMSystem LLVMTransformUtils) +set(MSVC_LIB_DEPS_LLVMInstCombine LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils) +set(MSVC_LIB_DEPS_LLVMInstrumentation LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTransformUtils) set(MSVC_LIB_DEPS_LLVMInterpreter LLVMCodeGen LLVMCore LLVMExecutionEngine LLVMSupport LLVMSystem LLVMTarget) set(MSVC_LIB_DEPS_LLVMJIT LLVMCodeGen LLVMCore LLVMExecutionEngine LLVMMC LLVMSupport LLVMSystem LLVMTarget) set(MSVC_LIB_DEPS_LLVMLinker LLVMArchive LLVMBitReader LLVMCore LLVMSupport LLVMSystem) set(MSVC_LIB_DEPS_LLVMMC LLVMSupport LLVMSystem) -set(MSVC_LIB_DEPS_LLVMMSIL LLVMAnalysis LLVMCodeGen LLVMCore LLVMMSILInfo LLVMScalarOpts LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils LLVMipa) +set(MSVC_LIB_DEPS_LLVMMSIL LLVMAnalysis LLVMCodeGen LLVMCore LLVMMSILInfo LLVMScalarOpts LLVMSupport LLVMTarget LLVMTransformUtils LLVMipa) set(MSVC_LIB_DEPS_LLVMMSILInfo LLVMSupport) set(MSVC_LIB_DEPS_LLVMMSP430AsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMSP430Info LLVMSupport LLVMSystem LLVMTarget) set(MSVC_LIB_DEPS_LLVMMSP430CodeGen LLVMCodeGen LLVMCore LLVMMC LLVMMSP430Info LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget) @@ -37,13 +38,13 @@ set(MSVC_LIB_DEPS_LLVMMSP430Info LLVMSupport) set(MSVC_LIB_DEPS_LLVMMipsAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMipsCodeGen LLVMMipsInfo LLVMSupport LLVMSystem LLVMTarget) set(MSVC_LIB_DEPS_LLVMMipsCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMMipsInfo LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget) set(MSVC_LIB_DEPS_LLVMMipsInfo LLVMSupport) -set(MSVC_LIB_DEPS_LLVMPIC16 LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMPIC16Info LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget) -set(MSVC_LIB_DEPS_LLVMPIC16AsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPIC16 LLVMPIC16Info LLVMSupport LLVMSystem LLVMTarget) +set(MSVC_LIB_DEPS_LLVMPIC16 LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMPIC16Info LLVMSelectionDAG LLVMSupport LLVMTarget) +set(MSVC_LIB_DEPS_LLVMPIC16AsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPIC16 LLVMPIC16Info LLVMSupport LLVMTarget) set(MSVC_LIB_DEPS_LLVMPIC16Info LLVMSupport) set(MSVC_LIB_DEPS_LLVMPowerPCAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPowerPCInfo LLVMSupport LLVMSystem LLVMTarget) set(MSVC_LIB_DEPS_LLVMPowerPCCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMPowerPCInfo LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget) set(MSVC_LIB_DEPS_LLVMPowerPCInfo LLVMSupport) -set(MSVC_LIB_DEPS_LLVMScalarOpts LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils) +set(MSVC_LIB_DEPS_LLVMScalarOpts LLVMAnalysis LLVMCore LLVMInstCombine LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils) set(MSVC_LIB_DEPS_LLVMSelectionDAG LLVMAnalysis LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMSupport LLVMSystem LLVMTarget) set(MSVC_LIB_DEPS_LLVMSparcAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSparcInfo LLVMSupport LLVMSystem LLVMTarget) set(MSVC_LIB_DEPS_LLVMSparcCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSparcInfo LLVMSupport LLVMSystem LLVMTarget) @@ -53,14 +54,14 @@ set(MSVC_LIB_DEPS_LLVMSystem ) set(MSVC_LIB_DEPS_LLVMSystemZAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMSystemZInfo LLVMTarget) set(MSVC_LIB_DEPS_LLVMSystemZCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystemZInfo LLVMTarget) set(MSVC_LIB_DEPS_LLVMSystemZInfo LLVMSupport) -set(MSVC_LIB_DEPS_LLVMTarget LLVMCore LLVMMC LLVMSupport LLVMSystem) +set(MSVC_LIB_DEPS_LLVMTarget LLVMCore LLVMMC LLVMSupport) set(MSVC_LIB_DEPS_LLVMTransformUtils LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTarget LLVMipa) set(MSVC_LIB_DEPS_LLVMX86AsmParser LLVMMC LLVMX86Info) set(MSVC_LIB_DEPS_LLVMX86AsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget LLVMX86CodeGen LLVMX86Info) -set(MSVC_LIB_DEPS_LLVMX86CodeGen LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget LLVMX86Disassembler LLVMX86Info) -set(MSVC_LIB_DEPS_LLVMX86Disassembler ) +set(MSVC_LIB_DEPS_LLVMX86CodeGen LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget LLVMX86Info) +set(MSVC_LIB_DEPS_LLVMX86Disassembler LLVMMC LLVMSupport LLVMX86Info) set(MSVC_LIB_DEPS_LLVMX86Info LLVMSupport) -set(MSVC_LIB_DEPS_LLVMXCore LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget LLVMXCoreInfo) +set(MSVC_LIB_DEPS_LLVMXCore LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget LLVMXCoreInfo) set(MSVC_LIB_DEPS_LLVMXCoreAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget LLVMXCoreInfo) set(MSVC_LIB_DEPS_LLVMXCoreInfo LLVMSupport) set(MSVC_LIB_DEPS_LLVMipa LLVMAnalysis LLVMCore LLVMSupport LLVMSystem) diff --git a/configure b/configure index 3e0ca0a680b6..fc30999dc904 100755 --- a/configure +++ b/configure @@ -5080,7 +5080,7 @@ case "$enableval" in PIC16) TARGETS_TO_BUILD="PIC16 $TARGETS_TO_BUILD" ;; XCore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;; MSP430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;; - SystemZ) TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;; + s390x) TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;; Blackfin) TARGETS_TO_BUILD="Blackfin $TARGETS_TO_BUILD" ;; *) { { echo "$as_me:$LINENO: error: Can not set target to build" >&5 echo "$as_me: error: Can not set target to build" >&2;} diff --git a/docs/CodeGenerator.html b/docs/CodeGenerator.html index d39de19ec6a5..2eb7abcacd11 100644 --- a/docs/CodeGenerator.html +++ b/docs/CodeGenerator.html @@ -1731,11 +1731,6 @@ define fastcc i32 @tailcaller(i32 %in1, i32 %in2) { (because one or more of above constraints are not met) to be followed by a readjustment of the stack. So performance might be worse in such cases.

-

On x86 and x86-64 one register is reserved for indirect tail calls (e.g via a - function pointer). So there is one less register for integer argument - passing. For x86 this means 2 registers (if inreg parameter - attribute is used) and for x86-64 this means 5 register are used.

-
@@ -2121,7 +2116,7 @@ MOVSX32rm16 -> movsx, 32-bit register, 16-bit memory Chris Lattner
The LLVM Compiler Infrastructure
- Last modified: $Date: 2009-10-10 23:30:55 +0200 (Sat, 10 Oct 2009) $ + Last modified: $Date: 2010-01-11 19:53:47 +0100 (Mon, 11 Jan 2010) $ diff --git a/docs/LangRef.html b/docs/LangRef.html index ba09f4c4bfbb..6ea0ead48a7f 100644 --- a/docs/LangRef.html +++ b/docs/LangRef.html @@ -43,6 +43,7 @@
  • Global Variables
  • Functions
  • Aliases
  • +
  • Named Metadata
  • Parameter Attributes
  • Function Attributes
  • Garbage Collector Names
  • @@ -85,12 +86,12 @@
  • Undefined Values
  • Addresses of Basic Blocks
  • Constant Expressions
  • -
  • Embedded Metadata
  • Other Values
    1. Inline Assembler Expressions
    2. +
    3. Metadata Nodes and Metadata Strings
  • Intrinsic Global Variables @@ -498,14 +499,19 @@ define i32 @main() { ; i32()* ; Call puts function to write out the string to stdout. call i32 @puts(i8 * %cast210) ; i32 - ret i32 0
    }
    + ret i32 0
    } + +; Named metadata +!1 = metadata !{i32 41} +!foo = !{!1, null}
  • This example is made up of a global variable named - ".LC0", an external declaration of the "puts" function, and + ".LC0", an external declaration of the "puts" function, a function definition for - "main".

    + "main" and named metadata + "foo".

    In general, a module is made up of a list of global values, where both functions and global variables are global values. Global values are @@ -558,10 +564,17 @@ define i32 @main() { ; i32()*

    linkonce
    Globals with "linkonce" linkage are merged with other globals of - the same name when linkage occurs. This is typically used to implement - inline functions, templates, or other code which must be generated in each - translation unit that uses it. Unreferenced linkonce globals are - allowed to be discarded.
    + the same name when linkage occurs. This can be used to implement + some forms of inline functions, templates, or other code which must be + generated in each translation unit that uses it, but where the body may + be overridden with a more definitive definition later. Unreferenced + linkonce globals are allowed to be discarded. Note that + linkonce linkage does not actually allow the optimizer to + inline the body of this function into callers because it doesn't know if + this definition of the function is the definitive definition within the + program or whether it will be overridden by a stronger definition. + To enable inlining and other optimizations, use "linkonce_odr" + linkage.
    weak
    "weak" linkage has the same merging semantics as @@ -671,9 +684,9 @@ define i32 @main() { ; i32()* (e.g. by passing things in registers). This calling convention allows the target to use whatever tricks it wants to produce fast code for the target, without having to conform to an externally specified ABI - (Application Binary Interface). Implementations of this convention should - allow arbitrary tail call - optimization to be supported. This calling convention does not + (Application Binary Interface). + Tail calls can only be optimized + when this convention is used. This calling convention does not support varargs and requires the prototype of all callees to exactly match the prototype of the function definition.
    @@ -904,6 +917,27 @@ define [linkage] [visibility] + +
    + Named Metadata +
    + +
    + +

    Named metadata is a collection of metadata. Metadata + node and null are the only valid named metadata operands. + Metadata strings are not allowed as an named metadata operand.

    + +
    Syntax:
    +
    +
    +!1 = metadata !{metadata !"one"}
    +!name = !{null, !1}
    +
    +
    + +
    +
    Parameter Attributes
    @@ -1649,10 +1683,12 @@ Classifications underlying processor. The elements of a structure may be any type that has a size.

    -

    Structures are accessed using 'load and - 'store' by getting a pointer to a field with - the 'getelementptr' instruction.

    - +

    Structures in memory are accessed using 'load' + and 'store' by getting a pointer to a field + with the 'getelementptr' instruction. + Structures in registers are accessed using the + 'extractvalue' and + 'insertvalue' instructions.

    Syntax:
       { <type list> }
    @@ -2305,12 +2341,12 @@ has undefined behavior.

    -
    Embedded Metadata +
    -

    Embedded metadata provides a way to attach arbitrary data to the instruction +

    Metadata provides a way to attach arbitrary data to the instruction stream without affecting the behaviour of the program. There are two metadata primitives, strings and nodes. All metadata has the metadata type and is identified in syntax by a preceding exclamation @@ -2329,6 +2365,9 @@ has undefined behavior.

    event that a value is deleted, it will be replaced with a typeless "null", such as "metadata !{null, i32 10}".

    +

    A named metadata is a collection of + metadata nodes. For example: "!foo = metadata !{!4, !3}". +

    Optimizations may rely on metadata to provide additional information about the program that isn't available in the instructions, or that isn't easily computable. Similarly, the code generator may expect a certain metadata @@ -3848,7 +3887,7 @@ Instruction

    Syntax:
    -  <result> = insertvalue <aggregate type> <val>, <ty> <val>, <idx>    ; yields <n x <ty>>
    +  <result> = insertvalue <aggregate type> <val>, <ty> <elt>, <idx>    ; yields <aggregate type>
     
    Overview:
    @@ -3873,7 +3912,8 @@ Instruction
    Example:
    -  <result> = insertvalue {i32, float} %agg, i32 1, 0    ; yields {i32, float}
    +  %agg1 = insertvalue {i32, float} undef, i32 1, 0         ; yields {i32 1, float undef}
    +  %agg2 = insertvalue {i32, float} %agg1, float %val, 1    ; yields {i32 1, float %val}
     
    @@ -4983,15 +5023,31 @@ Loop: ; Infinite loop that counts from 0 on up...

    This instruction requires several arguments:

      -
    1. The optional "tail" marker indicates whether the callee function accesses - any allocas or varargs in the caller. If the "tail" marker is present, - the function call is eligible for tail call optimization. Note that calls - may be marked "tail" even if they do not occur before - a ret instruction.
    2. +
    3. The optional "tail" marker indicates that the callee function does not + access any allocas or varargs in the caller. Note that calls may be + marked "tail" even if they do not occur before + a ret instruction. If the "tail" marker is + present, the function call is eligible for tail call optimization, + but might not in fact be + optimized into a jump. As of this writing, the extra requirements for + a call to actually be optimized are: +
        +
      • Caller and callee both have the calling + convention fastcc.
      • +
      • The call is in tail position (ret immediately follows call and ret + uses value of call or is void).
      • +
      • Option -tailcallopt is enabled, + or llvm::PerformTailCallOpt is true.
      • +
      • Platform specific + constraints are met.
      • +
      +
    4. The optional "cconv" marker indicates which calling convention the call should use. If none is specified, the call - defaults to using C calling conventions.
    5. + defaults to using C calling conventions. The calling convention of the + call must match the calling convention of the target function, or else the + behavior is undefined.
    6. The optional Parameter Attributes list for return values. Only 'zeroext', 'signext', and @@ -7263,7 +7319,7 @@ LLVM.

      Overview:

      The llvm.objectsize intrinsic is designed to provide information - to the optimizers to either discover at compile time either a) when an + to the optimizers to discover at compile time either a) when an operation like memcpy will either overflow a buffer that corresponds to an object, or b) to determine that a runtime check for overflow isn't necessary. An object in this context means an allocation of a @@ -7294,7 +7350,7 @@ LLVM.

      Chris Lattner
      The LLVM Compiler Infrastructure
      - Last modified: $Date: 2009-12-23 01:29:49 +0100 (Wed, 23 Dec 2009) $ + Last modified: $Date: 2010-01-11 20:35:55 +0100 (Mon, 11 Jan 2010) $ diff --git a/docs/ProgrammersManual.html b/docs/ProgrammersManual.html index c5451b67b1af..80a5db030a99 100644 --- a/docs/ProgrammersManual.html +++ b/docs/ProgrammersManual.html @@ -94,6 +94,7 @@ option
    7. BitVector-like containers
    8. @@ -1584,7 +1585,7 @@ please don't use it.

      -

      The BitVector container provides a fixed size set of bits for manipulation. +

      The BitVector container provides a dynamic size set of bits for manipulation. It supports individual bit setting/testing, as well as set operations. The set operations take time O(size of bitvector), but operations are performed one word at a time, instead of one bit at a time. This makes the BitVector very fast for @@ -1593,6 +1594,25 @@ the number of set bits to be high (IE a dense set).

      + + + +
      +

      The SmallBitVector container provides the same interface as BitVector, but +it is optimized for the case where only a small number of bits, less than +25 or so, are needed. It also transparently supports larger bit counts, but +slightly less efficiently than a plain BitVector, so SmallBitVector should +only be used when larger counts are rare. +

      + +

      +At this time, SmallBitVector does not support set operations (and, or, xor), +and its operator[] does not provide an assignable lvalue. +

      +
      +
      SparseBitVector @@ -3872,7 +3892,7 @@ arguments. An argument has a pointer to the parent Function.

      Dinakar Dhurjati and Chris Lattner
      The LLVM Compiler Infrastructure
      - Last modified: $Date: 2009-10-23 00:11:22 +0200 (Fri, 23 Oct 2009) $ + Last modified: $Date: 2010-01-05 19:24:00 +0100 (Tue, 05 Jan 2010) $ diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html index 5a0936fcf4da..aff08367566a 100644 --- a/docs/ReleaseNotes.html +++ b/docs/ReleaseNotes.html @@ -58,6 +58,7 @@ Almost dead code. include/llvm/Analysis/LiveValues.h => Dan lib/Transforms/IPO/MergeFunctions.cpp => consider for 2.8. llvm/Analysis/PointerTracking.h => Edwin wants this, consider for 2.8. + ABCD, SCCVN, GEPSplitterPass --> @@ -1348,7 +1349,7 @@ lists.

      src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"> LLVM Compiler Infrastructure
      - Last modified: $Date: 2009-11-03 22:50:09 +0100 (Tue, 03 Nov 2009) $ + Last modified: $Date: 2010-01-09 23:30:40 +0100 (Sat, 09 Jan 2010) $ diff --git a/docs/SourceLevelDebugging.html b/docs/SourceLevelDebugging.html index 4aca52cc2637..83c979b00fae 100644 --- a/docs/SourceLevelDebugging.html +++ b/docs/SourceLevelDebugging.html @@ -38,6 +38,7 @@
    9. Debugger intrinsic functions
  • Object lifetimes and scoping
  • @@ -774,6 +775,25 @@ DW_TAG_return_variable = 258 + + + +
    +
    +  void %llvm.dbg.value( metadata, i64, metadata )
    +
    + +

    This intrinsic provides information when a user source variable is set to a + new value. The first argument is the new value (wrapped as metadata). The + second argument is the offset in the user source variable where the new value + is written. The third argument is + the %llvm.dbg.variable containing + the description of the user source variable.

    + +
    +
    Object lifetimes and scoping @@ -1718,7 +1738,7 @@ enum Trees { Chris Lattner
    LLVM Compiler Infrastructure
    - Last modified: $Date: 2009-12-01 01:59:58 +0100 (Tue, 01 Dec 2009) $ + Last modified: $Date: 2010-01-11 23:53:48 +0100 (Mon, 11 Jan 2010) $ diff --git a/docs/TableGenFundamentals.html b/docs/TableGenFundamentals.html index 4ae6718bf45b..b96aeab1bef5 100644 --- a/docs/TableGenFundamentals.html +++ b/docs/TableGenFundamentals.html @@ -423,6 +423,10 @@ class. This operation is analogous to $(foreach) in GNU make.
    An integer {0,1} indicating whether list 'a' is empty.
    !if(a,b,c)
    'b' if the result of integer operator 'a' is nonzero, 'c' otherwise.
    +
    !eq(a,b)
    +
    Integer one if string a is equal to string b, zero otherwise. This + only operates on string objects. Use !cast to compare other + types of objects.

    Note that all of the values have rules specifying how they convert to values @@ -794,7 +798,7 @@ This should highlight the APIs in TableGen/Record.h.

    Chris Lattner
    LLVM Compiler Infrastructure
    - Last modified: $Date: 2009-10-29 19:10:34 +0100 (Thu, 29 Oct 2009) $ + Last modified: $Date: 2010-01-05 20:11:42 +0100 (Tue, 05 Jan 2010) $ diff --git a/include/llvm-c/Analysis.h b/include/llvm-c/Analysis.h index 68d8e65db49e..e1e44872b162 100644 --- a/include/llvm-c/Analysis.h +++ b/include/llvm-c/Analysis.h @@ -36,12 +36,12 @@ typedef enum { /* Verifies that a module is valid, taking the specified action if not. Optionally returns a human-readable description of any invalid constructs. OutMessage must be disposed with LLVMDisposeMessage. */ -int LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action, - char **OutMessage); +LLVMBool LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action, + char **OutMessage); /* Verifies that a single function is valid, taking the specified action. Useful for debugging. */ -int LLVMVerifyFunction(LLVMValueRef Fn, LLVMVerifierFailureAction Action); +LLVMBool LLVMVerifyFunction(LLVMValueRef Fn, LLVMVerifierFailureAction Action); /* Open up a ghostview window that displays the CFG of the current function. Useful for debugging. */ diff --git a/include/llvm-c/BitReader.h b/include/llvm-c/BitReader.h index a184f609d6ba..59269ce4aba3 100644 --- a/include/llvm-c/BitReader.h +++ b/include/llvm-c/BitReader.h @@ -29,24 +29,24 @@ extern "C" { /* Builds a module from the bitcode in the specified memory buffer, returning a reference to the module via the OutModule parameter. Returns 0 on success. Optionally returns a human-readable error message via OutMessage. */ -int LLVMParseBitcode(LLVMMemoryBufferRef MemBuf, - LLVMModuleRef *OutModule, char **OutMessage); +LLVMBool LLVMParseBitcode(LLVMMemoryBufferRef MemBuf, + LLVMModuleRef *OutModule, char **OutMessage); -int LLVMParseBitcodeInContext(LLVMContextRef ContextRef, - LLVMMemoryBufferRef MemBuf, - LLVMModuleRef *OutModule, char **OutMessage); +LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef, + LLVMMemoryBufferRef MemBuf, + LLVMModuleRef *OutModule, char **OutMessage); /* Reads a module from the specified path, returning via the OutMP parameter a module provider which performs lazy deserialization. Returns 0 on success. Optionally returns a human-readable error message via OutMessage. */ -int LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf, - LLVMModuleProviderRef *OutMP, - char **OutMessage); +LLVMBool LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf, + LLVMModuleProviderRef *OutMP, + char **OutMessage); -int LLVMGetBitcodeModuleProviderInContext(LLVMContextRef ContextRef, - LLVMMemoryBufferRef MemBuf, - LLVMModuleProviderRef *OutMP, - char **OutMessage); +LLVMBool LLVMGetBitcodeModuleProviderInContext(LLVMContextRef ContextRef, + LLVMMemoryBufferRef MemBuf, + LLVMModuleProviderRef *OutMP, + char **OutMessage); #ifdef __cplusplus diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h index c741d1c19a31..687920549c16 100644 --- a/include/llvm-c/Core.h +++ b/include/llvm-c/Core.h @@ -46,6 +46,8 @@ extern "C" { #endif +typedef int LLVMBool; + /* Opaque types. */ /** @@ -292,7 +294,7 @@ const char *LLVMGetTarget(LLVMModuleRef M); void LLVMSetTarget(LLVMModuleRef M, const char *Triple); /** See Module::addTypeName. */ -int LLVMAddTypeName(LLVMModuleRef M, const char *Name, LLVMTypeRef Ty); +LLVMBool LLVMAddTypeName(LLVMModuleRef M, const char *Name, LLVMTypeRef Ty); void LLVMDeleteTypeName(LLVMModuleRef M, const char *Name); LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name); @@ -355,20 +357,20 @@ LLVMTypeRef LLVMPPCFP128Type(void); /* Operations on function types */ LLVMTypeRef LLVMFunctionType(LLVMTypeRef ReturnType, LLVMTypeRef *ParamTypes, unsigned ParamCount, - int IsVarArg); -int LLVMIsFunctionVarArg(LLVMTypeRef FunctionTy); + LLVMBool IsVarArg); +LLVMBool LLVMIsFunctionVarArg(LLVMTypeRef FunctionTy); LLVMTypeRef LLVMGetReturnType(LLVMTypeRef FunctionTy); unsigned LLVMCountParamTypes(LLVMTypeRef FunctionTy); void LLVMGetParamTypes(LLVMTypeRef FunctionTy, LLVMTypeRef *Dest); /* Operations on struct types */ LLVMTypeRef LLVMStructTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes, - unsigned ElementCount, int Packed); + unsigned ElementCount, LLVMBool Packed); LLVMTypeRef LLVMStructType(LLVMTypeRef *ElementTypes, unsigned ElementCount, - int Packed); + LLVMBool Packed); unsigned LLVMCountStructElementTypes(LLVMTypeRef StructTy); void LLVMGetStructElementTypes(LLVMTypeRef StructTy, LLVMTypeRef *Dest); -int LLVMIsPackedStruct(LLVMTypeRef StructTy); +LLVMBool LLVMIsPackedStruct(LLVMTypeRef StructTy); /* Operations on array, pointer, and vector types (sequence types) */ LLVMTypeRef LLVMArrayType(LLVMTypeRef ElementType, unsigned ElementCount); @@ -427,10 +429,6 @@ void LLVMDisposeTypeHandle(LLVMTypeHandleRef TypeHandle); macro(IntrinsicInst) \ macro(DbgInfoIntrinsic) \ macro(DbgDeclareInst) \ - macro(DbgFuncStartInst) \ - macro(DbgRegionEndInst) \ - macro(DbgRegionStartInst) \ - macro(DbgStopPointInst) \ macro(EHSelectorInst) \ macro(MemIntrinsic) \ macro(MemCpyInst) \ @@ -499,14 +497,14 @@ LLVMValueRef LLVMGetOperand(LLVMValueRef Val, unsigned Index); LLVMValueRef LLVMConstNull(LLVMTypeRef Ty); /* all zeroes */ LLVMValueRef LLVMConstAllOnes(LLVMTypeRef Ty); /* only for int/vector */ LLVMValueRef LLVMGetUndef(LLVMTypeRef Ty); -int LLVMIsConstant(LLVMValueRef Val); -int LLVMIsNull(LLVMValueRef Val); -int LLVMIsUndef(LLVMValueRef Val); +LLVMBool LLVMIsConstant(LLVMValueRef Val); +LLVMBool LLVMIsNull(LLVMValueRef Val); +LLVMBool LLVMIsUndef(LLVMValueRef Val); LLVMValueRef LLVMConstPointerNull(LLVMTypeRef Ty); /* Operations on scalar constants */ LLVMValueRef LLVMConstInt(LLVMTypeRef IntTy, unsigned long long N, - int SignExtend); + LLVMBool SignExtend); LLVMValueRef LLVMConstIntOfString(LLVMTypeRef IntTy, const char *Text, uint8_t Radix); LLVMValueRef LLVMConstIntOfStringAndSize(LLVMTypeRef IntTy, const char *Text, @@ -521,17 +519,17 @@ long long LLVMConstIntGetSExtValue(LLVMValueRef ConstantVal); /* Operations on composite constants */ LLVMValueRef LLVMConstStringInContext(LLVMContextRef C, const char *Str, - unsigned Length, int DontNullTerminate); + unsigned Length, LLVMBool DontNullTerminate); LLVMValueRef LLVMConstStructInContext(LLVMContextRef C, LLVMValueRef *ConstantVals, - unsigned Count, int Packed); + unsigned Count, LLVMBool Packed); LLVMValueRef LLVMConstString(const char *Str, unsigned Length, - int DontNullTerminate); + LLVMBool DontNullTerminate); LLVMValueRef LLVMConstArray(LLVMTypeRef ElementTy, LLVMValueRef *ConstantVals, unsigned Length); LLVMValueRef LLVMConstStruct(LLVMValueRef *ConstantVals, unsigned Count, - int Packed); + LLVMBool Packed); LLVMValueRef LLVMConstVector(LLVMValueRef *ScalarConstantVals, unsigned Size); /* Constant expressions */ @@ -591,7 +589,7 @@ LLVMValueRef LLVMConstTruncOrBitCast(LLVMValueRef ConstantVal, LLVMValueRef LLVMConstPointerCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType); LLVMValueRef LLVMConstIntCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType, - unsigned isSigned); + LLVMBool isSigned); LLVMValueRef LLVMConstFPCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType); LLVMValueRef LLVMConstSelect(LLVMValueRef ConstantCondition, LLVMValueRef ConstantIfTrue, @@ -609,13 +607,13 @@ LLVMValueRef LLVMConstExtractValue(LLVMValueRef AggConstant, unsigned *IdxList, LLVMValueRef LLVMConstInsertValue(LLVMValueRef AggConstant, LLVMValueRef ElementValueConstant, unsigned *IdxList, unsigned NumIdx); -LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty, +LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty, const char *AsmString, const char *Constraints, - int HasSideEffects); + LLVMBool HasSideEffects, LLVMBool IsAlignStack); /* Operations on global variables, functions, and aliases (globals) */ LLVMModuleRef LLVMGetGlobalParent(LLVMValueRef Global); -int LLVMIsDeclaration(LLVMValueRef Global); +LLVMBool LLVMIsDeclaration(LLVMValueRef Global); LLVMLinkage LLVMGetLinkage(LLVMValueRef Global); void LLVMSetLinkage(LLVMValueRef Global, LLVMLinkage Linkage); const char *LLVMGetSection(LLVMValueRef Global); @@ -635,10 +633,10 @@ LLVMValueRef LLVMGetPreviousGlobal(LLVMValueRef GlobalVar); void LLVMDeleteGlobal(LLVMValueRef GlobalVar); LLVMValueRef LLVMGetInitializer(LLVMValueRef GlobalVar); void LLVMSetInitializer(LLVMValueRef GlobalVar, LLVMValueRef ConstantVal); -int LLVMIsThreadLocal(LLVMValueRef GlobalVar); -void LLVMSetThreadLocal(LLVMValueRef GlobalVar, int IsThreadLocal); -int LLVMIsGlobalConstant(LLVMValueRef GlobalVar); -void LLVMSetGlobalConstant(LLVMValueRef GlobalVar, int IsConstant); +LLVMBool LLVMIsThreadLocal(LLVMValueRef GlobalVar); +void LLVMSetThreadLocal(LLVMValueRef GlobalVar, LLVMBool IsThreadLocal); +LLVMBool LLVMIsGlobalConstant(LLVMValueRef GlobalVar); +void LLVMSetGlobalConstant(LLVMValueRef GlobalVar, LLVMBool IsConstant); /* Operations on aliases */ LLVMValueRef LLVMAddAlias(LLVMModuleRef M, LLVMTypeRef Ty, LLVMValueRef Aliasee, @@ -678,7 +676,7 @@ void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align); /* Operations on basic blocks */ LLVMValueRef LLVMBasicBlockAsValue(LLVMBasicBlockRef BB); -int LLVMValueIsBasicBlock(LLVMValueRef Val); +LLVMBool LLVMValueIsBasicBlock(LLVMValueRef Val); LLVMBasicBlockRef LLVMValueAsBasicBlock(LLVMValueRef Val); LLVMValueRef LLVMGetBasicBlockParent(LLVMBasicBlockRef BB); unsigned LLVMCountBasicBlocks(LLVMValueRef Fn); @@ -718,8 +716,8 @@ void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index, unsigned align); /* Operations on call instructions (only) */ -int LLVMIsTailCall(LLVMValueRef CallInst); -void LLVMSetTailCall(LLVMValueRef CallInst, int IsTailCall); +LLVMBool LLVMIsTailCall(LLVMValueRef CallInst); +void LLVMSetTailCall(LLVMValueRef CallInst, LLVMBool IsTailCall); /* Operations on phi nodes */ void LLVMAddIncoming(LLVMValueRef PhiNode, LLVMValueRef *IncomingValues, @@ -932,11 +930,11 @@ void LLVMDisposeModuleProvider(LLVMModuleProviderRef MP); /*===-- Memory buffers ----------------------------------------------------===*/ -int LLVMCreateMemoryBufferWithContentsOfFile(const char *Path, - LLVMMemoryBufferRef *OutMemBuf, - char **OutMessage); -int LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf, - char **OutMessage); +LLVMBool LLVMCreateMemoryBufferWithContentsOfFile(const char *Path, + LLVMMemoryBufferRef *OutMemBuf, + char **OutMessage); +LLVMBool LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf, + char **OutMessage); void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf); @@ -956,23 +954,23 @@ LLVMPassManagerRef LLVMCreateFunctionPassManager(LLVMModuleProviderRef MP); /** Initializes, executes on the provided module, and finalizes all of the passes scheduled in the pass manager. Returns 1 if any of the passes modified the module, 0 otherwise. See llvm::PassManager::run(Module&). */ -int LLVMRunPassManager(LLVMPassManagerRef PM, LLVMModuleRef M); +LLVMBool LLVMRunPassManager(LLVMPassManagerRef PM, LLVMModuleRef M); /** Initializes all of the function passes scheduled in the function pass manager. Returns 1 if any of the passes modified the module, 0 otherwise. See llvm::FunctionPassManager::doInitialization. */ -int LLVMInitializeFunctionPassManager(LLVMPassManagerRef FPM); +LLVMBool LLVMInitializeFunctionPassManager(LLVMPassManagerRef FPM); /** Executes all of the function passes scheduled in the function pass manager on the provided function. Returns 1 if any of the passes modified the function, false otherwise. See llvm::FunctionPassManager::run(Function&). */ -int LLVMRunFunctionPassManager(LLVMPassManagerRef FPM, LLVMValueRef F); +LLVMBool LLVMRunFunctionPassManager(LLVMPassManagerRef FPM, LLVMValueRef F); /** Finalizes all of the function passes scheduled in in the function pass manager. Returns 1 if any of the passes modified the module, 0 otherwise. See llvm::FunctionPassManager::doFinalization. */ -int LLVMFinalizeFunctionPassManager(LLVMPassManagerRef FPM); +LLVMBool LLVMFinalizeFunctionPassManager(LLVMPassManagerRef FPM); /** Frees the memory of a pass pipeline. For function pipelines, does not free the module provider. diff --git a/include/llvm-c/ExecutionEngine.h b/include/llvm-c/ExecutionEngine.h index 05f2a892e223..151c935ed2a9 100644 --- a/include/llvm-c/ExecutionEngine.h +++ b/include/llvm-c/ExecutionEngine.h @@ -36,7 +36,7 @@ typedef struct LLVMOpaqueExecutionEngine *LLVMExecutionEngineRef; LLVMGenericValueRef LLVMCreateGenericValueOfInt(LLVMTypeRef Ty, unsigned long long N, - int IsSigned); + LLVMBool IsSigned); LLVMGenericValueRef LLVMCreateGenericValueOfPointer(void *P); @@ -45,7 +45,7 @@ LLVMGenericValueRef LLVMCreateGenericValueOfFloat(LLVMTypeRef Ty, double N); unsigned LLVMGenericValueIntWidth(LLVMGenericValueRef GenValRef); unsigned long long LLVMGenericValueToInt(LLVMGenericValueRef GenVal, - int IsSigned); + LLVMBool IsSigned); void *LLVMGenericValueToPointer(LLVMGenericValueRef GenVal); @@ -55,18 +55,18 @@ void LLVMDisposeGenericValue(LLVMGenericValueRef GenVal); /*===-- Operations on execution engines -----------------------------------===*/ -int LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE, - LLVMModuleProviderRef MP, - char **OutError); +LLVMBool LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE, + LLVMModuleProviderRef MP, + char **OutError); -int LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp, - LLVMModuleProviderRef MP, - char **OutError); +LLVMBool LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp, + LLVMModuleProviderRef MP, + char **OutError); -int LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT, - LLVMModuleProviderRef MP, - unsigned OptLevel, - char **OutError); +LLVMBool LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT, + LLVMModuleProviderRef MP, + unsigned OptLevel, + char **OutError); void LLVMDisposeExecutionEngine(LLVMExecutionEngineRef EE); @@ -86,12 +86,12 @@ void LLVMFreeMachineCodeForFunction(LLVMExecutionEngineRef EE, LLVMValueRef F); void LLVMAddModuleProvider(LLVMExecutionEngineRef EE, LLVMModuleProviderRef MP); -int LLVMRemoveModuleProvider(LLVMExecutionEngineRef EE, - LLVMModuleProviderRef MP, - LLVMModuleRef *OutMod, char **OutError); +LLVMBool LLVMRemoveModuleProvider(LLVMExecutionEngineRef EE, + LLVMModuleProviderRef MP, + LLVMModuleRef *OutMod, char **OutError); -int LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name, - LLVMValueRef *OutFn); +LLVMBool LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name, + LLVMValueRef *OutFn); LLVMTargetDataRef LLVMGetExecutionEngineTargetData(LLVMExecutionEngineRef EE); diff --git a/include/llvm-c/Target.h b/include/llvm-c/Target.h index 00571826487c..e705a9989694 100644 --- a/include/llvm-c/Target.h +++ b/include/llvm-c/Target.h @@ -26,8 +26,7 @@ extern "C" { #endif -enum { LLVMBigEndian, LLVMLittleEndian }; -typedef int LLVMByteOrdering; +enum LLVMByteOrdering { LLVMBigEndian, LLVMLittleEndian }; typedef struct LLVMOpaqueTargetData *LLVMTargetDataRef; typedef struct LLVMStructLayout *LLVMStructLayoutRef; @@ -62,7 +61,7 @@ static inline void LLVMInitializeAllTargets() { /** LLVMInitializeNativeTarget - The main program should call this function to initialize the native target corresponding to the host. This is useful for JIT applications to ensure that the target gets linked in correctly. */ -static inline int LLVMInitializeNativeTarget() { +static inline LLVMBool LLVMInitializeNativeTarget() { /* If we have a native target, initialize it to ensure it is linked in. */ #ifdef LLVM_NATIVE_ARCH #define DoInit2(TARG) \ @@ -97,7 +96,7 @@ char *LLVMCopyStringRepOfTargetData(LLVMTargetDataRef); /** Returns the byte order of a target, either LLVMBigEndian or LLVMLittleEndian. See the method llvm::TargetData::isLittleEndian. */ -LLVMByteOrdering LLVMByteOrder(LLVMTargetDataRef); +enum LLVMByteOrdering LLVMByteOrder(LLVMTargetDataRef); /** Returns the pointer size in bytes for a target. See the method llvm::TargetData::getPointerSize. */ diff --git a/include/llvm/ADT/BitVector.h b/include/llvm/ADT/BitVector.h index 9c046efaaddd..45108c8cc519 100644 --- a/include/llvm/ADT/BitVector.h +++ b/include/llvm/ADT/BitVector.h @@ -95,6 +95,9 @@ class BitVector { delete[] Bits; } + /// empty - Tests whether there are no bits in this bitvector. + bool empty() const { return Size == 0; } + /// size - Returns the number of bits in this bitvector. unsigned size() const { return Size; } @@ -341,6 +344,12 @@ class BitVector { return *this; } + void swap(BitVector &RHS) { + std::swap(Bits, RHS.Bits); + std::swap(Size, RHS.Size); + std::swap(Capacity, RHS.Capacity); + } + private: unsigned NumBitWords(unsigned S) const { return (S + BITWORD_SIZE-1) / BITWORD_SIZE; @@ -406,4 +415,13 @@ inline BitVector operator^(const BitVector &LHS, const BitVector &RHS) { } } // End llvm namespace + +namespace std { + /// Implement std::swap in terms of BitVector swap. + inline void + swap(llvm::BitVector &LHS, llvm::BitVector &RHS) { + LHS.swap(RHS); + } +} + #endif diff --git a/include/llvm/ADT/SmallBitVector.h b/include/llvm/ADT/SmallBitVector.h new file mode 100644 index 000000000000..346fb1ca43dc --- /dev/null +++ b/include/llvm/ADT/SmallBitVector.h @@ -0,0 +1,373 @@ +//===- llvm/ADT/SmallBitVector.h - 'Normally small' bit vectors -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SmallBitVector class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_SMALLBITVECTOR_H +#define LLVM_ADT_SMALLBITVECTOR_H + +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/PointerIntPair.h" +#include "llvm/Support/MathExtras.h" +#include + +namespace llvm { + +/// SmallBitVector - This is a 'bitvector' (really, a variable-sized bit array), +/// optimized for the case when the array is small. It contains one +/// pointer-sized field, which is directly used as a plain collection of bits +/// when possible, or as a pointer to a larger heap-allocated array when +/// necessary. This allows normal "small" cases to be fast without losing +/// generality for large inputs. +/// +class SmallBitVector { + // TODO: In "large" mode, a pointer to a BitVector is used, leading to an + // unnecessary level of indirection. It would be more efficient to use a + // pointer to memory containing size, allocation size, and the array of bits. + PointerIntPair X; + + // The number of bits in this class. + static const size_t NumBaseBits = sizeof(uintptr_t) * CHAR_BIT; + + // One bit is used to discriminate between small and large mode. The + // remaining bits are used for the small-mode representation. + static const size_t SmallNumRawBits = NumBaseBits - 1; + + // A few more bits are used to store the size of the bit set in small mode. + // Theoretically this is a ceil-log2. These bits are encoded in the most + // significant bits of the raw bits. + static const size_t SmallNumSizeBits = (NumBaseBits == 32 ? 5 : + NumBaseBits == 64 ? 6 : + SmallNumRawBits); + + // The remaining bits are used to store the actual set in small mode. + static const size_t SmallNumDataBits = SmallNumRawBits - SmallNumSizeBits; + + bool isSmall() const { + return X.getInt(); + } + + void switchToSmall(uintptr_t NewSmallBits, size_t NewSize) { + X.setInt(true); + setSmallSize(NewSize); + setSmallBits(NewSmallBits); + } + + void switchToLarge(BitVector *BV) { + X.setInt(false); + X.setPointer(BV); + } + + // Return all the bits used for the "small" representation; this includes + // bits for the size as well as the element bits. + uintptr_t getSmallRawBits() const { + return reinterpret_cast(X.getPointer()) >> 1; + } + + void setSmallRawBits(uintptr_t NewRawBits) { + return X.setPointer(reinterpret_cast(NewRawBits << 1)); + } + + // Return the size. + size_t getSmallSize() const { + return getSmallRawBits() >> SmallNumDataBits; + } + + void setSmallSize(size_t Size) { + setSmallRawBits(getSmallBits() | (Size << SmallNumDataBits)); + } + + // Return the element bits. + uintptr_t getSmallBits() const { + return getSmallRawBits() & ~(~uintptr_t(0) << SmallNumDataBits); + } + + void setSmallBits(uintptr_t NewBits) { + setSmallRawBits((getSmallRawBits() & (~uintptr_t(0) << SmallNumDataBits)) | + (NewBits & ~(~uintptr_t(0) << getSmallSize()))); + } + +public: + /// SmallBitVector default ctor - Creates an empty bitvector. + SmallBitVector() : X(0, 1) {} + + /// SmallBitVector ctor - Creates a bitvector of specified number of bits. All + /// bits are initialized to the specified value. + explicit SmallBitVector(unsigned s, bool t = false) : X(0, 1) { + if (s <= SmallNumRawBits) + switchToSmall(t ? ~uintptr_t(0) : 0, s); + else + switchToLarge(new BitVector(s, t)); + } + + /// SmallBitVector copy ctor. + SmallBitVector(const SmallBitVector &RHS) { + if (RHS.isSmall()) + X = RHS.X; + else + switchToLarge(new BitVector(*RHS.X.getPointer())); + } + + ~SmallBitVector() { + if (!isSmall()) + delete X.getPointer(); + } + + /// empty - Tests whether there are no bits in this bitvector. + bool empty() const { + return isSmall() ? getSmallSize() == 0 : X.getPointer()->empty(); + } + + /// size - Returns the number of bits in this bitvector. + size_t size() const { + return isSmall() ? getSmallSize() : X.getPointer()->size(); + } + + /// count - Returns the number of bits which are set. + unsigned count() const { + if (isSmall()) { + uintptr_t Bits = getSmallBits(); + if (sizeof(uintptr_t) * CHAR_BIT == 32) + return CountPopulation_32(Bits); + if (sizeof(uintptr_t) * CHAR_BIT == 64) + return CountPopulation_64(Bits); + assert(0 && "Unsupported!"); + } + return X.getPointer()->count(); + } + + /// any - Returns true if any bit is set. + bool any() const { + if (isSmall()) + return getSmallBits() != 0; + return X.getPointer()->any(); + } + + /// none - Returns true if none of the bits are set. + bool none() const { + if (isSmall()) + return getSmallBits() == 0; + return X.getPointer()->none(); + } + + /// find_first - Returns the index of the first set bit, -1 if none + /// of the bits are set. + int find_first() const { + if (isSmall()) { + uintptr_t Bits = getSmallBits(); + if (sizeof(uintptr_t) * CHAR_BIT == 32) + return CountTrailingZeros_32(Bits); + if (sizeof(uintptr_t) * CHAR_BIT == 64) + return CountTrailingZeros_64(Bits); + assert(0 && "Unsupported!"); + } + return X.getPointer()->find_first(); + } + + /// find_next - Returns the index of the next set bit following the + /// "Prev" bit. Returns -1 if the next set bit is not found. + int find_next(unsigned Prev) const { + if (isSmall()) { + uintptr_t Bits = getSmallBits(); + // Mask off previous bits. + Bits &= ~uintptr_t(0) << Prev; + if (sizeof(uintptr_t) * CHAR_BIT == 32) + return CountTrailingZeros_32(Bits); + if (sizeof(uintptr_t) * CHAR_BIT == 64) + return CountTrailingZeros_64(Bits); + assert(0 && "Unsupported!"); + } + return X.getPointer()->find_next(Prev); + } + + /// clear - Clear all bits. + void clear() { + if (!isSmall()) + delete X.getPointer(); + switchToSmall(0, 0); + } + + /// resize - Grow or shrink the bitvector. + void resize(unsigned N, bool t = false) { + if (!isSmall()) { + X.getPointer()->resize(N, t); + } else if (getSmallSize() >= N) { + setSmallSize(N); + setSmallBits(getSmallBits()); + } else { + BitVector *BV = new BitVector(N, t); + uintptr_t OldBits = getSmallBits(); + for (size_t i = 0, e = getSmallSize(); i != e; ++i) + (*BV)[i] = (OldBits >> i) & 1; + switchToLarge(BV); + } + } + + void reserve(unsigned N) { + if (isSmall()) { + if (N > SmallNumDataBits) { + uintptr_t OldBits = getSmallRawBits(); + size_t SmallSize = getSmallSize(); + BitVector *BV = new BitVector(SmallSize); + for (size_t i = 0; i < SmallSize; ++i) + if ((OldBits >> i) & 1) + BV->set(i); + BV->reserve(N); + switchToLarge(BV); + } + } else { + X.getPointer()->reserve(N); + } + } + + // Set, reset, flip + SmallBitVector &set() { + if (isSmall()) + setSmallBits(~uintptr_t(0)); + else + X.getPointer()->set(); + return *this; + } + + SmallBitVector &set(unsigned Idx) { + if (isSmall()) + setSmallBits(getSmallBits() | (uintptr_t(1) << Idx)); + else + X.getPointer()->set(Idx); + return *this; + } + + SmallBitVector &reset() { + if (isSmall()) + setSmallBits(0); + else + X.getPointer()->reset(); + return *this; + } + + SmallBitVector &reset(unsigned Idx) { + if (isSmall()) + setSmallBits(getSmallBits() & ~(uintptr_t(1) << Idx)); + else + X.getPointer()->reset(Idx); + return *this; + } + + SmallBitVector &flip() { + if (isSmall()) + setSmallBits(~getSmallBits()); + else + X.getPointer()->flip(); + return *this; + } + + SmallBitVector &flip(unsigned Idx) { + if (isSmall()) + setSmallBits(getSmallBits() ^ (uintptr_t(1) << Idx)); + else + X.getPointer()->flip(Idx); + return *this; + } + + // No argument flip. + SmallBitVector operator~() const { + return SmallBitVector(*this).flip(); + } + + // Indexing. + // TODO: Add an index operator which returns a "reference" (proxy class). + bool operator[](unsigned Idx) const { + assert(Idx < size() && "Out-of-bounds Bit access."); + if (isSmall()) + return ((getSmallBits() >> Idx) & 1) != 0; + return X.getPointer()->operator[](Idx); + } + + bool test(unsigned Idx) const { + return (*this)[Idx]; + } + + // Comparison operators. + bool operator==(const SmallBitVector &RHS) const { + if (size() != RHS.size()) + return false; + if (isSmall()) + return getSmallBits() == RHS.getSmallBits(); + else + return *X.getPointer() == *RHS.X.getPointer(); + } + + bool operator!=(const SmallBitVector &RHS) const { + return !(*this == RHS); + } + + // Intersection, union, disjoint union. + BitVector &operator&=(const SmallBitVector &RHS); // TODO: implement + + BitVector &operator|=(const SmallBitVector &RHS); // TODO: implement + + BitVector &operator^=(const SmallBitVector &RHS); // TODO: implement + + // Assignment operator. + const SmallBitVector &operator=(const SmallBitVector &RHS) { + if (isSmall()) { + if (RHS.isSmall()) + X = RHS.X; + else + switchToLarge(new BitVector(*RHS.X.getPointer())); + } else { + if (!RHS.isSmall()) + *X.getPointer() = *RHS.X.getPointer(); + else { + delete X.getPointer(); + X = RHS.X; + } + } + return *this; + } + + void swap(SmallBitVector &RHS) { + std::swap(X, RHS.X); + } +}; + +inline SmallBitVector +operator&(const SmallBitVector &LHS, const SmallBitVector &RHS) { + SmallBitVector Result(LHS); + Result &= RHS; + return Result; +} + +inline SmallBitVector +operator|(const SmallBitVector &LHS, const SmallBitVector &RHS) { + SmallBitVector Result(LHS); + Result |= RHS; + return Result; +} + +inline SmallBitVector +operator^(const SmallBitVector &LHS, const SmallBitVector &RHS) { + SmallBitVector Result(LHS); + Result ^= RHS; + return Result; +} + +} // End llvm namespace + +namespace std { + /// Implement std::swap in terms of BitVector swap. + inline void + swap(llvm::SmallBitVector &LHS, llvm::SmallBitVector &RHS) { + LHS.swap(RHS); + } +} + +#endif diff --git a/include/llvm/ADT/StringExtras.h b/include/llvm/ADT/StringExtras.h index 85936c019d3b..1ea546f46f29 100644 --- a/include/llvm/ADT/StringExtras.h +++ b/include/llvm/ADT/StringExtras.h @@ -23,6 +23,7 @@ #include namespace llvm { +template class SmallVectorImpl; /// hexdigit - Return the (uppercase) hexadecimal character for the /// given number \arg X (which should be less than 16). @@ -136,86 +137,25 @@ static inline std::string UppercaseString(const std::string &S) { return result; } -/// StringsEqualNoCase - Return true if the two strings are equal, ignoring -/// case. -static inline bool StringsEqualNoCase(const std::string &LHS, - const std::string &RHS) { - if (LHS.size() != RHS.size()) return false; - for (unsigned i = 0, e = static_cast(LHS.size()); i != e; ++i) - if (tolower(LHS[i]) != tolower(RHS[i])) return false; - return true; -} - -/// StringsEqualNoCase - Return true if the two strings are equal, ignoring -/// case. -static inline bool StringsEqualNoCase(const std::string &LHS, - const char *RHS) { - for (unsigned i = 0, e = static_cast(LHS.size()); i != e; ++i) { - if (RHS[i] == 0) return false; // RHS too short. - if (tolower(LHS[i]) != tolower(RHS[i])) return false; - } - return RHS[LHS.size()] == 0; // Not too long? -} - -/// StringsEqualNoCase - Return true if the two null-terminated C strings are -/// equal, ignoring - -static inline bool StringsEqualNoCase(const char *LHS, const char *RHS, - unsigned len) { - - for (unsigned i = 0; i < len; ++i) { - if (tolower(LHS[i]) != tolower(RHS[i])) - return false; - - // If RHS[i] == 0 then LHS[i] == 0 or otherwise we would have returned - // at the previous branch as tolower('\0') == '\0'. - if (RHS[i] == 0) - return true; - } - - return true; -} - -/// CStrInCStrNoCase - Portable version of strcasestr. Locates the first -/// occurance of c-string 's2' in string 's1', ignoring case. Returns -/// NULL if 's2' cannot be found. -static inline const char* CStrInCStrNoCase(const char *s1, const char *s2) { - - // Are either strings NULL or empty? - if (!s1 || !s2 || s1[0] == '\0' || s2[0] == '\0') - return 0; - - if (s1 == s2) - return s1; - - const char *I1=s1, *I2=s2; - - while (*I1 != '\0' && *I2 != '\0' ) - if (tolower(*I1) != tolower(*I2)) { // No match. Start over. - ++s1; I1 = s1; I2 = s2; - } - else { // Character match. Advance to the next character. - ++I1; ++I2; - } - - // If we exhausted all of the characters in 's2', then 's2' appears in 's1'. - return *I2 == '\0' ? s1 : 0; -} +/// StrInStrNoCase - Portable version of strcasestr. Locates the first +/// occurrence of string 's1' in string 's2', ignoring case. Returns +/// the offset of s2 in s1 or npos if s2 cannot be found. +StringRef::size_type StrInStrNoCase(StringRef s1, StringRef s2); /// getToken - This function extracts one token from source, ignoring any /// leading characters that appear in the Delimiters string, and ending the /// token at any of the characters that appear in the Delimiters string. If /// there are no tokens in the source string, an empty string is returned. -/// The Source source string is updated in place to remove the returned string -/// and any delimiter prefix from it. -std::string getToken(std::string &Source, - const char *Delimiters = " \t\n\v\f\r"); +/// The function returns a pair containing the extracted token and the +/// remaining tail string. +std::pair getToken(StringRef Source, + StringRef Delimiters = " \t\n\v\f\r"); /// SplitString - Split up the specified string according to the specified /// delimiters, appending the result fragments to the output list. -void SplitString(const std::string &Source, - std::vector &OutFragments, - const char *Delimiters = " \t\n\v\f\r"); +void SplitString(StringRef Source, + SmallVectorImpl &OutFragments, + StringRef Delimiters = " \t\n\v\f\r"); /// HashString - Hash funtion for strings. /// diff --git a/include/llvm/ADT/StringRef.h b/include/llvm/ADT/StringRef.h index 1c738369d75e..3064af3abbca 100644 --- a/include/llvm/ADT/StringRef.h +++ b/include/llvm/ADT/StringRef.h @@ -29,6 +29,7 @@ namespace llvm { class StringRef { public: typedef const char *iterator; + typedef const char *const_iterator; static const size_t npos = ~size_t(0); typedef size_t size_type; @@ -42,15 +43,8 @@ namespace llvm { // Workaround PR5482: nearly all gcc 4.x miscompile StringRef and std::min() // Changing the arg of min to be an integer, instead of a reference to an // integer works around this bug. - size_t min(size_t a, size_t b) const - { - return a < b ? a : b; - } - - size_t max(size_t a, size_t b) const - { - return a > b ? a : b; - } + size_t min(size_t a, size_t b) const { return a < b ? a : b; } + size_t max(size_t a, size_t b) const { return a > b ? a : b; } public: /// @name Constructors @@ -191,7 +185,7 @@ namespace llvm { /// find - Search for the first character \arg C in the string. /// - /// \return - The index of the first occurence of \arg C, or npos if not + /// \return - The index of the first occurrence of \arg C, or npos if not /// found. size_t find(char C, size_t From = 0) const { for (size_t i = min(From, Length), e = Length; i != e; ++i) @@ -202,13 +196,13 @@ namespace llvm { /// find - Search for the first string \arg Str in the string. /// - /// \return - The index of the first occurence of \arg Str, or npos if not + /// \return - The index of the first occurrence of \arg Str, or npos if not /// found. size_t find(StringRef Str, size_t From = 0) const; /// rfind - Search for the last character \arg C in the string. /// - /// \return - The index of the last occurence of \arg C, or npos if not + /// \return - The index of the last occurrence of \arg C, or npos if not /// found. size_t rfind(char C, size_t From = npos) const { From = min(From, Length); @@ -223,7 +217,7 @@ namespace llvm { /// rfind - Search for the last string \arg Str in the string. /// - /// \return - The index of the last occurence of \arg Str, or npos if not + /// \return - The index of the last occurrence of \arg Str, or npos if not /// found. size_t rfind(StringRef Str) const; @@ -313,7 +307,7 @@ namespace llvm { return StringRef(Data + Start, End - Start); } - /// split - Split into two substrings around the first occurence of a + /// split - Split into two substrings around the first occurrence of a /// separator character. /// /// If \arg Separator is in the string, then the result is a pair (LHS, RHS) @@ -330,7 +324,7 @@ namespace llvm { return std::make_pair(slice(0, Idx), slice(Idx+1, npos)); } - /// split - Split into two substrings around the first occurence of a + /// split - Split into two substrings around the first occurrence of a /// separator string. /// /// If \arg Separator is in the string, then the result is a pair (LHS, RHS) @@ -347,7 +341,7 @@ namespace llvm { return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos)); } - /// split - Split into substrings around the occurences of a separator + /// split - Split into substrings around the occurrences of a separator /// string. /// /// Each substring is stored in \arg A. If \arg MaxSplit is >= 0, at most @@ -366,7 +360,7 @@ namespace llvm { StringRef Separator, int MaxSplit = -1, bool KeepEmpty = true) const; - /// rsplit - Split into two substrings around the last occurence of a + /// rsplit - Split into two substrings around the last occurrence of a /// separator character. /// /// If \arg Separator is in the string, then the result is a pair (LHS, RHS) diff --git a/include/llvm/ADT/Twine.h b/include/llvm/ADT/Twine.h index ca0be53d481d..97e9df445817 100644 --- a/include/llvm/ADT/Twine.h +++ b/include/llvm/ADT/Twine.h @@ -329,6 +329,22 @@ namespace llvm { bool isTriviallyEmpty() const { return isNullary(); } + + /// isSingleStringRef - Return true if this twine can be dynamically + /// accessed as a single StringRef value with getSingleStringRef(). + bool isSingleStringRef() const { + if (getRHSKind() != EmptyKind) return false; + + switch (getLHSKind()) { + case EmptyKind: + case CStringKind: + case StdStringKind: + case StringRefKind: + return true; + default: + return false; + } + } /// @} /// @name String Operations @@ -347,6 +363,24 @@ namespace llvm { /// SmallVector. void toVector(SmallVectorImpl &Out) const; + /// getSingleStringRef - This returns the twine as a single StringRef. This + /// method is only valid if isSingleStringRef() is true. + StringRef getSingleStringRef() const { + assert(isSingleStringRef() &&"This cannot be had as a single stringref!"); + switch (getLHSKind()) { + default: assert(0 && "Out of sync with isSingleStringRef"); + case EmptyKind: return StringRef(); + case CStringKind: return StringRef((const char*)LHS); + case StdStringKind: return StringRef(*(const std::string*)LHS); + case StringRefKind: return *(const StringRef*)LHS; + } + } + + /// toStringRef - This returns the twine as a single StringRef if it can be + /// represented as such. Otherwise the twine is written into the given + /// SmallVector and a StringRef to the SmallVector's data is returned. + StringRef toStringRef(SmallVectorImpl &Out) const; + /// print - Write the concatenated string represented by this twine to the /// stream \arg OS. void print(raw_ostream &OS) const; diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h index 2d43bddf7e0b..9f411350a791 100644 --- a/include/llvm/Analysis/AliasAnalysis.h +++ b/include/llvm/Analysis/AliasAnalysis.h @@ -197,6 +197,10 @@ class AliasAnalysis { virtual ModRefBehavior getModRefBehavior(Function *F, std::vector *Info = 0); + /// getModRefBehavior - Return the modref behavior of the intrinsic with the + /// given id. + static ModRefBehavior getModRefBehavior(unsigned iid); + /// doesNotAccessMemory - If the specified call is known to never read or /// write memory, return true. If the call only reads from known-constant /// memory, it is also legal to return true. Calls that unwind the stack diff --git a/include/llvm/Analysis/DebugInfo.h b/include/llvm/Analysis/DebugInfo.h index fdbd9c1130ca..cc9514cade16 100644 --- a/include/llvm/Analysis/DebugInfo.h +++ b/include/llvm/Analysis/DebugInfo.h @@ -30,11 +30,7 @@ namespace llvm { class Module; class Type; class Value; - struct DbgStopPointInst; - struct DbgDeclareInst; - struct DbgFuncStartInst; - struct DbgRegionStartInst; - struct DbgRegionEndInst; + class DbgDeclareInst; class DebugLoc; struct DebugLocTracker; class Instruction; @@ -495,7 +491,6 @@ namespace llvm { Module &M; LLVMContext& VMContext; - const Type *EmptyStructPtr; // "{}*". Function *DeclareFn; // llvm.dbg.declare Function *ValueFn; // llvm.dbg.value @@ -651,27 +646,19 @@ namespace llvm { Instruction *InsertBefore); /// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. - Instruction *InsertDbgValueIntrinsic(llvm::Value *V, llvm::Value *Offset, + Instruction *InsertDbgValueIntrinsic(llvm::Value *V, uint64_t Offset, DIVariable D, BasicBlock *InsertAtEnd); /// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. - Instruction *InsertDbgValueIntrinsic(llvm::Value *V, llvm::Value *Offset, + Instruction *InsertDbgValueIntrinsic(llvm::Value *V, uint64_t Offset, DIVariable D, Instruction *InsertBefore); private: Constant *GetTagConstant(unsigned TAG); }; - /// Finds the stoppoint coressponding to this instruction, that is the - /// stoppoint that dominates this instruction - const DbgStopPointInst *findStopPoint(const Instruction *Inst); - - /// Finds the stoppoint corresponding to first real (non-debug intrinsic) - /// instruction in this Basic Block, and returns the stoppoint for it. - const DbgStopPointInst *findBBStopPoint(const BasicBlock *BB); - /// Finds the dbg.declare intrinsic corresponding to this value if any. /// It looks through pointer casts too. - const DbgDeclareInst *findDbgDeclare(const Value *V, bool stripCasts = true); + const DbgDeclareInst *findDbgDeclare(const Value *V); /// Find the debug info descriptor corresponding to this global variable. Value *findDbgGlobalDeclare(GlobalVariable *V); @@ -680,21 +667,11 @@ namespace llvm { std::string &Type, unsigned &LineNo, std::string &File, std::string &Dir); - /// ExtractDebugLocation - Extract debug location information - /// from llvm.dbg.stoppoint intrinsic. - DebugLoc ExtractDebugLocation(DbgStopPointInst &SPI, - DebugLocTracker &DebugLocInfo); - /// ExtractDebugLocation - Extract debug location information /// from DILocation. DebugLoc ExtractDebugLocation(DILocation &Loc, DebugLocTracker &DebugLocInfo); - /// ExtractDebugLocation - Extract debug location information - /// from llvm.dbg.func_start intrinsic. - DebugLoc ExtractDebugLocation(DbgFuncStartInst &FSI, - DebugLocTracker &DebugLocInfo); - /// getDISubprogram - Find subprogram that is enclosing this scope. DISubprogram getDISubprogram(MDNode *Scope); diff --git a/include/llvm/Analysis/DominatorInternals.h b/include/llvm/Analysis/DominatorInternals.h index cca0d502b69c..5ecb34814459 100644 --- a/include/llvm/Analysis/DominatorInternals.h +++ b/include/llvm/Analysis/DominatorInternals.h @@ -347,15 +347,8 @@ void Calculate(DominatorTreeBase::NodeType>& DT, DT.IDoms.clear(); DT.Info.clear(); std::vector().swap(DT.Vertex); - - // FIXME: This does not work on PostDomTrees. It seems likely that this is - // due to an error in the algorithm for post-dominators. This really should - // be investigated and fixed at some point. - // DT.updateDFSNumbers(); - // Start out with the DFS numbers being invalid. Let them be computed if - // demanded. - DT.DFSInfoValid = false; + DT.updateDFSNumbers(); } } diff --git a/include/llvm/Analysis/Dominators.h b/include/llvm/Analysis/Dominators.h index 2e149d59e98f..31c19c49313a 100644 --- a/include/llvm/Analysis/Dominators.h +++ b/include/llvm/Analysis/Dominators.h @@ -390,6 +390,13 @@ class DominatorTreeBase : public DominatorBase { if (A == 0 || B == 0) return false; + // Compare the result of the tree walk and the dfs numbers, if expensive + // checks are enabled. +#ifdef XDEBUG + assert(!DFSInfoValid + || (dominatedBySlowTreeWalk(A, B) == B->DominatedBy(A))); +#endif + if (DFSInfoValid) return B->DominatedBy(A); @@ -585,29 +592,35 @@ class DominatorTreeBase : public DominatorBase { SmallVector*, typename DomTreeNodeBase::iterator>, 32> WorkStack; - for (unsigned i = 0, e = (unsigned)this->Roots.size(); i != e; ++i) { - DomTreeNodeBase *ThisRoot = getNode(this->Roots[i]); - WorkStack.push_back(std::make_pair(ThisRoot, ThisRoot->begin())); - ThisRoot->DFSNumIn = DFSNum++; + DomTreeNodeBase *ThisRoot = getRootNode(); - while (!WorkStack.empty()) { - DomTreeNodeBase *Node = WorkStack.back().first; - typename DomTreeNodeBase::iterator ChildIt = - WorkStack.back().second; + if (!ThisRoot) + return; - // If we visited all of the children of this node, "recurse" back up the - // stack setting the DFOutNum. - if (ChildIt == Node->end()) { - Node->DFSNumOut = DFSNum++; - WorkStack.pop_back(); - } else { - // Otherwise, recursively visit this child. - DomTreeNodeBase *Child = *ChildIt; - ++WorkStack.back().second; + // Even in the case of multiple exits that form the post dominator root + // nodes, do not iterate over all exits, but start from the virtual root + // node. Otherwise bbs, that are not post dominated by any exit but by the + // virtual root node, will never be assigned a DFS number. + WorkStack.push_back(std::make_pair(ThisRoot, ThisRoot->begin())); + ThisRoot->DFSNumIn = DFSNum++; - WorkStack.push_back(std::make_pair(Child, Child->begin())); - Child->DFSNumIn = DFSNum++; - } + while (!WorkStack.empty()) { + DomTreeNodeBase *Node = WorkStack.back().first; + typename DomTreeNodeBase::iterator ChildIt = + WorkStack.back().second; + + // If we visited all of the children of this node, "recurse" back up the + // stack setting the DFOutNum. + if (ChildIt == Node->end()) { + Node->DFSNumOut = DFSNum++; + WorkStack.pop_back(); + } else { + // Otherwise, recursively visit this child. + DomTreeNodeBase *Child = *ChildIt; + ++WorkStack.back().second; + + WorkStack.push_back(std::make_pair(Child, Child->begin())); + Child->DFSNumIn = DFSNum++; } } @@ -646,21 +659,17 @@ class DominatorTreeBase : public DominatorBase { /// recalculate - compute a dominator tree for the given function template void recalculate(FT& F) { - if (!this->IsPostDominators) { - reset(); + reset(); + this->Vertex.push_back(0); - // Initialize roots + if (!this->IsPostDominators) { + // Initialize root this->Roots.push_back(&F.front()); this->IDoms[&F.front()] = 0; this->DomTreeNodes[&F.front()] = 0; - this->Vertex.push_back(0); Calculate(*this, F); - - updateDFSNumbers(); } else { - reset(); // Reset from the last time we were run... - // Initialize the roots list for (typename FT::iterator I = F.begin(), E = F.end(); I != E; ++I) { if (std::distance(GraphTraits::child_begin(I), @@ -672,8 +681,6 @@ class DominatorTreeBase : public DominatorBase { this->DomTreeNodes[I] = 0; } - this->Vertex.push_back(0); - Calculate >(*this, F); } } diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h index 060286fc5e23..33bf0b037488 100644 --- a/include/llvm/Analysis/LoopInfo.h +++ b/include/llvm/Analysis/LoopInfo.h @@ -478,7 +478,7 @@ class LoopBase { for (iterator I = begin(), E = end(); I != E; ++I) (*I)->print(OS, Depth+2); } - + protected: friend class LoopInfoBase; explicit LoopBase(BlockT *BB) : ParentLoop(0) { @@ -588,6 +588,8 @@ class Loop : public LoopBase { /// block, return that block. Otherwise return null. BasicBlock *getUniqueExitBlock() const; + void dump() const; + private: friend class LoopInfoBase; explicit Loop(BasicBlock *BB) : LoopBase(BB) {} diff --git a/include/llvm/Analysis/PostDominators.h b/include/llvm/Analysis/PostDominators.h index ea14b2da9ce9..3681cc01f3b7 100644 --- a/include/llvm/Analysis/PostDominators.h +++ b/include/llvm/Analysis/PostDominators.h @@ -36,19 +36,23 @@ struct PostDominatorTree : public FunctionPass { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); } - + inline const std::vector &getRoots() const { return DT->getRoots(); } - + inline DomTreeNode *getRootNode() const { return DT->getRootNode(); } - + inline DomTreeNode *operator[](BasicBlock *BB) const { return DT->getNode(BB); } - + + inline DomTreeNode *getNode(BasicBlock *BB) const { + return DT->getNode(BB); + } + inline bool dominates(DomTreeNode* A, DomTreeNode* B) const { return DT->dominates(A, B); } @@ -60,7 +64,7 @@ struct PostDominatorTree : public FunctionPass { inline bool properlyDominates(const DomTreeNode* A, DomTreeNode* B) const { return DT->properlyDominates(A, B); } - + inline bool properlyDominates(BasicBlock* A, BasicBlock* B) const { return DT->properlyDominates(A, B); } @@ -97,7 +101,7 @@ template <> struct GraphTraits /// struct PostDominanceFrontier : public DominanceFrontierBase { static char ID; - PostDominanceFrontier() + PostDominanceFrontier() : DominanceFrontierBase(&ID, true) {} virtual bool runOnFunction(Function &) { diff --git a/include/llvm/Attributes.h b/include/llvm/Attributes.h index 0bbdc349b1b4..068f81fc7de2 100644 --- a/include/llvm/Attributes.h +++ b/include/llvm/Attributes.h @@ -1,4 +1,4 @@ -//===-- llvm/Attributes.h - Container for Attributes ---*---------- C++ -*-===// +//===-- llvm/Attributes.h - Container for Attributes ------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -27,9 +27,9 @@ typedef unsigned Attributes; namespace Attribute { -/// Function parameters and results can have attributes to indicate how they -/// should be treated by optimizations and code generation. This enumeration -/// lists the attributes that can be associated with parameters, function +/// Function parameters and results can have attributes to indicate how they +/// should be treated by optimizations and code generation. This enumeration +/// lists the attributes that can be associated with parameters, function /// results or the function itself. /// @brief Function attributes. @@ -45,7 +45,7 @@ const Attributes ByVal = 1<<7; ///< Pass structure by value const Attributes Nest = 1<<8; ///< Nested function static chain const Attributes ReadNone = 1<<9; ///< Function does not access memory const Attributes ReadOnly = 1<<10; ///< Function only reads from memory -const Attributes NoInline = 1<<11; ///< inline=never +const Attributes NoInline = 1<<11; ///< inline=never const Attributes AlwaysInline = 1<<12; ///< inline=always const Attributes OptimizeForSize = 1<<13; ///< opt_size const Attributes StackProtect = 1<<14; ///< Stack protection. @@ -58,14 +58,15 @@ const Attributes NoRedZone = 1<<22; /// disable redzone const Attributes NoImplicitFloat = 1<<23; /// disable implicit floating point /// instructions. const Attributes Naked = 1<<24; ///< Naked function -const Attributes InlineHint = 1<<25; ///< source said inlining was desirable +const Attributes InlineHint = 1<<25; ///< source said inlining was + ///desirable /// @brief Attributes that only apply to function parameters. const Attributes ParameterOnly = ByVal | Nest | StructRet | NoCapture; /// @brief Attributes that may be applied to the function itself. These cannot /// be used on return values or function parameters. -const Attributes FunctionOnly = NoReturn | NoUnwind | ReadNone | ReadOnly | +const Attributes FunctionOnly = NoReturn | NoUnwind | ReadNone | ReadOnly | NoInline | AlwaysInline | OptimizeForSize | StackProtect | StackProtectReq | NoRedZone | NoImplicitFloat | Naked | InlineHint; @@ -100,26 +101,26 @@ inline unsigned getAlignmentFromAttrs(Attributes A) { Attributes Align = A & Attribute::Alignment; if (Align == 0) return 0; - + return 1U << ((Align >> 16) - 1); } - - + + /// The set of Attributes set in Attributes is converted to a /// string of equivalent mnemonics. This is, presumably, for writing out -/// the mnemonics for the assembly writer. +/// the mnemonics for the assembly writer. /// @brief Convert attribute bits to text std::string getAsString(Attributes Attrs); } // end namespace Attribute /// This is just a pair of values to associate a set of attributes -/// with an index. +/// with an index. struct AttributeWithIndex { Attributes Attrs; ///< The attributes that are set, or'd together. unsigned Index; ///< Index of the parameter for which the attributes apply. ///< Index 0 is used for return value attributes. ///< Index ~0U is used for function attributes. - + static AttributeWithIndex get(unsigned Idx, Attributes Attrs) { AttributeWithIndex P; P.Index = Idx; @@ -127,14 +128,14 @@ struct AttributeWithIndex { return P; } }; - + //===----------------------------------------------------------------------===// // AttrListPtr Smart Pointer //===----------------------------------------------------------------------===// class AttributeListImpl; - -/// AttrListPtr - This class manages the ref count for the opaque + +/// AttrListPtr - This class manages the ref count for the opaque /// AttributeListImpl object and provides accessors for it. class AttrListPtr { /// AttrList - The attributes that we are managing. This can be null @@ -145,14 +146,14 @@ class AttrListPtr { AttrListPtr(const AttrListPtr &P); const AttrListPtr &operator=(const AttrListPtr &RHS); ~AttrListPtr(); - + //===--------------------------------------------------------------------===// // Attribute List Construction and Mutation //===--------------------------------------------------------------------===// - + /// get - Return a Attributes list with the specified parameter in it. static AttrListPtr get(const AttributeWithIndex *Attr, unsigned NumAttrs); - + /// get - Return a Attribute list with the parameters specified by the /// consecutive random access iterator range. template @@ -165,24 +166,24 @@ class AttrListPtr { /// attribute list. Since attribute lists are immutable, this /// returns the new list. AttrListPtr addAttr(unsigned Idx, Attributes Attrs) const; - + /// removeAttr - Remove the specified attribute at the specified index from /// this attribute list. Since attribute lists are immutable, this /// returns the new list. AttrListPtr removeAttr(unsigned Idx, Attributes Attrs) const; - + //===--------------------------------------------------------------------===// // Attribute List Accessors //===--------------------------------------------------------------------===// /// getParamAttributes - The attributes for the specified index are - /// returned. + /// returned. Attributes getParamAttributes(unsigned Idx) const { assert (Idx && Idx != ~0U && "Invalid parameter index!"); return getAttributes(Idx); } /// getRetAttributes - The attributes for the ret value are - /// returned. + /// returned. Attributes getRetAttributes() const { return getAttributes(0); } @@ -191,58 +192,60 @@ class AttrListPtr { Attributes getFnAttributes() const { return getAttributes(~0U); } - + /// paramHasAttr - Return true if the specified parameter index has the /// specified attribute set. bool paramHasAttr(unsigned Idx, Attributes Attr) const { return getAttributes(Idx) & Attr; } - + /// getParamAlignment - Return the alignment for the specified function /// parameter. unsigned getParamAlignment(unsigned Idx) const { return Attribute::getAlignmentFromAttrs(getAttributes(Idx)); } - + /// hasAttrSomewhere - Return true if the specified attribute is set for at /// least one parameter or for the return value. bool hasAttrSomewhere(Attributes Attr) const; /// operator==/!= - Provide equality predicates. - bool operator==(const AttrListPtr &RHS) const { return AttrList == RHS.AttrList; } - bool operator!=(const AttrListPtr &RHS) const { return AttrList != RHS.AttrList; } - + bool operator==(const AttrListPtr &RHS) const + { return AttrList == RHS.AttrList; } + bool operator!=(const AttrListPtr &RHS) const + { return AttrList != RHS.AttrList; } + void dump() const; //===--------------------------------------------------------------------===// // Attribute List Introspection //===--------------------------------------------------------------------===// - + /// getRawPointer - Return a raw pointer that uniquely identifies this - /// attribute list. + /// attribute list. void *getRawPointer() const { return AttrList; } - + // Attributes are stored as a dense set of slots, where there is one // slot for each argument that has an attribute. This allows walking over the // dense set instead of walking the sparse list of attributes. - + /// isEmpty - Return true if there are no attributes. /// bool isEmpty() const { return AttrList == 0; } - - /// getNumSlots - Return the number of slots used in this attribute list. + + /// getNumSlots - Return the number of slots used in this attribute list. /// This is the number of arguments that have an attribute set on them /// (including the function itself). unsigned getNumSlots() const; - + /// getSlot - Return the AttributeWithIndex at the specified slot. This /// holds a index number plus a set of attributes. const AttributeWithIndex &getSlot(unsigned Slot) const; - + private: explicit AttrListPtr(AttributeListImpl *L); diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h index c037399b96dc..9bb50d4b3bd5 100644 --- a/include/llvm/Bitcode/LLVMBitCodes.h +++ b/include/llvm/Bitcode/LLVMBitCodes.h @@ -111,10 +111,11 @@ namespace bitc { enum MetadataCodes { METADATA_STRING = 1, // MDSTRING: [values] METADATA_NODE = 2, // MDNODE: [n x (type num, value num)] - METADATA_NAME = 3, // STRING: [values] - METADATA_NAMED_NODE = 4, // NAMEDMDNODE: [n x mdnodes] - METADATA_KIND = 5, // [n x [id, name]] - METADATA_ATTACHMENT = 6 // [m x [value, [n x [id, mdnode]]] + METADATA_FN_NODE = 3, // FN_MDNODE: [n x (type num, value num)] + METADATA_NAME = 4, // STRING: [values] + METADATA_NAMED_NODE = 5, // NAMEDMDNODE: [n x mdnodes] + METADATA_KIND = 6, // [n x [id, name]] + METADATA_ATTACHMENT = 7 // [m x [value, [n x [id, mdnode]]] }; // The constants block (CONSTANTS_BLOCK_ID) describes emission for each // constant and maintains an implicit current type value. diff --git a/include/llvm/CodeGen/DAGISelHeader.h b/include/llvm/CodeGen/DAGISelHeader.h index 7233f3f0d8d5..4d50879a1526 100644 --- a/include/llvm/CodeGen/DAGISelHeader.h +++ b/include/llvm/CodeGen/DAGISelHeader.h @@ -109,7 +109,7 @@ void SelectRoot(SelectionDAG &DAG) { #if 0 DAG.setSubgraphColor(Node, "red"); #endif - SDNode *ResNode = Select(SDValue(Node, 0)); + SDNode *ResNode = Select(Node); // If node should not be replaced, continue with the next one. if (ResNode == Node) continue; diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h index 806952a16881..9d0f0d9e57b9 100644 --- a/include/llvm/CodeGen/FastISel.h +++ b/include/llvm/CodeGen/FastISel.h @@ -139,7 +139,7 @@ class FastISel { /// be emitted. virtual unsigned FastEmit_(MVT VT, MVT RetVT, - ISD::NodeType Opcode); + unsigned Opcode); /// FastEmit_r - This method is called by target-independent code /// to request that an instruction with the given type, opcode, and @@ -147,7 +147,7 @@ class FastISel { /// virtual unsigned FastEmit_r(MVT VT, MVT RetVT, - ISD::NodeType Opcode, unsigned Op0); + unsigned Opcode, unsigned Op0); /// FastEmit_rr - This method is called by target-independent code /// to request that an instruction with the given type, opcode, and @@ -155,7 +155,7 @@ class FastISel { /// virtual unsigned FastEmit_rr(MVT VT, MVT RetVT, - ISD::NodeType Opcode, + unsigned Opcode, unsigned Op0, unsigned Op1); /// FastEmit_ri - This method is called by target-independent code @@ -164,7 +164,7 @@ class FastISel { /// virtual unsigned FastEmit_ri(MVT VT, MVT RetVT, - ISD::NodeType Opcode, + unsigned Opcode, unsigned Op0, uint64_t Imm); /// FastEmit_rf - This method is called by target-independent code @@ -173,7 +173,7 @@ class FastISel { /// virtual unsigned FastEmit_rf(MVT VT, MVT RetVT, - ISD::NodeType Opcode, + unsigned Opcode, unsigned Op0, ConstantFP *FPImm); /// FastEmit_rri - This method is called by target-independent code @@ -182,7 +182,7 @@ class FastISel { /// virtual unsigned FastEmit_rri(MVT VT, MVT RetVT, - ISD::NodeType Opcode, + unsigned Opcode, unsigned Op0, unsigned Op1, uint64_t Imm); /// FastEmit_ri_ - This method is a wrapper of FastEmit_ri. It first tries @@ -190,7 +190,7 @@ class FastISel { /// If that fails, it materializes the immediate into a register and try /// FastEmit_rr instead. unsigned FastEmit_ri_(MVT VT, - ISD::NodeType Opcode, + unsigned Opcode, unsigned Op0, uint64_t Imm, MVT ImmType); @@ -199,7 +199,7 @@ class FastISel { /// If that fails, it materializes the immediate into a register and try /// FastEmit_rr instead. unsigned FastEmit_rf_(MVT VT, - ISD::NodeType Opcode, + unsigned Opcode, unsigned Op0, ConstantFP *FPImm, MVT ImmType); @@ -208,7 +208,7 @@ class FastISel { /// immediate operand be emitted. virtual unsigned FastEmit_i(MVT VT, MVT RetVT, - ISD::NodeType Opcode, + unsigned Opcode, uint64_t Imm); /// FastEmit_f - This method is called by target-independent code @@ -216,7 +216,7 @@ class FastISel { /// floating-point immediate operand be emitted. virtual unsigned FastEmit_f(MVT VT, MVT RetVT, - ISD::NodeType Opcode, + unsigned Opcode, ConstantFP *FPImm); /// FastEmitInst_ - Emit a MachineInstr with no operands and a @@ -298,7 +298,7 @@ class FastISel { } private: - bool SelectBinaryOp(User *I, ISD::NodeType ISDOpcode); + bool SelectBinaryOp(User *I, unsigned ISDOpcode); bool SelectFNeg(User *I); @@ -308,7 +308,7 @@ class FastISel { bool SelectBitCast(User *I); - bool SelectCast(User *I, ISD::NodeType Opcode); + bool SelectCast(User *I, unsigned Opcode); }; } diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h index f1bfa014585e..a12a55aefcfc 100644 --- a/include/llvm/CodeGen/MachineFunction.h +++ b/include/llvm/CodeGen/MachineFunction.h @@ -315,6 +315,8 @@ class MachineFunction { /// 'Orig' instruction, identical in all ways except the the instruction /// has no parent, prev, or next. /// + /// See also TargetInstrInfo::duplicate() for target-specific fixes to cloned + /// instructions. MachineInstr *CloneMachineInstr(const MachineInstr *Orig); /// DeleteMachineInstr - Delete the given MachineInstr. diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h index 87b67d6242d0..c2a057822ffd 100644 --- a/include/llvm/CodeGen/MachineInstr.h +++ b/include/llvm/CodeGen/MachineInstr.h @@ -288,7 +288,7 @@ class MachineInstr : public ilist_node { bool addRegisterKilled(unsigned IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound = false); - + /// addRegisterDead - We have determined MI defined a register without a use. /// Look for the operand that defines it and mark it as IsDead. If /// AddIfNotFound is true, add a implicit operand if it's not found. Returns @@ -296,6 +296,11 @@ class MachineInstr : public ilist_node { bool addRegisterDead(unsigned IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound = false); + /// addRegisterDefined - We have determined MI defines a register. Make sure + /// there is an operand defining Reg. + void addRegisterDefined(unsigned IncomingReg, + const TargetRegisterInfo *RegInfo); + /// isSafeToMove - Return true if it is safe to move this instruction. If /// SawStore is set to true, it means that there is a store (or call) between /// the instruction's location and its intended destination. diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h index 6ca63f01109e..8eb0add01920 100644 --- a/include/llvm/CodeGen/MachineInstrBuilder.h +++ b/include/llvm/CodeGen/MachineInstrBuilder.h @@ -22,6 +22,7 @@ namespace llvm { class TargetInstrDesc; +class MDNode; namespace RegState { enum { @@ -123,6 +124,11 @@ class MachineInstrBuilder { MI->addOperand(MO); return *this; } + + const MachineInstrBuilder &addMetadata(MDNode *MD) const { + MI->addOperand(MachineOperand::CreateMetadata(MD)); + return *this; + } }; /// BuildMI - Builder interface. Specify how to create the initial instruction diff --git a/include/llvm/CodeGen/MachineLoopInfo.h b/include/llvm/CodeGen/MachineLoopInfo.h index d3df805f642b..8459a8db9a30 100644 --- a/include/llvm/CodeGen/MachineLoopInfo.h +++ b/include/llvm/CodeGen/MachineLoopInfo.h @@ -49,6 +49,8 @@ class MachineLoop : public LoopBase { /// contiguous with the part the contains the header. MachineBasicBlock *getBottomBlock(); + void dump() const; + private: friend class LoopInfoBase; explicit MachineLoop(MachineBasicBlock *MBB) diff --git a/include/llvm/CodeGen/MachineOperand.h b/include/llvm/CodeGen/MachineOperand.h index 8748afcba92e..907c25af7d47 100644 --- a/include/llvm/CodeGen/MachineOperand.h +++ b/include/llvm/CodeGen/MachineOperand.h @@ -26,6 +26,7 @@ class GlobalValue; class MachineInstr; class TargetMachine; class MachineRegisterInfo; +class MDNode; class raw_ostream; /// MachineOperand class - Representation of each machine instruction operand. @@ -42,7 +43,8 @@ class MachineOperand { MO_JumpTableIndex, ///< Address of indexed Jump Table for switch MO_ExternalSymbol, ///< Name of external global symbol MO_GlobalAddress, ///< Address of a global value - MO_BlockAddress ///< Address of a basic block + MO_BlockAddress, ///< Address of a basic block + MO_Metadata ///< Metadata reference (for debug info) }; private: @@ -94,6 +96,7 @@ class MachineOperand { MachineBasicBlock *MBB; // For MO_MachineBasicBlock. const ConstantFP *CFP; // For MO_FPImmediate. int64_t ImmVal; // For MO_Immediate. + MDNode *MD; // For MO_Metadata. struct { // For MO_Register. unsigned RegNo; @@ -158,6 +161,8 @@ class MachineOperand { bool isSymbol() const { return OpKind == MO_ExternalSymbol; } /// isBlockAddress - Tests if this is a MO_BlockAddress operand. bool isBlockAddress() const { return OpKind == MO_BlockAddress; } + /// isMetadata - Tests if this is a MO_Metadata operand. + bool isMetadata() const { return OpKind == MO_Metadata; } //===--------------------------------------------------------------------===// // Accessors for Register Operands @@ -311,6 +316,11 @@ class MachineOperand { assert(isSymbol() && "Wrong MachineOperand accessor"); return Contents.OffsetedInfo.Val.SymbolName; } + + const MDNode *getMetadata() const { + assert(isMetadata() && "Wrong MachineOperand accessor"); + return Contents.MD; + } //===--------------------------------------------------------------------===// // Mutators for various operand types. @@ -443,6 +453,13 @@ class MachineOperand { Op.setTargetFlags(TargetFlags); return Op; } + static MachineOperand CreateMetadata(MDNode *Meta, + unsigned char TargetFlags = 0) { + MachineOperand Op(MachineOperand::MO_Metadata); + Op.Contents.MD = Meta; + Op.setTargetFlags(TargetFlags); + return Op; + } friend class MachineInstr; friend class MachineRegisterInfo; diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h index 99f8c34cce85..2203f8c12024 100644 --- a/include/llvm/CodeGen/Passes.h +++ b/include/llvm/CodeGen/Passes.h @@ -170,6 +170,10 @@ namespace llvm { /// instructions. FunctionPass *createMachineSinkingPass(); + /// createOptimizeExtsPass - This pass performs sign / zero extension + /// optimization by increasing uses of extended values. + FunctionPass *createOptimizeExtsPass(); + /// createStackSlotColoringPass - This pass performs stack slot coloring. FunctionPass *createStackSlotColoringPass(bool); diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h index bfd3492f210c..b33b21da42ad 100644 --- a/include/llvm/CodeGen/SelectionDAGISel.h +++ b/include/llvm/CodeGen/SelectionDAGISel.h @@ -111,11 +111,11 @@ class SelectionDAGISel : public MachineFunctionPass { int64_t DesiredMaskS) const; // Calls to these functions are generated by tblgen. - SDNode *Select_INLINEASM(SDValue N); - SDNode *Select_UNDEF(const SDValue &N); - SDNode *Select_EH_LABEL(const SDValue &N); - void CannotYetSelect(SDValue N); - void CannotYetSelectIntrinsic(SDValue N); + SDNode *Select_INLINEASM(SDNode *N); + SDNode *Select_UNDEF(SDNode *N); + SDNode *Select_EH_LABEL(SDNode *N); + void CannotYetSelect(SDNode *N); + void CannotYetSelectIntrinsic(SDNode *N); private: void SelectAllBasicBlocks(Function &Fn, MachineFunction &MF, @@ -131,6 +131,7 @@ class SelectionDAGISel : public MachineFunctionPass { void CodeGenAndEmitDAG(); void LowerArguments(BasicBlock *BB); + void ShrinkDemandedOps(); void ComputeLiveOutVRegInfo(); void HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB); diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h index 9dc4c7b7fa4a..0125190e37e5 100644 --- a/include/llvm/CodeGen/ValueTypes.h +++ b/include/llvm/CodeGen/ValueTypes.h @@ -149,7 +149,7 @@ namespace llvm { SimpleTy <= MVT::LAST_VECTOR_VALUETYPE); } - /// isPow2VectorType - Retuns true if the given vector is a power of 2. + /// isPow2VectorType - Returns true if the given vector is a power of 2. bool isPow2VectorType() const { unsigned NElts = getVectorNumElements(); return !(NElts & (NElts - 1)); @@ -437,25 +437,17 @@ namespace llvm { /// isFloatingPoint - Return true if this is a FP, or a vector FP type. bool isFloatingPoint() const { - return isSimple() ? - ((V >= MVT::f32 && V <= MVT::ppcf128) || - (V >= MVT::v2f32 && V <= MVT::v4f64)) : isExtendedFloatingPoint(); + return isSimple() ? V.isFloatingPoint() : isExtendedFloatingPoint(); } /// isInteger - Return true if this is an integer, or a vector integer type. bool isInteger() const { - return isSimple() ? - ((V >= MVT::FIRST_INTEGER_VALUETYPE && - V <= MVT::LAST_INTEGER_VALUETYPE) || - (V >= MVT::v2i8 && V <= MVT::v4i64)) : isExtendedInteger(); + return isSimple() ? V.isInteger() : isExtendedInteger(); } /// isVector - Return true if this is a vector value type. bool isVector() const { - return isSimple() ? - (V >= MVT::FIRST_VECTOR_VALUETYPE && V <= - MVT::LAST_VECTOR_VALUETYPE) : - isExtendedVector(); + return isSimple() ? V.isVector() : isExtendedVector(); } /// is64BitVector - Return true if this is a 64-bit vector type. @@ -641,7 +633,7 @@ namespace llvm { static EVT getEVT(const Type *Ty, bool HandleUnknown = false); intptr_t getRawBits() { - if (V.SimpleTy <= MVT::LastSimpleValueType) + if (isSimple()) return V.SimpleTy; else return (intptr_t)(LLVMTy); diff --git a/include/llvm/IntrinsicInst.h b/include/llvm/IntrinsicInst.h index 3c18de106740..f40e8cc76a54 100644 --- a/include/llvm/IntrinsicInst.h +++ b/include/llvm/IntrinsicInst.h @@ -25,6 +25,7 @@ #define LLVM_INTRINSICINST_H #include "llvm/Constants.h" +#include "llvm/Metadata.h" #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" @@ -58,16 +59,13 @@ namespace llvm { /// DbgInfoIntrinsic - This is the common base class for debug info intrinsics /// - struct DbgInfoIntrinsic : public IntrinsicInst { + class DbgInfoIntrinsic : public IntrinsicInst { + public: // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const DbgInfoIntrinsic *) { return true; } static inline bool classof(const IntrinsicInst *I) { switch (I->getIntrinsicID()) { - case Intrinsic::dbg_stoppoint: - case Intrinsic::dbg_func_start: - case Intrinsic::dbg_region_start: - case Intrinsic::dbg_region_end: case Intrinsic::dbg_declare: case Intrinsic::dbg_value: return true; @@ -81,84 +79,16 @@ namespace llvm { static Value *StripCast(Value *C); }; - /// DbgStopPointInst - This represents the llvm.dbg.stoppoint instruction. - /// - struct DbgStopPointInst : public DbgInfoIntrinsic { - Value *getLineValue() const { return const_cast(getOperand(1)); } - Value *getColumnValue() const { return const_cast(getOperand(2)); } - MDNode *getContext() const { - return cast(getOperand(3)); - } - - unsigned getLine() const { - return unsigned(cast(getOperand(1))->getZExtValue()); - } - unsigned getColumn() const { - return unsigned(cast(getOperand(2))->getZExtValue()); - } - - Value *getFileName() const; - Value *getDirectory() const; - - // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const DbgStopPointInst *) { return true; } - static inline bool classof(const IntrinsicInst *I) { - return I->getIntrinsicID() == Intrinsic::dbg_stoppoint; - } - static inline bool classof(const Value *V) { - return isa(V) && classof(cast(V)); - } - }; - - /// DbgFuncStartInst - This represents the llvm.dbg.func.start instruction. - /// - struct DbgFuncStartInst : public DbgInfoIntrinsic { - MDNode *getSubprogram() const { return cast(getOperand(1)); } - - // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const DbgFuncStartInst *) { return true; } - static inline bool classof(const IntrinsicInst *I) { - return I->getIntrinsicID() == Intrinsic::dbg_func_start; - } - static inline bool classof(const Value *V) { - return isa(V) && classof(cast(V)); - } - }; - - /// DbgRegionStartInst - This represents the llvm.dbg.region.start - /// instruction. - struct DbgRegionStartInst : public DbgInfoIntrinsic { - MDNode *getContext() const { return cast(getOperand(1)); } - - // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const DbgRegionStartInst *) { return true; } - static inline bool classof(const IntrinsicInst *I) { - return I->getIntrinsicID() == Intrinsic::dbg_region_start; - } - static inline bool classof(const Value *V) { - return isa(V) && classof(cast(V)); - } - }; - - /// DbgRegionEndInst - This represents the llvm.dbg.region.end instruction. - /// - struct DbgRegionEndInst : public DbgInfoIntrinsic { - MDNode *getContext() const { return cast(getOperand(1)); } - - // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const DbgRegionEndInst *) { return true; } - static inline bool classof(const IntrinsicInst *I) { - return I->getIntrinsicID() == Intrinsic::dbg_region_end; - } - static inline bool classof(const Value *V) { - return isa(V) && classof(cast(V)); - } - }; - /// DbgDeclareInst - This represents the llvm.dbg.declare instruction. /// - struct DbgDeclareInst : public DbgInfoIntrinsic { - Value *getAddress() const { return getOperand(1); } + class DbgDeclareInst : public DbgInfoIntrinsic { + public: + Value *getAddress() const { + if (MDNode* MD = dyn_cast(getOperand(1))) + return MD->getOperand(0); + else + return NULL; + } MDNode *getVariable() const { return cast(getOperand(2)); } // Methods for support type inquiry through isa, cast, and dyn_cast: @@ -173,10 +103,16 @@ namespace llvm { /// DbgValueInst - This represents the llvm.dbg.value instruction. /// - struct DbgValueInst : public DbgInfoIntrinsic { - Value *getValue() const; - Value *getOffset() const { return getOperand(2); } - MDNode *getVariable() const { return cast(getOperand(3)); } + class DbgValueInst : public DbgInfoIntrinsic { + public: + const Value *getValue() const; + Value *getValue(); + uint64_t getOffset() const { + return cast( + const_cast(getOperand(2)))->getZExtValue(); + } + const MDNode *getVariable() const { return cast(getOperand(3)); } + MDNode *getVariable() { return cast(getOperand(3)); } // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const DbgValueInst *) { return true; } @@ -190,7 +126,8 @@ namespace llvm { /// MemIntrinsic - This is the common base class for memset/memcpy/memmove. /// - struct MemIntrinsic : public IntrinsicInst { + class MemIntrinsic : public IntrinsicInst { + public: Value *getRawDest() const { return const_cast(getOperand(1)); } Value *getLength() const { return const_cast(getOperand(3)); } @@ -247,7 +184,8 @@ namespace llvm { /// MemSetInst - This class wraps the llvm.memset intrinsic. /// - struct MemSetInst : public MemIntrinsic { + class MemSetInst : public MemIntrinsic { + public: /// get* - Return the arguments to the instruction. /// Value *getValue() const { return const_cast(getOperand(2)); } @@ -270,7 +208,8 @@ namespace llvm { /// MemTransferInst - This class wraps the llvm.memcpy/memmove intrinsics. /// - struct MemTransferInst : public MemIntrinsic { + class MemTransferInst : public MemIntrinsic { + public: /// get* - Return the arguments to the instruction. /// Value *getRawSource() const { return const_cast(getOperand(2)); } @@ -300,7 +239,8 @@ namespace llvm { /// MemCpyInst - This class wraps the llvm.memcpy intrinsic. /// - struct MemCpyInst : public MemTransferInst { + class MemCpyInst : public MemTransferInst { + public: // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const MemCpyInst *) { return true; } static inline bool classof(const IntrinsicInst *I) { @@ -313,7 +253,8 @@ namespace llvm { /// MemMoveInst - This class wraps the llvm.memmove intrinsic. /// - struct MemMoveInst : public MemTransferInst { + class MemMoveInst : public MemTransferInst { + public: // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const MemMoveInst *) { return true; } static inline bool classof(const IntrinsicInst *I) { @@ -326,7 +267,8 @@ namespace llvm { /// EHSelectorInst - This represents the llvm.eh.selector instruction. /// - struct EHSelectorInst : public IntrinsicInst { + class EHSelectorInst : public IntrinsicInst { + public: // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const EHSelectorInst *) { return true; } static inline bool classof(const IntrinsicInst *I) { @@ -340,7 +282,8 @@ namespace llvm { /// MemoryUseIntrinsic - This is the common base class for the memory use /// marker intrinsics. /// - struct MemoryUseIntrinsic : public IntrinsicInst { + class MemoryUseIntrinsic : public IntrinsicInst { + public: // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const MemoryUseIntrinsic *) { return true; } diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td index c472f2be2069..684f8724cf76 100644 --- a/include/llvm/Intrinsics.td +++ b/include/llvm/Intrinsics.td @@ -282,14 +282,8 @@ let Properties = [IntrNoMem] in { // optimizers can change them aggressively. Special handling needed in a few // places. let Properties = [IntrNoMem] in { - def int_dbg_stoppoint : Intrinsic<[llvm_void_ty], - [llvm_i32_ty, llvm_i32_ty, - llvm_metadata_ty]>; - def int_dbg_region_start : Intrinsic<[llvm_void_ty], [llvm_metadata_ty]>; - def int_dbg_region_end : Intrinsic<[llvm_void_ty], [llvm_metadata_ty]>; - def int_dbg_func_start : Intrinsic<[llvm_void_ty], [llvm_metadata_ty]>; def int_dbg_declare : Intrinsic<[llvm_void_ty], - [llvm_descriptor_ty, llvm_metadata_ty]>; + [llvm_metadata_ty, llvm_metadata_ty]>; def int_dbg_value : Intrinsic<[llvm_void_ty], [llvm_metadata_ty, llvm_i64_ty, llvm_metadata_ty]>; diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h index 4aba210421dd..a7e2e05b9312 100644 --- a/include/llvm/LinkAllPasses.h +++ b/include/llvm/LinkAllPasses.h @@ -53,7 +53,6 @@ namespace { (void) llvm::createLibCallAliasAnalysisPass(0); (void) llvm::createScalarEvolutionAliasAnalysisPass(); (void) llvm::createBlockPlacementPass(); - (void) llvm::createBlockProfilerPass(); (void) llvm::createBreakCriticalEdgesPass(); (void) llvm::createCFGSimplificationPass(); (void) llvm::createConstantMergePass(); @@ -71,7 +70,6 @@ namespace { (void) llvm::createOptimalEdgeProfilerPass(); (void) llvm::createFunctionInliningPass(); (void) llvm::createAlwaysInlinerPass(); - (void) llvm::createFunctionProfilerPass(); (void) llvm::createGlobalDCEPass(); (void) llvm::createGlobalOptimizerPass(); (void) llvm::createGlobalsModRefPass(); @@ -120,8 +118,6 @@ namespace { (void) llvm::createTailDuplicationPass(); (void) llvm::createJumpThreadingPass(); (void) llvm::createUnifyFunctionExitNodesPass(); - (void) llvm::createNullProfilerRSPass(); - (void) llvm::createRSProfilingPass(); (void) llvm::createInstCountPass(); (void) llvm::createCodeGenPreparePass(); (void) llvm::createGVNPass(); diff --git a/include/llvm/MC/MCAsmLexer.h b/include/llvm/MC/MCAsmLexer.h index da471d284ccf..e9a6e3fda4ae 100644 --- a/include/llvm/MC/MCAsmLexer.h +++ b/include/llvm/MC/MCAsmLexer.h @@ -20,7 +20,8 @@ class SMLoc; class Target; /// AsmToken - Target independent representation for an assembler token. -struct AsmToken { +class AsmToken { +public: enum TokenKind { // Markers Eof, Error, diff --git a/include/llvm/MC/MCParsedAsmOperand.h b/include/llvm/MC/MCParsedAsmOperand.h new file mode 100644 index 000000000000..7c2f5beb7473 --- /dev/null +++ b/include/llvm/MC/MCParsedAsmOperand.h @@ -0,0 +1,33 @@ +//===-- llvm/MC/MCParsedAsmOperand.h - Asm Parser Operand -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCASMOPERAND_H +#define LLVM_MC_MCASMOPERAND_H + +namespace llvm { +class SMLoc; + +/// MCParsedAsmOperand - This abstract class represents a source-level assembly +/// instruction operand. It should be subclassed by target-specific code. This +/// base class is used by target-independent clients and is the interface +/// between parsing an asm instruction and recognizing it. +class MCParsedAsmOperand { +public: + MCParsedAsmOperand() {} + virtual ~MCParsedAsmOperand() {} + + /// getStartLoc - Get the location of the first token of this operand. + virtual SMLoc getStartLoc() const; + /// getEndLoc - Get the location of the last token of this operand. + virtual SMLoc getEndLoc() const; +}; + +} // end namespace llvm. + +#endif diff --git a/include/llvm/MC/MCSymbol.h b/include/llvm/MC/MCSymbol.h index cfe04d8855e1..eb594532fe0b 100644 --- a/include/llvm/MC/MCSymbol.h +++ b/include/llvm/MC/MCSymbol.h @@ -136,6 +136,11 @@ namespace llvm { /// dump - Print the value to stderr. void dump() const; + + /// printMangledName - Print the specified string in mangled form if it uses + /// any unusual characters. + static void printMangledName(StringRef Str, raw_ostream &OS, + const MCAsmInfo *MAI); }; } // end namespace llvm diff --git a/include/llvm/Metadata.h b/include/llvm/Metadata.h index ec6ba1b63d5e..179010b16570 100644 --- a/include/llvm/Metadata.h +++ b/include/llvm/Metadata.h @@ -31,7 +31,7 @@ template //===----------------------------------------------------------------------===// -// MetadataBase - A base class for MDNode, MDString and NamedMDNode. +// MetadataBase - A base class for MDNode and MDString. class MetadataBase : public Value { protected: MetadataBase(const Type *Ty, unsigned scid) @@ -42,8 +42,7 @@ class MetadataBase : public Value { /// Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const MetadataBase *) { return true; } static bool classof(const Value *V) { - return V->getValueID() == MDStringVal || V->getValueID() == MDNodeVal - || V->getValueID() == NamedMDNodeVal; + return V->getValueID() == MDStringVal || V->getValueID() == MDNodeVal; } }; @@ -113,6 +112,13 @@ class MDNode : public MetadataBase, public FoldingSetNode { DestroyFlag = 1 << 2 }; + // FunctionLocal enums. + enum FunctionLocalness { + FL_Unknown = -1, + FL_No = 0, + FL_Yes = 1 + }; + // Replace each instance of F from the operand list of this node with T. void replaceOperand(MDNodeOperand *Op, Value *NewVal); ~MDNode(); @@ -120,10 +126,17 @@ class MDNode : public MetadataBase, public FoldingSetNode { protected: explicit MDNode(LLVMContext &C, Value *const *Vals, unsigned NumVals, bool isFunctionLocal); + + static MDNode *getMDNode(LLVMContext &C, Value *const *Vals, unsigned NumVals, + FunctionLocalness FL); public: // Constructors and destructors. - static MDNode *get(LLVMContext &Context, Value *const *Vals, unsigned NumVals, - bool isFunctionLocal = false); + static MDNode *get(LLVMContext &Context, Value *const *Vals, + unsigned NumVals); + // getWhenValsUnresolved - Construct MDNode determining function-localness + // from isFunctionLocal argument, not by analyzing Vals. + static MDNode *getWhenValsUnresolved(LLVMContext &Context, Value *const *Vals, + unsigned NumVals, bool isFunctionLocal); /// getOperand - Return specified operand. Value *getOperand(unsigned i) const; @@ -138,6 +151,11 @@ class MDNode : public MetadataBase, public FoldingSetNode { bool isFunctionLocal() const { return (getSubclassDataFromValue() & FunctionLocalBit) != 0; } + + // getFunction - If this metadata is function-local and recursively has a + // function-local operand, return the first such operand's parent function. + // Otherwise, return null. + Function *getFunction() const; // destroy - Delete this node. Only when there are no uses. void destroy(); @@ -167,24 +185,25 @@ class MDNode : public MetadataBase, public FoldingSetNode { }; //===----------------------------------------------------------------------===// -/// NamedMDNode - a tuple of other metadata. +/// NamedMDNode - a tuple of MDNodes. /// NamedMDNode is always named. All NamedMDNode operand has a type of metadata. -class NamedMDNode : public MetadataBase, public ilist_node { +class NamedMDNode : public Value, public ilist_node { friend class SymbolTableListTraits; + friend struct ilist_traits; friend class LLVMContextImpl; - NamedMDNode(const NamedMDNode &); // DO NOT IMPLEMENT + std::string Name; Module *Parent; - void *Operands; // SmallVector, 4> + void *Operands; // SmallVector, 4> void setParent(Module *M) { Parent = M; } protected: - explicit NamedMDNode(LLVMContext &C, const Twine &N, MetadataBase*const *Vals, + explicit NamedMDNode(LLVMContext &C, const Twine &N, MDNode*const *Vals, unsigned NumVals, Module *M = 0); public: - static NamedMDNode *Create(LLVMContext &C, const Twine &N, - MetadataBase *const *MDs, + static NamedMDNode *Create(LLVMContext &C, const Twine &N, + MDNode *const *MDs, unsigned NumMDs, Module *M = 0) { return new NamedMDNode(C, N, MDs, NumMDs, M); } @@ -206,14 +225,20 @@ class NamedMDNode : public MetadataBase, public ilist_node { inline const Module *getParent() const { return Parent; } /// getOperand - Return specified operand. - MetadataBase *getOperand(unsigned i) const; + MDNode *getOperand(unsigned i) const; /// getNumOperands - Return the number of NamedMDNode operands. unsigned getNumOperands() const; /// addOperand - Add metadata operand. - void addOperand(MetadataBase *M); - + void addOperand(MDNode *M); + + /// setName - Set the name of this named metadata. + void setName(const Twine &NewName); + + /// getName - Return a constant reference to this named metadata's name. + StringRef getName() const; + /// Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const NamedMDNode *) { return true; } static bool classof(const Value *V) { diff --git a/include/llvm/Module.h b/include/llvm/Module.h index 9a8b53ac586a..3c8055d09fd7 100644 --- a/include/llvm/Module.h +++ b/include/llvm/Module.h @@ -26,6 +26,7 @@ namespace llvm { class FunctionType; class LLVMContext; +class MDSymbolTable; template<> struct ilist_traits : public SymbolTableListTraits { @@ -56,6 +57,7 @@ template<> struct ilist_traits static GlobalAlias *createSentinel(); static void destroySentinel(GlobalAlias *GA) { delete GA; } }; + template<> struct ilist_traits : public SymbolTableListTraits { // createSentinel is used to get hold of a node that marks the end of @@ -68,6 +70,8 @@ template<> struct ilist_traits NamedMDNode *provideInitialHead() const { return createSentinel(); } NamedMDNode *ensureHead(NamedMDNode*) const { return createSentinel(); } static void noteHead(NamedMDNode*, NamedMDNode*) {} + void addNodeToList(NamedMDNode *N); + void removeNodeFromList(NamedMDNode *N); private: mutable ilist_node Sentinel; }; @@ -131,19 +135,20 @@ class Module { /// @name Member Variables /// @{ private: - LLVMContext &Context; ///< The LLVMContext from which types and - ///< constants are allocated. - GlobalListType GlobalList; ///< The Global Variables in the module - FunctionListType FunctionList; ///< The Functions in the module - AliasListType AliasList; ///< The Aliases in the module - LibraryListType LibraryList; ///< The Libraries needed by the module - NamedMDListType NamedMDList; ///< The named metadata in the module - std::string GlobalScopeAsm; ///< Inline Asm at global scope. - ValueSymbolTable *ValSymTab; ///< Symbol table for values - TypeSymbolTable *TypeSymTab; ///< Symbol table for types - std::string ModuleID; ///< Human readable identifier for the module - std::string TargetTriple; ///< Platform target triple Module compiled on - std::string DataLayout; ///< Target data description + LLVMContext &Context; ///< The LLVMContext from which types and + ///< constants are allocated. + GlobalListType GlobalList; ///< The Global Variables in the module + FunctionListType FunctionList; ///< The Functions in the module + AliasListType AliasList; ///< The Aliases in the module + LibraryListType LibraryList; ///< The Libraries needed by the module + NamedMDListType NamedMDList; ///< The named metadata in the module + std::string GlobalScopeAsm; ///< Inline Asm at global scope. + ValueSymbolTable *ValSymTab; ///< Symbol table for values + TypeSymbolTable *TypeSymTab; ///< Symbol table for types + std::string ModuleID; ///< Human readable identifier for the module + std::string TargetTriple; ///< Platform target triple Module compiled on + std::string DataLayout; ///< Target data description + MDSymbolTable *NamedMDSymTab; ///< NamedMDNode names. friend class Constant; @@ -379,6 +384,10 @@ class Module { const TypeSymbolTable &getTypeSymbolTable() const { return *TypeSymTab; } /// Get the Module's symbol table of types TypeSymbolTable &getTypeSymbolTable() { return *TypeSymTab; } + /// Get the symbol table of named metadata + const MDSymbolTable &getMDSymbolTable() const { return *NamedMDSymTab; } + /// Get the Module's symbol table of named metadata + MDSymbolTable &getMDSymbolTable() { return *NamedMDSymTab; } /// @} /// @name Global Variable Iteration diff --git a/include/llvm/Support/CFG.h b/include/llvm/Support/CFG.h index 3a20696f05aa..90b95bf7cfd7 100644 --- a/include/llvm/Support/CFG.h +++ b/include/llvm/Support/CFG.h @@ -93,7 +93,7 @@ class SuccIterator : public std::iterator _Self; typedef typename super::pointer pointer; - // TODO: This can be random access iterator, need operator+ and stuff tho + // TODO: This can be random access iterator, only operator[] missing. inline SuccIterator(Term_ T) : Term(T), idx(0) { // begin iterator assert(T && "getTerminator returned null!"); @@ -109,6 +109,10 @@ class SuccIterator : public std::iterator= 0 && (unsigned) idx < Term->getNumSuccessors(); + } + /// getSuccessorIndex - This is used to interface between code that wants to /// operate on terminator instructions directly. unsigned getSuccessorIndex() const { return idx; } @@ -120,6 +124,7 @@ class SuccIterator : public std::iterator() const { return operator*(); } inline _Self& operator++() { ++idx; return *this; } // Preincrement + inline _Self operator++(int) { // Postincrement _Self tmp = *this; ++*this; return tmp; } @@ -128,6 +133,67 @@ class SuccIterator : public std::iterator=(const _Self& x) const { + assert(Term == x.Term && "Cannot compare iterators of different blocks!"); + return idx >= x.idx; + } + + inline bool operator>(const _Self& x) const { + return idx > x.idx; + assert(Term == x.Term && "Cannot compare iterators of different blocks!"); + } + + inline _Self& operator+=(int Right) { + unsigned new_idx = idx + Right; + assert(index_is_valid(new_idx) && "Iterator index out of bound"); + idx = new_idx; + return *this; + } + + inline _Self operator+(int Right) { + _Self tmp = *this; + tmp += Right; + return tmp; + } + + inline _Self& operator-=(int Right) { + return operator+=(-Right); + } + + inline _Self operator-(int Right) { + return operator+(-Right); + } + + inline int operator-(const _Self& x) { + assert(Term == x.Term && "Cannot work on iterators of different blocks!"); + int distance = idx - x.idx; + return distance; + } + + // This works for read access, however write access is difficult as changes + // to Term are only possible with Term->setSuccessor(idx). Pointers that can + // be modified are not available. + // + // inline pointer operator[](int offset) { + // _Self tmp = *this; + // tmp += offset; + // return tmp.operator*(); + // } + + /// Get the source BB of this iterator. + inline BB_ *getSource() { + return Term->getParent(); + } }; typedef SuccIterator succ_iterator; diff --git a/include/llvm/Support/FormattedStream.h b/include/llvm/Support/FormattedStream.h index 09ab17c446e0..af546f0535dc 100644 --- a/include/llvm/Support/FormattedStream.h +++ b/include/llvm/Support/FormattedStream.h @@ -144,6 +144,10 @@ formatted_raw_ostream &fouts(); /// standard error. Use it like: ferrs() << "foo" << "bar"; formatted_raw_ostream &ferrs(); +/// fdbgs() - This returns a reference to a formatted_raw_ostream for +/// debug output. Use it like: fdbgs() << "foo" << "bar"; +formatted_raw_ostream &fdbgs(); + } // end llvm namespace diff --git a/include/llvm/Support/Mangler.h b/include/llvm/Support/Mangler.h index 03c564897bb8..aa230d48e86d 100644 --- a/include/llvm/Support/Mangler.h +++ b/include/llvm/Support/Mangler.h @@ -19,6 +19,7 @@ #include namespace llvm { +class Twine; class Type; class Module; class Value; @@ -101,9 +102,25 @@ class Mangler { /// specified suffix. If 'ForcePrivate' is specified, the label is specified /// to have a private label prefix. /// + /// FIXME: This is deprecated, new code should use getNameWithPrefix and use + /// MCSymbol printing to handle quotes or not etc. + /// std::string getMangledName(const GlobalValue *V, const char *Suffix = "", bool ForcePrivate = false); + /// getNameWithPrefix - Fill OutName with the name of the appropriate prefix + /// and the specified global variable's name. If the global variable doesn't + /// have a name, this fills in a unique name for the global. + void getNameWithPrefix(SmallVectorImpl &OutName, const GlobalValue *GV, + bool isImplicitlyPrivate); + + /// getNameWithPrefix - Fill OutName with the name of the appropriate prefix + /// and the specified name as the global variable name. GVName must not be + /// empty. + void getNameWithPrefix(SmallVectorImpl &OutName, const Twine &GVName, + ManglerPrefixTy PrefixTy = Mangler::Default); + +private: /// makeNameProper - We don't want identifier names with ., space, or /// - in them, so we mangle these characters into the strings "d_", /// "s_", and "D_", respectively. This is a very simple mangling that @@ -111,14 +128,13 @@ class Mangler { /// does this for you, so there's no point calling it on the result /// from getValueName. /// - std::string makeNameProper(const std::string &x, - ManglerPrefixTy PrefixTy = Mangler::Default); + /// FIXME: This is deprecated, new code should use getNameWithPrefix and use + /// MCSymbol printing to handle quotes or not etc. + /// + void makeNameProper(SmallVectorImpl &OutName, + const Twine &Name, + ManglerPrefixTy PrefixTy = Mangler::Default); - /// getNameWithPrefix - Fill OutName with the name of the appropriate prefix - /// and the specified global variable's name. If the global variable doesn't - /// have a name, this fills in a unique name for the global. - void getNameWithPrefix(SmallVectorImpl &OutName, const GlobalValue *GV, - bool isImplicitlyPrivate); }; } // End llvm namespace diff --git a/include/llvm/Support/MathExtras.h b/include/llvm/Support/MathExtras.h index 438b021e46b4..fa12416aeac7 100644 --- a/include/llvm/Support/MathExtras.h +++ b/include/llvm/Support/MathExtras.h @@ -160,7 +160,7 @@ inline unsigned CountLeadingZeros_32(uint32_t Value) { #else if (!Value) return 32; Count = 0; - // bisecton method for count leading zeros + // bisection method for count leading zeros for (unsigned Shift = 32 >> 1; Shift; Shift >>= 1) { uint32_t Tmp = Value >> Shift; if (Tmp) { @@ -197,7 +197,7 @@ inline unsigned CountLeadingZeros_64(uint64_t Value) { if (sizeof(long) == sizeof(int64_t)) { if (!Value) return 64; Count = 0; - // bisecton method for count leading zeros + // bisection method for count leading zeros for (unsigned Shift = 64 >> 1; Shift; Shift >>= 1) { uint64_t Tmp = Value >> Shift; if (Tmp) { diff --git a/include/llvm/Support/PatternMatch.h b/include/llvm/Support/PatternMatch.h index c0b6a6b98c09..23daad923929 100644 --- a/include/llvm/Support/PatternMatch.h +++ b/include/llvm/Support/PatternMatch.h @@ -437,7 +437,7 @@ m_SelectCst(const Cond &C) { // Matchers for CastInst classes // -template +template struct CastClass_match { Op_t Op; @@ -445,17 +445,28 @@ struct CastClass_match { template bool match(OpTy *V) { - if (Class *I = dyn_cast(V)) - return Op.match(I->getOperand(0)); + if (CastInst *I = dyn_cast(V)) + return I->getOpcode() == Opcode && Op.match(I->getOperand(0)); + if (ConstantExpr *CE = dyn_cast(V)) + return CE->getOpcode() == Opcode && Op.match(CE->getOperand(0)); return false; } }; -template -inline CastClass_match m_Cast(const OpTy &Op) { - return CastClass_match(Op); +/// m_PtrToInt +template +inline CastClass_match +m_PtrToInt(const OpTy &Op) { + return CastClass_match(Op); } +/// m_Trunc +template +inline CastClass_match +m_Trunc(const OpTy &Op) { + return CastClass_match(Op); +} + //===----------------------------------------------------------------------===// // Matchers for unary operators diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td index 6f1e066551a2..206e42e7d93e 100644 --- a/include/llvm/Target/Target.td +++ b/include/llvm/Target/Target.td @@ -477,6 +477,13 @@ def COPY_TO_REGCLASS : Instruction { let neverHasSideEffects = 1; let isAsCheapAsAMove = 1; } +def DEBUG_VALUE : Instruction { + let OutOperandList = (ops); + let InOperandList = (ops unknown:$value, i64imm:$offset, unknown:$meta); + let AsmString = "DEBUG_VALUE"; + let Namespace = "TargetInstrInfo"; + let isAsCheapAsAMove = 1; +} } //===----------------------------------------------------------------------===// diff --git a/include/llvm/Target/TargetAsmParser.h b/include/llvm/Target/TargetAsmParser.h index ef1fc49cefee..1d3da8b2c62c 100644 --- a/include/llvm/Target/TargetAsmParser.h +++ b/include/llvm/Target/TargetAsmParser.h @@ -10,13 +10,15 @@ #ifndef LLVM_TARGET_TARGETPARSER_H #define LLVM_TARGET_TARGETPARSER_H -#include "llvm/MC/MCAsmLexer.h" - namespace llvm { class MCAsmParser; class MCInst; class StringRef; class Target; +class SMLoc; +class AsmToken; +class MCParsedAsmOperand; +template class SmallVectorImpl; /// TargetAsmParser - Generic interface to target specific assembly parsers. class TargetAsmParser { @@ -43,9 +45,11 @@ class TargetAsmParser { // /// \param AP - The current parser object. /// \param Name - The instruction name. - /// \param Inst [out] - On success, the parsed instruction. + /// \param Operands [out] - The list of parsed operands, this returns + /// ownership of them to the caller. /// \return True on failure. - virtual bool ParseInstruction(const StringRef &Name, MCInst &Inst) = 0; + virtual bool ParseInstruction(const StringRef &Name, SMLoc NameLoc, + SmallVectorImpl &Operands) = 0; /// ParseDirective - Parse a target specific assembler directive /// @@ -58,6 +62,14 @@ class TargetAsmParser { /// /// \param ID - the identifier token of the directive. virtual bool ParseDirective(AsmToken DirectiveID) = 0; + + /// MatchInstruction - Recognize a series of operands of a parsed instruction + /// as an actual MCInst. This returns false and fills in Inst on success and + /// returns true on failure to match. + virtual bool + MatchInstruction(const SmallVectorImpl &Operands, + MCInst &Inst) = 0; + }; } // End llvm namespace diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h index 1bcd6fd8366e..8e2157e05fef 100644 --- a/include/llvm/Target/TargetInstrInfo.h +++ b/include/llvm/Target/TargetInstrInfo.h @@ -88,7 +88,10 @@ class TargetInstrInfo { /// only needed in cases where the register classes implied by the /// instructions are insufficient. The actual MachineInstrs to perform /// the copy are emitted with the TargetInstrInfo::copyRegToReg hook. - COPY_TO_REGCLASS = 10 + COPY_TO_REGCLASS = 10, + + // DEBUG_VALUE - a mapping of the llvm.dbg.value intrinsic + DEBUG_VALUE = 11 }; unsigned getNumOpcodes() const { return NumOpcodes; } @@ -143,6 +146,18 @@ class TargetInstrInfo { return false; } + /// isCoalescableExtInstr - Return true if the instruction is a "coalescable" + /// extension instruction. That is, it's like a copy where it's legal for the + /// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns + /// true, then it's expected the pre-extension value is available as a subreg + /// of the result register. This also returns the sub-register index in + /// SubIdx. + virtual bool isCoalescableExtInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SubIdx) const { + return false; + } + /// isIdentityCopy - Return true if the instruction is a copy (or /// extract_subreg, insert_subreg, subreg_to_reg) where the source and /// destination registers are the same. @@ -232,6 +247,14 @@ class TargetInstrInfo { const MachineInstr *Orig, const TargetRegisterInfo *TRI) const = 0; + /// duplicate - Create a duplicate of the Orig instruction in MF. This is like + /// MachineFunction::CloneMachineInstr(), but the target may update operands + /// that are required to be unique. + /// + /// The instruction must be duplicable as indicated by isNotDuplicable(). + virtual MachineInstr *duplicate(MachineInstr *Orig, + MachineFunction &MF) const = 0; + /// convertToThreeAddress - This method must be implemented by targets that /// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target /// may be able to convert a two-address instruction into one or more true @@ -560,6 +583,8 @@ class TargetInstrInfoImpl : public TargetInstrInfo { unsigned DestReg, unsigned SubReg, const MachineInstr *Orig, const TargetRegisterInfo *TRI) const; + virtual MachineInstr *duplicate(MachineInstr *Orig, + MachineFunction &MF) const; virtual bool isIdentical(const MachineInstr *MI, const MachineInstr *Other, const MachineRegisterInfo *MRI) const; diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index dd28a87938ff..15da8456f172 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -774,10 +774,12 @@ class TargetLowering { /// that want to combine struct TargetLoweringOpt { SelectionDAG &DAG; + bool ShrinkOps; SDValue Old; SDValue New; - explicit TargetLoweringOpt(SelectionDAG &InDAG) : DAG(InDAG) {} + explicit TargetLoweringOpt(SelectionDAG &InDAG, bool Shrink = false) : + DAG(InDAG), ShrinkOps(Shrink) {} bool CombineTo(SDValue O, SDValue N) { Old = O; @@ -1478,7 +1480,7 @@ class TargetLowering { } /// isZExtFree - Return true if any actual instruction that defines a - /// value of type Ty1 implicit zero-extends the value to Ty2 in the result + /// value of type Ty1 implicitly zero-extends the value to Ty2 in the result /// register. This does not necessarily include registers defined in /// unknown ways, such as incoming arguments, or copies from unknown /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this diff --git a/include/llvm/Target/TargetLoweringObjectFile.h b/include/llvm/Target/TargetLoweringObjectFile.h index 9a641914af6f..3dd747175b41 100644 --- a/include/llvm/Target/TargetLoweringObjectFile.h +++ b/include/llvm/Target/TargetLoweringObjectFile.h @@ -352,7 +352,7 @@ class TargetLoweringObjectFileCOFF : public TargetLoweringObjectFile { /// getCOFFSection - Return the MCSection for the specified COFF section. /// FIXME: Switch this to a semantic view eventually. - const MCSection *getCOFFSection(const char *Name, bool isDirective, + const MCSection *getCOFFSection(StringRef Name, bool isDirective, SectionKind K) const; }; diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h index dec0b1dcd4d3..f93eadb3690d 100644 --- a/include/llvm/Target/TargetRegisterInfo.h +++ b/include/llvm/Target/TargetRegisterInfo.h @@ -664,7 +664,7 @@ class TargetRegisterInfo { /// frame indices from instructions which may use them. The instruction /// referenced by the iterator contains an MO_FrameIndex operand which must be /// eliminated by this method. This method may modify or replace the - /// specified instruction, as long as it keeps the iterator pointing the the + /// specified instruction, as long as it keeps the iterator pointing at the /// finished product. SPAdj is the SP adjustment due to call frame setup /// instruction. /// diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td index 7f54f819af23..4b72f81eb1e8 100644 --- a/include/llvm/Target/TargetSelectionDAG.td +++ b/include/llvm/Target/TargetSelectionDAG.td @@ -843,11 +843,6 @@ class Pat : Pattern; // Complex pattern definitions. // -class CPAttribute; -// Pass the parent Operand as root to CP function rather -// than the root of the sub-DAG -def CPAttrParentAsRoot : CPAttribute; - // Complex patterns, e.g. X86 addressing mode, requires pattern matching code // in C++. NumOperands is the number of operands returned by the select function; // SelectFunc is the name of the function used to pattern match the max. pattern; @@ -855,12 +850,10 @@ def CPAttrParentAsRoot : CPAttribute; // e.g. X86 addressing mode - def addr : ComplexPattern<4, "SelectAddr", [add]>; // class ComplexPattern roots = [], list props = [], - list attrs = []> { + list roots = [], list props = []> { ValueType Ty = ty; int NumOperands = numops; string SelectFunc = fn; list RootNodes = roots; list Properties = props; - list Attributes = attrs; } diff --git a/include/llvm/Transforms/Instrumentation.h b/include/llvm/Transforms/Instrumentation.h index 9794ffd42998..9c579ac76105 100644 --- a/include/llvm/Transforms/Instrumentation.h +++ b/include/llvm/Transforms/Instrumentation.h @@ -19,22 +19,12 @@ namespace llvm { class ModulePass; class FunctionPass; -// Insert function profiling instrumentation -ModulePass *createFunctionProfilerPass(); - -// Insert block profiling instrumentation -ModulePass *createBlockProfilerPass(); - // Insert edge profiling instrumentation ModulePass *createEdgeProfilerPass(); // Insert optimal edge profiling instrumentation ModulePass *createOptimalEdgeProfilerPass(); -// Random Sampling Profiling Framework -ModulePass* createNullProfilerRSPass(); -FunctionPass* createRSProfilingPass(); - } // End llvm namespace #endif diff --git a/include/llvm/Transforms/Utils/BasicBlockUtils.h b/include/llvm/Transforms/Utils/BasicBlockUtils.h index 8172114d8b6c..3f4571ebdd70 100644 --- a/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ b/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -40,8 +40,9 @@ void FoldSingleEntryPHINodes(BasicBlock *BB); /// DeleteDeadPHIs - Examine each PHI in the given block and delete it if it /// is dead. Also recursively delete any operands that become dead as /// a result. This includes tracing the def-use list from the PHI to see if -/// it is ultimately unused or if it reaches an unused cycle. -void DeleteDeadPHIs(BasicBlock *BB); +/// it is ultimately unused or if it reaches an unused cycle. Return true +/// if any PHIs were deleted. +bool DeleteDeadPHIs(BasicBlock *BB); /// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor, /// if possible. The return value indicates success or failure. @@ -65,11 +66,6 @@ void ReplaceInstWithInst(BasicBlock::InstListType &BIL, // void ReplaceInstWithInst(Instruction *From, Instruction *To); -/// CopyPrecedingStopPoint - If I is immediately preceded by a StopPoint, -/// make a copy of the stoppoint before InsertPos (presumably before copying -/// or moving I). -void CopyPrecedingStopPoint(Instruction *I, BasicBlock::iterator InsertPos); - /// FindAvailableLoadedValue - Scan the ScanBB block backwards (starting at the /// instruction before ScanFrom) checking to see if we have the value at the /// memory address *Ptr locally available within a small number of instructions. diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h index 2cdd31fb14d0..0b8147e76dfd 100644 --- a/include/llvm/Transforms/Utils/Local.h +++ b/include/llvm/Transforms/Utils/Local.h @@ -27,7 +27,7 @@ class PHINode; class AllocaInst; class ConstantExpr; class TargetData; -struct DbgInfoIntrinsic; +class DbgInfoIntrinsic; template class SmallVectorImpl; @@ -63,16 +63,25 @@ bool isInstructionTriviallyDead(Instruction *I); /// RecursivelyDeleteTriviallyDeadInstructions - If the specified value is a /// trivially dead instruction, delete it. If that makes any of its operands -/// trivially dead, delete them too, recursively. -void RecursivelyDeleteTriviallyDeadInstructions(Value *V); +/// trivially dead, delete them too, recursively. Return true if any +/// instructions were deleted. +bool RecursivelyDeleteTriviallyDeadInstructions(Value *V); /// RecursivelyDeleteDeadPHINode - If the specified value is an effectively /// dead PHI node, due to being a def-use chain of single-use nodes that /// either forms a cycle or is terminated by a trivially dead instruction, /// delete it. If that makes any of its operands trivially dead, delete them -/// too, recursively. -void RecursivelyDeleteDeadPHINode(PHINode *PN); +/// too, recursively. Return true if the PHI node is actually deleted. +bool RecursivelyDeleteDeadPHINode(PHINode *PN); + +/// SimplifyInstructionsInBlock - Scan the specified basic block and try to +/// simplify any instructions in it and recursively delete dead instructions. +/// +/// This returns true if it changed the code, note that it can delete +/// instructions in other blocks as well in this block. +bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD = 0); + //===----------------------------------------------------------------------===// // Control Flow Graph Restructuring. // diff --git a/include/llvm/Type.h b/include/llvm/Type.h index e5169824f805..2c37a6890e90 100644 --- a/include/llvm/Type.h +++ b/include/llvm/Type.h @@ -217,6 +217,9 @@ class Type : public AbstractTypeUser { /// bool isInteger() const { return ID == IntegerTyID; } + /// isInteger - Return true if this is an IntegerType of the specified width. + bool isInteger(unsigned Bitwidth) const; + /// isIntOrIntVector - Return true if this is an integer type or a vector of /// integer types. /// diff --git a/include/llvm/ValueSymbolTable.h b/include/llvm/ValueSymbolTable.h index e05fdbd08ddc..53815ba7a4e6 100644 --- a/include/llvm/ValueSymbolTable.h +++ b/include/llvm/ValueSymbolTable.h @@ -17,6 +17,7 @@ #include "llvm/Value.h" #include "llvm/ADT/StringMap.h" #include "llvm/System/DataTypes.h" +#include "llvm/ADT/ilist_node.h" namespace llvm { template @@ -26,7 +27,7 @@ namespace llvm { class NamedMDNode; class Module; class StringRef; - + /// This class provides a symbol table of name/value pairs. It is essentially /// a std::map but has a controlled interface provided by /// LLVM as well as ensuring uniqueness of names. @@ -39,7 +40,6 @@ class ValueSymbolTable { friend class SymbolTableListTraits; friend class SymbolTableListTraits; friend class SymbolTableListTraits; - friend class SymbolTableListTraits; /// @name Types /// @{ public: @@ -129,6 +129,88 @@ class ValueSymbolTable { /// @} }; +/// This class provides a symbol table of name/NamedMDNode pairs. It is +/// essentially a StringMap wrapper. + +class MDSymbolTable { + friend class SymbolTableListTraits; +/// @name Types +/// @{ +private: + /// @brief A mapping of names to metadata + typedef StringMap MDMap; + +public: + /// @brief An iterator over a ValueMap. + typedef MDMap::iterator iterator; + + /// @brief A const_iterator over a ValueMap. + typedef MDMap::const_iterator const_iterator; + +/// @} +/// @name Constructors +/// @{ +public: + + MDSymbolTable(const MDNode &); // DO NOT IMPLEMENT + void operator=(const MDSymbolTable &); // DO NOT IMPLEMENT + MDSymbolTable() : mmap(0) {} + ~MDSymbolTable(); + +/// @} +/// @name Accessors +/// @{ +public: + + /// This method finds the value with the given \p Name in the + /// the symbol table. + /// @returns the NamedMDNode associated with the \p Name + /// @brief Lookup a named Value. + NamedMDNode *lookup(StringRef Name) const { return mmap.lookup(Name); } + + /// @returns true iff the symbol table is empty + /// @brief Determine if the symbol table is empty + inline bool empty() const { return mmap.empty(); } + + /// @brief The number of name/type pairs is returned. + inline unsigned size() const { return unsigned(mmap.size()); } + +/// @} +/// @name Iteration +/// @{ +public: + /// @brief Get an iterator that from the beginning of the symbol table. + inline iterator begin() { return mmap.begin(); } + + /// @brief Get a const_iterator that from the beginning of the symbol table. + inline const_iterator begin() const { return mmap.begin(); } + + /// @brief Get an iterator to the end of the symbol table. + inline iterator end() { return mmap.end(); } + + /// @brief Get a const_iterator to the end of the symbol table. + inline const_iterator end() const { return mmap.end(); } + +/// @} +/// @name Mutators +/// @{ +public: + /// insert - The method inserts a new entry into the stringmap. + void insert(StringRef Name, NamedMDNode *Node) { + (void) mmap.GetOrCreateValue(Name, Node); + } + + /// This method removes a NamedMDNode from the symbol table. + void remove(StringRef Name) { mmap.erase(Name); } + +/// @} +/// @name Internal Data +/// @{ +private: + MDMap mmap; ///< The map that holds the symbol table. +/// @} +}; + } // End llvm namespace #endif diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index dee9b535871a..371dcafa9f31 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -116,13 +116,16 @@ AliasAnalysis::getModRefBehavior(Function *F, return DoesNotAccessMemory; if (F->onlyReadsMemory()) return OnlyReadsMemory; - if (unsigned id = F->getIntrinsicID()) { + if (unsigned id = F->getIntrinsicID()) + return getModRefBehavior(id); + } + return UnknownModRefBehavior; +} + +AliasAnalysis::ModRefBehavior AliasAnalysis::getModRefBehavior(unsigned iid) { #define GET_INTRINSIC_MODREF_BEHAVIOR #include "llvm/Intrinsics.gen" #undef GET_INTRINSIC_MODREF_BEHAVIOR - } - } - return UnknownModRefBehavior; } AliasAnalysis::ModRefResult diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp index f8cb32321b00..398dec7dd0a1 100644 --- a/lib/Analysis/Analysis.cpp +++ b/lib/Analysis/Analysis.cpp @@ -13,11 +13,11 @@ using namespace llvm; -int LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action, - char **OutMessages) { +LLVMBool LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action, + char **OutMessages) { std::string Messages; - int Result = verifyModule(*unwrap(M), + LLVMBool Result = verifyModule(*unwrap(M), static_cast(Action), OutMessages? &Messages : 0); @@ -27,7 +27,7 @@ int LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action, return Result; } -int LLVMVerifyFunction(LLVMValueRef Fn, LLVMVerifierFailureAction Action) { +LLVMBool LLVMVerifyFunction(LLVMValueRef Fn, LLVMVerifierFailureAction Action) { return verifyFunction(*unwrap(Fn), static_cast(Action)); } diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index eaf90d014ffe..4ae8859a257b 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -398,8 +398,8 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, BytesLoaded, TD)) return 0; - APInt ResultVal(IntType->getBitWidth(), 0); - for (unsigned i = 0; i != BytesLoaded; ++i) { + APInt ResultVal = APInt(IntType->getBitWidth(), RawBytes[BytesLoaded-1]); + for (unsigned i = 1; i != BytesLoaded; ++i) { ResultVal <<= 8; ResultVal |= APInt(IntType->getBitWidth(), RawBytes[BytesLoaded-1-i]); } @@ -718,14 +718,13 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, switch (Opcode) { default: return 0; + case Instruction::ICmp: + case Instruction::FCmp: assert(0 && "Invalid for compares"); case Instruction::Call: if (Function *F = dyn_cast(Ops[0])) if (canConstantFoldCallTo(F)) return ConstantFoldCall(F, Ops+1, NumOps-1); return 0; - case Instruction::ICmp: - case Instruction::FCmp: - llvm_unreachable("This function is invalid for compares: no predicate specified"); case Instruction::PtrToInt: // If the input is a inttoptr, eliminate the pair. This requires knowing // the width of a pointer, so it can't be done in ConstantExpr::getCast. @@ -877,6 +876,20 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, CE1->getOperand(0), TD); } } + + // icmp eq (or x, y), 0 -> (icmp eq x, 0) & (icmp eq y, 0) + // icmp ne (or x, y), 0 -> (icmp ne x, 0) | (icmp ne y, 0) + if ((Predicate == ICmpInst::ICMP_EQ || Predicate == ICmpInst::ICMP_NE) && + CE0->getOpcode() == Instruction::Or && Ops1->isNullValue()) { + Constant *LHS = + ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), Ops1,TD); + Constant *RHS = + ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(1), Ops1,TD); + unsigned OpC = + Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or; + Constant *Ops[] = { LHS, RHS }; + return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, 2, TD); + } } return ConstantExpr::getCompare(Predicate, Ops0, Ops1); diff --git a/lib/Analysis/DbgInfoPrinter.cpp b/lib/Analysis/DbgInfoPrinter.cpp index 7d72b383a5e0..3532b052dc55 100644 --- a/lib/Analysis/DbgInfoPrinter.cpp +++ b/lib/Analysis/DbgInfoPrinter.cpp @@ -37,8 +37,6 @@ PrintDirectory("print-fullpath", namespace { class PrintDbgInfo : public FunctionPass { raw_ostream &Out; - void printStopPoint(const DbgStopPointInst *DSI); - void printFuncStart(const DbgFuncStartInst *FS); void printVariableDeclaration(const Value *V); public: static char ID; // Pass identification @@ -74,27 +72,6 @@ void PrintDbgInfo::printVariableDeclaration(const Value *V) { Out << File << ":" << LineNo << "\n"; } -void PrintDbgInfo::printStopPoint(const DbgStopPointInst *DSI) { - if (PrintDirectory) - if (MDString *Str = dyn_cast(DSI->getDirectory())) - Out << Str->getString() << '/'; - - if (MDString *Str = dyn_cast(DSI->getFileName())) - Out << Str->getString(); - Out << ':' << DSI->getLine(); - - if (unsigned Col = DSI->getColumn()) - Out << ':' << Col; -} - -void PrintDbgInfo::printFuncStart(const DbgFuncStartInst *FS) { - DISubprogram Subprogram(FS->getSubprogram()); - Out << "; fully qualified function name: " << Subprogram.getDisplayName() - << " return type: " << Subprogram.getReturnTypeName() - << " at line " << Subprogram.getLineNumber() - << "\n\n"; -} - bool PrintDbgInfo::runOnFunction(Function &F) { if (F.isDeclaration()) return false; @@ -108,57 +85,21 @@ bool PrintDbgInfo::runOnFunction(Function &F) { // Skip dead blocks. continue; - const DbgStopPointInst *DSI = findBBStopPoint(BB); Out << BB->getName(); Out << ":"; - if (DSI) { - Out << "; ("; - printStopPoint(DSI); - Out << ")"; - } - Out << "\n"; - // A dbgstoppoint's information is valid until we encounter a new one. - const DbgStopPointInst *LastDSP = DSI; - bool Printed = DSI != 0; for (BasicBlock::const_iterator i = BB->begin(), e = BB->end(); i != e; ++i) { - if (isa(i)) { - if ((DSI = dyn_cast(i))) { - if (DSI->getContext() == LastDSP->getContext() && - DSI->getLineValue() == LastDSP->getLineValue() && - DSI->getColumnValue() == LastDSP->getColumnValue()) - // Don't print same location twice. - continue; - LastDSP = cast(i); - - // Don't print consecutive stoppoints, use a flag to know which one we - // printed. - Printed = false; - } else if (const DbgFuncStartInst *FS = dyn_cast(i)) { - printFuncStart(FS); - } - } else { - if (!Printed && LastDSP) { - Out << "; "; - printStopPoint(LastDSP); - Out << "\n"; - Printed = true; - } - - Out << *i << '\n'; printVariableDeclaration(i); if (const User *U = dyn_cast(i)) { for(unsigned i=0;igetNumOperands();i++) printVariableDeclaration(U->getOperand(i)); } - } } } - return false; } diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp index de2d839f6d61..59ba807dd027 100644 --- a/lib/Analysis/DebugInfo.cpp +++ b/lib/Analysis/DebugInfo.cpp @@ -599,9 +599,7 @@ void DIVariable::dump() const { //===----------------------------------------------------------------------===// DIFactory::DIFactory(Module &m) - : M(m), VMContext(M.getContext()), DeclareFn(0) { - EmptyStructPtr = PointerType::getUnqual(StructType::get(VMContext)); -} + : M(m), VMContext(M.getContext()), DeclareFn(0) {} Constant *DIFactory::GetTagConstant(unsigned TAG) { assert((TAG & LLVMDebugVersionMask) == 0 && @@ -1033,58 +1031,52 @@ DILocation DIFactory::CreateLocation(unsigned LineNo, unsigned ColumnNo, /// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call. Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D, - Instruction *InsertBefore) { - // Cast the storage to a {}* for the call to llvm.dbg.declare. - Storage = new BitCastInst(Storage, EmptyStructPtr, "", InsertBefore); - + Instruction *InsertBefore) { if (!DeclareFn) DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); - Value *Args[] = { Storage, D.getNode() }; + Value *Elts[] = { Storage }; + Value *Args[] = { MDNode::get(Storage->getContext(), Elts, 1), D.getNode() }; return CallInst::Create(DeclareFn, Args, Args+2, "", InsertBefore); } /// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call. Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D, - BasicBlock *InsertAtEnd) { - // Cast the storage to a {}* for the call to llvm.dbg.declare. - Storage = new BitCastInst(Storage, EmptyStructPtr, "", InsertAtEnd); - + BasicBlock *InsertAtEnd) { if (!DeclareFn) DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); - Value *Args[] = { Storage, D.getNode() }; + Value *Elts[] = { Storage }; + Value *Args[] = { MDNode::get(Storage->getContext(), Elts, 1), D.getNode() }; return CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd); } /// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. -Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, Value *Offset, +Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, uint64_t Offset, DIVariable D, Instruction *InsertBefore) { assert(V && "no value passed to dbg.value"); - assert(Offset->getType() == Type::getInt64Ty(V->getContext()) && - "offset must be i64"); if (!ValueFn) ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); Value *Elts[] = { V }; - Value *Args[] = { MDNode::get(V->getContext(), Elts, 1), Offset, + Value *Args[] = { MDNode::get(V->getContext(), Elts, 1), + ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset), D.getNode() }; return CallInst::Create(ValueFn, Args, Args+3, "", InsertBefore); } /// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. -Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, Value *Offset, +Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, uint64_t Offset, DIVariable D, BasicBlock *InsertAtEnd) { assert(V && "no value passed to dbg.value"); - assert(Offset->getType() == Type::getInt64Ty(V->getContext()) && - "offset must be i64"); if (!ValueFn) ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); Value *Elts[] = { V }; - Value *Args[] = { MDNode::get(V->getContext(), Elts, 1), Offset, + Value *Args[] = { MDNode::get(V->getContext(), Elts, 1), + ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset), D.getNode() }; return CallInst::Create(ValueFn, Args, Args+3, "", InsertAtEnd); } @@ -1242,52 +1234,6 @@ bool DebugInfoFinder::addSubprogram(DISubprogram SP) { return true; } -/// findStopPoint - Find the stoppoint coressponding to this instruction, that -/// is the stoppoint that dominates this instruction. -const DbgStopPointInst *llvm::findStopPoint(const Instruction *Inst) { - if (const DbgStopPointInst *DSI = dyn_cast(Inst)) - return DSI; - - const BasicBlock *BB = Inst->getParent(); - BasicBlock::const_iterator I = Inst, B; - while (BB) { - B = BB->begin(); - - // A BB consisting only of a terminator can't have a stoppoint. - while (I != B) { - --I; - if (const DbgStopPointInst *DSI = dyn_cast(I)) - return DSI; - } - - // This BB didn't have a stoppoint: if there is only one predecessor, look - // for a stoppoint there. We could use getIDom(), but that would require - // dominator info. - BB = I->getParent()->getUniquePredecessor(); - if (BB) - I = BB->getTerminator(); - } - - return 0; -} - -/// findBBStopPoint - Find the stoppoint corresponding to first real -/// (non-debug intrinsic) instruction in this Basic Block, and return the -/// stoppoint for it. -const DbgStopPointInst *llvm::findBBStopPoint(const BasicBlock *BB) { - for(BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) - if (const DbgStopPointInst *DSI = dyn_cast(I)) - return DSI; - - // Fallback to looking for stoppoint of unique predecessor. Useful if this - // BB contains no stoppoints, but unique predecessor does. - BB = BB->getUniquePredecessor(); - if (BB) - return findStopPoint(BB->getTerminator()); - - return 0; -} - Value *llvm::findDbgGlobalDeclare(GlobalVariable *V) { const Module *M = V->getParent(); NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.gv"); @@ -1306,25 +1252,24 @@ Value *llvm::findDbgGlobalDeclare(GlobalVariable *V) { /// Finds the llvm.dbg.declare intrinsic corresponding to this value if any. /// It looks through pointer casts too. -const DbgDeclareInst *llvm::findDbgDeclare(const Value *V, bool stripCasts) { - if (stripCasts) { - V = V->stripPointerCasts(); - - // Look for the bitcast. - for (Value::use_const_iterator I = V->use_begin(), E =V->use_end(); - I != E; ++I) - if (isa(I)) { - const DbgDeclareInst *DDI = findDbgDeclare(*I, false); - if (DDI) return DDI; - } +const DbgDeclareInst *llvm::findDbgDeclare(const Value *V) { + V = V->stripPointerCasts(); + + if (!isa(V) && !isa(V)) return 0; - } - - // Find llvm.dbg.declare among uses of the instruction. - for (Value::use_const_iterator I = V->use_begin(), E =V->use_end(); - I != E; ++I) - if (const DbgDeclareInst *DDI = dyn_cast(I)) - return DDI; + + const Function *F = NULL; + if (const Instruction *I = dyn_cast(V)) + F = I->getParent()->getParent(); + else if (const Argument *A = dyn_cast(V)) + F = A->getParent(); + + for (Function::const_iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI) + for (BasicBlock::const_iterator BI = (*FI).begin(), BE = (*FI).end(); + BI != BE; ++BI) + if (const DbgDeclareInst *DDI = dyn_cast(BI)) + if (DDI->getAddress() == V) + return DDI; return 0; } @@ -1371,29 +1316,6 @@ bool llvm::getLocationInfo(const Value *V, std::string &DisplayName, return true; } -/// ExtractDebugLocation - Extract debug location information -/// from llvm.dbg.stoppoint intrinsic. -DebugLoc llvm::ExtractDebugLocation(DbgStopPointInst &SPI, - DebugLocTracker &DebugLocInfo) { - DebugLoc DL; - Value *Context = SPI.getContext(); - - // If this location is already tracked then use it. - DebugLocTuple Tuple(cast(Context), NULL, SPI.getLine(), - SPI.getColumn()); - DenseMap::iterator II - = DebugLocInfo.DebugIdMap.find(Tuple); - if (II != DebugLocInfo.DebugIdMap.end()) - return DebugLoc::get(II->second); - - // Add a new location entry. - unsigned Id = DebugLocInfo.DebugLocations.size(); - DebugLocInfo.DebugLocations.push_back(Tuple); - DebugLocInfo.DebugIdMap[Tuple] = Id; - - return DebugLoc::get(Id); -} - /// ExtractDebugLocation - Extract debug location information /// from DILocation. DebugLoc llvm::ExtractDebugLocation(DILocation &Loc, @@ -1419,32 +1341,6 @@ DebugLoc llvm::ExtractDebugLocation(DILocation &Loc, return DebugLoc::get(Id); } -/// ExtractDebugLocation - Extract debug location information -/// from llvm.dbg.func_start intrinsic. -DebugLoc llvm::ExtractDebugLocation(DbgFuncStartInst &FSI, - DebugLocTracker &DebugLocInfo) { - DebugLoc DL; - Value *SP = FSI.getSubprogram(); - - DISubprogram Subprogram(cast(SP)); - unsigned Line = Subprogram.getLineNumber(); - DICompileUnit CU(Subprogram.getCompileUnit()); - - // If this location is already tracked then use it. - DebugLocTuple Tuple(CU.getNode(), NULL, Line, /* Column */ 0); - DenseMap::iterator II - = DebugLocInfo.DebugIdMap.find(Tuple); - if (II != DebugLocInfo.DebugIdMap.end()) - return DebugLoc::get(II->second); - - // Add a new location entry. - unsigned Id = DebugLocInfo.DebugLocations.size(); - DebugLocInfo.DebugLocations.push_back(Tuple); - DebugLocInfo.DebugIdMap[Tuple] = Id; - - return DebugLoc::get(Id); -} - /// getDISubprogram - Find subprogram that is enclosing this scope. DISubprogram llvm::getDISubprogram(MDNode *Scope) { DIDescriptor D(Scope); diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index df9e31c1d253..26c0c9e4ba8f 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -128,8 +128,9 @@ static bool getSCEVStartAndStride(const SCEV *&SH, Loop *L, Loop *UseLoop, if (!AddRecStride->properlyDominates(Header, DT)) return false; - DEBUG(dbgs() << "[" << L->getHeader()->getName() - << "] Variable stride: " << *AddRec << "\n"); + DEBUG(dbgs() << "["; + WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false); + dbgs() << "] Variable stride: " << *AddRec << "\n"); } Stride = AddRecStride; diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index bd9377bf87fb..651c918a37fe 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -102,6 +102,37 @@ unsigned InlineCostAnalyzer::FunctionInfo:: return Reduction; } +// callIsSmall - If a call is likely to lower to a single target instruction, or +// is otherwise deemed small return true. +// TODO: Perhaps calls like memcpy, strcpy, etc? +static bool callIsSmall(const Function *F) { + if (!F) return false; + + if (F->hasLocalLinkage()) return false; + + if (!F->hasName()) return false; + + StringRef Name = F->getName(); + + // These will all likely lower to a single selection DAG node. + if (Name == "copysign" || Name == "copysignf" || + Name == "fabs" || Name == "fabsf" || Name == "fabsl" || + Name == "sin" || Name == "sinf" || Name == "sinl" || + Name == "cos" || Name == "cosf" || Name == "cosl" || + Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" ) + return true; + + // These are all likely to be optimized into something smaller. + if (Name == "pow" || Name == "powf" || Name == "powl" || + Name == "exp2" || Name == "exp2l" || Name == "exp2f" || + Name == "floor" || Name == "floorf" || Name == "ceil" || + Name == "round" || Name == "ffs" || Name == "ffsl" || + Name == "abs" || Name == "labs" || Name == "llabs") + return true; + + return false; +} + /// analyzeBasicBlock - Fill in the current structure with information gleaned /// from the specified block. void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) { @@ -129,7 +160,7 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) { // Calls often compile into many machine instructions. Bump up their // cost to reflect this. - if (!isa(II)) + if (!isa(II) && !callIsSmall(CS.getCalledFunction())) NumInsts += InlineConstants::CallPenalty; } @@ -141,11 +172,16 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) { if (isa(II) || isa(II->getType())) ++NumVectorInsts; - // Noop casts, including ptr <-> int, don't count. if (const CastInst *CI = dyn_cast(II)) { + // Noop casts, including ptr <-> int, don't count. if (CI->isLosslessCast() || isa(CI) || isa(CI)) continue; + // Result of a cmp instruction is often extended (to be used by other + // cmp instructions, logical or return instructions). These are usually + // nop on most sane targets. + if (isa(CI->getOperand(0))) + continue; } else if (const GetElementPtrInst *GEPI = dyn_cast(II)){ // If a GEP has all constant indices, it will probably be folded with // a load/store. diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index 5d31c1157e11..453af5a5555a 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -21,6 +21,7 @@ #include "llvm/Assembly/Writer.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include @@ -385,6 +386,10 @@ BasicBlock *Loop::getUniqueExitBlock() const { return 0; } +void Loop::dump() const { + print(dbgs()); +} + //===----------------------------------------------------------------------===// // LoopInfo implementation // diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 17dc686a4259..4d85ce43d201 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -316,7 +316,9 @@ void SCEVAddRecExpr::print(raw_ostream &OS) const { OS << "{" << *Operands[0]; for (unsigned i = 1, e = Operands.size(); i != e; ++i) OS << ",+," << *Operands[i]; - OS << "}<" << L->getHeader()->getName() + ">"; + OS << "}<"; + WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false); + OS << ">"; } void SCEVFieldOffsetExpr::print(raw_ostream &OS) const { @@ -5193,7 +5195,9 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) PrintLoopInfo(OS, SE, *I); - OS << "Loop " << L->getHeader()->getName() << ": "; + OS << "Loop "; + WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false); + OS << ": "; SmallVector ExitBlocks; L->getExitBlocks(ExitBlocks); @@ -5206,8 +5210,10 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, OS << "Unpredictable backedge-taken count. "; } - OS << "\n"; - OS << "Loop " << L->getHeader()->getName() << ": "; + OS << "\n" + "Loop "; + WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false); + OS << ": "; if (!isa(SE->getMaxBackedgeTakenCount(L))) { OS << "max backedge-taken count is " << *SE->getMaxBackedgeTakenCount(L); @@ -5227,7 +5233,9 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const { // const isn't dangerous. ScalarEvolution &SE = *const_cast(this); - OS << "Classifying expressions for: " << F->getName() << "\n"; + OS << "Classifying expressions for: "; + WriteAsOperand(OS, F, /*PrintType=*/false); + OS << "\n"; for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) if (isSCEVable(I->getType())) { OS << *I << '\n'; @@ -5256,7 +5264,9 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const { OS << "\n"; } - OS << "Determining loop execution counts for: " << F->getName() << "\n"; + OS << "Determining loop execution counts for: "; + WriteAsOperand(OS, F, /*PrintType=*/false); + OS << "\n"; for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) PrintLoopInfo(OS, &SE, *I); } diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index acd3119abea8..91e5bc3876b4 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -726,8 +726,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD, Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1); if (Tmp2 == 1) return 1; - return std::min(Tmp, Tmp2)-1; - break; + return std::min(Tmp, Tmp2)-1; case Instruction::Sub: Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1); @@ -757,8 +756,24 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD, // is, at worst, one more bit than the inputs. Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); if (Tmp == 1) return 1; // Early out. - return std::min(Tmp, Tmp2)-1; - break; + return std::min(Tmp, Tmp2)-1; + + case Instruction::PHI: { + PHINode *PN = cast(U); + // Don't analyze large in-degree PHIs. + if (PN->getNumIncomingValues() > 4) break; + + // Take the minimum of all incoming values. This can't infinitely loop + // because of our depth threshold. + Tmp = ComputeNumSignBits(PN->getIncomingValue(0), TD, Depth+1); + for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) { + if (Tmp == 1) return Tmp; + Tmp = std::min(Tmp, + ComputeNumSignBits(PN->getIncomingValue(1), TD, Depth+1)); + } + return Tmp; + } + case Instruction::Trunc: // FIXME: it's tricky to do anything useful for this, but it is an important // case for targets like X86. @@ -1348,7 +1363,7 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset, // Make sure the index-ee is a pointer to array of i8. const PointerType *PT = cast(GEP->getOperand(0)->getType()); const ArrayType *AT = dyn_cast(PT->getElementType()); - if (AT == 0 || AT->getElementType() != Type::getInt8Ty(V->getContext())) + if (AT == 0 || !AT->getElementType()->isInteger(8)) return false; // Check to make sure that the first operand of the GEP is an integer and @@ -1387,8 +1402,7 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset, // Must be a Constant Array ConstantArray *Array = dyn_cast(GlobalInit); - if (Array == 0 || - Array->getType()->getElementType() != Type::getInt8Ty(V->getContext())) + if (Array == 0 || !Array->getType()->getElementType()->isInteger(8)) return false; // Get the number of elements in the array diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 15a9832731f2..e4039ab16809 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -510,12 +510,17 @@ bool LLParser::ParseNamedMetadata() { ParseToken(lltok::lbrace, "Expected '{' here")) return true; - SmallVector Elts; + SmallVector Elts; do { + // Null is a special case since it is typeless. + if (EatIfPresent(lltok::kw_null)) { + Elts.push_back(0); + continue; + } + if (ParseToken(lltok::exclaim, "Expected '!' here")) return true; - // FIXME: This rejects MDStrings. Are they legal in an named MDNode or not? MDNode *N = 0; if (ParseMDNodeID(N)) return true; Elts.push_back(N); @@ -543,7 +548,7 @@ bool LLParser::ParseStandaloneMetadata() { ParseType(Ty, TyLoc) || ParseToken(lltok::exclaim, "Expected '!' here") || ParseToken(lltok::lbrace, "Expected '{' here") || - ParseMDNodeVector(Elts) || + ParseMDNodeVector(Elts, NULL) || ParseToken(lltok::rbrace, "expected end of metadata node")) return true; @@ -1715,8 +1720,7 @@ Value *LLParser::PerFunctionState::GetVal(const std::string &Name, } // Don't make placeholders with invalid type. - if (!Ty->isFirstClassType() && !isa(Ty) && - Ty != Type::getLabelTy(F.getContext())) { + if (!Ty->isFirstClassType() && !isa(Ty) && !Ty->isLabelTy()) { P.Error(Loc, "invalid use of a non-first-class type"); return 0; } @@ -1757,8 +1761,7 @@ Value *LLParser::PerFunctionState::GetVal(unsigned ID, const Type *Ty, return 0; } - if (!Ty->isFirstClassType() && !isa(Ty) && - Ty != Type::getLabelTy(F.getContext())) { + if (!Ty->isFirstClassType() && !isa(Ty) && !Ty->isLabelTy()) { P.Error(Loc, "invalid use of a non-first-class type"); return 0; } @@ -1881,8 +1884,10 @@ BasicBlock *LLParser::PerFunctionState::DefineBB(const std::string &Name, /// ParseValID - Parse an abstract value that doesn't necessarily have a /// type implied. For example, if we parse "4" we don't know what integer type /// it has. The value will later be combined with its type and checked for -/// sanity. -bool LLParser::ParseValID(ValID &ID) { +/// sanity. PFS is used to convert function-local operands of metadata (since +/// metadata operands are not just parsed here but also converted to values). +/// PFS can be null when we are not parsing metadata values inside a function. +bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { ID.Loc = Lex.getLoc(); switch (Lex.getKind()) { default: return TokError("expected value token"); @@ -1908,7 +1913,7 @@ bool LLParser::ParseValID(ValID &ID) { if (EatIfPresent(lltok::lbrace)) { SmallVector Elts; - if (ParseMDNodeVector(Elts) || + if (ParseMDNodeVector(Elts, PFS) || ParseToken(lltok::rbrace, "expected end of metadata node")) return true; @@ -2353,30 +2358,85 @@ bool LLParser::ParseValID(ValID &ID) { } /// ParseGlobalValue - Parse a global value with the specified type. -bool LLParser::ParseGlobalValue(const Type *Ty, Constant *&V) { - V = 0; +bool LLParser::ParseGlobalValue(const Type *Ty, Constant *&C) { + C = 0; ValID ID; - return ParseValID(ID) || - ConvertGlobalValIDToValue(Ty, ID, V); + Value *V = NULL; + bool Parsed = ParseValID(ID) || + ConvertValIDToValue(Ty, ID, V, NULL); + if (V && !(C = dyn_cast(V))) + return Error(ID.Loc, "global values must be constants"); + return Parsed; } -/// ConvertGlobalValIDToValue - Apply a type to a ValID to get a fully resolved -/// constant. -bool LLParser::ConvertGlobalValIDToValue(const Type *Ty, ValID &ID, - Constant *&V) { +bool LLParser::ParseGlobalTypeAndValue(Constant *&V) { + PATypeHolder Type(Type::getVoidTy(Context)); + return ParseType(Type) || + ParseGlobalValue(Type, V); +} + +/// ParseGlobalValueVector +/// ::= /*empty*/ +/// ::= TypeAndValue (',' TypeAndValue)* +bool LLParser::ParseGlobalValueVector(SmallVectorImpl &Elts) { + // Empty list. + if (Lex.getKind() == lltok::rbrace || + Lex.getKind() == lltok::rsquare || + Lex.getKind() == lltok::greater || + Lex.getKind() == lltok::rparen) + return false; + + Constant *C; + if (ParseGlobalTypeAndValue(C)) return true; + Elts.push_back(C); + + while (EatIfPresent(lltok::comma)) { + if (ParseGlobalTypeAndValue(C)) return true; + Elts.push_back(C); + } + + return false; +} + + +//===----------------------------------------------------------------------===// +// Function Parsing. +//===----------------------------------------------------------------------===// + +bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V, + PerFunctionState *PFS) { if (isa(Ty)) return Error(ID.Loc, "functions are not values, refer to them as pointers"); switch (ID.Kind) { default: llvm_unreachable("Unknown ValID!"); - case ValID::t_MDNode: - case ValID::t_MDString: - return Error(ID.Loc, "invalid use of metadata"); case ValID::t_LocalID: + if (!PFS) return Error(ID.Loc, "invalid use of function-local name"); + V = PFS->GetVal(ID.UIntVal, Ty, ID.Loc); + return (V == 0); case ValID::t_LocalName: - return Error(ID.Loc, "invalid use of function-local name"); - case ValID::t_InlineAsm: - return Error(ID.Loc, "inline asm can only be an operand of call/invoke"); + if (!PFS) return Error(ID.Loc, "invalid use of function-local name"); + V = PFS->GetVal(ID.StrVal, Ty, ID.Loc); + return (V == 0); + case ValID::t_InlineAsm: { + const PointerType *PTy = dyn_cast(Ty); + const FunctionType *FTy = + PTy ? dyn_cast(PTy->getElementType()) : 0; + if (!FTy || !InlineAsm::Verify(FTy, ID.StrVal2)) + return Error(ID.Loc, "invalid type for inline asm constraint string"); + V = InlineAsm::get(FTy, ID.StrVal, ID.StrVal2, ID.UIntVal&1, ID.UIntVal>>1); + return false; + } + case ValID::t_MDNode: + if (!Ty->isMetadataTy()) + return Error(ID.Loc, "metadata value must have metadata type"); + V = ID.MDNodeVal; + return false; + case ValID::t_MDString: + if (!Ty->isMetadataTy()) + return Error(ID.Loc, "metadata value must have metadata type"); + V = ID.MDStringVal; + return false; case ValID::t_GlobalName: V = GetGlobalVal(ID.StrVal, Ty, ID.Loc); return V == 0; @@ -2440,90 +2500,11 @@ bool LLParser::ConvertGlobalValIDToValue(const Type *Ty, ValID &ID, } } -/// ConvertGlobalOrMetadataValIDToValue - Apply a type to a ValID to get a fully -/// resolved constant or metadata value. -bool LLParser::ConvertGlobalOrMetadataValIDToValue(const Type *Ty, ValID &ID, - Value *&V) { - switch (ID.Kind) { - case ValID::t_MDNode: - if (!Ty->isMetadataTy()) - return Error(ID.Loc, "metadata value must have metadata type"); - V = ID.MDNodeVal; - return false; - case ValID::t_MDString: - if (!Ty->isMetadataTy()) - return Error(ID.Loc, "metadata value must have metadata type"); - V = ID.MDStringVal; - return false; - default: - Constant *C; - if (ConvertGlobalValIDToValue(Ty, ID, C)) return true; - V = C; - return false; - } -} - - -bool LLParser::ParseGlobalTypeAndValue(Constant *&V) { - PATypeHolder Type(Type::getVoidTy(Context)); - return ParseType(Type) || - ParseGlobalValue(Type, V); -} - -/// ParseGlobalValueVector -/// ::= /*empty*/ -/// ::= TypeAndValue (',' TypeAndValue)* -bool LLParser::ParseGlobalValueVector(SmallVectorImpl &Elts) { - // Empty list. - if (Lex.getKind() == lltok::rbrace || - Lex.getKind() == lltok::rsquare || - Lex.getKind() == lltok::greater || - Lex.getKind() == lltok::rparen) - return false; - - Constant *C; - if (ParseGlobalTypeAndValue(C)) return true; - Elts.push_back(C); - - while (EatIfPresent(lltok::comma)) { - if (ParseGlobalTypeAndValue(C)) return true; - Elts.push_back(C); - } - - return false; -} - - -//===----------------------------------------------------------------------===// -// Function Parsing. -//===----------------------------------------------------------------------===// - -bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V, - PerFunctionState &PFS) { - switch (ID.Kind) { - case ValID::t_LocalID: V = PFS.GetVal(ID.UIntVal, Ty, ID.Loc); break; - case ValID::t_LocalName: V = PFS.GetVal(ID.StrVal, Ty, ID.Loc); break; - case ValID::t_InlineAsm: { - const PointerType *PTy = dyn_cast(Ty); - const FunctionType *FTy = - PTy ? dyn_cast(PTy->getElementType()) : 0; - if (!FTy || !InlineAsm::Verify(FTy, ID.StrVal2)) - return Error(ID.Loc, "invalid type for inline asm constraint string"); - V = InlineAsm::get(FTy, ID.StrVal, ID.StrVal2, ID.UIntVal&1, ID.UIntVal>>1); - return false; - } - default: - return ConvertGlobalOrMetadataValIDToValue(Ty, ID, V); - } - - return V == 0; -} - bool LLParser::ParseValue(const Type *Ty, Value *&V, PerFunctionState &PFS) { V = 0; ValID ID; - return ParseValID(ID) || - ConvertValIDToValue(Ty, ID, V, PFS); + return ParseValID(ID, &PFS) || + ConvertValIDToValue(Ty, ID, V, &PFS); } bool LLParser::ParseTypeAndValue(Value *&V, PerFunctionState &PFS) { @@ -2663,8 +2644,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { AttrListPtr PAL = AttrListPtr::get(Attrs.begin(), Attrs.end()); - if (PAL.paramHasAttr(1, Attribute::StructRet) && - RetType != Type::getVoidTy(Context)) + if (PAL.paramHasAttr(1, Attribute::StructRet) && !RetType->isVoidTy()) return Error(RetTypeLoc, "functions with 'sret' argument must return void"); const FunctionType *FT = @@ -2766,6 +2746,10 @@ bool LLParser::ParseFunctionBody(Function &Fn) { PerFunctionState PFS(*this, Fn, FunctionNumber); + // We need at least one basic block. + if (Lex.getKind() == lltok::rbrace || Lex.getKind() == lltok::kw_end) + return TokError("function body requires at least one basic block"); + while (Lex.getKind() != lltok::rbrace && Lex.getKind() != lltok::kw_end) if (ParseBasicBlock(PFS)) return true; @@ -3232,7 +3216,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { // Look up the callee. Value *Callee; - if (ConvertValIDToValue(PFTy, CalleeID, Callee, PFS)) return true; + if (ConvertValIDToValue(PFTy, CalleeID, Callee, &PFS)) return true; // FIXME: In LLVM 3.0, stop accepting zext, sext and inreg as optional // function attributes. @@ -3578,7 +3562,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, // Look up the callee. Value *Callee; - if (ConvertValIDToValue(PFTy, CalleeID, Callee, PFS)) return true; + if (ConvertValIDToValue(PFTy, CalleeID, Callee, &PFS)) return true; // FIXME: In LLVM 3.0, stop accepting zext, sext and inreg as optional // function attributes. @@ -3660,7 +3644,7 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS, } } - if (Size && Size->getType() != Type::getInt32Ty(Context)) + if (Size && !Size->getType()->isInteger(32)) return Error(SizeLoc, "element count must be i32"); if (isAlloca) { @@ -3840,7 +3824,8 @@ int LLParser::ParseInsertValue(Instruction *&Inst, PerFunctionState &PFS) { /// ::= Element (',' Element)* /// Element /// ::= 'null' | TypeAndValue -bool LLParser::ParseMDNodeVector(SmallVectorImpl &Elts) { +bool LLParser::ParseMDNodeVector(SmallVectorImpl &Elts, + PerFunctionState *PFS) { do { // Null is a special case since it is typeless. if (EatIfPresent(lltok::kw_null)) { @@ -3851,8 +3836,8 @@ bool LLParser::ParseMDNodeVector(SmallVectorImpl &Elts) { Value *V = 0; PATypeHolder Ty(Type::getVoidTy(Context)); ValID ID; - if (ParseType(Ty) || ParseValID(ID) || - ConvertGlobalOrMetadataValIDToValue(Ty, ID, V)) + if (ParseType(Ty) || ParseValID(ID, PFS) || + ConvertValIDToValue(Ty, ID, V, PFS)) return true; Elts.push_back(V); diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index 803832f93d59..bea0593faa67 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -216,17 +216,6 @@ namespace llvm { bool ParseFunctionType(PATypeHolder &Result); PATypeHolder HandleUpRefs(const Type *Ty); - // Constants. - bool ParseValID(ValID &ID); - bool ConvertGlobalValIDToValue(const Type *Ty, ValID &ID, Constant *&V); - bool ConvertGlobalOrMetadataValIDToValue(const Type *Ty, ValID &ID, - Value *&V); - bool ParseGlobalValue(const Type *Ty, Constant *&V); - bool ParseGlobalTypeAndValue(Constant *&V); - bool ParseGlobalValueVector(SmallVectorImpl &Elts); - bool ParseMDNodeVector(SmallVectorImpl &); - - // Function Semantic Analysis. class PerFunctionState { LLParser &P; @@ -270,7 +259,7 @@ namespace llvm { }; bool ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V, - PerFunctionState &PFS); + PerFunctionState *PFS); bool ParseValue(const Type *Ty, Value *&V, PerFunctionState &PFS); bool ParseValue(const Type *Ty, Value *&V, LocTy &Loc, @@ -301,6 +290,13 @@ namespace llvm { bool ParseParameterList(SmallVectorImpl &ArgList, PerFunctionState &PFS); + // Constant Parsing. + bool ParseValID(ValID &ID, PerFunctionState *PFS = NULL); + bool ParseGlobalValue(const Type *Ty, Constant *&V); + bool ParseGlobalTypeAndValue(Constant *&V); + bool ParseGlobalValueVector(SmallVectorImpl &Elts); + bool ParseMDNodeVector(SmallVectorImpl &, PerFunctionState *PFS); + // Function Parsing. struct ArgInfo { LocTy Loc; diff --git a/lib/Bitcode/Reader/BitReader.cpp b/lib/Bitcode/Reader/BitReader.cpp index f513d41ce3b4..32b97e89f215 100644 --- a/lib/Bitcode/Reader/BitReader.cpp +++ b/lib/Bitcode/Reader/BitReader.cpp @@ -18,9 +18,9 @@ using namespace llvm; /* Builds a module from the bitcode in the specified memory buffer, returning a reference to the module via the OutModule parameter. Returns 0 on success. - Optionally returns a human-readable error message via OutMessage. */ -int LLVMParseBitcode(LLVMMemoryBufferRef MemBuf, - LLVMModuleRef *OutModule, char **OutMessage) { + Optionally returns a human-readable error message via OutMessage. */ +LLVMBool LLVMParseBitcode(LLVMMemoryBufferRef MemBuf, + LLVMModuleRef *OutModule, char **OutMessage) { std::string Message; *OutModule = wrap(ParseBitcodeFile(unwrap(MemBuf), getGlobalContext(), @@ -34,9 +34,10 @@ int LLVMParseBitcode(LLVMMemoryBufferRef MemBuf, return 0; } -int LLVMParseBitcodeInContext(LLVMContextRef ContextRef, - LLVMMemoryBufferRef MemBuf, - LLVMModuleRef *OutModule, char **OutMessage) { +LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef, + LLVMMemoryBufferRef MemBuf, + LLVMModuleRef *OutModule, + char **OutMessage) { std::string Message; *OutModule = wrap(ParseBitcodeFile(unwrap(MemBuf), *unwrap(ContextRef), @@ -53,9 +54,9 @@ int LLVMParseBitcodeInContext(LLVMContextRef ContextRef, /* Reads a module from the specified path, returning via the OutModule parameter a module provider which performs lazy deserialization. Returns 0 on success. Optionally returns a human-readable error message via OutMessage. */ -int LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf, - LLVMModuleProviderRef *OutMP, - char **OutMessage) { +LLVMBool LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf, + LLVMModuleProviderRef *OutMP, + char **OutMessage) { std::string Message; *OutMP = wrap(getBitcodeModuleProvider(unwrap(MemBuf), getGlobalContext(), @@ -70,10 +71,10 @@ int LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf, return 0; } -int LLVMGetBitcodeModuleProviderInContext(LLVMContextRef ContextRef, - LLVMMemoryBufferRef MemBuf, - LLVMModuleProviderRef *OutMP, - char **OutMessage) { +LLVMBool LLVMGetBitcodeModuleProviderInContext(LLVMContextRef ContextRef, + LLVMMemoryBufferRef MemBuf, + LLVMModuleProviderRef *OutMP, + char **OutMessage) { std::string Message; *OutMP = wrap(getBitcodeModuleProvider(unwrap(MemBuf), *unwrap(ContextRef), diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 7dffafa8314e..aabbc90c8be8 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -737,7 +737,7 @@ bool BitcodeReader::ParseValueSymbolTable() { } bool BitcodeReader::ParseMetadata() { - unsigned NextValueNo = MDValueList.size(); + unsigned NextMDValueNo = MDValueList.size(); if (Stream.EnterSubBlock(bitc::METADATA_BLOCK_ID)) return Error("Malformed block record"); @@ -766,6 +766,7 @@ bool BitcodeReader::ParseMetadata() { continue; } + bool IsFunctionLocal = false; // Read a record. Record.clear(); switch (Stream.ReadRecord(Code, Record)) { @@ -787,17 +788,25 @@ bool BitcodeReader::ParseMetadata() { // Read named metadata elements. unsigned Size = Record.size(); - SmallVector Elts; + SmallVector Elts; for (unsigned i = 0; i != Size; ++i) { - Value *MD = MDValueList.getValueFwdRef(Record[i]); - if (MetadataBase *B = dyn_cast(MD)) - Elts.push_back(B); + if (Record[i] == ~0U) { + Elts.push_back(NULL); + continue; + } + MDNode *MD = dyn_cast(MDValueList.getValueFwdRef(Record[i])); + if (MD == 0) + return Error("Malformed metadata record"); + Elts.push_back(MD); } Value *V = NamedMDNode::Create(Context, Name.str(), Elts.data(), Elts.size(), TheModule); - MDValueList.AssignValue(V, NextValueNo++); + MDValueList.AssignValue(V, NextMDValueNo++); break; } + case bitc::METADATA_FN_NODE: + IsFunctionLocal = true; + // fall-through case bitc::METADATA_NODE: { if (Record.empty() || Record.size() % 2 == 1) return Error("Invalid METADATA_NODE record"); @@ -808,13 +817,15 @@ bool BitcodeReader::ParseMetadata() { const Type *Ty = getTypeByID(Record[i], false); if (Ty->isMetadataTy()) Elts.push_back(MDValueList.getValueFwdRef(Record[i+1])); - else if (Ty != Type::getVoidTy(Context)) + else if (!Ty->isVoidTy()) Elts.push_back(ValueList.getValueFwdRef(Record[i+1], Ty)); else Elts.push_back(NULL); } - Value *V = MDNode::get(Context, &Elts[0], Elts.size()); - MDValueList.AssignValue(V, NextValueNo++); + Value *V = MDNode::getWhenValsUnresolved(Context, &Elts[0], Elts.size(), + IsFunctionLocal); + IsFunctionLocal = false; + MDValueList.AssignValue(V, NextMDValueNo++); break; } case bitc::METADATA_STRING: { @@ -825,7 +836,7 @@ bool BitcodeReader::ParseMetadata() { String[i] = Record[i]; Value *V = MDString::get(Context, StringRef(String.data(), String.size())); - MDValueList.AssignValue(V, NextValueNo++); + MDValueList.AssignValue(V, NextMDValueNo++); break; } case bitc::METADATA_KIND: { @@ -1646,6 +1657,9 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { case bitc::METADATA_ATTACHMENT_ID: if (ParseMetadataAttachment()) return true; break; + case bitc::METADATA_BLOCK_ID: + if (ParseMetadata()) return true; + break; } continue; } @@ -2238,7 +2252,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { } // Non-void values get registered in the value table for future use. - if (I && I->getType() != Type::getVoidTy(Context)) + if (I && !I->getType()->isVoidTy()) ValueList.AssignValue(I, NextValueNo++); } diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index c78a30e8690c..5a4a1b2920d8 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -484,7 +484,9 @@ static void WriteMDNode(const MDNode *N, Record.push_back(0); } } - Stream.EmitRecord(bitc::METADATA_NODE, Record, 0); + unsigned MDCode = N->isFunctionLocal() ? bitc::METADATA_FN_NODE : + bitc::METADATA_NODE; + Stream.EmitRecord(MDCode, Record, 0); Record.clear(); } @@ -497,11 +499,13 @@ static void WriteModuleMetadata(const ValueEnumerator &VE, for (unsigned i = 0, e = Vals.size(); i != e; ++i) { if (const MDNode *N = dyn_cast(Vals[i].first)) { - if (!StartedMetadataBlock) { - Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3); - StartedMetadataBlock = true; + if (!N->isFunctionLocal()) { + if (!StartedMetadataBlock) { + Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3); + StartedMetadataBlock = true; + } + WriteMDNode(N, VE, Stream, Record); } - WriteMDNode(N, VE, Stream, Record); } else if (const MDString *MDS = dyn_cast(Vals[i].first)) { if (!StartedMetadataBlock) { Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3); @@ -528,10 +532,9 @@ static void WriteModuleMetadata(const ValueEnumerator &VE, } // Write name. - std::string Str = NMD->getNameStr(); - const char *StrBegin = Str.c_str(); - for (unsigned i = 0, e = Str.length(); i != e; ++i) - Record.push_back(StrBegin[i]); + StringRef Str = NMD->getName(); + for (unsigned i = 0, e = Str.size(); i != e; ++i) + Record.push_back(Str[i]); Stream.EmitRecord(bitc::METADATA_NAME, Record, 0/*TODO*/); Record.clear(); @@ -540,7 +543,7 @@ static void WriteModuleMetadata(const ValueEnumerator &VE, if (NMD->getOperand(i)) Record.push_back(VE.getValueID(NMD->getOperand(i))); else - Record.push_back(0); + Record.push_back(~0U); } Stream.EmitRecord(bitc::METADATA_NAMED_NODE, Record, 0); Record.clear(); @@ -551,6 +554,27 @@ static void WriteModuleMetadata(const ValueEnumerator &VE, Stream.ExitBlock(); } +static void WriteFunctionLocalMetadata(const Function &F, + const ValueEnumerator &VE, + BitstreamWriter &Stream) { + bool StartedMetadataBlock = false; + SmallVector Record; + const ValueEnumerator::ValueList &Vals = VE.getMDValues(); + + for (unsigned i = 0, e = Vals.size(); i != e; ++i) + if (const MDNode *N = dyn_cast(Vals[i].first)) + if (N->getFunction() == &F) { + if (!StartedMetadataBlock) { + Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3); + StartedMetadataBlock = true; + } + WriteMDNode(N, VE, Stream, Record); + } + + if (StartedMetadataBlock) + Stream.ExitBlock(); +} + static void WriteMetadataAttachment(const Function &F, const ValueEnumerator &VE, BitstreamWriter &Stream) { @@ -1194,6 +1218,9 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE, VE.getFunctionConstantRange(CstStart, CstEnd); WriteConstants(CstStart, CstEnd, VE, Stream, false); + // If there is function-local metadata, emit it now. + WriteFunctionLocalMetadata(F, VE, Stream); + // Keep a running idea of what the instruction ID is. unsigned InstID = CstEnd; diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp index d8128dba1a11..cb139e5e41fb 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -74,9 +74,10 @@ ValueEnumerator::ValueEnumerator(const Module *M) { // Enumerate types used by the type symbol table. EnumerateTypeSymbolTable(M->getTypeSymbolTable()); - // Insert constants that are named at module level into the slot pool so that - // the module symbol table can refer to them... + // Insert constants and metadata that are named at module level into the slot + // pool so that the module symbol table can refer to them... EnumerateValueSymbolTable(M->getValueSymbolTable()); + EnumerateMDSymbolTable(M->getMDSymbolTable()); SmallVector, 8> MDs; @@ -90,8 +91,13 @@ ValueEnumerator::ValueEnumerator(const Module *M) { for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;++I){ for (User::const_op_iterator OI = I->op_begin(), E = I->op_end(); - OI != E; ++OI) + OI != E; ++OI) { + if (MDNode *MD = dyn_cast(*OI)) + if (MD->isFunctionLocal()) + // These will get enumerated during function-incorporation. + continue; EnumerateOperandType(*OI); + } EnumerateType(I->getType()); if (const CallInst *CI = dyn_cast(I)) EnumerateAttributes(CI->getAttributes()); @@ -196,6 +202,33 @@ void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) { EnumerateValue(VI->getValue()); } +/// EnumerateMDSymbolTable - Insert all of the values in the specified metadata +/// table. +void ValueEnumerator::EnumerateMDSymbolTable(const MDSymbolTable &MST) { + for (MDSymbolTable::const_iterator MI = MST.begin(), ME = MST.end(); + MI != ME; ++MI) + EnumerateValue(MI->getValue()); +} + +void ValueEnumerator::EnumerateNamedMDNode(const NamedMDNode *MD) { + // Check to see if it's already in! + unsigned &MDValueID = MDValueMap[MD]; + if (MDValueID) { + // Increment use count. + MDValues[MDValueID-1].second++; + return; + } + + // Enumerate the type of this value. + EnumerateType(MD->getType()); + + for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) + if (MDNode *E = MD->getOperand(i)) + EnumerateValue(E); + MDValues.push_back(std::make_pair(MD, 1U)); + MDValueMap[MD] = Values.size(); +} + void ValueEnumerator::EnumerateMetadata(const MetadataBase *MD) { // Check to see if it's already in! unsigned &MDValueID = MDValueMap[MD]; @@ -212,7 +245,7 @@ void ValueEnumerator::EnumerateMetadata(const MetadataBase *MD) { MDValues.push_back(std::make_pair(MD, 1U)); MDValueMap[MD] = MDValues.size(); MDValueID = MDValues.size(); - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { if (Value *V = N->getOperand(i)) EnumerateValue(V); else @@ -221,14 +254,6 @@ void ValueEnumerator::EnumerateMetadata(const MetadataBase *MD) { return; } - if (const NamedMDNode *N = dyn_cast(MD)) { - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - EnumerateValue(N->getOperand(i)); - MDValues.push_back(std::make_pair(MD, 1U)); - MDValueMap[MD] = Values.size(); - return; - } - // Add the value. assert(isa(MD) && "Unknown metadata kind"); MDValues.push_back(std::make_pair(MD, 1U)); @@ -239,6 +264,8 @@ void ValueEnumerator::EnumerateValue(const Value *V) { assert(!V->getType()->isVoidTy() && "Can't insert void values!"); if (const MetadataBase *MB = dyn_cast(V)) return EnumerateMetadata(MB); + else if (const NamedMDNode *NMD = dyn_cast(V)) + return EnumerateNamedMDNode(NMD); // Check to see if it's already in! unsigned &ValueID = ValueMap[V]; @@ -309,6 +336,7 @@ void ValueEnumerator::EnumerateType(const Type *Ty) { // walk through it, enumerating the types of the constant. void ValueEnumerator::EnumerateOperandType(const Value *V) { EnumerateType(V->getType()); + if (const Constant *C = dyn_cast(V)) { // If this constant is already enumerated, ignore it, we know its type must // be enumerated. @@ -382,7 +410,15 @@ void ValueEnumerator::incorporateFunction(const Function &F) { // Add all of the instructions. for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) { - if (I->getType() != Type::getVoidTy(F.getContext())) + for (User::const_op_iterator OI = I->op_begin(), E = I->op_end(); + OI != E; ++OI) { + if (MDNode *MD = dyn_cast(*OI)) + if (!MD->isFunctionLocal()) + // These were already enumerated during ValueEnumerator creation. + continue; + EnumerateOperandType(*OI); + } + if (!I->getType()->isVoidTy()) EnumerateValue(I); } } diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h index 3c83e3569560..c50fe9ce7672 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.h +++ b/lib/Bitcode/Writer/ValueEnumerator.h @@ -27,9 +27,11 @@ class BasicBlock; class Function; class Module; class MetadataBase; +class NamedMDNode; class AttrListPtr; class TypeSymbolTable; class ValueSymbolTable; +class MDSymbolTable; class ValueEnumerator { public: @@ -126,6 +128,7 @@ class ValueEnumerator { void OptimizeConstants(unsigned CstStart, unsigned CstEnd); void EnumerateMetadata(const MetadataBase *MD); + void EnumerateNamedMDNode(const NamedMDNode *NMD); void EnumerateValue(const Value *V); void EnumerateType(const Type *T); void EnumerateOperandType(const Value *V); @@ -133,6 +136,7 @@ class ValueEnumerator { void EnumerateTypeSymbolTable(const TypeSymbolTable &ST); void EnumerateValueSymbolTable(const ValueSymbolTable &ST); + void EnumerateMDSymbolTable(const MDSymbolTable &ST); }; } // End llvm namespace diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 761fbc6870f0..ca1f4a3e0147 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -1,4 +1,4 @@ -//===----- AggressiveAntiDepBreaker.cpp - Anti-dep breaker -------- ---------===// +//===----- AggressiveAntiDepBreaker.cpp - Anti-dep breaker ----------------===// // // The LLVM Compiler Infrastructure // @@ -77,18 +77,18 @@ unsigned AggressiveAntiDepState::UnionGroups(unsigned Reg1, unsigned Reg2) { assert(GroupNodes[0] == 0 && "GroupNode 0 not parent!"); assert(GroupNodeIndices[0] == 0 && "Reg 0 not in Group 0!"); - + // find group for each register unsigned Group1 = GetGroup(Reg1); unsigned Group2 = GetGroup(Reg2); - + // if either group is 0, then that must become the parent unsigned Parent = (Group1 == 0) ? Group1 : Group2; unsigned Other = (Parent == Group1) ? Group2 : Group1; GroupNodes.at(Other) = Parent; return Parent; } - + unsigned AggressiveAntiDepState::LeaveGroup(unsigned Reg) { // Create a new GroupNode for Reg. Reg's existing GroupNode must @@ -111,7 +111,7 @@ bool AggressiveAntiDepState::IsLive(unsigned Reg) AggressiveAntiDepBreaker:: AggressiveAntiDepBreaker(MachineFunction& MFi, - TargetSubtarget::RegClassVector& CriticalPathRCs) : + TargetSubtarget::RegClassVector& CriticalPathRCs) : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()), TRI(MF.getTarget().getRegisterInfo()), @@ -126,9 +126,9 @@ AggressiveAntiDepBreaker(MachineFunction& MFi, else CriticalPathSet |= CPSet; } - + DEBUG(dbgs() << "AntiDep Critical-Path Registers:"); - DEBUG(for (int r = CriticalPathSet.find_first(); r != -1; + DEBUG(for (int r = CriticalPathSet.find_first(); r != -1; r = CriticalPathSet.find_next(r)) dbgs() << " " << TRI->getName(r)); DEBUG(dbgs() << '\n'); @@ -232,10 +232,11 @@ void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, // schedule region). if (State->IsLive(Reg)) { DEBUG(if (State->GetGroup(Reg) != 0) - dbgs() << " " << TRI->getName(Reg) << "=g" << + dbgs() << " " << TRI->getName(Reg) << "=g" << State->GetGroup(Reg) << "->g0(region live-out)"); State->UnionGroups(Reg, 0); - } else if ((DefIndices[Reg] < InsertPosIndex) && (DefIndices[Reg] >= Count)) { + } else if ((DefIndices[Reg] < InsertPosIndex) + && (DefIndices[Reg] >= Count)) { DefIndices[Reg] = Count; } } @@ -266,7 +267,7 @@ void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI, for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; - if ((MO.isDef() && MI->isRegTiedToUseOperand(i)) || + if ((MO.isDef() && MI->isRegTiedToUseOperand(i)) || IsImplicitDefUse(MI, MO)) { const unsigned Reg = MO.getReg(); PassthruRegs.insert(Reg); @@ -320,11 +321,12 @@ static SUnit *CriticalPathStep(SUnit *SU) { } void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, - const char *tag, const char *header, + const char *tag, + const char *header, const char *footer) { unsigned *KillIndices = State->GetKillIndices(); unsigned *DefIndices = State->GetDefIndices(); - std::multimap& + std::multimap& RegRefs = State->GetRegRefs(); if (!State->IsLive(Reg)) { @@ -355,10 +357,12 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, DEBUG(if ((header == NULL) && (footer != NULL)) dbgs() << footer); } -void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, unsigned Count, - std::set& PassthruRegs) { +void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, + unsigned Count, + std::set& PassthruRegs) +{ unsigned *DefIndices = State->GetDefIndices(); - std::multimap& + std::multimap& RegRefs = State->GetRegRefs(); // Handle dead defs by simulating a last-use of the register just @@ -371,7 +375,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, unsigned Cou if (!MO.isReg() || !MO.isDef()) continue; unsigned Reg = MO.getReg(); if (Reg == 0) continue; - + HandleLastUse(Reg, Count + 1, "", "\tDead Def: ", "\n"); } @@ -382,7 +386,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, unsigned Cou unsigned Reg = MO.getReg(); if (Reg == 0) continue; - DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" << State->GetGroup(Reg)); + DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" << State->GetGroup(Reg)); // If MI's defs have a special allocation requirement, don't allow // any def registers to be changed. Also assume all registers @@ -398,11 +402,11 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, unsigned Cou unsigned AliasReg = *Alias; if (State->IsLive(AliasReg)) { State->UnionGroups(Reg, AliasReg); - DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via " << + DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via " << TRI->getName(AliasReg) << ")"); } } - + // Note register reference... const TargetRegisterClass *RC = NULL; if (i < MI->getDesc().getNumOperands()) @@ -438,7 +442,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, unsigned Cou void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, unsigned Count) { DEBUG(dbgs() << "\tUse Groups:"); - std::multimap& + std::multimap& RegRefs = State->GetRegRefs(); // Scan the register uses for this instruction and update @@ -448,9 +452,9 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, if (!MO.isReg() || !MO.isUse()) continue; unsigned Reg = MO.getReg(); if (Reg == 0) continue; - - DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" << - State->GetGroup(Reg)); + + DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" << + State->GetGroup(Reg)); // It wasn't previously live but now it is, this is a kill. Forget // the previous live-range information and start a new live-range @@ -472,7 +476,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, AggressiveAntiDepState::RegisterReference RR = { &MO, RC }; RegRefs.insert(std::make_pair(Reg, RR)); } - + DEBUG(dbgs() << '\n'); // Form a group of all defs and uses of a KILL instruction to ensure @@ -486,7 +490,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (Reg == 0) continue; - + if (FirstReg != 0) { DEBUG(dbgs() << "=" << TRI->getName(Reg)); State->UnionGroups(FirstReg, Reg); @@ -495,7 +499,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, FirstReg = Reg; } } - + DEBUG(dbgs() << "->g" << State->GetGroup(FirstReg) << '\n'); } } @@ -507,13 +511,14 @@ BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) { // Check all references that need rewriting for Reg. For each, use // the corresponding register class to narrow the set of registers // that are appropriate for renaming. - std::pair::iterator, std::multimap::iterator> Range = State->GetRegRefs().equal_range(Reg); - for (std::multimap::iterator - Q = Range.first, QE = Range.second; Q != QE; ++Q) { + for (std::multimap::iterator Q = Range.first, + QE = Range.second; Q != QE; ++Q) { const TargetRegisterClass *RC = Q->second.RC; if (RC == NULL) continue; @@ -527,9 +532,9 @@ BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) { DEBUG(dbgs() << " " << RC->getName()); } - + return BV; -} +} bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( unsigned AntiDepGroupIndex, @@ -537,7 +542,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( std::map &RenameMap) { unsigned *KillIndices = State->GetKillIndices(); unsigned *DefIndices = State->GetDefIndices(); - std::multimap& + std::multimap& RegRefs = State->GetRegRefs(); // Collect all referenced registers in the same group as @@ -552,7 +557,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( // Find the "superest" register in the group. At the same time, // collect the BitVector of registers that can be used to rename // each register. - DEBUG(dbgs() << "\tRename Candidates for Group g" << AntiDepGroupIndex << ":\n"); + DEBUG(dbgs() << "\tRename Candidates for Group g" << AntiDepGroupIndex + << ":\n"); std::map RenameRegisterMap; unsigned SuperReg = 0; for (unsigned i = 0, e = Regs.size(); i != e; ++i) { @@ -563,7 +569,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( // If Reg has any references, then collect possible rename regs if (RegRefs.count(Reg) > 0) { DEBUG(dbgs() << "\t\t" << TRI->getName(Reg) << ":"); - + BitVector BV = GetRenameRegisters(Reg); RenameRegisterMap.insert(std::pair(Reg, BV)); @@ -590,7 +596,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( static int renamecnt = 0; if (renamecnt++ % DebugDiv != DebugMod) return false; - + dbgs() << "*** Performing rename " << TRI->getName(SuperReg) << " for debug ***\n"; } @@ -600,9 +606,9 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( // order. If that register is available, and the corresponding // registers are available for the other group subregisters, then we // can use those registers to rename. - const TargetRegisterClass *SuperRC = + const TargetRegisterClass *SuperRC = TRI->getPhysicalRegisterRegClass(SuperReg, MVT::Other); - + const TargetRegisterClass::iterator RB = SuperRC->allocation_order_begin(MF); const TargetRegisterClass::iterator RE = SuperRC->allocation_order_end(MF); if (RB == RE) { @@ -624,7 +630,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( const unsigned NewSuperReg = *R; // Don't replace a register with itself. if (NewSuperReg == SuperReg) continue; - + DEBUG(dbgs() << " [" << TRI->getName(NewSuperReg) << ':'); RenameMap.clear(); @@ -643,7 +649,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( } DEBUG(dbgs() << " " << TRI->getName(NewReg)); - + // Check if Reg can be renamed to NewReg. BitVector BV = RenameRegisterMap[Reg]; if (!BV.test(NewReg)) { @@ -663,7 +669,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( for (const unsigned *Alias = TRI->getAliasSet(NewReg); *Alias; ++Alias) { unsigned AliasReg = *Alias; - if (State->IsLive(AliasReg) || (KillIndices[Reg] > DefIndices[AliasReg])) { + if (State->IsLive(AliasReg) || + (KillIndices[Reg] > DefIndices[AliasReg])) { DEBUG(dbgs() << "(alias " << TRI->getName(AliasReg) << " live)"); found = true; break; @@ -672,11 +679,11 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( if (found) goto next_super_reg; } - + // Record that 'Reg' can be renamed to 'NewReg'. RenameMap.insert(std::pair(Reg, NewReg)); } - + // If we fall-out here, then every register in the group can be // renamed, as recorded in RenameMap. RenameOrder.erase(SuperRC); @@ -704,13 +711,13 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( unsigned InsertPosIndex) { unsigned *KillIndices = State->GetKillIndices(); unsigned *DefIndices = State->GetDefIndices(); - std::multimap& + std::multimap& RegRefs = State->GetRegRefs(); // The code below assumes that there is at least one instruction, // so just duck out immediately if the block is empty. if (SUnits.empty()) return 0; - + // For each regclass the next register to use for renaming. RenameOrderType RenameOrder; @@ -729,17 +736,17 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( if (CriticalPathSet.any()) { for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { SUnit *SU = &SUnits[i]; - if (!CriticalPathSU || - ((SU->getDepth() + SU->Latency) > + if (!CriticalPathSU || + ((SU->getDepth() + SU->Latency) > (CriticalPathSU->getDepth() + CriticalPathSU->Latency))) { CriticalPathSU = SU; } } - + CriticalPathMI = CriticalPathSU->getInstr(); } -#ifndef NDEBUG +#ifndef NDEBUG DEBUG(dbgs() << "\n===== Aggressive anti-dependency breaking\n"); DEBUG(dbgs() << "Available regs:"); for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) { @@ -766,7 +773,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( // Process the defs in MI... PrescanInstruction(MI, Count, PassthruRegs); - + // The dependence edges that represent anti- and output- // dependencies that are candidates for breaking. std::vector Edges; @@ -779,7 +786,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( if (MI == CriticalPathMI) { CriticalPathSU = CriticalPathStep(CriticalPathSU); CriticalPathMI = (CriticalPathSU) ? CriticalPathSU->getInstr() : 0; - } else { + } else { ExcludeRegs = &CriticalPathSet; } @@ -790,14 +797,14 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( for (unsigned i = 0, e = Edges.size(); i != e; ++i) { SDep *Edge = Edges[i]; SUnit *NextSU = Edge->getSUnit(); - + if ((Edge->getKind() != SDep::Anti) && (Edge->getKind() != SDep::Output)) continue; - + unsigned AntiDepReg = Edge->getReg(); DEBUG(dbgs() << "\tAntidep reg: " << TRI->getName(AntiDepReg)); assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); - + if (!AllocatableSet.test(AntiDepReg)) { // Don't break anti-dependencies on non-allocatable registers. DEBUG(dbgs() << " (non-allocatable)\n"); @@ -816,12 +823,13 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( } else { // No anti-dep breaking for implicit deps MachineOperand *AntiDepOp = MI->findRegisterDefOperand(AntiDepReg); - assert(AntiDepOp != NULL && "Can't find index for defined register operand"); + assert(AntiDepOp != NULL && + "Can't find index for defined register operand"); if ((AntiDepOp == NULL) || AntiDepOp->isImplicit()) { DEBUG(dbgs() << " (implicit)\n"); continue; } - + // If the SUnit has other dependencies on the SUnit that // it anti-depends on, don't bother breaking the // anti-dependency since those edges would prevent such @@ -847,58 +855,59 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( DEBUG(dbgs() << " (real dependency)\n"); AntiDepReg = 0; break; - } else if ((P->getSUnit() != NextSU) && - (P->getKind() == SDep::Data) && + } else if ((P->getSUnit() != NextSU) && + (P->getKind() == SDep::Data) && (P->getReg() == AntiDepReg)) { DEBUG(dbgs() << " (other dependency)\n"); AntiDepReg = 0; break; } } - + if (AntiDepReg == 0) continue; } - + assert(AntiDepReg != 0); if (AntiDepReg == 0) continue; - + // Determine AntiDepReg's register group. const unsigned GroupIndex = State->GetGroup(AntiDepReg); if (GroupIndex == 0) { DEBUG(dbgs() << " (zero group)\n"); continue; } - + DEBUG(dbgs() << '\n'); - + // Look for a suitable register to use to break the anti-dependence. std::map RenameMap; if (FindSuitableFreeRegisters(GroupIndex, RenameOrder, RenameMap)) { DEBUG(dbgs() << "\tBreaking anti-dependence edge on " << TRI->getName(AntiDepReg) << ":"); - + // Handle each group register... for (std::map::iterator S = RenameMap.begin(), E = RenameMap.end(); S != E; ++S) { unsigned CurrReg = S->first; unsigned NewReg = S->second; - - DEBUG(dbgs() << " " << TRI->getName(CurrReg) << "->" << - TRI->getName(NewReg) << "(" << + + DEBUG(dbgs() << " " << TRI->getName(CurrReg) << "->" << + TRI->getName(NewReg) << "(" << RegRefs.count(CurrReg) << " refs)"); - + // Update the references to the old register CurrReg to // refer to the new register NewReg. - std::pair::iterator, + std::pair::iterator, std::multimap::iterator> + AggressiveAntiDepState::RegisterReference>::iterator> Range = RegRefs.equal_range(CurrReg); - for (std::multimap::iterator + for (std::multimap::iterator Q = Range.first, QE = Range.second; Q != QE; ++Q) { Q->second.Operand->setReg(NewReg); } - + // We just went back in time and modified history; the // liveness information for CurrReg is now inconsistent. Set // the state as if it were dead. @@ -906,7 +915,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( RegRefs.erase(NewReg); DefIndices[NewReg] = DefIndices[CurrReg]; KillIndices[NewReg] = KillIndices[CurrReg]; - + State->UnionGroups(CurrReg, 0); RegRefs.erase(CurrReg); DefIndices[CurrReg] = KillIndices[CurrReg]; @@ -915,7 +924,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( (DefIndices[CurrReg] == ~0u)) && "Kill and Def maps aren't consistent for AntiDepReg!"); } - + ++Broken; DEBUG(dbgs() << '\n'); } @@ -924,6 +933,6 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( ScanInstruction(MI, Count); } - + return Broken; } diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h index d385a212c7ef..a62d68c2a834 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.h +++ b/lib/CodeGen/AggressiveAntiDepBreaker.h @@ -30,7 +30,7 @@ #include namespace llvm { - /// Class AggressiveAntiDepState + /// Class AggressiveAntiDepState /// Contains all the state necessary for anti-dep breaking. class AggressiveAntiDepState { public: @@ -54,27 +54,27 @@ namespace llvm { /// is the parent of a group, or point to another node to indicate /// that it is a member of the same group as that node. std::vector GroupNodes; - + /// GroupNodeIndices - For each register, the index of the GroupNode /// currently representing the group that the register belongs to. /// Register 0 is always represented by the 0 group, a group /// composed of registers that are not eligible for anti-aliasing. unsigned GroupNodeIndices[TargetRegisterInfo::FirstVirtualRegister]; - + /// RegRefs - Map registers to all their references within a live range. std::multimap RegRefs; - + /// KillIndices - The index of the most recent kill (proceding bottom-up), /// or ~0u if the register is not live. unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister]; - + /// DefIndices - The index of the most recent complete def (proceding bottom /// up), or ~0u if the register is live. unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister]; public: AggressiveAntiDepState(const unsigned TargetRegs, MachineBasicBlock *BB); - + /// GetKillIndices - Return the kill indices. unsigned *GetKillIndices() { return KillIndices; } @@ -87,13 +87,14 @@ namespace llvm { // GetGroup - Get the group for a register. The returned value is // the index of the GroupNode representing the group. unsigned GetGroup(unsigned Reg); - + // GetGroupRegs - Return a vector of the registers belonging to a // group. If RegRefs is non-NULL then only included referenced registers. void GetGroupRegs( unsigned Group, std::vector &Regs, - std::multimap *RegRefs); + std::multimap *RegRefs); // UnionGroups - Union Reg1's and Reg2's groups to form a new // group. Return the index of the GroupNode representing the @@ -110,7 +111,7 @@ namespace llvm { }; - /// Class AggressiveAntiDepBreaker + /// Class AggressiveAntiDepBreaker class AggressiveAntiDepBreaker : public AntiDepBreaker { MachineFunction& MF; MachineRegisterInfo &MRI; @@ -130,14 +131,15 @@ namespace llvm { AggressiveAntiDepState *State; public: - AggressiveAntiDepBreaker(MachineFunction& MFi, + AggressiveAntiDepBreaker(MachineFunction& MFi, TargetSubtarget::RegClassVector& CriticalPathRCs); ~AggressiveAntiDepBreaker(); - + /// Start - Initialize anti-dep breaking for a new basic block. void StartBlock(MachineBasicBlock *BB); - /// BreakAntiDependencies - Identifiy anti-dependencies along the critical path + /// BreakAntiDependencies - Identifiy anti-dependencies along the critical + /// path /// of the ScheduleDAG and break them by renaming registers. /// unsigned BreakAntiDependencies(std::vector& SUnits, @@ -160,7 +162,7 @@ namespace llvm { /// IsImplicitDefUse - Return true if MO represents a register /// that is both implicitly used and defined in MI bool IsImplicitDefUse(MachineInstr *MI, MachineOperand& MO); - + /// GetPassthruRegs - If MI implicitly def/uses a register, then /// return that register and all subregisters. void GetPassthruRegs(MachineInstr *MI, std::set& PassthruRegs); diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 6b24e24d3716..876f628336fa 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -807,124 +807,145 @@ void AsmPrinter::EmitZeros(uint64_t NumZeros, unsigned AddrSpace) const { // Print out the specified constant, without a storage class. Only the // constants valid in constant expressions can occur here. void AsmPrinter::EmitConstantValueOnly(const Constant *CV) { - if (CV->isNullValue() || isa(CV)) + if (CV->isNullValue() || isa(CV)) { O << '0'; - else if (const ConstantInt *CI = dyn_cast(CV)) { + return; + } + + if (const ConstantInt *CI = dyn_cast(CV)) { O << CI->getZExtValue(); - } else if (const GlobalValue *GV = dyn_cast(CV)) { + return; + } + + if (const GlobalValue *GV = dyn_cast(CV)) { // This is a constant address for a global variable or function. Use the // name of the variable or function as the address value. O << Mang->getMangledName(GV); - } else if (const ConstantExpr *CE = dyn_cast(CV)) { - const TargetData *TD = TM.getTargetData(); - unsigned Opcode = CE->getOpcode(); - switch (Opcode) { - case Instruction::Trunc: - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::FPTrunc: - case Instruction::FPExt: - case Instruction::UIToFP: - case Instruction::SIToFP: - case Instruction::FPToUI: - case Instruction::FPToSI: - llvm_unreachable("FIXME: Don't support this constant cast expr"); - case Instruction::GetElementPtr: { - // generate a symbolic expression for the byte address - const Constant *ptrVal = CE->getOperand(0); - SmallVector idxVec(CE->op_begin()+1, CE->op_end()); - if (int64_t Offset = TD->getIndexedOffset(ptrVal->getType(), &idxVec[0], - idxVec.size())) { - // Truncate/sext the offset to the pointer size. - if (TD->getPointerSizeInBits() != 64) { - int SExtAmount = 64-TD->getPointerSizeInBits(); - Offset = (Offset << SExtAmount) >> SExtAmount; - } - - if (Offset) - O << '('; - EmitConstantValueOnly(ptrVal); - if (Offset > 0) - O << ") + " << Offset; - else if (Offset < 0) - O << ") - " << -Offset; - } else { - EmitConstantValueOnly(ptrVal); - } - break; - } - case Instruction::BitCast: - return EmitConstantValueOnly(CE->getOperand(0)); - - case Instruction::IntToPtr: { - // Handle casts to pointers by changing them into casts to the appropriate - // integer type. This promotes constant folding and simplifies this code. - Constant *Op = CE->getOperand(0); - Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(CV->getContext()), - false/*ZExt*/); - return EmitConstantValueOnly(Op); - } - - - case Instruction::PtrToInt: { - // Support only foldable casts to/from pointers that can be eliminated by - // changing the pointer to the appropriately sized integer type. - Constant *Op = CE->getOperand(0); - const Type *Ty = CE->getType(); - - // We can emit the pointer value into this slot if the slot is an - // integer slot greater or equal to the size of the pointer. - if (TD->getTypeAllocSize(Ty) == TD->getTypeAllocSize(Op->getType())) - return EmitConstantValueOnly(Op); - - O << "(("; - EmitConstantValueOnly(Op); - APInt ptrMask = - APInt::getAllOnesValue(TD->getTypeAllocSizeInBits(Op->getType())); - - SmallString<40> S; - ptrMask.toStringUnsigned(S); - O << ") & " << S.str() << ')'; - break; - } - case Instruction::Add: - case Instruction::Sub: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - O << '('; - EmitConstantValueOnly(CE->getOperand(0)); - O << ')'; - switch (Opcode) { - case Instruction::Add: - O << " + "; - break; - case Instruction::Sub: - O << " - "; - break; - case Instruction::And: - O << " & "; - break; - case Instruction::Or: - O << " | "; - break; - case Instruction::Xor: - O << " ^ "; - break; - default: - break; - } - O << '('; - EmitConstantValueOnly(CE->getOperand(1)); - O << ')'; - break; - default: - llvm_unreachable("Unsupported operator!"); - } - } else if (const BlockAddress *BA = dyn_cast(CV)) { + return; + } + + if (const BlockAddress *BA = dyn_cast(CV)) { GetBlockAddressSymbol(BA)->print(O, MAI); - } else { + return; + } + + const ConstantExpr *CE = dyn_cast(CV); + if (CE == 0) { llvm_unreachable("Unknown constant value!"); + O << '0'; + return; + } + + switch (CE->getOpcode()) { + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPToUI: + case Instruction::FPToSI: + default: + llvm_unreachable("FIXME: Don't support this constant cast expr"); + case Instruction::GetElementPtr: { + // generate a symbolic expression for the byte address + const TargetData *TD = TM.getTargetData(); + const Constant *ptrVal = CE->getOperand(0); + SmallVector idxVec(CE->op_begin()+1, CE->op_end()); + int64_t Offset = TD->getIndexedOffset(ptrVal->getType(), &idxVec[0], + idxVec.size()); + if (Offset == 0) + return EmitConstantValueOnly(ptrVal); + + // Truncate/sext the offset to the pointer size. + if (TD->getPointerSizeInBits() != 64) { + int SExtAmount = 64-TD->getPointerSizeInBits(); + Offset = (Offset << SExtAmount) >> SExtAmount; + } + + if (Offset) + O << '('; + EmitConstantValueOnly(ptrVal); + if (Offset > 0) + O << ") + " << Offset; + else + O << ") - " << -Offset; + return; + } + case Instruction::BitCast: + return EmitConstantValueOnly(CE->getOperand(0)); + + case Instruction::IntToPtr: { + // Handle casts to pointers by changing them into casts to the appropriate + // integer type. This promotes constant folding and simplifies this code. + const TargetData *TD = TM.getTargetData(); + Constant *Op = CE->getOperand(0); + Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(CV->getContext()), + false/*ZExt*/); + return EmitConstantValueOnly(Op); + } + + case Instruction::PtrToInt: { + // Support only foldable casts to/from pointers that can be eliminated by + // changing the pointer to the appropriately sized integer type. + Constant *Op = CE->getOperand(0); + const Type *Ty = CE->getType(); + const TargetData *TD = TM.getTargetData(); + + // We can emit the pointer value into this slot if the slot is an + // integer slot greater or equal to the size of the pointer. + if (TD->getTypeAllocSize(Ty) == TD->getTypeAllocSize(Op->getType())) + return EmitConstantValueOnly(Op); + + O << "(("; + EmitConstantValueOnly(Op); + APInt ptrMask = + APInt::getAllOnesValue(TD->getTypeAllocSizeInBits(Op->getType())); + + SmallString<40> S; + ptrMask.toStringUnsigned(S); + O << ") & " << S.str() << ')'; + return; + } + + case Instruction::Trunc: + // We emit the value and depend on the assembler to truncate the generated + // expression properly. This is important for differences between + // blockaddress labels. Since the two labels are in the same function, it + // is reasonable to treat their delta as a 32-bit value. + return EmitConstantValueOnly(CE->getOperand(0)); + + case Instruction::Add: + case Instruction::Sub: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + O << '('; + EmitConstantValueOnly(CE->getOperand(0)); + O << ')'; + switch (CE->getOpcode()) { + case Instruction::Add: + O << " + "; + break; + case Instruction::Sub: + O << " - "; + break; + case Instruction::And: + O << " & "; + break; + case Instruction::Or: + O << " | "; + break; + case Instruction::Xor: + O << " ^ "; + break; + default: + break; + } + O << '('; + EmitConstantValueOnly(CE->getOperand(1)); + O << ')'; + break; } } @@ -1225,8 +1246,7 @@ void AsmPrinter::EmitGlobalConstantLargeInt(const ConstantInt *CI, unsigned AddrSpace) { const TargetData *TD = TM.getTargetData(); unsigned BitWidth = CI->getBitWidth(); - assert(isPowerOf2_32(BitWidth) && - "Non-power-of-2-sized integers not handled!"); + assert((BitWidth & 63) == 0 && "only support multiples of 64-bits"); // We don't expect assemblers to support integer data directives // for more than 64 bits, so we emit the data in at most 64-bit @@ -1239,39 +1259,34 @@ void AsmPrinter::EmitGlobalConstantLargeInt(const ConstantInt *CI, else Val = RawData[i]; - if (MAI->getData64bitsDirective(AddrSpace)) + if (MAI->getData64bitsDirective(AddrSpace)) { O << MAI->getData64bitsDirective(AddrSpace) << Val << '\n'; - else if (TD->isBigEndian()) { - O << MAI->getData32bitsDirective(AddrSpace) << unsigned(Val >> 32); - if (VerboseAsm) { - O.PadToColumn(MAI->getCommentColumn()); - O << MAI->getCommentString() - << " most significant half of i64 " << Val; - } - O << '\n'; - O << MAI->getData32bitsDirective(AddrSpace) << unsigned(Val); - if (VerboseAsm) { - O.PadToColumn(MAI->getCommentColumn()); - O << MAI->getCommentString() - << " least significant half of i64 " << Val; - } - O << '\n'; - } else { - O << MAI->getData32bitsDirective(AddrSpace) << unsigned(Val); - if (VerboseAsm) { - O.PadToColumn(MAI->getCommentColumn()); - O << MAI->getCommentString() - << " least significant half of i64 " << Val; - } - O << '\n'; - O << MAI->getData32bitsDirective(AddrSpace) << unsigned(Val >> 32); - if (VerboseAsm) { - O.PadToColumn(MAI->getCommentColumn()); - O << MAI->getCommentString() - << " most significant half of i64 " << Val; - } - O << '\n'; + continue; } + + // Emit two 32-bit chunks, order depends on endianness. + unsigned FirstChunk = unsigned(Val), SecondChunk = unsigned(Val >> 32); + const char *FirstName = " least", *SecondName = " most"; + if (TD->isBigEndian()) { + std::swap(FirstChunk, SecondChunk); + std::swap(FirstName, SecondName); + } + + O << MAI->getData32bitsDirective(AddrSpace) << FirstChunk; + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << FirstName << " significant half of i64 " << Val; + } + O << '\n'; + + O << MAI->getData32bitsDirective(AddrSpace) << SecondChunk; + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << SecondName << " significant half of i64 " << Val; + } + O << '\n'; } } @@ -1284,22 +1299,39 @@ void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) { if (CV->isNullValue() || isa(CV)) { EmitZeros(Size, AddrSpace); return; - } else if (const ConstantArray *CVA = dyn_cast(CV)) { + } + + if (const ConstantArray *CVA = dyn_cast(CV)) { EmitGlobalConstantArray(CVA , AddrSpace); return; - } else if (const ConstantStruct *CVS = dyn_cast(CV)) { + } + + if (const ConstantStruct *CVS = dyn_cast(CV)) { EmitGlobalConstantStruct(CVS, AddrSpace); return; - } else if (const ConstantFP *CFP = dyn_cast(CV)) { + } + + if (const ConstantFP *CFP = dyn_cast(CV)) { EmitGlobalConstantFP(CFP, AddrSpace); return; - } else if (const ConstantInt *CI = dyn_cast(CV)) { + } + + if (const ConstantInt *CI = dyn_cast(CV)) { + // If we can directly emit an 8-byte constant, do it. + if (Size == 8) + if (const char *Data64Dir = MAI->getData64bitsDirective(AddrSpace)) { + O << Data64Dir << CI->getZExtValue() << '\n'; + return; + } + // Small integers are handled below; large integers are handled here. if (Size > 4) { EmitGlobalConstantLargeInt(CI, AddrSpace); return; } - } else if (const ConstantVector *CP = dyn_cast(CV)) { + } + + if (const ConstantVector *CP = dyn_cast(CV)) { EmitGlobalConstantVector(CP); return; } @@ -1617,7 +1649,7 @@ void AsmPrinter::printLabel(unsigned Id) const { /// PrintAsmOperand - Print the specified operand of MI, an INLINEASM /// instruction, using the specified assembler variant. Targets should -/// overried this to format as appropriate. +/// override this to format as appropriate. bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode) { // Target doesn't support this yet! @@ -1645,15 +1677,17 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const Function *F, // This code must use the function name itself, and not the function number, // since it must be possible to generate the label name from within other // functions. - std::string FuncName = Mang->getMangledName(F); + SmallString<60> FnName; + Mang->getNameWithPrefix(FnName, F, false); - SmallString<60> Name; - raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "BA" - << FuncName.size() << '_' << FuncName << '_' - << Mang->makeNameProper(BB->getName()) - << Suffix; + // FIXME: THIS IS BROKEN IF THE LLVM BASIC BLOCK DOESN'T HAVE A NAME! + SmallString<60> NameResult; + Mang->getNameWithPrefix(NameResult, + StringRef("BA") + Twine((unsigned)FnName.size()) + + "_" + FnName.str() + "_" + BB->getName() + Suffix, + Mangler::Private); - return OutContext.GetOrCreateSymbol(Name.str()); + return OutContext.GetOrCreateSymbol(NameResult.str()); } MCSymbol *AsmPrinter::GetMBBSymbol(unsigned MBBID) const { diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 8a3ceb631d03..15f37aec148a 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -212,19 +212,30 @@ class DbgScope { /// void addVariable(DbgVariable *V) { Variables.push_back(V); } - void fixInstructionMarkers() { + void fixInstructionMarkers(DenseMap &MIIndexMap) { assert (getFirstInsn() && "First instruction is missing!"); - if (getLastInsn()) - return; - - // If a scope does not have an instruction to mark an end then use - // the end of last child scope. + + // Use the end of last child scope as end of this scope. SmallVector &Scopes = getScopes(); - assert (!Scopes.empty() && "Inner most scope does not have last insn!"); - DbgScope *L = Scopes.back(); - if (!L->getLastInsn()) - L->fixInstructionMarkers(); - setLastInsn(L->getLastInsn()); + const MachineInstr *LastInsn = getFirstInsn(); + unsigned LIndex = 0; + if (Scopes.empty()) { + assert (getLastInsn() && "Inner most scope does not have last insn!"); + return; + } + for (SmallVector::iterator SI = Scopes.begin(), + SE = Scopes.end(); SI != SE; ++SI) { + DbgScope *DS = *SI; + DS->fixInstructionMarkers(MIIndexMap); + const MachineInstr *DSLastInsn = DS->getLastInsn(); + unsigned DSI = MIIndexMap[DSLastInsn]; + if (DSI > LIndex) { + LastInsn = DSLastInsn; + LIndex = DSI; + } + } + setLastInsn(LastInsn); } #ifndef NDEBUG @@ -1021,6 +1032,16 @@ DIE *DwarfDebug::constructEnumTypeDIE(DIEnumerator *ETy) { return Enumerator; } +/// getRealLinkageName - If special LLVM prefix that is used to inform the asm +/// printer to not emit usual symbol prefix before the symbol name is used then +/// return linkage name after skipping this special LLVM prefix. +static StringRef getRealLinkageName(StringRef LinkageName) { + char One = '\1'; + if (LinkageName.startswith(StringRef(&One, 1))) + return LinkageName.substr(1); + return LinkageName; +} + /// createGlobalVariableDIE - Create new DIE using GV. DIE *DwarfDebug::createGlobalVariableDIE(const DIGlobalVariable &GV) { // If the global variable was optmized out then no need to create debug info @@ -1033,16 +1054,10 @@ DIE *DwarfDebug::createGlobalVariableDIE(const DIGlobalVariable &GV) { GV.getDisplayName()); StringRef LinkageName = GV.getLinkageName(); - if (!LinkageName.empty()) { - // Skip special LLVM prefix that is used to inform the asm printer to not - // emit usual symbol prefix before the symbol name. This happens for - // Objective-C symbol names and symbol whose name is replaced using GCC's - // __asm__ attribute. - if (LinkageName[0] == 1) - LinkageName = LinkageName.substr(1); + if (!LinkageName.empty()) addString(GVDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, - LinkageName); - } + getRealLinkageName(LinkageName)); + addType(GVDie, GV.getType()); if (!GV.isLocalToUnit()) addUInt(GVDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); @@ -1074,10 +1089,9 @@ DIE *DwarfDebug::createMemberDIE(const DIDerivedType &DT) { addUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits()); uint64_t Offset = DT.getOffsetInBits(); - uint64_t FieldOffset = Offset; uint64_t AlignMask = ~(DT.getAlignInBits() - 1); uint64_t HiMark = (Offset + FieldSize) & AlignMask; - FieldOffset = (HiMark - FieldSize); + uint64_t FieldOffset = (HiMark - FieldSize); Offset -= FieldOffset; // Maybe we need to work from the other end. @@ -1119,16 +1133,10 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) { addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, SP.getName()); StringRef LinkageName = SP.getLinkageName(); - if (!LinkageName.empty()) { - // Skip special LLVM prefix that is used to inform the asm printer to not - // emit usual symbol prefix before the symbol name. This happens for - // Objective-C symbol names and symbol whose name is replaced using GCC's - // __asm__ attribute. - if (LinkageName[0] == 1) - LinkageName = LinkageName.substr(1); + if (!LinkageName.empty()) addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, - LinkageName); - } + getRealLinkageName(LinkageName)); + addSourceLine(SPDie, &SP); // Add prototyped tag, if C or ObjC. @@ -1382,7 +1390,8 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) { I->second.push_back(std::make_pair(StartID, ScopeDIE)); StringPool.insert(InlinedSP.getName()); - StringPool.insert(InlinedSP.getLinkageName()); + StringPool.insert(getRealLinkageName(InlinedSP.getLinkageName())); + DILocation DL(Scope->getInlinedAt()); addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, ModuleCU->getID()); addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber()); @@ -1644,8 +1653,11 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) { ModuleCU->insertDIE(N, VariableDie); // Add to context owner. - if (DI_GV.isDefinition() - && !DI_GV.getContext().isCompileUnit()) { + DIDescriptor GVContext = DI_GV.getContext(); + // Do not create specification DIE if context is either compile unit + // or a subprogram. + if (DI_GV.isDefinition() && !GVContext.isCompileUnit() + && !GVContext.isSubprogram()) { // Create specification DIE. DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable); addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, @@ -1663,7 +1675,7 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) { Asm->Mang->getMangledName(DI_GV.getGlobal())); addBlock(VariableDie, dwarf::DW_AT_location, 0, Block); } - addToContextOwner(VariableDie, DI_GV.getContext()); + addToContextOwner(VariableDie, GVContext); // Expose as global. FIXME - need to check external flag. ModuleCU->addGlobal(DI_GV.getName(), VariableDie); @@ -1804,7 +1816,8 @@ void DwarfDebug::endModule() { DIE *NDie = ModuleCU->getDIE(N); if (!NDie) continue; addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie); - addDIEEntry(NDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie); + // FIXME - This is not the correct approach. + // addDIEEntry(NDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie); } // Standard sections final addresses. @@ -1976,12 +1989,15 @@ bool DwarfDebug::extractScopeInformation(MachineFunction *MF) { if (!DbgScopeMap.empty()) return false; + DenseMap MIIndexMap; + unsigned MIIndex = 0; // Scan each instruction and create scopes. First build working set of scopes. for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; ++I) { for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); II != IE; ++II) { const MachineInstr *MInsn = II; + MIIndexMap[MInsn] = MIIndex++; DebugLoc DL = MInsn->getDebugLoc(); if (DL.isUnknown()) continue; DebugLocTuple DLT = MF->getDebugLocTuple(DL); @@ -2014,16 +2030,10 @@ bool DwarfDebug::extractScopeInformation(MachineFunction *MF) { } } - // If a scope's last instruction is not set then use its child scope's - // last instruction as this scope's last instrunction. - for (ValueMap::iterator DI = DbgScopeMap.begin(), - DE = DbgScopeMap.end(); DI != DE; ++DI) { - if (DI->second->isAbstractScope()) - continue; - assert (DI->second->getFirstInsn() && "Invalid first instruction!"); - DI->second->fixInstructionMarkers(); - assert (DI->second->getLastInsn() && "Invalid last instruction!"); - } + if (!CurrentFnDbgScope) + return false; + + CurrentFnDbgScope->fixInstructionMarkers(MIIndexMap); // Each scope has first instruction and last instruction to mark beginning // and end of a scope respectively. Create an inverse map that list scopes @@ -2105,38 +2115,41 @@ void DwarfDebug::endFunction(MachineFunction *MF) { if (DbgScopeMap.empty()) return; - // Define end label for subprogram. - EmitLabel("func_end", SubprogramCount); - - // Get function line info. - if (!Lines.empty()) { - // Get section line info. - unsigned ID = SectionMap.insert(Asm->getCurrentSection()); - if (SectionSourceLines.size() < ID) SectionSourceLines.resize(ID); - std::vector &SectionLineInfos = SectionSourceLines[ID-1]; - // Append the function info to section info. - SectionLineInfos.insert(SectionLineInfos.end(), - Lines.begin(), Lines.end()); + if (CurrentFnDbgScope) { + // Define end label for subprogram. + EmitLabel("func_end", SubprogramCount); + + // Get function line info. + if (!Lines.empty()) { + // Get section line info. + unsigned ID = SectionMap.insert(Asm->getCurrentSection()); + if (SectionSourceLines.size() < ID) SectionSourceLines.resize(ID); + std::vector &SectionLineInfos = SectionSourceLines[ID-1]; + // Append the function info to section info. + SectionLineInfos.insert(SectionLineInfos.end(), + Lines.begin(), Lines.end()); + } + + // Construct abstract scopes. + for (SmallVector::iterator AI = AbstractScopesList.begin(), + AE = AbstractScopesList.end(); AI != AE; ++AI) + constructScopeDIE(*AI); + + constructScopeDIE(CurrentFnDbgScope); + + DebugFrames.push_back(FunctionDebugFrameInfo(SubprogramCount, + MMI->getFrameMoves())); } - // Construct abstract scopes. - for (SmallVector::iterator AI = AbstractScopesList.begin(), - AE = AbstractScopesList.end(); AI != AE; ++AI) - constructScopeDIE(*AI); - - constructScopeDIE(CurrentFnDbgScope); - - DebugFrames.push_back(FunctionDebugFrameInfo(SubprogramCount, - MMI->getFrameMoves())); - // Clear debug info - CurrentFnDbgScope = NULL; - DbgScopeMap.clear(); - DbgScopeBeginMap.clear(); - DbgScopeEndMap.clear(); - ConcreteScopes.clear(); - AbstractScopesList.clear(); - + if (CurrentFnDbgScope) { + CurrentFnDbgScope = NULL; + DbgScopeMap.clear(); + DbgScopeBeginMap.clear(); + DbgScopeEndMap.clear(); + ConcreteScopes.clear(); + AbstractScopesList.clear(); + } Lines.clear(); if (TimePassesIsEnabled) @@ -2908,8 +2921,6 @@ void DwarfDebug::emitDebugInlineInfo() { for (SmallVector::iterator I = InlinedSPNodes.begin(), E = InlinedSPNodes.end(); I != E; ++I) { -// for (ValueMap >::iterator - // I = InlineInfo.begin(), E = InlineInfo.end(); I != E; ++I) { MDNode *Node = *I; ValueMap >::iterator II = InlineInfo.find(Node); @@ -2920,20 +2931,11 @@ void DwarfDebug::emitDebugInlineInfo() { if (LName.empty()) Asm->EmitString(Name); - else { - // Skip special LLVM prefix that is used to inform the asm printer to not - // emit usual symbol prefix before the symbol name. This happens for - // Objective-C symbol names and symbol whose name is replaced using GCC's - // __asm__ attribute. - if (LName[0] == 1) - LName = LName.substr(1); -// Asm->EmitString(LName); + else EmitSectionOffset("string", "section_str", - StringPool.idFor(LName), false, true); + StringPool.idFor(getRealLinkageName(LName)), false, true); - } Asm->EOL("MIPS linkage name"); -// Asm->EmitString(Name); EmitSectionOffset("string", "section_str", StringPool.idFor(Name), false, true); Asm->EOL("Function name"); diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 7a969f0ad8f4..6bc808c3c562 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -35,12 +35,13 @@ add_llvm_library(LLVMCodeGen MachineModuleInfoImpls.cpp MachinePassRegistry.cpp MachineRegisterInfo.cpp - MachineSink.cpp MachineSSAUpdater.cpp + MachineSink.cpp MachineVerifier.cpp MaxStackAlignment.cpp ObjectCodeEmitter.cpp OcamlGC.cpp + OptimizeExts.cpp PHIElimination.cpp Passes.cpp PostRASchedulerList.cpp diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index 3c7961c2c33c..056e2d5b01e9 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -288,9 +288,11 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, } unsigned -CriticalAntiDepBreaker::findSuitableFreeRegister(unsigned AntiDepReg, +CriticalAntiDepBreaker::findSuitableFreeRegister(MachineInstr *MI, + unsigned AntiDepReg, unsigned LastNewReg, - const TargetRegisterClass *RC) { + const TargetRegisterClass *RC) +{ for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF), RE = RC->allocation_order_end(MF); R != RE; ++R) { unsigned NewReg = *R; @@ -300,12 +302,16 @@ CriticalAntiDepBreaker::findSuitableFreeRegister(unsigned AntiDepReg, // an anti-dependence with this AntiDepReg, because that would // re-introduce that anti-dependence. if (NewReg == LastNewReg) continue; + // If the instruction already has a def of the NewReg, it's not suitable. + // For example, Instruction with multiple definitions can result in this + // condition. + if (MI->modifiesRegister(NewReg, TRI)) continue; // If NewReg is dead and NewReg's most recent def is not before // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg. - assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) && - "Kill and Def maps aren't consistent for AntiDepReg!"); - assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) && - "Kill and Def maps aren't consistent for NewReg!"); + assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) + && "Kill and Def maps aren't consistent for AntiDepReg!"); + assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) + && "Kill and Def maps aren't consistent for NewReg!"); if (KillIndices[NewReg] != ~0u || Classes[NewReg] == reinterpret_cast(-1) || KillIndices[AntiDepReg] > DefIndices[NewReg]) @@ -336,14 +342,14 @@ BreakAntiDependencies(std::vector& SUnits, #ifndef NDEBUG { - DEBUG(errs() << "Critical path has total latency " + DEBUG(dbgs() << "Critical path has total latency " << (Max->getDepth() + Max->Latency) << "\n"); - DEBUG(errs() << "Available regs:"); + DEBUG(dbgs() << "Available regs:"); for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) { if (KillIndices[Reg] == ~0u) - DEBUG(errs() << " " << TRI->getName(Reg)); + DEBUG(dbgs() << " " << TRI->getName(Reg)); } - DEBUG(errs() << '\n'); + DEBUG(dbgs() << '\n'); } #endif @@ -495,10 +501,10 @@ BreakAntiDependencies(std::vector& SUnits, // TODO: Instead of picking the first free register, consider which might // be the best. if (AntiDepReg != 0) { - if (unsigned NewReg = findSuitableFreeRegister(AntiDepReg, + if (unsigned NewReg = findSuitableFreeRegister(MI, AntiDepReg, LastNewReg[AntiDepReg], RC)) { - DEBUG(errs() << "Breaking anti-dependence edge on " + DEBUG(dbgs() << "Breaking anti-dependence edge on " << TRI->getName(AntiDepReg) << " with " << RegRefs.count(AntiDepReg) << " references" << " using " << TRI->getName(NewReg) << "!\n"); diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h index 496888d45f9d..9e8db022621a 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/lib/CodeGen/CriticalAntiDepBreaker.h @@ -64,11 +64,12 @@ namespace llvm { public: CriticalAntiDepBreaker(MachineFunction& MFi); ~CriticalAntiDepBreaker(); - + /// Start - Initialize anti-dep breaking for a new basic block. void StartBlock(MachineBasicBlock *BB); - /// BreakAntiDependencies - Identifiy anti-dependencies along the critical path + /// BreakAntiDependencies - Identifiy anti-dependencies along the critical + /// path /// of the ScheduleDAG and break them by renaming registers. /// unsigned BreakAntiDependencies(std::vector& SUnits, @@ -87,7 +88,8 @@ namespace llvm { private: void PrescanInstruction(MachineInstr *MI); void ScanInstruction(MachineInstr *MI, unsigned Count); - unsigned findSuitableFreeRegister(unsigned AntiDepReg, + unsigned findSuitableFreeRegister(MachineInstr *MI, + unsigned AntiDepReg, unsigned LastNewReg, const TargetRegisterClass *); }; diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp index 07a5d38db03f..0982eabff123 100644 --- a/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -109,7 +109,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { // If the instruction is dead, delete it! if (isDead(MI)) { - DEBUG(errs() << "DeadMachineInstructionElim: DELETING: " << *MI); + DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI); AnyChanges = true; MI->eraseFromParent(); MIE = MBB->rend(); diff --git a/lib/CodeGen/ELFCodeEmitter.cpp b/lib/CodeGen/ELFCodeEmitter.cpp index a6429f70001a..11a85a0ba79d 100644 --- a/lib/CodeGen/ELFCodeEmitter.cpp +++ b/lib/CodeGen/ELFCodeEmitter.cpp @@ -37,7 +37,7 @@ namespace llvm { /// startFunction - This callback is invoked when a new machine function is /// about to be emitted. void ELFCodeEmitter::startFunction(MachineFunction &MF) { - DEBUG(errs() << "processing function: " + DEBUG(dbgs() << "processing function: " << MF.getFunction()->getName() << "\n"); // Get the ELF Section that this function belongs in. diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp index 3e1ee11b2166..5e5f58970ea3 100644 --- a/lib/CodeGen/ELFWriter.cpp +++ b/lib/CodeGen/ELFWriter.cpp @@ -1076,7 +1076,7 @@ void ELFWriter::OutputSectionsAndSectionTable() { // Emit all of sections to the file and build the section header table. for (ELFSectionIter I=SectionList.begin(), E=SectionList.end(); I != E; ++I) { ELFSection &S = *(*I); - DEBUG(errs() << "SectionIdx: " << S.SectionIdx << ", Name: " << S.getName() + DEBUG(dbgs() << "SectionIdx: " << S.SectionIdx << ", Name: " << S.getName() << ", Size: " << S.Size << ", Offset: " << S.Offset << ", SectionData Size: " << S.size() << "\n"); diff --git a/lib/CodeGen/ExactHazardRecognizer.cpp b/lib/CodeGen/ExactHazardRecognizer.cpp index 36925b1ff375..266c74c9995e 100644 --- a/lib/CodeGen/ExactHazardRecognizer.cpp +++ b/lib/CodeGen/ExactHazardRecognizer.cpp @@ -48,7 +48,7 @@ ExactHazardRecognizer(const InstrItineraryData &LItinData) : Scoreboard = new unsigned[ScoreboardDepth]; ScoreboardHead = 0; - DEBUG(errs() << "Using exact hazard recognizer: ScoreboardDepth = " + DEBUG(dbgs() << "Using exact hazard recognizer: ScoreboardDepth = " << ScoreboardDepth << '\n'); } @@ -66,7 +66,7 @@ unsigned ExactHazardRecognizer::getFutureIndex(unsigned offset) { } void ExactHazardRecognizer::dumpScoreboard() { - errs() << "Scoreboard:\n"; + dbgs() << "Scoreboard:\n"; unsigned last = ScoreboardDepth - 1; while ((last > 0) && (Scoreboard[getFutureIndex(last)] == 0)) @@ -74,10 +74,10 @@ void ExactHazardRecognizer::dumpScoreboard() { for (unsigned i = 0; i <= last; i++) { unsigned FUs = Scoreboard[getFutureIndex(i)]; - errs() << "\t"; + dbgs() << "\t"; for (int j = 31; j >= 0; j--) - errs() << ((FUs & (1 << j)) ? '1' : '0'); - errs() << '\n'; + dbgs() << ((FUs & (1 << j)) ? '1' : '0'); + dbgs() << '\n'; } } @@ -102,8 +102,8 @@ ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU unsigned index = getFutureIndex(cycle + i); unsigned freeUnits = IS->getUnits() & ~Scoreboard[index]; if (!freeUnits) { - DEBUG(errs() << "*** Hazard in cycle " << (cycle + i) << ", "); - DEBUG(errs() << "SU(" << SU->NodeNum << "): "); + DEBUG(dbgs() << "*** Hazard in cycle " << (cycle + i) << ", "); + DEBUG(dbgs() << "SU(" << SU->NodeNum << "): "); DEBUG(SU->getInstr()->dump()); return Hazard; } diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp index 4d25dccff575..055172b4e0e9 100644 --- a/lib/CodeGen/GCMetadata.cpp +++ b/lib/CodeGen/GCMetadata.cpp @@ -17,6 +17,7 @@ #include "llvm/Pass.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Function.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -92,7 +93,7 @@ GCStrategy *GCModuleInfo::getOrCreateStrategy(const Module *M, } } - errs() << "unsupported GC: " << Name << "\n"; + dbgs() << "unsupported GC: " << Name << "\n"; llvm_unreachable(0); } diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp index 6e0bde6364d1..79b2986608dd 100644 --- a/lib/CodeGen/GCStrategy.cpp +++ b/lib/CodeGen/GCStrategy.cpp @@ -27,6 +27,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -109,7 +110,7 @@ GCStrategy::~GCStrategy() { bool GCStrategy::initializeCustomLowering(Module &M) { return false; } bool GCStrategy::performCustomLowering(Function &F) { - errs() << "gc " << getName() << " must override performCustomLowering.\n"; + dbgs() << "gc " << getName() << " must override performCustomLowering.\n"; llvm_unreachable(0); return 0; } diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index c23d7070a34e..c61fd17e7911 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -229,14 +229,14 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { TII = MF.getTarget().getInstrInfo(); if (!TII) return false; - DEBUG(errs() << "\nIfcvt: function (" << ++FnNum << ") \'" + DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'" << MF.getFunction()->getName() << "\'"); if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) { - DEBUG(errs() << " skipped\n"); + DEBUG(dbgs() << " skipped\n"); return false; } - DEBUG(errs() << "\n"); + DEBUG(dbgs() << "\n"); MF.RenumberBlocks(); BBAnalysis.resize(MF.getNumBlockIDs()); @@ -281,13 +281,13 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { case ICSimpleFalse: { bool isFalse = Kind == ICSimpleFalse; if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break; - DEBUG(errs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? " false" :"") + DEBUG(dbgs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? " false" :"") << "): BB#" << BBI.BB->getNumber() << " (" << ((Kind == ICSimpleFalse) ? BBI.FalseBB->getNumber() : BBI.TrueBB->getNumber()) << ") "); RetVal = IfConvertSimple(BBI, Kind); - DEBUG(errs() << (RetVal ? "succeeded!" : "failed!") << "\n"); + DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); if (RetVal) { if (isFalse) NumSimpleFalse++; else NumSimple++; @@ -304,16 +304,16 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { if (DisableTriangleR && !isFalse && isRev) break; if (DisableTriangleF && isFalse && !isRev) break; if (DisableTriangleFR && isFalse && isRev) break; - DEBUG(errs() << "Ifcvt (Triangle"); + DEBUG(dbgs() << "Ifcvt (Triangle"); if (isFalse) - DEBUG(errs() << " false"); + DEBUG(dbgs() << " false"); if (isRev) - DEBUG(errs() << " rev"); - DEBUG(errs() << "): BB#" << BBI.BB->getNumber() << " (T:" + DEBUG(dbgs() << " rev"); + DEBUG(dbgs() << "): BB#" << BBI.BB->getNumber() << " (T:" << BBI.TrueBB->getNumber() << ",F:" << BBI.FalseBB->getNumber() << ") "); RetVal = IfConvertTriangle(BBI, Kind); - DEBUG(errs() << (RetVal ? "succeeded!" : "failed!") << "\n"); + DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); if (RetVal) { if (isFalse) { if (isRev) NumTriangleFRev++; @@ -327,11 +327,11 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { } case ICDiamond: { if (DisableDiamond) break; - DEBUG(errs() << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:" + DEBUG(dbgs() << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:" << BBI.TrueBB->getNumber() << ",F:" << BBI.FalseBB->getNumber() << ") "); RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2); - DEBUG(errs() << (RetVal ? "succeeded!" : "failed!") << "\n"); + DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); if (RetVal) NumDiamonds++; break; } @@ -1141,7 +1141,7 @@ void IfConverter::PredicateBlock(BBInfo &BBI, continue; if (!TII->PredicateInstruction(I, Cond)) { #ifndef NDEBUG - errs() << "Unable to predicate " << *I << "!\n"; + dbgs() << "Unable to predicate " << *I << "!\n"; #endif llvm_unreachable(0); } @@ -1177,7 +1177,7 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, if (!isPredicated) if (!TII->PredicateInstruction(MI, Cond)) { #ifndef NDEBUG - errs() << "Unable to predicate " << *I << "!\n"; + dbgs() << "Unable to predicate " << *I << "!\n"; #endif llvm_unreachable(0); } diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index 8a3bd0bf4e00..9997a4844251 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -349,12 +349,12 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { case Intrinsic::setjmp: { Value *V = ReplaceCallWith("setjmp", CI, CI->op_begin() + 1, CI->op_end(), Type::getInt32Ty(Context)); - if (CI->getType() != Type::getVoidTy(Context)) + if (!CI->getType()->isVoidTy()) CI->replaceAllUsesWith(V); break; } case Intrinsic::sigsetjmp: - if (CI->getType() != Type::getVoidTy(Context)) + if (!CI->getType()->isVoidTy()) CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); break; @@ -427,10 +427,6 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; } - case Intrinsic::dbg_stoppoint: - case Intrinsic::dbg_region_start: - case Intrinsic::dbg_region_end: - case Intrinsic::dbg_func_start: case Intrinsic::dbg_declare: break; // Simply strip out debugging intrinsics @@ -512,7 +508,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { } case Intrinsic::flt_rounds: // Lower to "round to the nearest" - if (CI->getType() != Type::getVoidTy(Context)) + if (!CI->getType()->isVoidTy()) CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1)); break; case Intrinsic::invariant_start: diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index d5fd051d50b0..2b5fd2c94909 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -24,6 +24,7 @@ #include "llvm/Target/TargetRegistry.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/FormattedStream.h" using namespace llvm; @@ -61,6 +62,7 @@ static cl::opt VerifyMachineCode("verify-machineinstrs", cl::Hidden, cl::desc("Verify generated machine code"), cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL)); + // Enable or disable FastISel. Both options are needed, because // FastISel is enabled by default with -fast, and we wish to be // able to enable or disable fast-isel independently from -O0. @@ -246,7 +248,7 @@ static void printAndVerify(PassManagerBase &PM, const char *Banner, bool allowDoubleDefs = false) { if (PrintMachineCode) - PM.add(createMachineFunctionPrinterPass(errs(), Banner)); + PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); if (VerifyMachineCode) PM.add(createMachineVerifierPass(allowDoubleDefs)); @@ -269,7 +271,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, if (OptLevel != CodeGenOpt::None && !DisableLSR) { PM.add(createLoopStrengthReducePass(getTargetLowering())); if (PrintLSR) - PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &errs())); + PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs())); } // Turn exception handling constructs into something the code generators can @@ -278,8 +280,13 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, { case ExceptionHandling::SjLj: // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both - PM.add(createDwarfEHPass(getTargetLowering(), OptLevel==CodeGenOpt::None)); + // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise, + // catch info can get misplaced when a selector ends up more than one block + // removed from the parent invoke(s). This could happen when a landing + // pad is shared by multiple invokes and is also a target of a normal + // edge from elsewhere. PM.add(createSjLjEHPass(getTargetLowering())); + PM.add(createDwarfEHPass(getTargetLowering(), OptLevel==CodeGenOpt::None)); break; case ExceptionHandling::Dwarf: PM.add(createDwarfEHPass(getTargetLowering(), OptLevel==CodeGenOpt::None)); @@ -302,7 +309,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, if (PrintISelInput) PM.add(createPrintFunctionPass("\n\n" "*** Final LLVM Code input to ISel ***\n", - &errs())); + &dbgs())); // Standard Lower-Level Passes. @@ -323,6 +330,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, /* allowDoubleDefs= */ true); if (OptLevel != CodeGenOpt::None) { + PM.add(createOptimizeExtsPass()); if (!DisableMachineLICM) PM.add(createMachineLICMPass()); if (!DisableMachineSink) @@ -335,7 +343,8 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, if (OptLevel != CodeGenOpt::None && !DisableTailDuplicate && PreAllocTailDup) { PM.add(createTailDuplicatePass(true)); - printAndVerify(PM, "After Pre-RegAlloc TailDuplicate"); + printAndVerify(PM, "After Pre-RegAlloc TailDuplicate", + /* allowDoubleDefs= */ true); } // Run pre-ra passes. @@ -391,7 +400,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, PM.add(createGCMachineCodeAnalysisPass()); if (PrintGCInfo) - PM.add(createGCInfoPrinter(errs())); + PM.add(createGCInfoPrinter(dbgs())); if (OptLevel != CodeGenOpt::None && !DisableCodePlace) { PM.add(createCodePlacementOptPass()); diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index cc286aa13d67..e207f607dad2 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -10,7 +10,7 @@ // This file implements the LiveRange and LiveInterval classes. Given some // numbering of each the machine instructions an interval [i, j) is said to be a // live interval for register v if there is no instruction with number j' > j -// such that v is live at j' abd there is no instruction with number i' < i such +// such that v is live at j' and there is no instruction with number i' < i such // that v is live at i'. In this implementation intervals can have holes, // i.e. an interval might look like [1,20), [50,65), [1000,1001). Each // individual range is represented as an instance of LiveRange, and the whole @@ -24,6 +24,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" #include @@ -813,7 +814,7 @@ raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange &LR) { } void LiveRange::dump() const { - errs() << *this << "\n"; + dbgs() << *this << "\n"; } void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { @@ -872,7 +873,7 @@ void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { } void LiveInterval::dump() const { - errs() << *this << "\n"; + dbgs() << *this << "\n"; } diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 452f872f2fba..e0e2ec8f12b3 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -146,7 +146,7 @@ void LiveIntervals::printInstrs(raw_ostream &OS) const { } void LiveIntervals::dumpInstrs() const { - printInstrs(errs()); + printInstrs(dbgs()); } bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li, @@ -253,9 +253,9 @@ bool LiveIntervals::conflictsWithPhysRegRef(LiveInterval &li, #ifndef NDEBUG static void printRegName(unsigned reg, const TargetRegisterInfo* tri_) { if (TargetRegisterInfo::isPhysicalRegister(reg)) - errs() << tri_->getName(reg); + dbgs() << tri_->getName(reg); else - errs() << "%reg" << reg; + dbgs() << "%reg" << reg; } #endif @@ -266,7 +266,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, unsigned MOIdx, LiveInterval &interval) { DEBUG({ - errs() << "\t\tregister: "; + dbgs() << "\t\tregister: "; printRegName(interval.reg, tri_); }); @@ -314,7 +314,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, "Shouldn't be alive across any blocks!"); LiveRange LR(defIndex, killIdx, ValNo); interval.addRange(LR); - DEBUG(errs() << " +" << LR << "\n"); + DEBUG(dbgs() << " +" << LR << "\n"); ValNo->addKill(killIdx); return; } @@ -325,7 +325,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // live into some number of blocks, but gets killed. Start by adding a // range that goes from this definition to the end of the defining block. LiveRange NewLR(defIndex, getMBBEndIdx(mbb), ValNo); - DEBUG(errs() << " +" << NewLR); + DEBUG(dbgs() << " +" << NewLR); interval.addRange(NewLR); // Iterate over all of the blocks that the variable is completely @@ -336,7 +336,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, MachineBasicBlock *aliveBlock = mf_->getBlockNumbered(*I); LiveRange LR(getMBBStartIdx(aliveBlock), getMBBEndIdx(aliveBlock), ValNo); interval.addRange(LR); - DEBUG(errs() << " +" << LR); + DEBUG(dbgs() << " +" << LR); } // Finally, this virtual register is live from the start of any killing @@ -348,7 +348,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, LiveRange LR(getMBBStartIdx(Kill->getParent()), killIdx, ValNo); interval.addRange(LR); ValNo->addKill(killIdx); - DEBUG(errs() << " +" << LR); + DEBUG(dbgs() << " +" << LR); } } else { @@ -393,7 +393,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // Add the new live interval which replaces the range for the input copy. LiveRange LR(DefIndex, RedefIndex, ValNo); - DEBUG(errs() << " replace range with " << LR); + DEBUG(dbgs() << " replace range with " << LR); interval.addRange(LR); ValNo->addKill(RedefIndex); @@ -404,8 +404,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, OldValNo)); DEBUG({ - errs() << " RESULT: "; - interval.print(errs(), tri_); + dbgs() << " RESULT: "; + interval.print(dbgs(), tri_); }); } else { // Otherwise, this must be because of phi elimination. If this is the @@ -422,8 +422,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, SlotIndex Start = getMBBStartIdx(Killer->getParent()); SlotIndex End = getInstructionIndex(Killer).getDefIndex(); DEBUG({ - errs() << "\n\t\trenaming [" << Start << "," << End << "] in: "; - interval.print(errs(), tri_); + dbgs() << "\n\t\trenaming [" << Start << "," << End << "] in: "; + interval.print(dbgs(), tri_); }); interval.removeRange(Start, End); @@ -442,8 +442,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, VNI->addKill(indexes_->getTerminatorGap(killMBB)); VNI->setHasPHIKill(true); DEBUG({ - errs() << " RESULT: "; - interval.print(errs(), tri_); + dbgs() << " RESULT: "; + interval.print(dbgs(), tri_); }); } @@ -469,11 +469,11 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, interval.addRange(LR); ValNo->addKill(indexes_->getTerminatorGap(mbb)); ValNo->setHasPHIKill(true); - DEBUG(errs() << " +" << LR); + DEBUG(dbgs() << " +" << LR); } } - DEBUG(errs() << '\n'); + DEBUG(dbgs() << '\n'); } void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, @@ -485,7 +485,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, // A physical register cannot be live across basic block, so its // lifetime must end somewhere in its defining basic block. DEBUG({ - errs() << "\t\tregister: "; + dbgs() << "\t\tregister: "; printRegName(interval.reg, tri_); }); @@ -502,7 +502,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, // For earlyclobbers, the defSlot was pushed back one; the extra // advance below compensates. if (MO.isDead()) { - DEBUG(errs() << " dead"); + DEBUG(dbgs() << " dead"); end = start.getStoreIndex(); goto exit; } @@ -517,7 +517,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, baseIndex = indexes_->getNextNonNullIndex(baseIndex); if (mi->killsRegister(interval.reg, tri_)) { - DEBUG(errs() << " killed"); + DEBUG(dbgs() << " killed"); end = baseIndex.getDefIndex(); goto exit; } else { @@ -531,7 +531,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, // Then the register is essentially dead at the instruction that defines // it. Hence its interval is: // [defSlot(def), defSlot(def)+1) - DEBUG(errs() << " dead"); + DEBUG(dbgs() << " dead"); end = start.getStoreIndex(); } goto exit; @@ -560,7 +560,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, LiveRange LR(start, end, ValNo); interval.addRange(LR); LR.valno->addKill(end); - DEBUG(errs() << " +" << LR << '\n'); + DEBUG(dbgs() << " +" << LR << '\n'); } void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, @@ -595,7 +595,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, SlotIndex MIIdx, LiveInterval &interval, bool isAlias) { DEBUG({ - errs() << "\t\tlivein register: "; + dbgs() << "\t\tlivein register: "; printRegName(interval.reg, tri_); }); @@ -612,7 +612,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, while (mi != MBB->end()) { if (mi->killsRegister(interval.reg, tri_)) { - DEBUG(errs() << " killed"); + DEBUG(dbgs() << " killed"); end = baseIndex.getDefIndex(); SeenDefUse = true; break; @@ -621,7 +621,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, // Then the register is essentially dead at the instruction that defines // it. Hence its interval is: // [defSlot(def), defSlot(def)+1) - DEBUG(errs() << " dead"); + DEBUG(dbgs() << " dead"); end = start.getStoreIndex(); SeenDefUse = true; break; @@ -636,10 +636,10 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, // Live-in register might not be used at all. if (!SeenDefUse) { if (isAlias) { - DEBUG(errs() << " dead"); + DEBUG(dbgs() << " dead"); end = MIIdx.getStoreIndex(); } else { - DEBUG(errs() << " live through"); + DEBUG(dbgs() << " live through"); end = baseIndex; } } @@ -652,7 +652,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, interval.addRange(LR); LR.valno->addKill(end); - DEBUG(errs() << " +" << LR << '\n'); + DEBUG(dbgs() << " +" << LR << '\n'); } /// computeIntervals - computes the live intervals for virtual @@ -660,7 +660,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, /// live interval is an interval [i, j) where 1 <= i <= j < N for /// which a variable is live void LiveIntervals::computeIntervals() { - DEBUG(errs() << "********** COMPUTING LIVE INTERVALS **********\n" + DEBUG(dbgs() << "********** COMPUTING LIVE INTERVALS **********\n" << "********** Function: " << ((Value*)mf_->getFunction())->getName() << '\n'); @@ -670,7 +670,7 @@ void LiveIntervals::computeIntervals() { MachineBasicBlock *MBB = MBBI; // Track the index of the current machine instr. SlotIndex MIIndex = getMBBStartIdx(MBB); - DEBUG(errs() << MBB->getName() << ":\n"); + DEBUG(dbgs() << MBB->getName() << ":\n"); MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end(); @@ -690,7 +690,7 @@ void LiveIntervals::computeIntervals() { MIIndex = indexes_->getNextNonNullIndex(MIIndex); for (; MI != miEnd; ++MI) { - DEBUG(errs() << MIIndex << "\t" << *MI); + DEBUG(dbgs() << MIIndex << "\t" << *MI); // Handle defs. for (int i = MI->getNumOperands() - 1; i >= 0; --i) { @@ -1055,7 +1055,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, // If this is the rematerializable definition MI itself and // all of its uses are rematerialized, simply delete it. if (MI == ReMatOrigDefMI && CanDelete) { - DEBUG(errs() << "\t\t\t\tErasing re-materlizable def: " + DEBUG(dbgs() << "\t\t\t\tErasing re-materlizable def: " << MI << '\n'); RemoveMachineInstrFromMaps(MI); vrm.RemoveMachineInstrFromMaps(MI); @@ -1208,28 +1208,28 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, if (CreatedNewVReg) { LiveRange LR(index.getLoadIndex(), index.getDefIndex(), nI.getNextValue(SlotIndex(), 0, false, VNInfoAllocator)); - DEBUG(errs() << " +" << LR); + DEBUG(dbgs() << " +" << LR); nI.addRange(LR); } else { // Extend the split live interval to this def / use. SlotIndex End = index.getDefIndex(); LiveRange LR(nI.ranges[nI.ranges.size()-1].end, End, nI.getValNumInfo(nI.getNumValNums()-1)); - DEBUG(errs() << " +" << LR); + DEBUG(dbgs() << " +" << LR); nI.addRange(LR); } } if (HasDef) { LiveRange LR(index.getDefIndex(), index.getStoreIndex(), nI.getNextValue(SlotIndex(), 0, false, VNInfoAllocator)); - DEBUG(errs() << " +" << LR); + DEBUG(dbgs() << " +" << LR); nI.addRange(LR); } DEBUG({ - errs() << "\t\t\t\tAdded new interval: "; - nI.print(errs(), tri_); - errs() << '\n'; + dbgs() << "\t\t\t\tAdded new interval: "; + nI.print(dbgs(), tri_); + dbgs() << '\n'; }); } return CanFold; @@ -1557,9 +1557,9 @@ addIntervalsForSpillsFast(const LiveInterval &li, "attempt to spill already spilled interval!"); DEBUG({ - errs() << "\t\t\t\tadding intervals for spills for interval: "; + dbgs() << "\t\t\t\tadding intervals for spills for interval: "; li.dump(); - errs() << '\n'; + dbgs() << '\n'; }); const TargetRegisterClass* rc = mri_->getRegClass(li.reg); @@ -1610,7 +1610,7 @@ addIntervalsForSpillsFast(const LiveInterval &li, LiveRange LR(index.getLoadIndex(), index.getUseIndex(), nI.getNextValue(SlotIndex(), 0, false, getVNInfoAllocator())); - DEBUG(errs() << " +" << LR); + DEBUG(dbgs() << " +" << LR); nI.addRange(LR); vrm.addRestorePoint(NewVReg, MI); } @@ -1618,7 +1618,7 @@ addIntervalsForSpillsFast(const LiveInterval &li, LiveRange LR(index.getDefIndex(), index.getStoreIndex(), nI.getNextValue(SlotIndex(), 0, false, getVNInfoAllocator())); - DEBUG(errs() << " +" << LR); + DEBUG(dbgs() << " +" << LR); nI.addRange(LR); vrm.addSpillPoint(NewVReg, true, MI); } @@ -1626,9 +1626,9 @@ addIntervalsForSpillsFast(const LiveInterval &li, added.push_back(&nI); DEBUG({ - errs() << "\t\t\t\tadded new interval: "; + dbgs() << "\t\t\t\tadded new interval: "; nI.dump(); - errs() << '\n'; + dbgs() << '\n'; }); } @@ -1651,9 +1651,9 @@ addIntervalsForSpills(const LiveInterval &li, "attempt to spill already spilled interval!"); DEBUG({ - errs() << "\t\t\t\tadding intervals for spills for interval: "; - li.print(errs(), tri_); - errs() << '\n'; + dbgs() << "\t\t\t\tadding intervals for spills for interval: "; + li.print(dbgs(), tri_); + dbgs() << '\n'; }); // Each bit specify whether a spill is required in the MBB. diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index 3c88e370cb72..b44a2202e764 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -30,6 +30,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -59,17 +60,17 @@ LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const { } void LiveVariables::VarInfo::dump() const { - errs() << " Alive in blocks: "; + dbgs() << " Alive in blocks: "; for (SparseBitVector<>::iterator I = AliveBlocks.begin(), E = AliveBlocks.end(); I != E; ++I) - errs() << *I << ", "; - errs() << "\n Killed by:"; + dbgs() << *I << ", "; + dbgs() << "\n Killed by:"; if (Kills.empty()) - errs() << " No instructions.\n"; + dbgs() << " No instructions.\n"; else { for (unsigned i = 0, e = Kills.size(); i != e; ++i) - errs() << "\n #" << i << ": " << *Kills[i]; - errs() << "\n"; + dbgs() << "\n #" << i << ": " << *Kills[i]; + dbgs() << "\n"; } } @@ -289,7 +290,6 @@ MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) { MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef; unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef]; - MachineInstr *LastPartDef = 0; unsigned LastPartDefDist = 0; for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); unsigned SubReg = *SubRegs; ++SubRegs) { @@ -298,13 +298,9 @@ MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) { // There was a def of this sub-register in between. This is a partial // def, keep track of the last one. unsigned Dist = DistanceMap[Def]; - if (Dist > LastPartDefDist) { + if (Dist > LastPartDefDist) LastPartDefDist = Dist; - LastPartDef = Def; - } - continue; - } - if (MachineInstr *Use = PhysRegUse[SubReg]) { + } else if (MachineInstr *Use = PhysRegUse[SubReg]) { unsigned Dist = DistanceMap[Use]; if (Dist > LastRefOrPartRefDist) { LastRefOrPartRefDist = Dist; diff --git a/lib/CodeGen/LowerSubregs.cpp b/lib/CodeGen/LowerSubregs.cpp index 80eb6cdcba61..1121d9ba752e 100644 --- a/lib/CodeGen/LowerSubregs.cpp +++ b/lib/CodeGen/LowerSubregs.cpp @@ -122,7 +122,7 @@ bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) { "Extract destination must be in a physical register"); assert(SrcReg && "invalid subregister index for register"); - DEBUG(errs() << "subreg: CONVERTING: " << *MI); + DEBUG(dbgs() << "subreg: CONVERTING: " << *MI); if (SrcReg == DstReg) { // No need to insert an identity copy instruction. @@ -131,11 +131,11 @@ bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) { // instruction with KILL. MI->setDesc(TII->get(TargetInstrInfo::KILL)); MI->RemoveOperand(2); // SubIdx - DEBUG(errs() << "subreg: replace by: " << *MI); + DEBUG(dbgs() << "subreg: replace by: " << *MI); return true; } - DEBUG(errs() << "subreg: eliminated!"); + DEBUG(dbgs() << "subreg: eliminated!"); } else { // Insert copy const TargetRegisterClass *TRCS = TRI->getPhysicalRegisterRegClass(DstReg); @@ -150,11 +150,11 @@ bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) { TransferKillFlag(MI, SuperReg, TRI, true); DEBUG({ MachineBasicBlock::iterator dMI = MI; - errs() << "subreg: " << *(--dMI); + dbgs() << "subreg: " << *(--dMI); }); } - DEBUG(errs() << '\n'); + DEBUG(dbgs() << '\n'); MBB->erase(MI); return true; } @@ -179,7 +179,7 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) { assert(TargetRegisterInfo::isPhysicalRegister(InsReg) && "Inserted value must be in a physical register"); - DEBUG(errs() << "subreg: CONVERTING: " << *MI); + DEBUG(dbgs() << "subreg: CONVERTING: " << *MI); if (DstSubReg == InsReg && InsSIdx == 0) { // No need to insert an identify copy instruction. @@ -188,7 +188,7 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) { // %RAX = SUBREG_TO_REG 0, %EAX:3, 3 // The first def is defining RAX, not EAX so the top bits were not // zero extended. - DEBUG(errs() << "subreg: eliminated!"); + DEBUG(dbgs() << "subreg: eliminated!"); } else { // Insert sub-register copy const TargetRegisterClass *TRC0= TRI->getPhysicalRegisterRegClass(DstSubReg); @@ -203,11 +203,11 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) { TransferKillFlag(MI, InsReg, TRI); DEBUG({ MachineBasicBlock::iterator dMI = MI; - errs() << "subreg: " << *(--dMI); + dbgs() << "subreg: " << *(--dMI); }); } - DEBUG(errs() << '\n'); + DEBUG(dbgs() << '\n'); MBB->erase(MI); return true; } @@ -235,7 +235,7 @@ bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) { assert(TargetRegisterInfo::isPhysicalRegister(InsReg) && "Inserted value must be in a physical register"); - DEBUG(errs() << "subreg: CONVERTING: " << *MI); + DEBUG(dbgs() << "subreg: CONVERTING: " << *MI); if (DstSubReg == InsReg) { // No need to insert an identity copy instruction. If the SrcReg was @@ -248,7 +248,7 @@ bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) { else MIB.addReg(InsReg, RegState::Kill); } else { - DEBUG(errs() << "subreg: eliminated!\n"); + DEBUG(dbgs() << "subreg: eliminated!\n"); MBB->erase(MI); return true; } @@ -288,7 +288,7 @@ bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) { DEBUG({ MachineBasicBlock::iterator dMI = MI; - errs() << "subreg: " << *(--dMI) << "\n"; + dbgs() << "subreg: " << *(--dMI) << "\n"; }); MBB->erase(MI); @@ -299,7 +299,7 @@ bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) { /// copies. /// bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) { - DEBUG(errs() << "Machine Function\n" + DEBUG(dbgs() << "Machine Function\n" << "********** LOWERING SUBREG INSTRS **********\n" << "********** Function: " << MF.getFunction()->getName() << '\n'); diff --git a/lib/CodeGen/MachOWriter.cpp b/lib/CodeGen/MachOWriter.cpp index 73b15edba37f..337eab18277a 100644 --- a/lib/CodeGen/MachOWriter.cpp +++ b/lib/CodeGen/MachOWriter.cpp @@ -33,6 +33,7 @@ #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetMachOWriterInfo.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/Mangler.h" #include "llvm/Support/OutputBuffer.h" #include "llvm/Support/ErrorHandling.h" @@ -634,7 +635,7 @@ void MachOWriter::InitMem(const Constant *C, uintptr_t Offset, } case Instruction::Add: default: - errs() << "ConstantExpr not handled as global var init: " << *CE <<"\n"; + dbgs() << "ConstantExpr not handled as global var init: " << *CE <<"\n"; llvm_unreachable(0); } } else if (PC->getType()->isSingleValueType()) { @@ -732,7 +733,7 @@ void MachOWriter::InitMem(const Constant *C, uintptr_t Offset, WorkList.push_back(CPair(CPS->getOperand(i), PA+SL->getElementOffset(i))); } else { - errs() << "Bad Type: " << *PC->getType() << "\n"; + dbgs() << "Bad Type: " << *PC->getType() << "\n"; llvm_unreachable("Unknown constant type to initialize memory with!"); } } diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 74a0d574a232..e2ce642cfd6d 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -19,6 +19,7 @@ #include "llvm/Target/TargetInstrDesc.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/LeakDetector.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Assembly/Writer.h" @@ -158,7 +159,7 @@ bool MachineBasicBlock::isOnlyReachableByFallthrough() const { } void MachineBasicBlock::dump() const { - print(errs()); + print(dbgs()); } static inline void OutputReg(raw_ostream &os, unsigned RegNo, diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index dd6fd7ea594f..ae9451c88e63 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -26,6 +26,7 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" @@ -299,7 +300,7 @@ MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin, } void MachineFunction::dump() const { - print(errs()); + print(dbgs()); } void MachineFunction::print(raw_ostream &OS) const { @@ -519,7 +520,7 @@ void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ } void MachineFrameInfo::dump(const MachineFunction &MF) const { - print(MF, errs()); + print(MF, dbgs()); } //===----------------------------------------------------------------------===// @@ -579,7 +580,7 @@ void MachineJumpTableInfo::print(raw_ostream &OS) const { OS << '\n'; } -void MachineJumpTableInfo::dump() const { print(errs()); } +void MachineJumpTableInfo::dump() const { print(dbgs()); } //===----------------------------------------------------------------------===// @@ -702,4 +703,4 @@ void MachineConstantPool::print(raw_ostream &OS) const { } } -void MachineConstantPool::dump() const { print(errs()); } +void MachineConstantPool::dump() const { print(dbgs()); } diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index a761c2daa96b..cf3e3e16014a 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -28,11 +28,13 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/DebugInfo.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LeakDetector.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/FoldingSet.h" +#include "llvm/Metadata.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -277,10 +279,15 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { OS << '>'; break; case MachineOperand::MO_BlockAddress: - OS << "<"; + OS << '<'; WriteAsOperand(OS, getBlockAddress(), /*PrintType=*/false); OS << '>'; break; + case MachineOperand::MO_Metadata: + OS << '<'; + WriteAsOperand(OS, getMetadata(), /*PrintType=*/false); + OS << '>'; + break; default: llvm_unreachable("Unrecognized operand type"); } @@ -1094,7 +1101,7 @@ unsigned MachineInstr::isConstantValuePHI() const { } void MachineInstr::dump() const { - errs() << " " << *this; + dbgs() << " " << *this; } void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { @@ -1313,3 +1320,12 @@ bool MachineInstr::addRegisterDead(unsigned IncomingReg, true /*IsDead*/)); return true; } + +void MachineInstr::addRegisterDefined(unsigned IncomingReg, + const TargetRegisterInfo *RegInfo) { + MachineOperand *MO = findRegisterDefOperand(IncomingReg, false, RegInfo); + if (!MO || MO->getSubReg()) + addOperand(MachineOperand::CreateReg(IncomingReg, + true /*IsDef*/, + true /*IsImp*/)); +} diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 0a57ea151e72..ffcc8abbc7b3 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -161,7 +161,7 @@ static bool LoopIsOuterMostWithPreheader(MachineLoop *CurLoop) { /// loop. /// bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { - DEBUG(errs() << "******** Machine LICM ********\n"); + DEBUG(dbgs() << "******** Machine LICM ********\n"); Changed = FirstInLoop = false; MCP = MF.getConstantPool(); @@ -253,28 +253,28 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { } DEBUG({ - errs() << "--- Checking if we can hoist " << I; + dbgs() << "--- Checking if we can hoist " << I; if (I.getDesc().getImplicitUses()) { - errs() << " * Instruction has implicit uses:\n"; + dbgs() << " * Instruction has implicit uses:\n"; const TargetRegisterInfo *TRI = TM->getRegisterInfo(); for (const unsigned *ImpUses = I.getDesc().getImplicitUses(); *ImpUses; ++ImpUses) - errs() << " -> " << TRI->getName(*ImpUses) << "\n"; + dbgs() << " -> " << TRI->getName(*ImpUses) << "\n"; } if (I.getDesc().getImplicitDefs()) { - errs() << " * Instruction has implicit defines:\n"; + dbgs() << " * Instruction has implicit defines:\n"; const TargetRegisterInfo *TRI = TM->getRegisterInfo(); for (const unsigned *ImpDefs = I.getDesc().getImplicitDefs(); *ImpDefs; ++ImpDefs) - errs() << " -> " << TRI->getName(*ImpDefs) << "\n"; + dbgs() << " -> " << TRI->getName(*ImpDefs) << "\n"; } }); if (I.getDesc().getImplicitDefs() || I.getDesc().getImplicitUses()) { - DEBUG(errs() << "Cannot hoist with implicit defines or uses\n"); + DEBUG(dbgs() << "Cannot hoist with implicit defines or uses\n"); return false; } @@ -479,7 +479,7 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI, return false; if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) { - DEBUG(errs() << "CSEing " << *MI << " with " << *Dup); + DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isDef()) @@ -506,14 +506,14 @@ void MachineLICM::Hoist(MachineInstr *MI) { // Now move the instructions to the predecessor, inserting it before any // terminator instructions. DEBUG({ - errs() << "Hoisting " << *MI; + dbgs() << "Hoisting " << *MI; if (CurPreheader->getBasicBlock()) - errs() << " to MachineBasicBlock " + dbgs() << " to MachineBasicBlock " << CurPreheader->getName(); if (MI->getParent()->getBasicBlock()) - errs() << " from MachineBasicBlock " + dbgs() << " from MachineBasicBlock " << MI->getParent()->getName(); - errs() << "\n"; + dbgs() << "\n"; }); // If this is the first instruction being hoisted to the preheader, diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp index d561a5bb8c99..269538b31d0b 100644 --- a/lib/CodeGen/MachineLoopInfo.cpp +++ b/lib/CodeGen/MachineLoopInfo.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" using namespace llvm; namespace llvm { @@ -73,3 +74,7 @@ MachineBasicBlock *MachineLoop::getBottomBlock() { } return BotMBB; } + +void MachineLoop::dump() const { + print(dbgs()); +} diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp index 292096f00dd1..467ea5d17356 100644 --- a/lib/CodeGen/MachineSSAUpdater.cpp +++ b/lib/CodeGen/MachineSSAUpdater.cpp @@ -210,7 +210,7 @@ unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) { // If the client wants to know about all new instructions, tell it. if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); - DEBUG(errs() << " Inserted PHI: " << *InsertedPHI << "\n"); + DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n"); return InsertedPHI->getOperand(0).getReg(); } @@ -383,7 +383,7 @@ unsigned MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){ InsertedPHI->eraseFromParent(); InsertedVal = ConstVal; } else { - DEBUG(errs() << " Inserted PHI: " << *InsertedPHI << "\n"); + DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n"); // If the client wants to know about all new instructions, tell it. if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index e04073884da7..c177e3c7bae8 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -90,7 +90,7 @@ bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg, } bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { - DEBUG(errs() << "******** Machine Sinking ********\n"); + DEBUG(dbgs() << "******** Machine Sinking ********\n"); const TargetMachine &TM = MF.getTarget(); TII = TM.getInstrInfo(); @@ -255,15 +255,15 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { if (MI->getParent() == SuccToSinkTo) return false; - DEBUG(errs() << "Sink instr " << *MI); - DEBUG(errs() << "to block " << *SuccToSinkTo); + DEBUG(dbgs() << "Sink instr " << *MI); + DEBUG(dbgs() << "to block " << *SuccToSinkTo); // If the block has multiple predecessors, this would introduce computation on // a path that it doesn't already exist. We could split the critical edge, // but for now we just punt. // FIXME: Split critical edges if not backedges. if (SuccToSinkTo->pred_size() > 1) { - DEBUG(errs() << " *** PUNTING: Critical edge found\n"); + DEBUG(dbgs() << " *** PUNTING: Critical edge found\n"); return false; } diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 077231904631..584c21b70353 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -190,8 +190,7 @@ namespace { void report(const char *msg, const MachineOperand *MO, unsigned MONum); void markReachable(const MachineBasicBlock *MBB); - void calcMaxRegsPassed(); - void calcMinRegsPassed(); + void calcRegsPassed(); void checkPHIOps(const MachineBasicBlock *MBB); void calcRegsRequired(); @@ -710,7 +709,7 @@ MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) { // Calculate the largest possible vregsPassed sets. These are the registers that // can pass through an MBB live, but may not be live every time. It is assumed // that all vregsPassed sets are empty before the call. -void MachineVerifier::calcMaxRegsPassed() { +void MachineVerifier::calcRegsPassed() { // First push live-out regs to successors' vregsPassed. Remember the MBBs that // have any vregsPassed. DenseSet todo; @@ -745,45 +744,9 @@ void MachineVerifier::calcMaxRegsPassed() { } } -// Calculate the minimum vregsPassed set. These are the registers that always -// pass live through an MBB. The calculation assumes that calcMaxRegsPassed has -// been called earlier. -void MachineVerifier::calcMinRegsPassed() { - DenseSet todo; - for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); - MFI != MFE; ++MFI) - todo.insert(MFI); - - while (!todo.empty()) { - const MachineBasicBlock *MBB = *todo.begin(); - todo.erase(MBB); - BBInfo &MInfo = MBBInfoMap[MBB]; - - // Remove entries from vRegsPassed that are not live out from all - // reachable predecessors. - RegSet dead; - for (RegSet::iterator I = MInfo.vregsPassed.begin(), - E = MInfo.vregsPassed.end(); I != E; ++I) { - for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(), - PrE = MBB->pred_end(); PrI != PrE; ++PrI) { - BBInfo &PrInfo = MBBInfoMap[*PrI]; - if (PrInfo.reachable && !PrInfo.isLiveOut(*I)) { - dead.insert(*I); - break; - } - } - } - // If any regs removed, we need to recheck successors. - if (!dead.empty()) { - set_subtract(MInfo.vregsPassed, dead); - todo.insert(MBB->succ_begin(), MBB->succ_end()); - } - } -} - // Calculate the set of virtual registers that must be passed through each basic // block in order to satisfy the requirements of successor blocks. This is very -// similar to calcMaxRegsPassed, only backwards. +// similar to calcRegsPassed, only backwards. void MachineVerifier::calcRegsRequired() { // First push live-in regs to predecessors' vregsRequired. DenseSet todo; @@ -817,7 +780,7 @@ void MachineVerifier::calcRegsRequired() { } // Check PHI instructions at the beginning of MBB. It is assumed that -// calcMinRegsPassed has been run so BBInfo::isLiveOut is valid. +// calcRegsPassed has been run so BBInfo::isLiveOut is valid. void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) { for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end(); BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI) { @@ -848,61 +811,8 @@ void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) { } void MachineVerifier::visitMachineFunctionAfter() { - calcMaxRegsPassed(); + calcRegsPassed(); - // With the maximal set of vregsPassed we can verify dead-in registers. - for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); - MFI != MFE; ++MFI) { - BBInfo &MInfo = MBBInfoMap[MFI]; - - // Skip unreachable MBBs. - if (!MInfo.reachable) - continue; - - for (MachineBasicBlock::const_pred_iterator PrI = MFI->pred_begin(), - PrE = MFI->pred_end(); PrI != PrE; ++PrI) { - BBInfo &PrInfo = MBBInfoMap[*PrI]; - if (!PrInfo.reachable) - continue; - - // Verify physical live-ins. EH landing pads have magic live-ins so we - // ignore them. - if (!MFI->isLandingPad()) { - for (MachineBasicBlock::const_livein_iterator I = MFI->livein_begin(), - E = MFI->livein_end(); I != E; ++I) { - if (TargetRegisterInfo::isPhysicalRegister(*I) && - !isReserved (*I) && !PrInfo.isLiveOut(*I)) { - report("Live-in physical register is not live-out from predecessor", - MFI); - *OS << "Register " << TRI->getName(*I) - << " is not live-out from BB#" << (*PrI)->getNumber() - << ".\n"; - } - } - } - - - // Verify dead-in virtual registers. - if (!allowVirtDoubleDefs) { - for (RegMap::iterator I = MInfo.vregsDeadIn.begin(), - E = MInfo.vregsDeadIn.end(); I != E; ++I) { - // DeadIn register must be in neither regsLiveOut or vregsPassed of - // any predecessor. - if (PrInfo.isLiveOut(I->first)) { - report("Live-in virtual register redefined", I->second); - *OS << "Register %reg" << I->first - << " was live-out from predecessor MBB #" - << (*PrI)->getNumber() << ".\n"; - } - } - } - } - } - - calcMinRegsPassed(); - - // With the minimal set of vregsPassed we can verify live-in virtual - // registers, including PHI instructions. for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); MFI != MFE; ++MFI) { BBInfo &MInfo = MBBInfoMap[MFI]; @@ -913,20 +823,24 @@ void MachineVerifier::visitMachineFunctionAfter() { checkPHIOps(MFI); - for (MachineBasicBlock::const_pred_iterator PrI = MFI->pred_begin(), - PrE = MFI->pred_end(); PrI != PrE; ++PrI) { - BBInfo &PrInfo = MBBInfoMap[*PrI]; - if (!PrInfo.reachable) - continue; + // Verify dead-in virtual registers. + if (!allowVirtDoubleDefs) { + for (MachineBasicBlock::const_pred_iterator PrI = MFI->pred_begin(), + PrE = MFI->pred_end(); PrI != PrE; ++PrI) { + BBInfo &PrInfo = MBBInfoMap[*PrI]; + if (!PrInfo.reachable) + continue; - for (RegMap::iterator I = MInfo.vregsLiveIn.begin(), - E = MInfo.vregsLiveIn.end(); I != E; ++I) { - if (!PrInfo.isLiveOut(I->first)) { - report("Used virtual register is not live-in", I->second); - *OS << "Register %reg" << I->first - << " is not live-out from predecessor MBB #" - << (*PrI)->getNumber() - << ".\n"; + for (RegMap::iterator I = MInfo.vregsDeadIn.begin(), + E = MInfo.vregsDeadIn.end(); I != E; ++I) { + // DeadIn register must be in neither regsLiveOut or vregsPassed of + // any predecessor. + if (PrInfo.isLiveOut(I->first)) { + report("Live-in virtual register redefined", I->second); + *OS << "Register %reg" << I->first + << " was live-out from predecessor MBB #" + << (*PrI)->getNumber() << ".\n"; + } } } } diff --git a/lib/CodeGen/OptimizeExts.cpp b/lib/CodeGen/OptimizeExts.cpp new file mode 100644 index 000000000000..625ff89f90e8 --- /dev/null +++ b/lib/CodeGen/OptimizeExts.cpp @@ -0,0 +1,185 @@ +//===-- OptimizeExts.cpp - Optimize sign / zero extension instrs -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass performs optimization of sign / zero extension instructions. It +// may be extended to handle other instructions of similar property. +// +// On some targets, some instructions, e.g. X86 sign / zero extension, may +// leave the source value in the lower part of the result. This pass will +// replace (some) uses of the pre-extension value with uses of the sub-register +// of the results. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ext-opt" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +static cl::opt Aggressive("aggressive-ext-opt", cl::Hidden, + cl::desc("Aggressive extension optimization")); + +STATISTIC(NumReuse, "Number of extension results reused"); + +namespace { + class OptimizeExts : public MachineFunctionPass { + const TargetMachine *TM; + const TargetInstrInfo *TII; + MachineRegisterInfo *MRI; + MachineDominatorTree *DT; // Machine dominator tree + + public: + static char ID; // Pass identification + OptimizeExts() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + if (Aggressive) { + AU.addRequired(); + AU.addPreserved(); + } + } + + private: + bool OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB, + SmallPtrSet &LocalMIs); + }; +} + +char OptimizeExts::ID = 0; +static RegisterPass +X("opt-exts", "Optimize sign / zero extensions"); + +FunctionPass *llvm::createOptimizeExtsPass() { return new OptimizeExts(); } + +/// OptimizeInstr - If instruction is a copy-like instruction, i.e. it reads +/// a single register and writes a single register and it does not modify +/// the source, and if the source value is preserved as a sub-register of +/// the result, then replace all reachable uses of the source with the subreg +/// of the result. +bool OptimizeExts::OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB, + SmallPtrSet &LocalMIs) { + bool Changed = false; + LocalMIs.insert(MI); + + unsigned SrcReg, DstReg, SubIdx; + if (TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) { + if (TargetRegisterInfo::isPhysicalRegister(DstReg) || + TargetRegisterInfo::isPhysicalRegister(SrcReg)) + return false; + + MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg); + if (++UI == MRI->use_end()) + // No other uses. + return false; + + // Ok, the source has other uses. See if we can replace the other uses + // with use of the result of the extension. + SmallPtrSet ReachedBBs; + UI = MRI->use_begin(DstReg); + for (MachineRegisterInfo::use_iterator UE = MRI->use_end(); UI != UE; + ++UI) + ReachedBBs.insert(UI->getParent()); + + bool ExtendLife = true; + // Uses that are in the same BB of uses of the result of the instruction. + SmallVector Uses; + // Uses that the result of the instruction can reach. + SmallVector ExtendedUses; + + UI = MRI->use_begin(SrcReg); + for (MachineRegisterInfo::use_iterator UE = MRI->use_end(); UI != UE; + ++UI) { + MachineOperand &UseMO = UI.getOperand(); + MachineInstr *UseMI = &*UI; + if (UseMI == MI) + continue; + if (UseMI->getOpcode() == TargetInstrInfo::PHI) { + ExtendLife = false; + continue; + } + + MachineBasicBlock *UseMBB = UseMI->getParent(); + if (UseMBB == MBB) { + // Local uses that come after the extension. + if (!LocalMIs.count(UseMI)) + Uses.push_back(&UseMO); + } else if (ReachedBBs.count(UseMBB)) + // Non-local uses where the result of extension is used. Always + // replace these unless it's a PHI. + Uses.push_back(&UseMO); + else if (Aggressive && DT->dominates(MBB, UseMBB)) + // We may want to extend live range of the extension result in order + // to replace these uses. + ExtendedUses.push_back(&UseMO); + else { + // Both will be live out of the def MBB anyway. Don't extend live + // range of the extension result. + ExtendLife = false; + break; + } + } + + if (ExtendLife && !ExtendedUses.empty()) + // Ok, we'll extend the liveness of the extension result. + std::copy(ExtendedUses.begin(), ExtendedUses.end(), + std::back_inserter(Uses)); + + // Now replace all uses. + if (!Uses.empty()) { + const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); + for (unsigned i = 0, e = Uses.size(); i != e; ++i) { + MachineOperand *UseMO = Uses[i]; + MachineInstr *UseMI = UseMO->getParent(); + MachineBasicBlock *UseMBB = UseMI->getParent(); + unsigned NewVR = MRI->createVirtualRegister(RC); + BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), + TII->get(TargetInstrInfo::EXTRACT_SUBREG), NewVR) + .addReg(DstReg).addImm(SubIdx); + UseMO->setReg(NewVR); + ++NumReuse; + Changed = true; + } + } + } + + return Changed; +} + +bool OptimizeExts::runOnMachineFunction(MachineFunction &MF) { + TM = &MF.getTarget(); + TII = TM->getInstrInfo(); + MRI = &MF.getRegInfo(); + DT = Aggressive ? &getAnalysis() : 0; + + bool Changed = false; + + SmallPtrSet LocalMIs; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock *MBB = &*I; + LocalMIs.clear(); + for (MachineBasicBlock::iterator MII = I->begin(), ME = I->end(); MII != ME; + ++MII) { + MachineInstr *MI = &*MII; + Changed |= OptimizeInstr(MI, MBB, LocalMIs); + } + } + + return Changed; +} diff --git a/lib/CodeGen/PBQP/AnnotatedGraph.h b/lib/CodeGen/PBQP/AnnotatedGraph.h index a47dce9e6753..738dea0d37cd 100644 --- a/lib/CodeGen/PBQP/AnnotatedGraph.h +++ b/lib/CodeGen/PBQP/AnnotatedGraph.h @@ -1,4 +1,4 @@ -//===-- AnnotatedGraph.h - Annotated PBQP Graph ----------------*- C++ --*-===// +//===-- AnnotatedGraph.h - Annotated PBQP Graph -----------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/CodeGen/PBQP/ExhaustiveSolver.h b/lib/CodeGen/PBQP/ExhaustiveSolver.h index b2f2e6f620fd..35ec4f1b0b9c 100644 --- a/lib/CodeGen/PBQP/ExhaustiveSolver.h +++ b/lib/CodeGen/PBQP/ExhaustiveSolver.h @@ -1,4 +1,4 @@ -//===-- ExhaustiveSolver.h - Brute Force PBQP Solver -----------*- C++ --*-===// +//===-- ExhaustiveSolver.h - Brute Force PBQP Solver ------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/CodeGen/PBQP/GraphBase.h b/lib/CodeGen/PBQP/GraphBase.h index 0c7493b8957f..becd98afdb5b 100644 --- a/lib/CodeGen/PBQP/GraphBase.h +++ b/lib/CodeGen/PBQP/GraphBase.h @@ -1,4 +1,4 @@ -//===-- GraphBase.h - Abstract Base PBQP Graph -----------------*- C++ --*-===// +//===-- GraphBase.h - Abstract Base PBQP Graph ------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/CodeGen/PBQP/HeuristicSolver.h b/lib/CodeGen/PBQP/HeuristicSolver.h index 16708779e09d..f78a58a66cb2 100644 --- a/lib/CodeGen/PBQP/HeuristicSolver.h +++ b/lib/CodeGen/PBQP/HeuristicSolver.h @@ -1,4 +1,4 @@ -//===-- HeuristicSolver.h - Heuristic PBQP Solver --------------*- C++ --*-===// +//===-- HeuristicSolver.h - Heuristic PBQP Solver ---------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/CodeGen/PBQP/Heuristics/Briggs.h b/lib/CodeGen/PBQP/Heuristics/Briggs.h index 3ac9e707bab4..1228f6533c3b 100644 --- a/lib/CodeGen/PBQP/Heuristics/Briggs.h +++ b/lib/CodeGen/PBQP/Heuristics/Briggs.h @@ -1,4 +1,4 @@ -//===-- Briggs.h --- Briggs Heuristic for PBQP -----------------*- C++ --*-===// +//===-- Briggs.h --- Briggs Heuristic for PBQP ------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/CodeGen/PBQP/PBQPMath.h b/lib/CodeGen/PBQP/PBQPMath.h index 11f4b4b4e34c..20737a298cf8 100644 --- a/lib/CodeGen/PBQP/PBQPMath.h +++ b/lib/CodeGen/PBQP/PBQPMath.h @@ -1,4 +1,4 @@ -//===-- PBQPMath.h - PBQP Vector and Matrix classes ------------*- C++ --*-===// +//===-- PBQPMath.h - PBQP Vector and Matrix classes -------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/CodeGen/PBQP/SimpleGraph.h b/lib/CodeGen/PBQP/SimpleGraph.h index 1ca9caee3467..13e63ceb4800 100644 --- a/lib/CodeGen/PBQP/SimpleGraph.h +++ b/lib/CodeGen/PBQP/SimpleGraph.h @@ -1,4 +1,4 @@ -//===-- SimpleGraph.h - Simple PBQP Graph ----------------------*- C++ --*-===// +//===-- SimpleGraph.h - Simple PBQP Graph -----------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/CodeGen/PBQP/Solution.h b/lib/CodeGen/PBQP/Solution.h index c91e2fa560a0..aee684d33e1b 100644 --- a/lib/CodeGen/PBQP/Solution.h +++ b/lib/CodeGen/PBQP/Solution.h @@ -1,4 +1,4 @@ -//===-- Solution.h ------- PBQP Solution -----------------------*- C++ --*-===// +//===-- Solution.h ------- PBQP Solution ------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/CodeGen/PBQP/Solver.h b/lib/CodeGen/PBQP/Solver.h index a9c5f837c453..a445de81bca3 100644 --- a/lib/CodeGen/PBQP/Solver.h +++ b/lib/CodeGen/PBQP/Solver.h @@ -1,4 +1,4 @@ -//===-- Solver.h ------- PBQP solver interface -----------------*- C++ --*-===// +//===-- Solver.h ------- PBQP solver interface ------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index 58c3dec4c22c..365df309429b 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -207,7 +207,7 @@ void llvm::PHIElimination::LowerAtomicPHINode( IncomingReg = entry; reusedIncoming = true; ++NumReused; - DEBUG(errs() << "Reusing %reg" << IncomingReg << " for " << *MPhi); + DEBUG(dbgs() << "Reusing %reg" << IncomingReg << " for " << *MPhi); } else { entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC); } @@ -234,7 +234,7 @@ void llvm::PHIElimination::LowerAtomicPHINode( // AfterPHIsIt, so it appears before the current PHICopy. if (reusedIncoming) if (MachineInstr *OldKill = VI.findKill(&MBB)) { - DEBUG(errs() << "Remove old kill from " << *OldKill); + DEBUG(dbgs() << "Remove old kill from " << *OldKill); LV->removeVirtualRegisterKilled(IncomingReg, OldKill); DEBUG(MBB.dump()); } @@ -421,7 +421,7 @@ MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A, MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock(); MF->insert(llvm::next(MachineFunction::iterator(A)), NMBB); - DEBUG(errs() << "PHIElimination splitting critical edge:" + DEBUG(dbgs() << "PHIElimination splitting critical edge:" " BB#" << A->getNumber() << " -- BB#" << NMBB->getNumber() << " -- BB#" << B->getNumber() << '\n'); diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index 79be29526c4a..f43395fa2efc 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -233,7 +233,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { TargetSubtarget::ANTIDEP_NONE; } - DEBUG(errs() << "PostRAScheduler\n"); + DEBUG(dbgs() << "PostRAScheduler\n"); const MachineLoopInfo &MLI = getAnalysis(); const MachineDominatorTree &MDT = getAnalysis(); @@ -258,7 +258,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { static int bbcnt = 0; if (bbcnt++ % DebugDiv != DebugMod) continue; - errs() << "*** DEBUG scheduling " << Fn.getFunction()->getNameStr() << + dbgs() << "*** DEBUG scheduling " << Fn.getFunction()->getNameStr() << ":BB#" << MBB->getNumber() << " ***\n"; } #endif @@ -342,7 +342,7 @@ void SchedulePostRATDList::Schedule() { } } - DEBUG(errs() << "********** List Scheduling **********\n"); + DEBUG(dbgs() << "********** List Scheduling **********\n"); DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su].dumpAll(this)); @@ -448,7 +448,7 @@ bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI, /// incorrect by instruction reordering. /// void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { - DEBUG(errs() << "Fixup kills for BB#" << MBB->getNumber() << '\n'); + DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n'); std::set killedRegs; BitVector ReservedRegs = TRI->getReservedRegs(MF); @@ -511,7 +511,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { } if (MO.isKill() != kill) { - DEBUG(errs() << "Fixing " << MO << " in "); + DEBUG(dbgs() << "Fixing " << MO << " in "); // Warning: ToggleKillFlag may invalidate MO. ToggleKillFlag(MI, MO); DEBUG(MI->dump()); @@ -549,9 +549,9 @@ void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { #ifndef NDEBUG if (SuccSU->NumPredsLeft == 0) { - errs() << "*** Scheduling failed! ***\n"; + dbgs() << "*** Scheduling failed! ***\n"; SuccSU->dump(this); - errs() << " has been released too many times!\n"; + dbgs() << " has been released too many times!\n"; llvm_unreachable(0); } #endif @@ -580,7 +580,7 @@ void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) { /// count of its successors. If a successor pending count is zero, add it to /// the Available queue. void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { - DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: "); + DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); DEBUG(SU->dump(this)); Sequence.push_back(SU); @@ -640,11 +640,11 @@ void SchedulePostRATDList::ListScheduleTopDown() { MinDepth = PendingQueue[i]->getDepth(); } - DEBUG(errs() << "\n*** Examining Available\n"; + DEBUG(dbgs() << "\n*** Examining Available\n"; LatencyPriorityQueue q = AvailableQueue; while (!q.empty()) { SUnit *su = q.pop(); - errs() << "Height " << su->getHeight() << ": "; + dbgs() << "Height " << su->getHeight() << ": "; su->dump(this); }); @@ -689,19 +689,19 @@ void SchedulePostRATDList::ListScheduleTopDown() { } } else { if (CycleHasInsts) { - DEBUG(errs() << "*** Finished cycle " << CurCycle << '\n'); + DEBUG(dbgs() << "*** Finished cycle " << CurCycle << '\n'); HazardRec->AdvanceCycle(); } else if (!HasNoopHazards) { // Otherwise, we have a pipeline stall, but no other problem, // just advance the current cycle and try again. - DEBUG(errs() << "*** Stall in cycle " << CurCycle << '\n'); + DEBUG(dbgs() << "*** Stall in cycle " << CurCycle << '\n'); HazardRec->AdvanceCycle(); ++NumStalls; } else { // Otherwise, we have no instructions to issue and we have instructions // that will fault if we don't do this right. This is the case for // processors without pipeline interlocks and other cases. - DEBUG(errs() << "*** Emitting noop in cycle " << CurCycle << '\n'); + DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n'); HazardRec->EmitNoop(); Sequence.push_back(0); // NULL here means noop ++NumNoops; diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp index 1c5222c38e7c..8cbc8c224ac3 100644 --- a/lib/CodeGen/PreAllocSplitting.cpp +++ b/lib/CodeGen/PreAllocSplitting.cpp @@ -481,32 +481,21 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI, // Search for the use in this block that precedes the instruction we care // about, going to the fallback case if we don't find it. - if (UseI == MBB->begin()) - return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, - Uses, NewVNs, LiveOut, Phis, - IsTopLevel, IsIntraBlock); - MachineBasicBlock::iterator Walker = UseI; - --Walker; bool found = false; while (Walker != MBB->begin()) { + --Walker; if (BlockUses.count(Walker)) { found = true; break; } - --Walker; - } - - // Must check begin() too. - if (!found) { - if (BlockUses.count(Walker)) - found = true; - else - return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, - Uses, NewVNs, LiveOut, Phis, - IsTopLevel, IsIntraBlock); } + if (!found) + return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, + Uses, NewVNs, LiveOut, Phis, + IsTopLevel, IsIntraBlock); + SlotIndex UseIndex = LIs->getInstructionIndex(Walker); UseIndex = UseIndex.getUseIndex(); SlotIndex EndIndex; @@ -533,17 +522,11 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI, // This case is basically a merging of the two preceding case, with the // special note that checking for defs must take precedence over checking // for uses, because of two-address instructions. - - if (UseI == MBB->begin()) - return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, Uses, - NewVNs, LiveOut, Phis, - IsTopLevel, IsIntraBlock); - MachineBasicBlock::iterator Walker = UseI; - --Walker; bool foundDef = false; bool foundUse = false; while (Walker != MBB->begin()) { + --Walker; if (BlockDefs.count(Walker)) { foundDef = true; break; @@ -551,21 +534,13 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI, foundUse = true; break; } - --Walker; - } - - // Must check begin() too. - if (!foundDef && !foundUse) { - if (BlockDefs.count(Walker)) - foundDef = true; - else if (BlockUses.count(Walker)) - foundUse = true; - else - return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, - Uses, NewVNs, LiveOut, Phis, - IsTopLevel, IsIntraBlock); } + if (!foundDef && !foundUse) + return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, + Uses, NewVNs, LiveOut, Phis, + IsTopLevel, IsIntraBlock); + SlotIndex StartIndex = LIs->getInstructionIndex(Walker); StartIndex = foundDef ? StartIndex.getDefIndex() : StartIndex.getUseIndex(); SlotIndex EndIndex; @@ -1022,7 +997,7 @@ MachineInstr* PreAllocSplitting::FoldRestore(unsigned vreg, /// so it would not cross the barrier that's being processed. Shrink wrap /// (minimize) the live interval to the last uses. bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { - DEBUG(errs() << "Pre-alloc splitting " << LI->reg << " for " << *Barrier + DEBUG(dbgs() << "Pre-alloc splitting " << LI->reg << " for " << *Barrier << " result: "); CurrLI = LI; @@ -1039,7 +1014,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { // If this would create a new join point, do not split. if (DefMI && createsNewJoin(LR, DefMI->getParent(), Barrier->getParent())) { - DEBUG(errs() << "FAILED (would create a new join point).\n"); + DEBUG(dbgs() << "FAILED (would create a new join point).\n"); return false; } @@ -1056,13 +1031,13 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { MachineBasicBlock::iterator RestorePt = findRestorePoint(BarrierMBB, Barrier, LR->end, RefsInMBB); if (RestorePt == BarrierMBB->end()) { - DEBUG(errs() << "FAILED (could not find a suitable restore point).\n"); + DEBUG(dbgs() << "FAILED (could not find a suitable restore point).\n"); return false; } if (DefMI && LIs->isReMaterializable(*LI, ValNo, DefMI)) if (Rematerialize(LI->reg, ValNo, DefMI, RestorePt, RefsInMBB)) { - DEBUG(errs() << "success (remat).\n"); + DEBUG(dbgs() << "success (remat).\n"); return true; } @@ -1081,7 +1056,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { MachineBasicBlock::iterator SpillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB); if (SpillPt == BarrierMBB->begin()) { - DEBUG(errs() << "FAILED (could not find a suitable spill point).\n"); + DEBUG(dbgs() << "FAILED (could not find a suitable spill point).\n"); return false; // No gap to insert spill. } // Add spill. @@ -1096,7 +1071,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { // If it's already split, just restore the value. There is no need to spill // the def again. if (!DefMI) { - DEBUG(errs() << "FAILED (def is dead).\n"); + DEBUG(dbgs() << "FAILED (def is dead).\n"); return false; // Def is dead. Do nothing. } @@ -1111,13 +1086,13 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { SpillPt = findSpillPoint(BarrierMBB, Barrier, DefMI, RefsInMBB); if (SpillPt == DefMBB->begin()) { - DEBUG(errs() << "FAILED (could not find a suitable spill point).\n"); + DEBUG(dbgs() << "FAILED (could not find a suitable spill point).\n"); return false; // No gap to insert spill. } } else { SpillPt = llvm::next(MachineBasicBlock::iterator(DefMI)); if (SpillPt == DefMBB->end()) { - DEBUG(errs() << "FAILED (could not find a suitable spill point).\n"); + DEBUG(dbgs() << "FAILED (could not find a suitable spill point).\n"); return false; // No gap to insert spill. } } @@ -1160,7 +1135,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { } ++NumSplits; - DEBUG(errs() << "success.\n"); + DEBUG(dbgs() << "success.\n"); return true; } diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index c9a33d885154..a00f4507af08 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -61,7 +61,7 @@ bool ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI, /// implicit_def defs and their uses. bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { - DEBUG(errs() << "********** PROCESS IMPLICIT DEFS **********\n" + DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n" << "********** Function: " << ((Value*)fn.getFunction())->getName() << '\n'); diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp index 9e97d89c62e1..8e44a576d650 100644 --- a/lib/CodeGen/RegAllocLinearScan.cpp +++ b/lib/CodeGen/RegAllocLinearScan.cpp @@ -277,7 +277,7 @@ namespace { bool Error = false; for (unsigned i = 0, e = tri_->getNumRegs(); i != e; ++i) { if (regUse_[i] != 0) { - errs() << tri_->getName(i) << " is still in use!\n"; + dbgs() << tri_->getName(i) << " is still in use!\n"; Error = true; } } @@ -344,16 +344,16 @@ namespace { void printIntervals(const char* const str, ItTy i, ItTy e) const { DEBUG({ if (str) - errs() << str << " intervals:\n"; + dbgs() << str << " intervals:\n"; for (; i != e; ++i) { - errs() << "\t" << *i->first << " -> "; + dbgs() << "\t" << *i->first << " -> "; unsigned reg = i->first->reg; if (TargetRegisterInfo::isVirtualRegister(reg)) reg = vrm_->getPhys(reg); - errs() << tri_->getName(reg) << '\n'; + dbgs() << tri_->getName(reg) << '\n'; } }); } @@ -455,7 +455,7 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) { return Reg; // Try to coalesce. - DEBUG(errs() << "Coalescing: " << cur << " -> " << tri_->getName(CandReg) + DEBUG(dbgs() << "Coalescing: " << cur << " -> " << tri_->getName(CandReg) << '\n'); vrm_->clearVirt(cur.reg); vrm_->assignVirt2Phys(cur.reg, CandReg); @@ -544,7 +544,7 @@ void RALinScan::initIntervalSets() void RALinScan::linearScan() { // linear scan algorithm DEBUG({ - errs() << "********** LINEAR SCAN **********\n" + dbgs() << "********** LINEAR SCAN **********\n" << "********** Function: " << mf_->getFunction()->getName() << '\n'; printIntervals("fixed", fixed_.begin(), fixed_.end()); @@ -555,7 +555,7 @@ void RALinScan::linearScan() { LiveInterval* cur = unhandled_.top(); unhandled_.pop(); ++NumIters; - DEBUG(errs() << "\n*** CURRENT ***: " << *cur << '\n'); + DEBUG(dbgs() << "\n*** CURRENT ***: " << *cur << '\n'); assert(!cur->empty() && "Empty interval in unhandled set."); @@ -580,7 +580,7 @@ void RALinScan::linearScan() { while (!active_.empty()) { IntervalPtr &IP = active_.back(); unsigned reg = IP.first->reg; - DEBUG(errs() << "\tinterval " << *IP.first << " expired\n"); + DEBUG(dbgs() << "\tinterval " << *IP.first << " expired\n"); assert(TargetRegisterInfo::isVirtualRegister(reg) && "Can only allocate virtual registers!"); reg = vrm_->getPhys(reg); @@ -592,7 +592,7 @@ void RALinScan::linearScan() { DEBUG({ for (IntervalPtrs::reverse_iterator i = inactive_.rbegin(); i != inactive_.rend(); ++i) - errs() << "\tinterval " << *i->first << " expired\n"; + dbgs() << "\tinterval " << *i->first << " expired\n"; }); inactive_.clear(); @@ -628,7 +628,7 @@ void RALinScan::linearScan() { } } - DEBUG(errs() << *vrm_); + DEBUG(dbgs() << *vrm_); // Look for physical registers that end up not being allocated even though // register allocator had to spill other registers in its register class. @@ -642,7 +642,7 @@ void RALinScan::linearScan() { /// to the inactive list. void RALinScan::processActiveIntervals(SlotIndex CurPoint) { - DEBUG(errs() << "\tprocessing active intervals:\n"); + DEBUG(dbgs() << "\tprocessing active intervals:\n"); for (unsigned i = 0, e = active_.size(); i != e; ++i) { LiveInterval *Interval = active_[i].first; @@ -652,7 +652,7 @@ void RALinScan::processActiveIntervals(SlotIndex CurPoint) IntervalPos = Interval->advanceTo(IntervalPos, CurPoint); if (IntervalPos == Interval->end()) { // Remove expired intervals. - DEBUG(errs() << "\t\tinterval " << *Interval << " expired\n"); + DEBUG(dbgs() << "\t\tinterval " << *Interval << " expired\n"); assert(TargetRegisterInfo::isVirtualRegister(reg) && "Can only allocate virtual registers!"); reg = vrm_->getPhys(reg); @@ -665,7 +665,7 @@ void RALinScan::processActiveIntervals(SlotIndex CurPoint) } else if (IntervalPos->start > CurPoint) { // Move inactive intervals to inactive list. - DEBUG(errs() << "\t\tinterval " << *Interval << " inactive\n"); + DEBUG(dbgs() << "\t\tinterval " << *Interval << " inactive\n"); assert(TargetRegisterInfo::isVirtualRegister(reg) && "Can only allocate virtual registers!"); reg = vrm_->getPhys(reg); @@ -688,7 +688,7 @@ void RALinScan::processActiveIntervals(SlotIndex CurPoint) /// ones to the active list. void RALinScan::processInactiveIntervals(SlotIndex CurPoint) { - DEBUG(errs() << "\tprocessing inactive intervals:\n"); + DEBUG(dbgs() << "\tprocessing inactive intervals:\n"); for (unsigned i = 0, e = inactive_.size(); i != e; ++i) { LiveInterval *Interval = inactive_[i].first; @@ -698,7 +698,7 @@ void RALinScan::processInactiveIntervals(SlotIndex CurPoint) IntervalPos = Interval->advanceTo(IntervalPos, CurPoint); if (IntervalPos == Interval->end()) { // remove expired intervals. - DEBUG(errs() << "\t\tinterval " << *Interval << " expired\n"); + DEBUG(dbgs() << "\t\tinterval " << *Interval << " expired\n"); // Pop off the end of the list. inactive_[i] = inactive_.back(); @@ -706,7 +706,7 @@ void RALinScan::processInactiveIntervals(SlotIndex CurPoint) --i; --e; } else if (IntervalPos->start <= CurPoint) { // move re-activated intervals in active list - DEBUG(errs() << "\t\tinterval " << *Interval << " active\n"); + DEBUG(dbgs() << "\t\tinterval " << *Interval << " active\n"); assert(TargetRegisterInfo::isVirtualRegister(reg) && "Can only allocate virtual registers!"); reg = vrm_->getPhys(reg); @@ -834,10 +834,10 @@ void RALinScan::findIntervalsToSpill(LiveInterval *cur, SmallVector SLIs[3]; DEBUG({ - errs() << "\tConsidering " << NumCands << " candidates: "; + dbgs() << "\tConsidering " << NumCands << " candidates: "; for (unsigned i = 0; i != NumCands; ++i) - errs() << tri_->getName(Candidates[i].first) << " "; - errs() << "\n"; + dbgs() << tri_->getName(Candidates[i].first) << " "; + dbgs() << "\n"; }); // Calculate the number of conflicts of each candidate. @@ -950,7 +950,7 @@ namespace { /// assignRegOrStackSlotAtInterval - assign a register if one is available, or /// spill. void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { - DEBUG(errs() << "\tallocating current interval: "); + DEBUG(dbgs() << "\tallocating current interval: "); // This is an implicitly defined live interval, just assign any register. const TargetRegisterClass *RC = mri_->getRegClass(cur->reg); @@ -958,7 +958,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { unsigned physReg = vrm_->getRegAllocPref(cur->reg); if (!physReg) physReg = *RC->allocation_order_begin(*mf_); - DEBUG(errs() << tri_->getName(physReg) << '\n'); + DEBUG(dbgs() << tri_->getName(physReg) << '\n'); // Note the register is not really in use. vrm_->assignVirt2Phys(cur->reg, physReg); return; @@ -1092,7 +1092,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { // the free physical register and add this interval to the active // list. if (physReg) { - DEBUG(errs() << tri_->getName(physReg) << '\n'); + DEBUG(dbgs() << tri_->getName(physReg) << '\n'); vrm_->assignVirt2Phys(cur->reg, physReg); addRegUse(physReg); active_.push_back(std::make_pair(cur, cur->begin())); @@ -1108,7 +1108,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { } return; } - DEBUG(errs() << "no free registers\n"); + DEBUG(dbgs() << "no free registers\n"); // Compile the spill weights into an array that is better for scanning. std::vector SpillWeights(tri_->getNumRegs(), 0.0f); @@ -1126,7 +1126,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { updateSpillWeights(SpillWeights, reg, i->first->weight, RC); } - DEBUG(errs() << "\tassigning stack slot at interval "<< *cur << ":\n"); + DEBUG(dbgs() << "\tassigning stack slot at interval "<< *cur << ":\n"); // Find a register to spill. float minWeight = HUGE_VALF; @@ -1196,10 +1196,10 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { } DEBUG({ - errs() << "\t\tregister(s) with min weight(s): "; + dbgs() << "\t\tregister(s) with min weight(s): "; for (unsigned i = 0; i != LastCandidate; ++i) - errs() << tri_->getName(RegsWeights[i].first) + dbgs() << tri_->getName(RegsWeights[i].first) << " (" << RegsWeights[i].second << ")\n"; }); @@ -1207,7 +1207,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { // add any added intervals back to unhandled, and restart // linearscan. if (cur->weight != HUGE_VALF && cur->weight <= minWeight) { - DEBUG(errs() << "\t\t\tspilling(c): " << *cur << '\n'); + DEBUG(dbgs() << "\t\t\tspilling(c): " << *cur << '\n'); SmallVector spillIs; std::vector added; @@ -1285,7 +1285,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { while (!spillIs.empty()) { LiveInterval *sli = spillIs.back(); spillIs.pop_back(); - DEBUG(errs() << "\t\t\tspilling(a): " << *sli << '\n'); + DEBUG(dbgs() << "\t\t\tspilling(a): " << *sli << '\n'); if (sli->beginIndex() < earliestStart) earliestStart = sli->beginIndex(); @@ -1296,7 +1296,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { spilled.insert(sli->reg); } - DEBUG(errs() << "\t\trolling back to: " << earliestStart << '\n'); + DEBUG(dbgs() << "\t\trolling back to: " << earliestStart << '\n'); // Scan handled in reverse order up to the earliest start of a // spilled live interval and undo each one, restoring the state of @@ -1306,7 +1306,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { // If this interval starts before t we are done. if (!i->empty() && i->beginIndex() < earliestStart) break; - DEBUG(errs() << "\t\t\tundo changes for: " << *i << '\n'); + DEBUG(dbgs() << "\t\t\tundo changes for: " << *i << '\n'); handled_.pop_back(); // When undoing a live interval allocation we must know if it is active or @@ -1356,7 +1356,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { LiveInterval *HI = handled_[i]; if (!HI->expiredAt(earliestStart) && HI->expiredAt(cur->beginIndex())) { - DEBUG(errs() << "\t\t\tundo changes for: " << *HI << '\n'); + DEBUG(dbgs() << "\t\t\tundo changes for: " << *HI << '\n'); active_.push_back(std::make_pair(HI, HI->begin())); assert(!TargetRegisterInfo::isPhysicalRegister(HI->reg)); addRegUse(vrm_->getPhys(HI->reg)); @@ -1492,7 +1492,7 @@ unsigned RALinScan::getFreePhysReg(LiveInterval *cur) { // available first. unsigned Preference = vrm_->getRegAllocPref(cur->reg); if (Preference) { - DEBUG(errs() << "(preferred: " << tri_->getName(Preference) << ") "); + DEBUG(dbgs() << "(preferred: " << tri_->getName(Preference) << ") "); if (isRegAvail(Preference) && RC->contains(Preference)) return Preference; diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp index aea5cff79ba1..cbb5826ce9a1 100644 --- a/lib/CodeGen/RegAllocLocal.cpp +++ b/lib/CodeGen/RegAllocLocal.cpp @@ -296,11 +296,11 @@ void RALocal::spillVirtReg(MachineBasicBlock &MBB, assert(VirtReg && "Spilling a physical register is illegal!" " Must not have appropriate kill for the register or use exists beyond" " the intended one."); - DEBUG(errs() << " Spilling register " << TRI->getName(PhysReg) + DEBUG(dbgs() << " Spilling register " << TRI->getName(PhysReg) << " containing %reg" << VirtReg); if (!isVirtRegModified(VirtReg)) { - DEBUG(errs() << " which has not been modified, so no store necessary!"); + DEBUG(dbgs() << " which has not been modified, so no store necessary!"); std::pair &LastUse = getVirtRegLastUse(VirtReg); if (LastUse.first) LastUse.first->getOperand(LastUse.second).setIsKill(); @@ -310,7 +310,7 @@ void RALocal::spillVirtReg(MachineBasicBlock &MBB, // modified. const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); int FrameIndex = getStackSpaceFor(VirtReg, RC); - DEBUG(errs() << " to stack slot #" << FrameIndex); + DEBUG(dbgs() << " to stack slot #" << FrameIndex); // If the instruction reads the register that's spilled, (e.g. this can // happen if it is a move to a physical register), then the spill // instruction is not a kill. @@ -321,7 +321,7 @@ void RALocal::spillVirtReg(MachineBasicBlock &MBB, getVirt2PhysRegMapSlot(VirtReg) = 0; // VirtReg no longer available - DEBUG(errs() << '\n'); + DEBUG(dbgs() << '\n'); removePhysReg(PhysReg); } @@ -516,7 +516,7 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, markVirtRegModified(VirtReg, false); // Note that this reg was just reloaded - DEBUG(errs() << " Reloading %reg" << VirtReg << " into " + DEBUG(dbgs() << " Reloading %reg" << VirtReg << " into " << TRI->getName(PhysReg) << "\n"); // Add move instruction(s) @@ -725,7 +725,7 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { DEBUG({ const BasicBlock *LBB = MBB.getBasicBlock(); if (LBB) - errs() << "\nStarting RegAlloc of BB: " << LBB->getName(); + dbgs() << "\nStarting RegAlloc of BB: " << LBB->getName(); }); // Add live-in registers as active. @@ -752,13 +752,13 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { MachineInstr *MI = MII++; const TargetInstrDesc &TID = MI->getDesc(); DEBUG({ - errs() << "\nStarting RegAlloc of: " << *MI; - errs() << " Regs have values: "; + dbgs() << "\nStarting RegAlloc of: " << *MI; + dbgs() << " Regs have values: "; for (unsigned i = 0; i != TRI->getNumRegs(); ++i) if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) - errs() << "[" << TRI->getName(i) + dbgs() << "[" << TRI->getName(i) << ",%reg" << PhysRegsUsed[i] << "] "; - errs() << '\n'; + dbgs() << '\n'; }); // Determine whether this is a copy instruction. The cases where the @@ -809,7 +809,7 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { markVirtRegModified(DestVirtReg); getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0); - DEBUG(errs() << " Assigning " << TRI->getName(DestPhysReg) + DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg) << " to %reg" << DestVirtReg << "\n"); MO.setReg(DestPhysReg); // Assign the earlyclobber register } else { @@ -876,13 +876,13 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { } if (PhysReg) { - DEBUG(errs() << " Last use of " << TRI->getName(PhysReg) + DEBUG(dbgs() << " Last use of " << TRI->getName(PhysReg) << "[%reg" << VirtReg <<"], removing it from live set\n"); removePhysReg(PhysReg); for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg); *SubRegs; ++SubRegs) { if (PhysRegsUsed[*SubRegs] != -2) { - DEBUG(errs() << " Last use of " + DEBUG(dbgs() << " Last use of " << TRI->getName(*SubRegs) << "[%reg" << VirtReg <<"], removing it from live set\n"); removePhysReg(*SubRegs); @@ -978,7 +978,7 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { MF->getRegInfo().setPhysRegUsed(DestPhysReg); markVirtRegModified(DestVirtReg); getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0); - DEBUG(errs() << " Assigning " << TRI->getName(DestPhysReg) + DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg) << " to %reg" << DestVirtReg << "\n"); MO.setReg(DestPhysReg); // Assign the output register } @@ -1001,14 +1001,14 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { } if (PhysReg) { - DEBUG(errs() << " Register " << TRI->getName(PhysReg) + DEBUG(dbgs() << " Register " << TRI->getName(PhysReg) << " [%reg" << VirtReg << "] is never used, removing it from live set\n"); removePhysReg(PhysReg); for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); *AliasSet; ++AliasSet) { if (PhysRegsUsed[*AliasSet] != -2) { - DEBUG(errs() << " Register " << TRI->getName(*AliasSet) + DEBUG(dbgs() << " Register " << TRI->getName(*AliasSet) << " [%reg" << *AliasSet << "] is never used, removing it from live set\n"); removePhysReg(*AliasSet); @@ -1058,7 +1058,7 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { /// runOnMachineFunction - Register allocate the whole function /// bool RALocal::runOnMachineFunction(MachineFunction &Fn) { - DEBUG(errs() << "Machine Function\n"); + DEBUG(dbgs() << "Machine Function\n"); MF = &Fn; TM = &Fn.getTarget(); TRI = TM->getRegisterInfo(); diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index c2014a7649b7..fc59653f8203 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -717,7 +717,7 @@ bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) { // Get the physical reg, subtracting 1 to account for the spill option. unsigned physReg = allowedSets[node][allocSelection - 1]; - DEBUG(errs() << "VREG " << virtReg << " -> " + DEBUG(dbgs() << "VREG " << virtReg << " -> " << tri->getName(physReg) << "\n"); assert(physReg != 0); @@ -741,7 +741,7 @@ bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) { addStackInterval(spillInterval, mri); (void) oldSpillWeight; - DEBUG(errs() << "VREG " << virtReg << " -> SPILLED (Cost: " + DEBUG(dbgs() << "VREG " << virtReg << " -> SPILLED (Cost: " << oldSpillWeight << ", New vregs: "); // Copy any newly inserted live intervals into the list of regs to @@ -752,12 +752,12 @@ bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) { assert(!(*itr)->empty() && "Empty spill range."); - DEBUG(errs() << (*itr)->reg << " "); + DEBUG(dbgs() << (*itr)->reg << " "); vregIntervalsToAlloc.insert(*itr); } - DEBUG(errs() << ")\n"); + DEBUG(dbgs() << ")\n"); // We need another round if spill intervals were added. anotherRoundNeeded |= !newSpills.empty(); @@ -849,7 +849,7 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) { vrm = &getAnalysis(); - DEBUG(errs() << "PBQP2 Register Allocating for " << mf->getFunction()->getName() << "\n"); + DEBUG(dbgs() << "PBQP2 Register Allocating for " << mf->getFunction()->getName() << "\n"); // Allocator main loop: // @@ -874,7 +874,7 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) { unsigned round = 0; while (!pbqpAllocComplete) { - DEBUG(errs() << " PBQP Regalloc round " << round << ":\n"); + DEBUG(dbgs() << " PBQP Regalloc round " << round << ":\n"); PBQP::SimpleGraph problem = constructPBQPProblem(); PBQP::HeuristicSolver solver; @@ -896,7 +896,7 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) { node2LI.clear(); allowedSets.clear(); - DEBUG(errs() << "Post alloc VirtRegMap:\n" << *vrm << "\n"); + DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n"); // Run rewriter std::auto_ptr rewriter(createVirtRegRewriter()); diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 71693d21c688..1f3e295fe979 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -41,7 +41,7 @@ void ScheduleDAG::dumpSchedule() const { if (SUnit *SU = Sequence[i]) SU->dump(this); else - errs() << "**** NOOP ****\n"; + dbgs() << "**** NOOP ****\n"; } } @@ -61,9 +61,9 @@ void ScheduleDAG::Run(MachineBasicBlock *bb, Schedule(); DEBUG({ - errs() << "*** Final schedule ***\n"; + dbgs() << "*** Final schedule ***\n"; dumpSchedule(); - errs() << '\n'; + dbgs() << '\n'; }); } @@ -271,58 +271,58 @@ void SUnit::ComputeHeight() { /// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or /// a group of nodes flagged together. void SUnit::dump(const ScheduleDAG *G) const { - errs() << "SU(" << NodeNum << "): "; + dbgs() << "SU(" << NodeNum << "): "; G->dumpNode(this); } void SUnit::dumpAll(const ScheduleDAG *G) const { dump(G); - errs() << " # preds left : " << NumPredsLeft << "\n"; - errs() << " # succs left : " << NumSuccsLeft << "\n"; - errs() << " Latency : " << Latency << "\n"; - errs() << " Depth : " << Depth << "\n"; - errs() << " Height : " << Height << "\n"; + dbgs() << " # preds left : " << NumPredsLeft << "\n"; + dbgs() << " # succs left : " << NumSuccsLeft << "\n"; + dbgs() << " Latency : " << Latency << "\n"; + dbgs() << " Depth : " << Depth << "\n"; + dbgs() << " Height : " << Height << "\n"; if (Preds.size() != 0) { - errs() << " Predecessors:\n"; + dbgs() << " Predecessors:\n"; for (SUnit::const_succ_iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { - errs() << " "; + dbgs() << " "; switch (I->getKind()) { - case SDep::Data: errs() << "val "; break; - case SDep::Anti: errs() << "anti"; break; - case SDep::Output: errs() << "out "; break; - case SDep::Order: errs() << "ch "; break; + case SDep::Data: dbgs() << "val "; break; + case SDep::Anti: dbgs() << "anti"; break; + case SDep::Output: dbgs() << "out "; break; + case SDep::Order: dbgs() << "ch "; break; } - errs() << "#"; - errs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")"; + dbgs() << "#"; + dbgs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")"; if (I->isArtificial()) - errs() << " *"; - errs() << ": Latency=" << I->getLatency(); - errs() << "\n"; + dbgs() << " *"; + dbgs() << ": Latency=" << I->getLatency(); + dbgs() << "\n"; } } if (Succs.size() != 0) { - errs() << " Successors:\n"; + dbgs() << " Successors:\n"; for (SUnit::const_succ_iterator I = Succs.begin(), E = Succs.end(); I != E; ++I) { - errs() << " "; + dbgs() << " "; switch (I->getKind()) { - case SDep::Data: errs() << "val "; break; - case SDep::Anti: errs() << "anti"; break; - case SDep::Output: errs() << "out "; break; - case SDep::Order: errs() << "ch "; break; + case SDep::Data: dbgs() << "val "; break; + case SDep::Anti: dbgs() << "anti"; break; + case SDep::Output: dbgs() << "out "; break; + case SDep::Order: dbgs() << "ch "; break; } - errs() << "#"; - errs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")"; + dbgs() << "#"; + dbgs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")"; if (I->isArtificial()) - errs() << " *"; - errs() << ": Latency=" << I->getLatency(); - errs() << "\n"; + dbgs() << " *"; + dbgs() << ": Latency=" << I->getLatency(); + dbgs() << "\n"; } } - errs() << "\n"; + dbgs() << "\n"; } #ifndef NDEBUG @@ -340,35 +340,35 @@ void ScheduleDAG::VerifySchedule(bool isBottomUp) { continue; } if (!AnyNotSched) - errs() << "*** Scheduling failed! ***\n"; + dbgs() << "*** Scheduling failed! ***\n"; SUnits[i].dump(this); - errs() << "has not been scheduled!\n"; + dbgs() << "has not been scheduled!\n"; AnyNotSched = true; } if (SUnits[i].isScheduled && (isBottomUp ? SUnits[i].getHeight() : SUnits[i].getDepth()) > unsigned(INT_MAX)) { if (!AnyNotSched) - errs() << "*** Scheduling failed! ***\n"; + dbgs() << "*** Scheduling failed! ***\n"; SUnits[i].dump(this); - errs() << "has an unexpected " + dbgs() << "has an unexpected " << (isBottomUp ? "Height" : "Depth") << " value!\n"; AnyNotSched = true; } if (isBottomUp) { if (SUnits[i].NumSuccsLeft != 0) { if (!AnyNotSched) - errs() << "*** Scheduling failed! ***\n"; + dbgs() << "*** Scheduling failed! ***\n"; SUnits[i].dump(this); - errs() << "has successors left!\n"; + dbgs() << "has successors left!\n"; AnyNotSched = true; } } else { if (SUnits[i].NumPredsLeft != 0) { if (!AnyNotSched) - errs() << "*** Scheduling failed! ***\n"; + dbgs() << "*** Scheduling failed! ***\n"; SUnits[i].dump(this); - errs() << "has predecessors left!\n"; + dbgs() << "has predecessors left!\n"; AnyNotSched = true; } } diff --git a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp index 38839c44131a..4e6c1fcc9604 100644 --- a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp +++ b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -69,7 +70,7 @@ CCState::AnalyzeFormalArguments(const SmallVectorImpl &Ins, ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { #ifndef NDEBUG - errs() << "Formal argument #" << i << " has unhandled type " + dbgs() << "Formal argument #" << i << " has unhandled type " << ArgVT.getEVTString(); #endif llvm_unreachable(0); @@ -102,7 +103,7 @@ void CCState::AnalyzeReturn(const SmallVectorImpl &Outs, ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) { #ifndef NDEBUG - errs() << "Return operand #" << i << " has unhandled type " + dbgs() << "Return operand #" << i << " has unhandled type " << VT.getEVTString(); #endif llvm_unreachable(0); @@ -121,7 +122,7 @@ void CCState::AnalyzeCallOperands(const SmallVectorImpl &Outs, ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { #ifndef NDEBUG - errs() << "Call operand #" << i << " has unhandled type " + dbgs() << "Call operand #" << i << " has unhandled type " << ArgVT.getEVTString(); #endif llvm_unreachable(0); @@ -140,7 +141,7 @@ void CCState::AnalyzeCallOperands(SmallVectorImpl &ArgVTs, ISD::ArgFlagsTy ArgFlags = Flags[i]; if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { #ifndef NDEBUG - errs() << "Call operand #" << i << " has unhandled type " + dbgs() << "Call operand #" << i << " has unhandled type " << ArgVT.getEVTString(); #endif llvm_unreachable(0); @@ -157,7 +158,7 @@ void CCState::AnalyzeCallResult(const SmallVectorImpl &Ins, ISD::ArgFlagsTy Flags = Ins[i].Flags; if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) { #ifndef NDEBUG - errs() << "Call result #" << i << " has unhandled type " + dbgs() << "Call result #" << i << " has unhandled type " << VT.getEVTString(); #endif llvm_unreachable(0); @@ -170,7 +171,7 @@ void CCState::AnalyzeCallResult(const SmallVectorImpl &Ins, void CCState::AnalyzeCallResult(EVT VT, CCAssignFn Fn) { if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) { #ifndef NDEBUG - errs() << "Call result has unhandled type " + dbgs() << "Call result has unhandled type " << VT.getEVTString(); #endif llvm_unreachable(0); diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index e6aa14cd73e1..549527c1a3f7 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -541,11 +541,11 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, bool AddTo) { assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); ++NodesCombined; - DEBUG(errs() << "\nReplacing.1 "; + DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); - errs() << "\nWith: "; + dbgs() << "\nWith: "; To[0].getNode()->dump(&DAG); - errs() << " and " << NumTo-1 << " other values\n"; + dbgs() << " and " << NumTo-1 << " other values\n"; for (unsigned i = 0, e = NumTo; i != e; ++i) assert((!To[i].getNode() || N->getValueType(i) == To[i].getValueType()) && @@ -619,11 +619,11 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { // Replace the old value with the new one. ++NodesCombined; - DEBUG(errs() << "\nReplacing.2 "; + DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG); - errs() << "\nWith: "; + dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG); - errs() << '\n'); + dbgs() << '\n'); CommitTargetLoweringOpt(TLO); return true; @@ -689,11 +689,11 @@ void DAGCombiner::Run(CombineLevel AtLevel) { RV.getNode()->getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"); - DEBUG(errs() << "\nReplacing.3 "; + DEBUG(dbgs() << "\nReplacing.3 "; N->dump(&DAG); - errs() << "\nWith: "; + dbgs() << "\nWith: "; RV.getNode()->dump(&DAG); - errs() << '\n'); + dbgs() << '\n'); WorkListRemover DeadNodes(*this); if (N->getNumValues() == RV.getNode()->getNumValues()) DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes); @@ -1684,22 +1684,25 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { EVT VT = N0.getValueType(); assert(N0.getOpcode() == N1.getOpcode() && "Bad input!"); + // Bail early if none of these transforms apply. + if (N0.getNode()->getNumOperands() == 0) return SDValue(); + // For each of OP in AND/OR/XOR: // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) - // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) + // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) // // do not sink logical op inside of a vector extend, since it may combine // into a vsetcc. - if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND|| + EVT Op0VT = N0.getOperand(0).getValueType(); + if ((N0.getOpcode() == ISD::ZERO_EXTEND || + N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND || - (N0.getOpcode() == ISD::TRUNCATE && - !TLI.isTruncateFree(N0.getOperand(0).getValueType(), VT))) && + (N0.getOpcode() == ISD::TRUNCATE && TLI.isTypeLegal(Op0VT))) && !VT.isVector() && - N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() && - (!LegalOperations || - TLI.isOperationLegal(N->getOpcode(), N0.getOperand(0).getValueType()))) { + Op0VT == N1.getOperand(0).getValueType() && + (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) { SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), N0.getOperand(0).getValueType(), N0.getOperand(0), N1.getOperand(0)); @@ -1839,6 +1842,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); + // fold (zext_inreg (extload x)) -> (zextload x) if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { LoadSDNode *LN0 = cast(N0); @@ -1885,48 +1889,69 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // fold (and (load x), 255) -> (zextload x, i8) // fold (and (extload x, i16), 255) -> (zextload x, i8) - if (N1C && N0.getOpcode() == ISD::LOAD) { - LoadSDNode *LN0 = cast(N0); + // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8) + if (N1C && (N0.getOpcode() == ISD::LOAD || + (N0.getOpcode() == ISD::ANY_EXTEND && + N0.getOperand(0).getOpcode() == ISD::LOAD))) { + bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND; + LoadSDNode *LN0 = HasAnyExt + ? cast(N0.getOperand(0)) + : cast(N0); if (LN0->getExtensionType() != ISD::SEXTLOAD && - LN0->isUnindexed() && N0.hasOneUse() && - // Do not change the width of a volatile load. - !LN0->isVolatile()) { - EVT ExtVT = MVT::Other; + LN0->isUnindexed() && N0.hasOneUse() && LN0->hasOneUse()) { uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits(); - if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())) - ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); + if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){ + EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); + EVT LoadedVT = LN0->getMemoryVT(); - EVT LoadedVT = LN0->getMemoryVT(); - - // Do not generate loads of non-round integer types since these can - // be expensive (and would be wrong if the type is not byte sized). - if (ExtVT != MVT::Other && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() && - (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { - EVT PtrType = N0.getOperand(1).getValueType(); - - // For big endian targets, we need to add an offset to the pointer to - // load the correct bytes. For little endian systems, we merely need to - // read fewer bytes from the same pointer. - unsigned LVTStoreBytes = LoadedVT.getStoreSize(); - unsigned EVTStoreBytes = ExtVT.getStoreSize(); - unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; - unsigned Alignment = LN0->getAlignment(); - SDValue NewPtr = LN0->getBasePtr(); - - if (TLI.isBigEndian()) { - NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType, - NewPtr, DAG.getConstant(PtrOff, PtrType)); - Alignment = MinAlign(Alignment, PtrOff); + if (ExtVT == LoadedVT && + (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { + EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; + + SDValue NewLoad = + DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, + LN0->getChain(), LN0->getBasePtr(), + LN0->getSrcValue(), LN0->getSrcValueOffset(), + ExtVT, LN0->isVolatile(), LN0->getAlignment()); + AddToWorkList(N); + CombineTo(LN0, NewLoad, NewLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! } + + // Do not change the width of a volatile load. + // Do not generate loads of non-round integer types since these can + // be expensive (and would be wrong if the type is not byte sized). + if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() && + (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { + EVT PtrType = LN0->getOperand(1).getValueType(); - AddToWorkList(NewPtr.getNode()); - SDValue Load = - DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, LN0->getChain(), - NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset(), - ExtVT, LN0->isVolatile(), Alignment); - AddToWorkList(N); - CombineTo(N0.getNode(), Load, Load.getValue(1)); - return SDValue(N, 0); // Return N so it doesn't get rechecked! + unsigned Alignment = LN0->getAlignment(); + SDValue NewPtr = LN0->getBasePtr(); + + // For big endian targets, we need to add an offset to the pointer + // to load the correct bytes. For little endian systems, we merely + // need to read fewer bytes from the same pointer. + if (TLI.isBigEndian()) { + unsigned LVTStoreBytes = LoadedVT.getStoreSize(); + unsigned EVTStoreBytes = ExtVT.getStoreSize(); + unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; + NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType, + NewPtr, DAG.getConstant(PtrOff, PtrType)); + Alignment = MinAlign(Alignment, PtrOff); + } + + AddToWorkList(NewPtr.getNode()); + + EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; + SDValue Load = + DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, + LN0->getChain(), NewPtr, + LN0->getSrcValue(), LN0->getSrcValueOffset(), + ExtVT, LN0->isVolatile(), Alignment); + AddToWorkList(N); + CombineTo(LN0, Load, Load.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } } } } @@ -2555,10 +2580,14 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // sext_inreg. if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue(); - EVT EVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); - if ((!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, EVT))) + EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); + if (VT.isVector()) + ExtVT = EVT::getVectorVT(*DAG.getContext(), + ExtVT, VT.getVectorNumElements()); + if ((!LegalOperations || + TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT))) return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, - N0.getOperand(0), DAG.getValueType(EVT)); + N0.getOperand(0), DAG.getValueType(ExtVT)); } // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) @@ -2778,9 +2807,17 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // However when after the source operand of SRL is optimized into AND, the SRL // itself may not be optimized further. Look for it and add the BRCOND into // the worklist. - if (N->hasOneUse() && - N->use_begin()->getOpcode() == ISD::BRCOND) - AddToWorkList(*N->use_begin()); + if (N->hasOneUse()) { + SDNode *Use = *N->use_begin(); + if (Use->getOpcode() == ISD::BRCOND) + AddToWorkList(Use); + else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) { + // Also look pass the truncate. + Use = *Use->use_begin(); + if (Use->getOpcode() == ISD::BRCOND) + AddToWorkList(Use); + } + } return SDValue(); } @@ -3034,9 +3071,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // See if the value being truncated is already sign extended. If so, just // eliminate the trunc/sext pair. SDValue Op = N0.getOperand(0); - unsigned OpBits = Op.getValueType().getSizeInBits(); - unsigned MidBits = N0.getValueType().getSizeInBits(); - unsigned DestBits = VT.getSizeInBits(); + unsigned OpBits = Op.getValueType().getScalarType().getSizeInBits(); + unsigned MidBits = N0.getValueType().getScalarType().getSizeInBits(); + unsigned DestBits = VT.getScalarType().getSizeInBits(); unsigned NumSignBits = DAG.ComputeNumSignBits(Op); if (OpBits == DestBits) { @@ -3059,12 +3096,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // fold (sext (truncate x)) -> (sextinreg x). if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, N0.getValueType())) { - if (Op.getValueType().bitsLT(VT)) + if (OpBits < DestBits) Op = DAG.getNode(ISD::ANY_EXTEND, N0.getDebugLoc(), VT, Op); - else if (Op.getValueType().bitsGT(VT)) + else if (OpBits > DestBits) Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op); return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op, - DAG.getValueType(N0.getValueType().getScalarType())); + DAG.getValueType(N0.getValueType())); } } @@ -3198,7 +3235,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // fold (zext (truncate x)) -> (and x, mask) if (N0.getOpcode() == ISD::TRUNCATE && - (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) { + (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) && + (!TLI.isTruncateFree(N0.getOperand(0).getValueType(), + N0.getValueType()) || + !TLI.isZExtFree(N0.getValueType(), VT))) { SDValue Op = N0.getOperand(0); if (Op.getValueType().bitsLT(VT)) { Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op); @@ -3322,7 +3362,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { DebugLoc dl = N->getDebugLoc(); return DAG.getNode(N0.getOpcode(), dl, VT, DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(0)), - DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(1))); + DAG.getNode(ISD::ZERO_EXTEND, dl, + N0.getOperand(1).getValueType(), + N0.getOperand(1))); } return SDValue(); @@ -3512,7 +3554,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { if (VT.isVector()) return SDValue(); - // Special case: SIGN_EXTEND_INREG is basically truncating to EVT then + // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then // extended to VT. if (Opc == ISD::SIGN_EXTEND_INREG) { ExtType = ISD::SEXTLOAD; @@ -3586,7 +3628,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { EVT VT = N->getValueType(0); EVT EVT = cast(N1)->getVT(); unsigned VTBits = VT.getScalarType().getSizeInBits(); - unsigned EVTBits = EVT.getSizeInBits(); + unsigned EVTBits = EVT.getScalarType().getSizeInBits(); // fold (sext_in_reg c1) -> c1 if (isa(N0) || N0.getOpcode() == ISD::UNDEF) @@ -3702,7 +3744,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); else // if the source and dest are the same type, we can drop both the extend - // and the truncate + // and the truncate. return N0.getOperand(0); } @@ -4513,6 +4555,13 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { N1.getOperand(0), N1.getOperand(1), N2); } + SDNode *Trunc = 0; + if (N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) { + // Look pass truncate. + Trunc = N1.getNode(); + N1 = N1.getOperand(0); + } + if (N1.hasOneUse() && N1.getOpcode() == ISD::SRL) { // Match this pattern so that we can generate simpler code: // @@ -4524,7 +4573,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { // into // // %a = ... - // %b = and %a, 2 + // %b = and i32 %a, 2 // %c = setcc eq %b, 0 // brcond %c ... // @@ -4535,7 +4584,6 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { SDValue Op1 = N1.getOperand(1); if (Op0.getOpcode() == ISD::AND && - Op0.hasOneUse() && Op1.getOpcode() == ISD::Constant) { SDValue AndOp1 = Op0.getOperand(1); @@ -4550,12 +4598,21 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { Op0, DAG.getConstant(0, Op0.getValueType()), ISD::SETNE); + SDValue NewBRCond = DAG.getNode(ISD::BRCOND, N->getDebugLoc(), + MVT::Other, Chain, SetCC, N2); + // Don't add the new BRCond into the worklist or else SimplifySelectCC + // will convert it back to (X & C1) >> C2. + CombineTo(N, NewBRCond, false); + // Truncate is dead. + if (Trunc) { + removeFromWorkList(Trunc); + DAG.DeleteNode(Trunc); + } // Replace the uses of SRL with SETCC DAG.ReplaceAllUsesOfValueWith(N1, SetCC); removeFromWorkList(N1.getNode()); DAG.DeleteNode(N1.getNode()); - return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), - MVT::Other, Chain, SetCC, N2); + return SDValue(N, 0); // Return N so it doesn't get rechecked! } } } @@ -4692,11 +4749,11 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { BasePtr, Offset, AM); ++PreIndexedNodes; ++NodesCombined; - DEBUG(errs() << "\nReplacing.4 "; + DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); - errs() << "\nWith: "; + dbgs() << "\nWith: "; Result.getNode()->dump(&DAG); - errs() << '\n'); + dbgs() << '\n'); WorkListRemover DeadNodes(*this); if (isLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0), @@ -4826,11 +4883,11 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { BasePtr, Offset, AM); ++PostIndexedNodes; ++NodesCombined; - DEBUG(errs() << "\nReplacing.5 "; + DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); - errs() << "\nWith: "; + dbgs() << "\nWith: "; Result.getNode()->dump(&DAG); - errs() << '\n'); + dbgs() << '\n'); WorkListRemover DeadNodes(*this); if (isLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0), @@ -4889,11 +4946,11 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // v3 = add v2, c // Now we replace use of chain2 with chain1. This makes the second load // isomorphic to the one we are deleting, and thus makes this load live. - DEBUG(errs() << "\nReplacing.6 "; + DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG); - errs() << "\nWith chain: "; + dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG); - errs() << "\n"); + dbgs() << "\n"); WorkListRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain, &DeadNodes); @@ -4909,11 +4966,11 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) { SDValue Undef = DAG.getUNDEF(N->getValueType(0)); - DEBUG(errs() << "\nReplacing.6 "; + DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG); - errs() << "\nWith: "; + dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG); - errs() << " and 2 other values\n"); + dbgs() << " and 2 other values\n"); WorkListRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef, &DeadNodes); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), @@ -5738,35 +5795,48 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, if (LLD->getMemoryVT() == RLD->getMemoryVT()) { // FIXME: this discards src value information. This is // over-conservative. It would be beneficial to be able to remember - // both potential memory locations. + // both potential memory locations. Since we are discarding + // src value info, don't do the transformation if the memory + // locations are not in the default address space. + unsigned LLDAddrSpace = 0, RLDAddrSpace = 0; + if (const Value *LLDVal = LLD->getMemOperand()->getValue()) { + if (const PointerType *PT = dyn_cast(LLDVal->getType())) + LLDAddrSpace = PT->getAddressSpace(); + } + if (const Value *RLDVal = RLD->getMemOperand()->getValue()) { + if (const PointerType *PT = dyn_cast(RLDVal->getType())) + RLDAddrSpace = PT->getAddressSpace(); + } SDValue Addr; - if (TheSelect->getOpcode() == ISD::SELECT) { - // Check that the condition doesn't reach either load. If so, folding - // this will induce a cycle into the DAG. - if ((!LLD->hasAnyUseOfValue(1) || - !LLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) && - (!RLD->hasAnyUseOfValue(1) || - !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()))) { - Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(), - LLD->getBasePtr().getValueType(), - TheSelect->getOperand(0), LLD->getBasePtr(), - RLD->getBasePtr()); - } - } else { - // Check that the condition doesn't reach either load. If so, folding - // this will induce a cycle into the DAG. - if ((!LLD->hasAnyUseOfValue(1) || - (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && - !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()))) && - (!RLD->hasAnyUseOfValue(1) || - (!RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && - !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())))) { - Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(), - LLD->getBasePtr().getValueType(), - TheSelect->getOperand(0), - TheSelect->getOperand(1), - LLD->getBasePtr(), RLD->getBasePtr(), - TheSelect->getOperand(4)); + if (LLDAddrSpace == 0 && RLDAddrSpace == 0) { + if (TheSelect->getOpcode() == ISD::SELECT) { + // Check that the condition doesn't reach either load. If so, folding + // this will induce a cycle into the DAG. + if ((!LLD->hasAnyUseOfValue(1) || + !LLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) && + (!RLD->hasAnyUseOfValue(1) || + !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()))) { + Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(), + LLD->getBasePtr().getValueType(), + TheSelect->getOperand(0), LLD->getBasePtr(), + RLD->getBasePtr()); + } + } else { + // Check that the condition doesn't reach either load. If so, folding + // this will induce a cycle into the DAG. + if ((!LLD->hasAnyUseOfValue(1) || + (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && + !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()))) && + (!RLD->hasAnyUseOfValue(1) || + (!RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && + !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())))) { + Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(), + LLD->getBasePtr().getValueType(), + TheSelect->getOperand(0), + TheSelect->getOperand(1), + LLD->getBasePtr(), RLD->getBasePtr(), + TheSelect->getOperand(4)); + } } } diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 33694f283e75..09fd657fffd0 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -78,7 +78,7 @@ unsigned FastISel::getRegForValue(Value *V) { // Look up the value to see if we already have a register for it. We // cache values defined by Instructions across blocks, and other values // only locally. This is because Instructions already have the SSA - // def-dominatess-use requirement enforced. + // def-dominates-use requirement enforced. if (ValueMap.count(V)) return ValueMap[V]; unsigned Reg = LocalValueMap[V]; @@ -188,7 +188,7 @@ unsigned FastISel::getRegForGEPIndex(Value *Idx) { /// SelectBinaryOp - Select and emit code for a binary operator instruction, /// which has an opcode which directly corresponds to the given ISD opcode. /// -bool FastISel::SelectBinaryOp(User *I, ISD::NodeType ISDOpcode) { +bool FastISel::SelectBinaryOp(User *I, unsigned ISDOpcode) { EVT VT = EVT::getEVT(I->getType(), /*HandleUnknown=*/true); if (VT == MVT::Other || !VT.isSimple()) // Unhandled type. Halt "fast" selection and bail. @@ -325,12 +325,6 @@ bool FastISel::SelectCall(User *I) { unsigned IID = F->getIntrinsicID(); switch (IID) { default: break; - case Intrinsic::dbg_stoppoint: - case Intrinsic::dbg_region_start: - case Intrinsic::dbg_region_end: - case Intrinsic::dbg_func_start: - // FIXME - Remove this instructions once the dust settles. - return true; case Intrinsic::dbg_declare: { DbgDeclareInst *DI = cast(I); if (!DIDescriptor::ValidDebugInfo(DI->getVariable(), CodeGenOpt::None)||!DW @@ -338,8 +332,6 @@ bool FastISel::SelectCall(User *I) { return true; Value *Address = DI->getAddress(); - if (BitCastInst *BCI = dyn_cast(Address)) - Address = BCI->getOperand(0); AllocaInst *AI = dyn_cast(Address); // Don't handle byval struct arguments or VLAs, for example. if (!AI) break; @@ -424,7 +416,7 @@ bool FastISel::SelectCall(User *I) { return false; } -bool FastISel::SelectCast(User *I, ISD::NodeType Opcode) { +bool FastISel::SelectCast(User *I, unsigned Opcode) { EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); EVT DstVT = TLI.getValueType(I->getType()); @@ -742,44 +734,44 @@ FastISel::FastISel(MachineFunction &mf, FastISel::~FastISel() {} unsigned FastISel::FastEmit_(MVT, MVT, - ISD::NodeType) { + unsigned) { return 0; } unsigned FastISel::FastEmit_r(MVT, MVT, - ISD::NodeType, unsigned /*Op0*/) { + unsigned, unsigned /*Op0*/) { return 0; } unsigned FastISel::FastEmit_rr(MVT, MVT, - ISD::NodeType, unsigned /*Op0*/, + unsigned, unsigned /*Op0*/, unsigned /*Op0*/) { return 0; } -unsigned FastISel::FastEmit_i(MVT, MVT, ISD::NodeType, uint64_t /*Imm*/) { +unsigned FastISel::FastEmit_i(MVT, MVT, unsigned, uint64_t /*Imm*/) { return 0; } unsigned FastISel::FastEmit_f(MVT, MVT, - ISD::NodeType, ConstantFP * /*FPImm*/) { + unsigned, ConstantFP * /*FPImm*/) { return 0; } unsigned FastISel::FastEmit_ri(MVT, MVT, - ISD::NodeType, unsigned /*Op0*/, + unsigned, unsigned /*Op0*/, uint64_t /*Imm*/) { return 0; } unsigned FastISel::FastEmit_rf(MVT, MVT, - ISD::NodeType, unsigned /*Op0*/, + unsigned, unsigned /*Op0*/, ConstantFP * /*FPImm*/) { return 0; } unsigned FastISel::FastEmit_rri(MVT, MVT, - ISD::NodeType, + unsigned, unsigned /*Op0*/, unsigned /*Op1*/, uint64_t /*Imm*/) { return 0; @@ -789,7 +781,7 @@ unsigned FastISel::FastEmit_rri(MVT, MVT, /// to emit an instruction with an immediate operand using FastEmit_ri. /// If that fails, it materializes the immediate into a register and try /// FastEmit_rr instead. -unsigned FastISel::FastEmit_ri_(MVT VT, ISD::NodeType Opcode, +unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0, uint64_t Imm, MVT ImmType) { // First check if immediate type is legal. If not, we can't use the ri form. @@ -806,7 +798,7 @@ unsigned FastISel::FastEmit_ri_(MVT VT, ISD::NodeType Opcode, /// to emit an instruction with a floating-point immediate operand using /// FastEmit_rf. If that fails, it materializes the immediate into a register /// and try FastEmit_rr instead. -unsigned FastISel::FastEmit_rf_(MVT VT, ISD::NodeType Opcode, +unsigned FastISel::FastEmit_rf_(MVT VT, unsigned Opcode, unsigned Op0, ConstantFP *FPImm, MVT ImmType) { // First check if immediate type is legal. If not, we can't use the rf form. diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index e3b25c2a85cc..4868c9e29e15 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -113,7 +113,7 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const Type *Ty, return; } // Interpret void as zero return values. - if (Ty == Type::getVoidTy(Ty->getContext())) + if (Ty->isVoidTy()) return; // Base case: we can get an EVT for this LLVM IR type. ValueVTs.push_back(TLI.getValueType(Ty)); diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 474d83396660..5e3f58a8af41 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -32,6 +32,7 @@ #include "llvm/GlobalVariable.h" #include "llvm/LLVMContext.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" @@ -950,9 +951,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { switch (Node->getOpcode()) { default: #ifndef NDEBUG - errs() << "NODE: "; - Node->dump(&DAG); - errs() << "\n"; + dbgs() << "NODE: "; + Node->dump( &DAG); + dbgs() << "\n"; #endif llvm_unreachable("Do not know how to legalize this operator!"); @@ -2292,12 +2293,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, EVT ExtraVT = cast(Node->getOperand(1))->getVT(); EVT VT = Node->getValueType(0); EVT ShiftAmountTy = TLI.getShiftAmountTy(); - if (VT.isVector()) { + if (VT.isVector()) ShiftAmountTy = VT; - VT = VT.getVectorElementType(); - } - unsigned BitsDiff = VT.getSizeInBits() - - ExtraVT.getSizeInBits(); + unsigned BitsDiff = VT.getScalarType().getSizeInBits() - + ExtraVT.getScalarType().getSizeInBits(); SDValue ShiftCst = DAG.getConstant(BitsDiff, ShiftAmountTy); Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0), Node->getOperand(0), ShiftCst); diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 2831617ebd46..4f0fce743422 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -43,15 +43,15 @@ static RTLIB::Libcall GetFPLibCall(EVT VT, //===----------------------------------------------------------------------===// void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { - DEBUG(errs() << "Soften float result " << ResNo << ": "; N->dump(&DAG); - errs() << "\n"); + DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG); + dbgs() << "\n"); SDValue R = SDValue(); switch (N->getOpcode()) { default: #ifndef NDEBUG - errs() << "SoftenFloatResult #" << ResNo << ": "; - N->dump(&DAG); errs() << "\n"; + dbgs() << "SoftenFloatResult #" << ResNo << ": "; + N->dump(&DAG); dbgs() << "\n"; #endif llvm_unreachable("Do not know how to soften the result of this operator!"); @@ -531,15 +531,15 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { //===----------------------------------------------------------------------===// bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { - DEBUG(errs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG); - errs() << "\n"); + DEBUG(dbgs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG); + dbgs() << "\n"); SDValue Res = SDValue(); switch (N->getOpcode()) { default: #ifndef NDEBUG - errs() << "SoftenFloatOperand Op #" << OpNo << ": "; - N->dump(&DAG); errs() << "\n"; + dbgs() << "SoftenFloatOperand Op #" << OpNo << ": "; + N->dump(&DAG); dbgs() << "\n"; #endif llvm_unreachable("Do not know how to soften this operator's operand!"); @@ -768,7 +768,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { /// have invalid operands or may have other results that need promotion, we just /// know that (at least) one result needs expansion. void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { - DEBUG(errs() << "Expand float result: "; N->dump(&DAG); errs() << "\n"); + DEBUG(dbgs() << "Expand float result: "; N->dump(&DAG); dbgs() << "\n"); SDValue Lo, Hi; Lo = Hi = SDValue(); @@ -779,8 +779,8 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { switch (N->getOpcode()) { default: #ifndef NDEBUG - errs() << "ExpandFloatResult #" << ResNo << ": "; - N->dump(&DAG); errs() << "\n"; + dbgs() << "ExpandFloatResult #" << ResNo << ": "; + N->dump(&DAG); dbgs() << "\n"; #endif llvm_unreachable("Do not know how to expand the result of this operator!"); @@ -1167,7 +1167,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, /// types of the node are known to be legal, but other operands of the node may /// need promotion or expansion as well as the specified one. bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { - DEBUG(errs() << "Expand float operand: "; N->dump(&DAG); errs() << "\n"); + DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n"); SDValue Res = SDValue(); if (TLI.getOperationAction(N->getOpcode(), N->getOperand(OpNo).getValueType()) @@ -1178,8 +1178,8 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { switch (N->getOpcode()) { default: #ifndef NDEBUG - errs() << "ExpandFloatOperand Op #" << OpNo << ": "; - N->dump(&DAG); errs() << "\n"; + dbgs() << "ExpandFloatOperand Op #" << OpNo << ": "; + N->dump(&DAG); dbgs() << "\n"; #endif llvm_unreachable("Do not know how to expand this operator's operand!"); diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index bd3b97a9ff13..9932cf49eb2d 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -33,7 +33,7 @@ using namespace llvm; /// may also have invalid operands or may have other results that need /// expansion, we just know that (at least) one result needs promotion. void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { - DEBUG(errs() << "Promote integer result: "; N->dump(&DAG); errs() << "\n"); + DEBUG(dbgs() << "Promote integer result: "; N->dump(&DAG); dbgs() << "\n"); SDValue Res = SDValue(); // See if the target wants to custom expand this node. @@ -43,8 +43,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { switch (N->getOpcode()) { default: #ifndef NDEBUG - errs() << "PromoteIntegerResult #" << ResNo << ": "; - N->dump(&DAG); errs() << "\n"; + dbgs() << "PromoteIntegerResult #" << ResNo << ": "; + N->dump(&DAG); dbgs() << "\n"; #endif llvm_unreachable("Do not know how to promote this operator!"); case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break; @@ -599,7 +599,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { /// result types of the node are known to be legal, but other operands of the /// node may need promotion or expansion as well as the specified one. bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { - DEBUG(errs() << "Promote integer operand: "; N->dump(&DAG); errs() << "\n"); + DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG); dbgs() << "\n"); SDValue Res = SDValue(); if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) @@ -608,8 +608,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { switch (N->getOpcode()) { default: #ifndef NDEBUG - errs() << "PromoteIntegerOperand Op #" << OpNo << ": "; - N->dump(&DAG); errs() << "\n"; + dbgs() << "PromoteIntegerOperand Op #" << OpNo << ": "; + N->dump(&DAG); dbgs() << "\n"; #endif llvm_unreachable("Do not know how to promote this operator's operand!"); @@ -910,7 +910,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) { /// have invalid operands or may have other results that need promotion, we just /// know that (at least) one result needs expansion. void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { - DEBUG(errs() << "Expand integer result: "; N->dump(&DAG); errs() << "\n"); + DEBUG(dbgs() << "Expand integer result: "; N->dump(&DAG); dbgs() << "\n"); SDValue Lo, Hi; Lo = Hi = SDValue(); @@ -921,8 +921,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { switch (N->getOpcode()) { default: #ifndef NDEBUG - errs() << "ExpandIntegerResult #" << ResNo << ": "; - N->dump(&DAG); errs() << "\n"; + dbgs() << "ExpandIntegerResult #" << ResNo << ": "; + N->dump(&DAG); dbgs() << "\n"; #endif llvm_unreachable("Do not know how to expand the result of this operator!"); @@ -1965,7 +1965,7 @@ void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, /// result types of the node are known to be legal, but other operands of the /// node may need promotion or expansion as well as the specified one. bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { - DEBUG(errs() << "Expand integer operand: "; N->dump(&DAG); errs() << "\n"); + DEBUG(dbgs() << "Expand integer operand: "; N->dump(&DAG); dbgs() << "\n"); SDValue Res = SDValue(); if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) @@ -1974,8 +1974,8 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { switch (N->getOpcode()) { default: #ifndef NDEBUG - errs() << "ExpandIntegerOperand Op #" << OpNo << ": "; - N->dump(&DAG); errs() << "\n"; + dbgs() << "ExpandIntegerOperand Op #" << OpNo << ": "; + N->dump(&DAG); dbgs() << "\n"; #endif llvm_unreachable("Do not know how to expand this operator's operand!"); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index d9efd4f78ad4..37f36a3ea316 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -123,42 +123,42 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { // another node that has not been seen by the LegalizeTypes machinery. if ((I->getNodeId() == NewNode && Mapped > 1) || (I->getNodeId() != NewNode && Mapped != 0)) { - errs() << "Unprocessed value in a map!"; + dbgs() << "Unprocessed value in a map!"; Failed = true; } } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(I)) { if (Mapped > 1) { - errs() << "Value with legal type was transformed!"; + dbgs() << "Value with legal type was transformed!"; Failed = true; } } else { if (Mapped == 0) { - errs() << "Processed value not in any map!"; + dbgs() << "Processed value not in any map!"; Failed = true; } else if (Mapped & (Mapped - 1)) { - errs() << "Value in multiple maps!"; + dbgs() << "Value in multiple maps!"; Failed = true; } } if (Failed) { if (Mapped & 1) - errs() << " ReplacedValues"; + dbgs() << " ReplacedValues"; if (Mapped & 2) - errs() << " PromotedIntegers"; + dbgs() << " PromotedIntegers"; if (Mapped & 4) - errs() << " SoftenedFloats"; + dbgs() << " SoftenedFloats"; if (Mapped & 8) - errs() << " ScalarizedVectors"; + dbgs() << " ScalarizedVectors"; if (Mapped & 16) - errs() << " ExpandedIntegers"; + dbgs() << " ExpandedIntegers"; if (Mapped & 32) - errs() << " ExpandedFloats"; + dbgs() << " ExpandedFloats"; if (Mapped & 64) - errs() << " SplitVectors"; + dbgs() << " SplitVectors"; if (Mapped & 128) - errs() << " WidenedVectors"; - errs() << "\n"; + dbgs() << " WidenedVectors"; + dbgs() << "\n"; llvm_unreachable(0); } } @@ -342,7 +342,7 @@ bool DAGTypeLegalizer::run() { } if (i == NumOperands) { - DEBUG(errs() << "Legally typed node: "; N->dump(&DAG); errs() << "\n"); + DEBUG(dbgs() << "Legally typed node: "; N->dump(&DAG); dbgs() << "\n"); } } NodeDone: @@ -411,7 +411,7 @@ bool DAGTypeLegalizer::run() { if (!IgnoreNodeResults(I)) for (unsigned i = 0, NumVals = I->getNumValues(); i < NumVals; ++i) if (!isTypeLegal(I->getValueType(i))) { - errs() << "Result type " << i << " illegal!\n"; + dbgs() << "Result type " << i << " illegal!\n"; Failed = true; } @@ -419,24 +419,24 @@ bool DAGTypeLegalizer::run() { for (unsigned i = 0, NumOps = I->getNumOperands(); i < NumOps; ++i) if (!IgnoreNodeResults(I->getOperand(i).getNode()) && !isTypeLegal(I->getOperand(i).getValueType())) { - errs() << "Operand type " << i << " illegal!\n"; + dbgs() << "Operand type " << i << " illegal!\n"; Failed = true; } if (I->getNodeId() != Processed) { if (I->getNodeId() == NewNode) - errs() << "New node not analyzed?\n"; + dbgs() << "New node not analyzed?\n"; else if (I->getNodeId() == Unanalyzed) - errs() << "Unanalyzed node not noticed?\n"; + dbgs() << "Unanalyzed node not noticed?\n"; else if (I->getNodeId() > 0) - errs() << "Operand not processed?\n"; + dbgs() << "Operand not processed?\n"; else if (I->getNodeId() == ReadyToProcess) - errs() << "Not added to worklist?\n"; + dbgs() << "Not added to worklist?\n"; Failed = true; } if (Failed) { - I->dump(&DAG); errs() << "\n"; + I->dump(&DAG); dbgs() << "\n"; llvm_unreachable(0); } } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index c35f7ad9b1ab..b5dbd41eb97a 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -509,6 +509,7 @@ class VISIBILITY_HIDDEN DAGTypeLegalizer { void ScalarizeVectorResult(SDNode *N, unsigned OpNo); SDValue ScalarizeVecRes_BinOp(SDNode *N); SDValue ScalarizeVecRes_UnaryOp(SDNode *N); + SDValue ScalarizeVecRes_InregOp(SDNode *N); SDValue ScalarizeVecRes_BIT_CONVERT(SDNode *N); SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N); @@ -550,6 +551,7 @@ class VISIBILITY_HIDDEN DAGTypeLegalizer { void SplitVectorResult(SDNode *N, unsigned OpNo); void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -615,6 +617,7 @@ class VISIBILITY_HIDDEN DAGTypeLegalizer { SDValue WidenVecRes_Convert(SDNode *N); SDValue WidenVecRes_Shift(SDNode *N); SDValue WidenVecRes_Unary(SDNode *N); + SDValue WidenVecRes_InregOp(SDNode *N); // Widen Vector Operand. bool WidenVectorOperand(SDNode *N, unsigned ResNo); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 26252459ec99..b5f84c0abb23 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -179,9 +179,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FRINT: case ISD::FNEARBYINT: case ISD::FFLOOR: - case ISD::SIGN_EXTEND_INREG: QueryType = Node->getValueType(0); break; + case ISD::SIGN_EXTEND_INREG: + case ISD::FP_ROUND_INREG: + QueryType = cast(Node->getOperand(1))->getVT(); + break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: QueryType = Node->getOperand(0).getValueType(); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index cf67ab968a0b..808bac70fda8 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -32,17 +32,17 @@ using namespace llvm; //===----------------------------------------------------------------------===// void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { - DEBUG(errs() << "Scalarize node result " << ResNo << ": "; + DEBUG(dbgs() << "Scalarize node result " << ResNo << ": "; N->dump(&DAG); - errs() << "\n"); + dbgs() << "\n"); SDValue R = SDValue(); switch (N->getOpcode()) { default: #ifndef NDEBUG - errs() << "ScalarizeVectorResult #" << ResNo << ": "; + dbgs() << "ScalarizeVectorResult #" << ResNo << ": "; N->dump(&DAG); - errs() << "\n"; + dbgs() << "\n"; #endif llvm_unreachable("Do not know how to scalarize the result of this operator!"); @@ -50,11 +50,12 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::BUILD_VECTOR: R = N->getOperand(0); break; case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break; case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break; + case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break; case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break; case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast(N));break; case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; - case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_SIGN_EXTEND_INREG(N); break; + case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break; case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break; case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break; @@ -186,6 +187,14 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) { return DAG.getNode(N->getOpcode(), N->getDebugLoc(), DestVT, Op); } +SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) { + EVT EltVT = N->getValueType(0).getVectorElementType(); + EVT ExtVT = cast(N->getOperand(1))->getVT().getVectorElementType(); + SDValue LHS = GetScalarizedVector(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), EltVT, + LHS, DAG.getValueType(ExtVT)); +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) { // If the operand is wider than the vector element type then it is implicitly // truncated. Make that explicit here. @@ -196,13 +205,6 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) { return InOp; } -SDValue DAGTypeLegalizer::ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N) { - EVT EltVT = N->getValueType(0).getVectorElementType(); - SDValue LHS = GetScalarizedVector(N->getOperand(0)); - return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), EltVT, - LHS, N->getOperand(1)); -} - SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) { SDValue LHS = GetScalarizedVector(N->getOperand(1)); return DAG.getNode(ISD::SELECT, N->getDebugLoc(), @@ -278,18 +280,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) { //===----------------------------------------------------------------------===// bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { - DEBUG(errs() << "Scalarize node operand " << OpNo << ": "; + DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": "; N->dump(&DAG); - errs() << "\n"); + dbgs() << "\n"); SDValue Res = SDValue(); if (Res.getNode() == 0) { switch (N->getOpcode()) { default: #ifndef NDEBUG - errs() << "ScalarizeVectorOperand Op #" << OpNo << ": "; + dbgs() << "ScalarizeVectorOperand Op #" << OpNo << ": "; N->dump(&DAG); - errs() << "\n"; + dbgs() << "\n"; #endif llvm_unreachable("Do not know how to scalarize this operator's operand!"); case ISD::BIT_CONVERT: @@ -382,17 +384,17 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ /// legalization, we just know that (at least) one result needs vector /// splitting. void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { - DEBUG(errs() << "Split node result: "; + DEBUG(dbgs() << "Split node result: "; N->dump(&DAG); - errs() << "\n"); + dbgs() << "\n"); SDValue Lo, Hi; switch (N->getOpcode()) { default: #ifndef NDEBUG - errs() << "SplitVectorResult #" << ResNo << ": "; + dbgs() << "SplitVectorResult #" << ResNo << ": "; N->dump(&DAG); - errs() << "\n"; + dbgs() << "\n"; #endif llvm_unreachable("Do not know how to split the result of this operator!"); @@ -406,10 +408,11 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break; case ISD::CONVERT_RNDSAT: SplitVecRes_CONVERT_RNDSAT(N, Lo, Hi); break; case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break; + case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break; - case ISD::SIGN_EXTEND_INREG: SplitVecRes_SIGN_EXTEND_INREG(N, Lo, Hi); break; + case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; case ISD::LOAD: SplitVecRes_LOAD(cast(N), Lo, Hi); break; @@ -654,6 +657,21 @@ void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1)); } +void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue LHSLo, LHSHi; + GetSplitVector(N->getOperand(0), LHSLo, LHSHi); + DebugLoc dl = N->getDebugLoc(); + + EVT LoVT, HiVT; + GetSplitDestVTs(cast(N->getOperand(1))->getVT(), LoVT, HiVT); + + Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, + DAG.getValueType(LoVT)); + Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, + DAG.getValueType(HiVT)); +} + void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Vec = N->getOperand(0); @@ -709,18 +727,6 @@ void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, Hi = DAG.getUNDEF(HiVT); } -void DAGTypeLegalizer::SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, - SDValue &Hi) { - SDValue LHSLo, LHSHi; - GetSplitVector(N->getOperand(0), LHSLo, LHSHi); - DebugLoc dl = N->getDebugLoc(); - - Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, - N->getOperand(1)); - Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, - N->getOperand(1)); -} - void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi) { assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!"); @@ -945,18 +951,18 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, /// result types of the node are known to be legal, but other operands of the /// node may need legalization as well as the specified one. bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { - DEBUG(errs() << "Split node operand: "; + DEBUG(dbgs() << "Split node operand: "; N->dump(&DAG); - errs() << "\n"); + dbgs() << "\n"); SDValue Res = SDValue(); if (Res.getNode() == 0) { switch (N->getOpcode()) { default: #ifndef NDEBUG - errs() << "SplitVectorOperand Op #" << OpNo << ": "; + dbgs() << "SplitVectorOperand Op #" << OpNo << ": "; N->dump(&DAG); - errs() << "\n"; + dbgs() << "\n"; #endif llvm_unreachable("Do not know how to split this operator's operand!"); @@ -1136,9 +1142,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { //===----------------------------------------------------------------------===// void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { - DEBUG(errs() << "Widen node result " << ResNo << ": "; + DEBUG(dbgs() << "Widen node result " << ResNo << ": "; N->dump(&DAG); - errs() << "\n"); + dbgs() << "\n"); // See if the target wants to custom widen this node. if (CustomWidenLowerNode(N, N->getValueType(ResNo))) @@ -1148,9 +1154,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { switch (N->getOpcode()) { default: #ifndef NDEBUG - errs() << "WidenVectorResult #" << ResNo << ": "; + dbgs() << "WidenVectorResult #" << ResNo << ": "; N->dump(&DAG); - errs() << "\n"; + dbgs() << "\n"; #endif llvm_unreachable("Do not know how to widen the result of this operator!"); @@ -1159,10 +1165,11 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break; case ISD::CONVERT_RNDSAT: Res = WidenVecRes_CONVERT_RNDSAT(N); break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; + case ISD::FP_ROUND_INREG: Res = WidenVecRes_InregOp(N); break; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break; - case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_SIGN_EXTEND_INREG(N); break; + case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break; case ISD::SELECT: Res = WidenVecRes_SELECT(N); break; case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break; case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break; @@ -1331,6 +1338,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) { return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp); } +SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT ExtVT = EVT::getVectorVT(*DAG.getContext(), + cast(N->getOperand(1))->getVT() + .getVectorElementType(), + WidenVT.getVectorNumElements()); + SDValue WidenLHS = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + WidenVT, WidenLHS, DAG.getValueType(ExtVT)); +} + SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) { SDValue InOp = N->getOperand(0); EVT InVT = InOp.getValueType(); @@ -1713,13 +1731,6 @@ SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) { WidenVT, N->getOperand(0)); } -SDValue DAGTypeLegalizer::WidenVecRes_SIGN_EXTEND_INREG(SDNode *N) { - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue WidenLHS = GetWidenedVector(N->getOperand(0)); - return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), - WidenVT, WidenLHS, N->getOperand(1)); -} - SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned WidenNumElts = WidenVT.getVectorNumElements(); @@ -1806,17 +1817,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { // Widen Vector Operand //===----------------------------------------------------------------------===// bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) { - DEBUG(errs() << "Widen node operand " << ResNo << ": "; + DEBUG(dbgs() << "Widen node operand " << ResNo << ": "; N->dump(&DAG); - errs() << "\n"); + dbgs() << "\n"); SDValue Res = SDValue(); switch (N->getOpcode()) { default: #ifndef NDEBUG - errs() << "WidenVectorOperand op #" << ResNo << ": "; + dbgs() << "WidenVectorOperand op #" << ResNo << ": "; N->dump(&DAG); - errs() << "\n"; + dbgs() << "\n"; #endif llvm_unreachable("Do not know how to widen this operator's operand!"); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 4045a34a87e8..0c3c974c6cb7 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -109,7 +109,7 @@ class ScheduleDAGFast : public ScheduleDAGSDNodes { /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGFast::Schedule() { - DEBUG(errs() << "********** List Scheduling **********\n"); + DEBUG(dbgs() << "********** List Scheduling **********\n"); NumLiveRegs = 0; LiveRegDefs.resize(TRI->getNumRegs(), NULL); @@ -136,9 +136,9 @@ void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) { #ifndef NDEBUG if (PredSU->NumSuccsLeft == 0) { - errs() << "*** Scheduling failed! ***\n"; + dbgs() << "*** Scheduling failed! ***\n"; PredSU->dump(this); - errs() << " has been released too many times!\n"; + dbgs() << " has been released too many times!\n"; llvm_unreachable(0); } #endif @@ -175,7 +175,7 @@ void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { /// count of its predecessors. If a predecessor pending count is zero, add it to /// the Available queue. void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { - DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: "); + DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); DEBUG(SU->dump(this)); assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!"); @@ -233,7 +233,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) return NULL; - DEBUG(errs() << "Unfolding SU # " << SU->NodeNum << "\n"); + DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n"); assert(NewNodes.size() == 2 && "Expected a load folding node!"); N = NewNodes[1]; @@ -343,7 +343,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { SU = NewSU; } - DEBUG(errs() << "Duplicating SU # " << SU->NodeNum << "\n"); + DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n"); NewSU = Clone(SU); // New SUnit has the exact same predecessors. @@ -550,7 +550,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() { // Issue copies, these can be expensive cross register class copies. SmallVector Copies; InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); - DEBUG(errs() << "Adding an edge from SU # " << TrySU->NodeNum + DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum << " to SU #" << Copies.front()->NodeNum << "\n"); AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, /*Reg=*/0, /*isNormalMemory=*/false, @@ -558,7 +558,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() { NewDef = Copies.back(); } - DEBUG(errs() << "Adding an edge from SU # " << NewDef->NodeNum + DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum << " to SU #" << TrySU->NodeNum << "\n"); LiveRegDefs[Reg] = NewDef; AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp index faf21f7be7af..b92a672f90e4 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp @@ -87,7 +87,7 @@ class ScheduleDAGList : public ScheduleDAGSDNodes { /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGList::Schedule() { - DEBUG(errs() << "********** List Scheduling **********\n"); + DEBUG(dbgs() << "********** List Scheduling **********\n"); // Build the scheduling graph. BuildSchedGraph(NULL); @@ -110,9 +110,9 @@ void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) { #ifndef NDEBUG if (SuccSU->NumPredsLeft == 0) { - errs() << "*** Scheduling failed! ***\n"; + dbgs() << "*** Scheduling failed! ***\n"; SuccSU->dump(this); - errs() << " has been released too many times!\n"; + dbgs() << " has been released too many times!\n"; llvm_unreachable(0); } #endif @@ -141,7 +141,7 @@ void ScheduleDAGList::ReleaseSuccessors(SUnit *SU) { /// count of its successors. If a successor pending count is zero, add it to /// the Available queue. void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { - DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: "); + DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); DEBUG(SU->dump(this)); Sequence.push_back(SU); @@ -233,7 +233,7 @@ void ScheduleDAGList::ListScheduleTopDown() { } else if (!HasNoopHazards) { // Otherwise, we have a pipeline stall, but no other problem, just advance // the current cycle and try again. - DEBUG(errs() << "*** Advancing cycle, no work to do\n"); + DEBUG(dbgs() << "*** Advancing cycle, no work to do\n"); HazardRec->AdvanceCycle(); ++NumStalls; ++CurCycle; @@ -241,7 +241,7 @@ void ScheduleDAGList::ListScheduleTopDown() { // Otherwise, we have no instructions to issue and we have instructions // that will fault if we don't do this right. This is the case for // processors without pipeline interlocks and other cases. - DEBUG(errs() << "*** Emitting noop\n"); + DEBUG(dbgs() << "*** Emitting noop\n"); HazardRec->EmitNoop(); Sequence.push_back(0); // NULL here means noop ++NumNoops; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 7e1015aff4b2..1ad7919962b3 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -164,7 +164,7 @@ class ScheduleDAGRRList : public ScheduleDAGSDNodes { /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGRRList::Schedule() { - DEBUG(errs() << "********** List Scheduling **********\n"); + DEBUG(dbgs() << "********** List Scheduling **********\n"); NumLiveRegs = 0; LiveRegDefs.resize(TRI->getNumRegs(), NULL); @@ -199,9 +199,9 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { #ifndef NDEBUG if (PredSU->NumSuccsLeft == 0) { - errs() << "*** Scheduling failed! ***\n"; + dbgs() << "*** Scheduling failed! ***\n"; PredSU->dump(this); - errs() << " has been released too many times!\n"; + dbgs() << " has been released too many times!\n"; llvm_unreachable(0); } #endif @@ -238,7 +238,7 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { /// count of its predecessors. If a predecessor pending count is zero, add it to /// the Available queue. void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { - DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: "); + DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); DEBUG(SU->dump(this)); assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!"); @@ -284,7 +284,7 @@ void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and /// its predecessor states to reflect the change. void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { - DEBUG(errs() << "*** Unscheduling [" << SU->getHeight() << "]: "); + DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); DEBUG(SU->dump(this)); AvailableQueue->UnscheduledNode(SU); @@ -371,7 +371,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) return NULL; - DEBUG(errs() << "Unfolding SU # " << SU->NodeNum << "\n"); + DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n"); assert(NewNodes.size() == 2 && "Expected a load folding node!"); N = NewNodes[1]; @@ -490,7 +490,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { SU = NewSU; } - DEBUG(errs() << "Duplicating SU # " << SU->NodeNum << "\n"); + DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n"); NewSU = CreateClone(SU); // New SUnit has the exact same predecessors. @@ -771,7 +771,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { // Issue copies, these can be expensive cross register class copies. SmallVector Copies; InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); - DEBUG(errs() << "Adding an edge from SU #" << TrySU->NodeNum + DEBUG(dbgs() << "Adding an edge from SU #" << TrySU->NodeNum << " to SU #" << Copies.front()->NodeNum << "\n"); AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, /*Reg=*/0, /*isNormalMemory=*/false, @@ -780,7 +780,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { NewDef = Copies.back(); } - DEBUG(errs() << "Adding an edge from SU #" << NewDef->NodeNum + DEBUG(dbgs() << "Adding an edge from SU #" << NewDef->NodeNum << " to SU #" << TrySU->NodeNum << "\n"); LiveRegDefs[Reg] = NewDef; AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, @@ -827,9 +827,9 @@ void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) { #ifndef NDEBUG if (SuccSU->NumPredsLeft == 0) { - errs() << "*** Scheduling failed! ***\n"; + dbgs() << "*** Scheduling failed! ***\n"; SuccSU->dump(this); - errs() << " has been released too many times!\n"; + dbgs() << " has been released too many times!\n"; llvm_unreachable(0); } #endif @@ -858,7 +858,7 @@ void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) { /// count of its successors. If a successor pending count is zero, add it to /// the Available queue. void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { - DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: "); + DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); DEBUG(SU->dump(this)); assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); @@ -1038,6 +1038,10 @@ namespace { return 0; return SethiUllmanNumbers[SU->NodeNum]; } + + unsigned getNodeOrdering(const SUnit *SU) const { + return scheduleDAG->DAG->GetOrdering(SU->getNode()); + } unsigned size() const { return Queue.size(); } @@ -1120,6 +1124,14 @@ static unsigned calcMaxScratches(const SUnit *SU) { // Bottom up bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { + unsigned LOrder = SPQ->getNodeOrdering(left); + unsigned ROrder = SPQ->getNodeOrdering(right); + + // Prefer an ordering where the lower the non-zero order number, the higher + // the preference. + if ((LOrder || ROrder) && LOrder != ROrder) + return LOrder != 0 && (LOrder < ROrder || ROrder == 0); + unsigned LPriority = SPQ->getNodePriority(left); unsigned RPriority = SPQ->getNodePriority(right); if (LPriority != RPriority) @@ -1329,7 +1341,7 @@ void RegReductionPriorityQueue::PrescheduleNodesWithMultipleUses() { // Ok, the transformation is safe and the heuristics suggest it is // profitable. Update the graph. - DEBUG(errs() << "Prescheduling SU # " << SU->NodeNum + DEBUG(dbgs() << "Prescheduling SU # " << SU->NodeNum << " next to PredSU # " << PredSU->NodeNum << " to guide scheduling in the presence of multiple uses\n"); for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { @@ -1419,7 +1431,7 @@ void RegReductionPriorityQueue::AddPseudoTwoAddrDeps() { (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) || (!SU->isCommutable && SuccSU->isCommutable)) && !scheduleDAG->IsReachable(SuccSU, SU)) { - DEBUG(errs() << "Adding a pseudo-two-addr edge from SU # " + DEBUG(dbgs() << "Adding a pseudo-two-addr edge from SU # " << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0, /*Reg=*/0, /*isNormalMemory=*/false, diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index d53de347a556..aaaa2b3b7075 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -253,19 +253,19 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { if (!SU->getNode()) { - errs() << "PHYS REG COPY\n"; + dbgs() << "PHYS REG COPY\n"; return; } SU->getNode()->dump(DAG); - errs() << "\n"; + dbgs() << "\n"; SmallVector FlaggedNodes; for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode()) FlaggedNodes.push_back(N); while (!FlaggedNodes.empty()) { - errs() << " "; + dbgs() << " "; FlaggedNodes.back()->dump(DAG); - errs() << "\n"; + dbgs() << "\n"; FlaggedNodes.pop_back(); } } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 77301b042022..cb1a0d660b4c 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -36,6 +36,7 @@ #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MathExtras.h" @@ -644,7 +645,7 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Flag && !N->isMachineOpcode() && !doNotCSE(N)) { N->dump(this); - errs() << "\n"; + dbgs() << "\n"; llvm_unreachable("Node is not in map!"); } #endif @@ -1740,7 +1741,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, return; case ISD::SIGN_EXTEND_INREG: { EVT EVT = cast(Op.getOperand(1))->getVT(); - unsigned EBits = EVT.getSizeInBits(); + unsigned EBits = EVT.getScalarType().getSizeInBits(); // Sign extension. Compute the demanded bits in the result that are not // present in the input. @@ -1785,7 +1786,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, if (ISD::isZEXTLoad(Op.getNode())) { LoadSDNode *LD = cast(Op); EVT VT = LD->getMemoryVT(); - unsigned MemBits = VT.getSizeInBits(); + unsigned MemBits = VT.getScalarType().getSizeInBits(); KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits) & Mask; } return; @@ -2024,7 +2025,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ case ISD::SIGN_EXTEND_INREG: // Max of the input and what this extends. - Tmp = cast(Op.getOperand(1))->getVT().getSizeInBits(); + Tmp = + cast(Op.getOperand(1))->getVT().getScalarType().getSizeInBits(); Tmp = VTBits-Tmp+1; Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1); @@ -2168,10 +2170,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ switch (ExtType) { default: break; case ISD::SEXTLOAD: // '17' bits known - Tmp = LD->getMemoryVT().getSizeInBits(); + Tmp = LD->getMemoryVT().getScalarType().getSizeInBits(); return VTBits-Tmp+1; case ISD::ZEXTLOAD: // '16' bits known - Tmp = LD->getMemoryVT().getSizeInBits(); + Tmp = LD->getMemoryVT().getScalarType().getSizeInBits(); return VTBits-Tmp; } } @@ -2655,12 +2657,20 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, // size of the value, the shift/rotate count is guaranteed to be zero. if (VT == MVT::i1) return N1; + if (N2C && N2C->isNullValue()) + return N1; break; case ISD::FP_ROUND_INREG: { EVT EVT = cast(N2)->getVT(); assert(VT == N1.getValueType() && "Not an inreg round!"); assert(VT.isFloatingPoint() && EVT.isFloatingPoint() && "Cannot FP_ROUND_INREG integer types"); + assert(EVT.isVector() == VT.isVector() && + "FP_ROUND_INREG type should be vector iff the operand " + "type is vector!"); + assert((!EVT.isVector() || + EVT.getVectorNumElements() == VT.getVectorNumElements()) && + "Vector element counts must match in FP_ROUND_INREG"); assert(EVT.bitsLE(VT) && "Not rounding down!"); if (cast(N2)->getVT() == VT) return N1; // Not actually rounding. break; @@ -2690,15 +2700,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, assert(VT == N1.getValueType() && "Not an inreg extend!"); assert(VT.isInteger() && EVT.isInteger() && "Cannot *_EXTEND_INREG FP types"); - assert(!EVT.isVector() && - "SIGN_EXTEND_INREG type should be the vector element type rather " - "than the vector type!"); - assert(EVT.bitsLE(VT.getScalarType()) && "Not extending!"); + assert(EVT.isVector() == VT.isVector() && + "SIGN_EXTEND_INREG type should be vector iff the operand " + "type is vector!"); + assert((!EVT.isVector() || + EVT.getVectorNumElements() == VT.getVectorNumElements()) && + "Vector element counts must match in SIGN_EXTEND_INREG"); + assert(EVT.bitsLE(VT) && "Not extending!"); if (EVT == VT) return N1; // Not actually extending if (N1C) { APInt Val = N1C->getAPIntValue(); - unsigned FromBits = EVT.getSizeInBits(); + unsigned FromBits = EVT.getScalarType().getSizeInBits(); Val <<= Val.getBitWidth()-FromBits; Val = Val.ashr(Val.getBitWidth()-FromBits); return getConstant(Val, VT); @@ -4106,7 +4119,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, if (ConstantSDNode *AndRHS = dyn_cast(N3.getOperand(1))) { // If the and is only masking out bits that cannot effect the shift, // eliminate the and. - unsigned NumBits = VT.getSizeInBits()*2; + unsigned NumBits = VT.getScalarType().getSizeInBits()*2; if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1) return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0)); } @@ -5713,7 +5726,7 @@ std::string ISD::ArgFlagsTy::getArgFlagsString() { void SDNode::dump() const { dump(0); } void SDNode::dump(const SelectionDAG *G) const { - print(errs(), G); + print(dbgs(), G); } void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { @@ -5885,12 +5898,12 @@ static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { if (N->getOperand(i).getNode()->hasOneUse()) DumpNodes(N->getOperand(i).getNode(), indent+2, G); else - errs() << "\n" << std::string(indent+2, ' ') - << (void*)N->getOperand(i).getNode() << ": "; + dbgs() << "\n" << std::string(indent+2, ' ') + << (void*)N->getOperand(i).getNode() << ": "; - errs() << "\n"; - errs().indent(indent); + dbgs() << "\n"; + dbgs().indent(indent); N->dump(G); } @@ -5943,6 +5956,13 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0], getShiftAmountOperand(Operands[1]))); break; + case ISD::SIGN_EXTEND_INREG: + case ISD::FP_ROUND_INREG: { + EVT ExtVT = cast(Operands[1])->getVT().getVectorElementType(); + Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, + Operands[0], + getValueType(ExtVT))); + } } } @@ -6048,7 +6068,7 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { } void SelectionDAG::dump() const { - errs() << "SelectionDAG has " << AllNodes.size() << " nodes:"; + dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:"; for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I) { @@ -6059,7 +6079,7 @@ void SelectionDAG::dump() const { if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this); - errs() << "\n\n"; + dbgs() << "\n\n"; } void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { @@ -6106,12 +6126,12 @@ static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, void SDNode::dumpr() const { VisitedSDNodeSet once; - DumpNodesr(errs(), this, 0, 0, once); + DumpNodesr(dbgs(), this, 0, 0, once); } void SDNode::dumpr(const SelectionDAG *G) const { VisitedSDNodeSet once; - DumpNodesr(errs(), this, 0, G, once); + DumpNodesr(dbgs(), this, 0, G, once); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 74d624f3647d..5e3a3b5e0d0e 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1195,6 +1195,18 @@ SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector &Cases){ return false; } + // Handle: (X != null) | (Y != null) --> (X|Y) != 0 + // Handle: (X == null) & (Y == null) --> (X|Y) == 0 + if (Cases[0].CmpRHS == Cases[1].CmpRHS && + Cases[0].CC == Cases[1].CC && + isa(Cases[0].CmpRHS) && + cast(Cases[0].CmpRHS)->isNullValue()) { + if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB) + return false; + if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB) + return false; + } + return true; } @@ -1733,7 +1745,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, if (Density < 0.4) return false; - DEBUG(errs() << "Lowering jump table\n" + DEBUG(dbgs() << "Lowering jump table\n" << "First entry: " << First << ". Last entry: " << Last << '\n' << "Range: " << Range << "Size: " << TSize << ". Density: " << Density << "\n\n"); @@ -1837,7 +1849,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, APInt LSize = FrontCase.size(); APInt RSize = TSize-LSize; - DEBUG(errs() << "Selecting best pivot: \n" + DEBUG(dbgs() << "Selecting best pivot: \n" << "First: " << First << ", Last: " << Last <<'\n' << "LSize: " << LSize << ", RSize: " << RSize << '\n'); for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second; @@ -1853,7 +1865,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, (Last - RBegin + 1ULL).roundToDouble(); double Metric = Range.logBase2()*(LDensity+RDensity); // Should always split in some non-trivial place - DEBUG(errs() <<"=>Step\n" + DEBUG(dbgs() <<"=>Step\n" << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n' << "LDensity: " << LDensity << ", RDensity: " << RDensity << '\n' @@ -1861,7 +1873,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, if (FMetric < Metric) { Pivot = J; FMetric = Metric; - DEBUG(errs() << "Current metric set to: " << FMetric << '\n'); + DEBUG(dbgs() << "Current metric set to: " << FMetric << '\n'); } LSize += J->size(); @@ -1965,7 +1977,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, // Don't bother the code below, if there are too much unique destinations return false; } - DEBUG(errs() << "Total number of unique destinations: " + DEBUG(dbgs() << "Total number of unique destinations: " << Dests.size() << '\n' << "Total number of comparisons: " << numCmps << '\n'); @@ -1974,7 +1986,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, const APInt& maxValue = cast(BackCase.High)->getValue(); APInt cmpRange = maxValue - minValue; - DEBUG(errs() << "Compare range: " << cmpRange << '\n' + DEBUG(dbgs() << "Compare range: " << cmpRange << '\n' << "Low bound: " << minValue << '\n' << "High bound: " << maxValue << '\n'); @@ -1984,7 +1996,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, !(Dests.size() >= 3 && numCmps >= 6))) return false; - DEBUG(errs() << "Emitting bit tests\n"); + DEBUG(dbgs() << "Emitting bit tests\n"); APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth()); // Optimize the case where all the case values fit in a @@ -2034,9 +2046,9 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); - DEBUG(errs() << "Cases:\n"); + DEBUG(dbgs() << "Cases:\n"); for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) { - DEBUG(errs() << "Mask: " << CasesBits[i].Mask + DEBUG(dbgs() << "Mask: " << CasesBits[i].Mask << ", Bits: " << CasesBits[i].Bits << ", BB: " << CasesBits[i].BB << '\n'); @@ -2135,7 +2147,7 @@ void SelectionDAGBuilder::visitSwitch(SwitchInst &SI) { // create a binary search tree from them. CaseVector Cases; size_t numCmps = Clusterify(Cases, SI); - DEBUG(errs() << "Clusterify finished. Total clusters: " << Cases.size() + DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size() << ". Total compares: " << numCmps << '\n'); numCmps = 0; @@ -3157,7 +3169,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(CallInst &I, } else if (!HasChain) { Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(), VTs, &Ops[0], Ops.size()); - } else if (I.getType() != Type::getVoidTy(*DAG.getContext())) { + } else if (!I.getType()->isVoidTy()) { Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(), VTs, &Ops[0], Ops.size()); } else { @@ -3176,7 +3188,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(CallInst &I, DAG.setRoot(Chain); } - if (I.getType() != Type::getVoidTy(*DAG.getContext())) { + if (!I.getType()->isVoidTy()) { if (const VectorType *PTy = dyn_cast(I.getType())) { EVT VT = TLI.getValueType(PTy); Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result); @@ -4406,12 +4418,6 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { DAG.AssignOrdering(Res.getNode(), SDNodeOrder); return 0; } - case Intrinsic::dbg_stoppoint: - case Intrinsic::dbg_region_start: - case Intrinsic::dbg_region_end: - case Intrinsic::dbg_func_start: - // FIXME - Remove this instructions once the dust settles. - return 0; case Intrinsic::dbg_declare: { if (OptLevel != CodeGenOpt::None) // FIXME: Variable debug info is not supported here. @@ -5931,7 +5937,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { // The return value of the call is this value. As such, there is no // corresponding argument. - assert(CS.getType() != Type::getVoidTy(*DAG.getContext()) && + assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); if (const StructType *STy = dyn_cast(CS.getType())) { OpVT = TLI.getValueType(STy->getElementType(ResNo)); @@ -6056,7 +6062,8 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { std::vector AsmNodeOperands; AsmNodeOperands.push_back(SDValue()); // reserve space for input chain AsmNodeOperands.push_back( - DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), MVT::Other)); + DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), + TLI.getPointerTy())); // Loop over all of the inputs, copying the operand values into the @@ -6100,8 +6107,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { OpInfo.CallOperandVal)); } else { // This is the result value of the call. - assert(CS.getType() != Type::getVoidTy(*DAG.getContext()) && - "Bad inline asm!"); + assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); // Concatenate this output onto the outputs list. RetValRegs.append(OpInfo.AssignedRegs); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 88a2017b4746..db656e35a4a8 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -163,7 +163,7 @@ class SelectionDAGBuilder { /// The comparison function for sorting the switch case values in the vector. /// WARNING: Case ranges should be disjoint! struct CaseCmp { - bool operator () (const Case& C1, const Case& C2) { + bool operator()(const Case &C1, const Case &C2) { assert(isa(C1.Low) && isa(C2.High)); const ConstantInt* CI1 = cast(C1.Low); const ConstantInt* CI2 = cast(C2.High); @@ -172,12 +172,12 @@ class SelectionDAGBuilder { }; struct CaseBitsCmp { - bool operator () (const CaseBits& C1, const CaseBits& C2) { + bool operator()(const CaseBits &C1, const CaseBits &C2) { return C1.Bits > C2.Bits; } }; - size_t Clusterify(CaseVector& Cases, const SwitchInst &SI); + size_t Clusterify(CaseVector &Cases, const SwitchInst &SI); /// CaseBlock - This structure is used to communicate between /// SelectionDAGBuilder and SDISel for the code generation of additional basic @@ -215,7 +215,7 @@ class SelectionDAGBuilder { MachineBasicBlock *Default; }; struct JumpTableHeader { - JumpTableHeader(APInt F, APInt L, Value* SV, MachineBasicBlock* H, + JumpTableHeader(APInt F, APInt L, Value *SV, MachineBasicBlock *H, bool E = false): First(F), Last(L), SValue(SV), HeaderBB(H), Emitted(E) {} APInt First; @@ -230,8 +230,8 @@ class SelectionDAGBuilder { BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr): Mask(M), ThisBB(T), TargetBB(Tr) { } uint64_t Mask; - MachineBasicBlock* ThisBB; - MachineBasicBlock* TargetBB; + MachineBasicBlock *ThisBB; + MachineBasicBlock *TargetBB; }; typedef SmallVector BitTestInfo; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 05669c0ec9a9..9ac8f83ad44b 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -162,7 +162,7 @@ MachineBasicBlock *TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB, DenseMap *EM) const { #ifndef NDEBUG - errs() << "If a target marks an instruction with " + dbgs() << "If a target marks an instruction with " "'usesCustomInserter', it must implement " "TargetLowering::EmitInstrWithCustomInserter!"; #endif @@ -325,7 +325,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { else GFI = 0; RegInfo = &MF->getRegInfo(); - DEBUG(errs() << "\n\n\n=== " << Fn.getName() << "\n"); + DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); MachineModuleInfo *MMI = getAnalysisIfAvailable(); DwarfWriter *DW = getAnalysisIfAvailable(); @@ -438,6 +438,95 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB, SDB->clear(); } +namespace { +/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted +/// nodes from the worklist. +class SDOPsWorkListRemover : public SelectionDAG::DAGUpdateListener { + SmallVector &Worklist; +public: + SDOPsWorkListRemover(SmallVector &wl) : Worklist(wl) {} + + virtual void NodeDeleted(SDNode *N, SDNode *E) { + Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), N), + Worklist.end()); + } + + virtual void NodeUpdated(SDNode *N) { + // Ignore updates. + } +}; +} + +/// ShrinkDemandedOps - A late transformation pass that shrink expressions +/// using TargetLowering::TargetLoweringOpt::ShrinkDemandedOp. It converts +/// x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. +void SelectionDAGISel::ShrinkDemandedOps() { + SmallVector Worklist; + + // Add all the dag nodes to the worklist. + Worklist.reserve(CurDAG->allnodes_size()); + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), + E = CurDAG->allnodes_end(); I != E; ++I) + Worklist.push_back(I); + + APInt Mask; + APInt KnownZero; + APInt KnownOne; + + TargetLowering::TargetLoweringOpt TLO(*CurDAG, true); + while (!Worklist.empty()) { + SDNode *N = Worklist.pop_back_val(); + + if (N->use_empty() && N != CurDAG->getRoot().getNode()) { + CurDAG->DeleteNode(N); + continue; + } + + // Run ShrinkDemandedOp on scalar binary operations. + if (N->getNumValues() == 1 && + N->getValueType(0).isSimple() && N->getValueType(0).isInteger()) { + unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits(); + APInt Demanded = APInt::getAllOnesValue(BitWidth); + APInt KnownZero, KnownOne; + if (TLI.SimplifyDemandedBits(SDValue(N, 0), Demanded, + KnownZero, KnownOne, TLO)) { + // Revisit the node. + Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), N), + Worklist.end()); + Worklist.push_back(N); + + // Replace the old value with the new one. + DEBUG(errs() << "\nReplacing "; + TLO.Old.getNode()->dump(CurDAG); + errs() << "\nWith: "; + TLO.New.getNode()->dump(CurDAG); + errs() << '\n'); + + Worklist.push_back(TLO.New.getNode()); + + SDOPsWorkListRemover DeadNodes(Worklist); + CurDAG->ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes); + + if (TLO.Old.getNode()->use_empty()) { + for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); + i != e; ++i) { + SDNode *OpNode = TLO.Old.getNode()->getOperand(i).getNode(); + if (OpNode->hasOneUse()) { + Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), + OpNode), Worklist.end()); + Worklist.push_back(OpNode); + } + } + + Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), + TLO.Old.getNode()), Worklist.end()); + CurDAG->DeleteNode(TLO.Old.getNode()); + } + } + } + } +} + void SelectionDAGISel::ComputeLiveOutVRegInfo() { SmallPtrSet VisitedNodes; SmallVector Worklist; @@ -448,9 +537,8 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { APInt KnownZero; APInt KnownOne; - while (!Worklist.empty()) { - SDNode *N = Worklist.back(); - Worklist.pop_back(); + do { + SDNode *N = Worklist.pop_back_val(); // If we've already seen this node, ignore it. if (!VisitedNodes.insert(N)) @@ -490,7 +578,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { LOI.KnownOne = KnownOne; LOI.KnownZero = KnownZero; } - } + } while (!Worklist.empty()); } void SelectionDAGISel::CodeGenAndEmitDAG() { @@ -504,7 +592,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { BlockName = MF->getFunction()->getNameStr() + ":" + BB->getBasicBlock()->getNameStr(); - DEBUG(errs() << "Initial selection DAG:\n"); + DEBUG(dbgs() << "Initial selection DAG:\n"); DEBUG(CurDAG->dump()); if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName); @@ -517,7 +605,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(Unrestricted, *AA, OptLevel); } - DEBUG(errs() << "Optimized lowered selection DAG:\n"); + DEBUG(dbgs() << "Optimized lowered selection DAG:\n"); DEBUG(CurDAG->dump()); // Second step, hack on the DAG until it only uses operations and types that @@ -533,7 +621,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { Changed = CurDAG->LegalizeTypes(); } - DEBUG(errs() << "Type-legalized selection DAG:\n"); + DEBUG(dbgs() << "Type-legalized selection DAG:\n"); DEBUG(CurDAG->dump()); if (Changed) { @@ -548,7 +636,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(NoIllegalTypes, *AA, OptLevel); } - DEBUG(errs() << "Optimized type-legalized selection DAG:\n"); + DEBUG(dbgs() << "Optimized type-legalized selection DAG:\n"); DEBUG(CurDAG->dump()); } @@ -578,7 +666,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); } - DEBUG(errs() << "Optimized vector-legalized selection DAG:\n"); + DEBUG(dbgs() << "Optimized vector-legalized selection DAG:\n"); DEBUG(CurDAG->dump()); } @@ -591,7 +679,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Legalize(OptLevel); } - DEBUG(errs() << "Legalized selection DAG:\n"); + DEBUG(dbgs() << "Legalized selection DAG:\n"); DEBUG(CurDAG->dump()); if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName); @@ -604,13 +692,15 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); } - DEBUG(errs() << "Optimized legalized selection DAG:\n"); + DEBUG(dbgs() << "Optimized legalized selection DAG:\n"); DEBUG(CurDAG->dump()); if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName); - if (OptLevel != CodeGenOpt::None) + if (OptLevel != CodeGenOpt::None) { + ShrinkDemandedOps(); ComputeLiveOutVRegInfo(); + } // Third, instruction select all of the operations to machine code, adding the // code to the MachineBasicBlock. @@ -621,7 +711,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { InstructionSelect(); } - DEBUG(errs() << "Selected selection DAG:\n"); + DEBUG(dbgs() << "Selected selection DAG:\n"); DEBUG(CurDAG->dump()); if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName); @@ -654,7 +744,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { delete Scheduler; } - DEBUG(errs() << "Selected machine code:\n"); + DEBUG(dbgs() << "Selected machine code:\n"); DEBUG(BB->dump()); } @@ -699,7 +789,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, I != E; ++I, ++j) if (Fn.paramHasAttr(j, Attribute::ByVal)) { if (EnableFastISelVerbose || EnableFastISelAbort) - errs() << "FastISel skips entry block due to byval argument\n"; + dbgs() << "FastISel skips entry block due to byval argument\n"; SuppressFastISel = true; break; } @@ -729,10 +819,10 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, // information is provided by an intrinsic (eh.selector) that can be moved // to unexpected places by the optimizers: if the unwind edge is critical, // then breaking it can result in the intrinsics being in the successor of - // the landing pad, not the landing pad itself. This results in exceptions - // not being caught because no typeids are associated with the invoke. - // This may not be the only way things can go wrong, but it is the only way - // we try to work around for the moment. + // the landing pad, not the landing pad itself. This results + // in exceptions not being caught because no typeids are associated with + // the invoke. This may not be the only way things can go wrong, but it + // is the only way we try to work around for the moment. BranchInst *Br = dyn_cast(LLVMBB->getTerminator()); if (Br && Br->isUnconditional()) { // Critical edge? @@ -765,7 +855,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, if (!HandlePHINodesInSuccessorBlocksFast(LLVMBB, FastIS)) { ResetDebugLoc(SDB, FastIS); if (EnableFastISelVerbose || EnableFastISelAbort) { - errs() << "FastISel miss: "; + dbgs() << "FastISel miss: "; BI->dump(); } assert(!EnableFastISelAbort && @@ -775,7 +865,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, SetDebugLoc(MDDbgKind, BI, SDB, FastIS, &MF); - // First try normal tablegen-generated "fast" selection. + // Try to select the instruction with FastISel. if (FastIS->SelectInstruction(BI)) { ResetDebugLoc(SDB, FastIS); continue; @@ -788,11 +878,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, // Then handle certain instructions as single-LLVM-Instruction blocks. if (isa(BI)) { if (EnableFastISelVerbose || EnableFastISelAbort) { - errs() << "FastISel missed call: "; + dbgs() << "FastISel missed call: "; BI->dump(); } - if (BI->getType() != Type::getVoidTy(*CurDAG->getContext())) { + if (!BI->getType()->isVoidTy()) { unsigned &R = FuncInfo->ValueMap[BI]; if (!R) R = FuncInfo->CreateRegForValue(BI); @@ -817,7 +907,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, // For now, be a little lenient about non-branch terminators. if (!isa(BI) || isa(BI)) { if (EnableFastISelVerbose || EnableFastISelAbort) { - errs() << "FastISel miss: "; + dbgs() << "FastISel miss: "; BI->dump(); } if (EnableFastISelAbort) @@ -846,13 +936,13 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, void SelectionDAGISel::FinishBasicBlock() { - DEBUG(errs() << "Target-post-processed machine code:\n"); + DEBUG(dbgs() << "Target-post-processed machine code:\n"); DEBUG(BB->dump()); - DEBUG(errs() << "Total amount of phi nodes to update: " + DEBUG(dbgs() << "Total amount of phi nodes to update: " << SDB->PHINodesToUpdate.size() << "\n"); DEBUG(for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i) - errs() << "Node " << i << " : (" + dbgs() << "Node " << i << " : (" << SDB->PHINodesToUpdate[i].first << ", " << SDB->PHINodesToUpdate[i].second << ")\n"); @@ -915,11 +1005,11 @@ SelectionDAGISel::FinishBasicBlock() { // This is "default" BB. We have two jumps to it. From "header" BB and // from last "case" BB. if (PHIBB == SDB->BitTestCases[i].Default) { - PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second, - false)); + PHI->addOperand(MachineOperand:: + CreateReg(SDB->PHINodesToUpdate[pi].second, false)); PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Parent)); - PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second, - false)); + PHI->addOperand(MachineOperand:: + CreateReg(SDB->PHINodesToUpdate[pi].second, false)); PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Cases. back().ThisBB)); } @@ -927,10 +1017,9 @@ SelectionDAGISel::FinishBasicBlock() { for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) { MachineBasicBlock* cBB = SDB->BitTestCases[i].Cases[j].ThisBB; - if (cBB->succ_end() != - std::find(cBB->succ_begin(),cBB->succ_end(), PHIBB)) { - PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second, - false)); + if (cBB->isSuccessor(PHIBB)) { + PHI->addOperand(MachineOperand:: + CreateReg(SDB->PHINodesToUpdate[pi].second, false)); PHI->addOperand(MachineOperand::CreateMBB(cBB)); } } @@ -977,7 +1066,7 @@ SelectionDAGISel::FinishBasicBlock() { (MachineOperand::CreateMBB(SDB->JTCases[i].first.HeaderBB)); } // JT BB. Just iterate over successors here - if (BB->succ_end() != std::find(BB->succ_begin(),BB->succ_end(), PHIBB)) { + if (BB->isSuccessor(PHIBB)) { PHI->addOperand (MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second, false)); PHI->addOperand(MachineOperand::CreateMBB(BB)); @@ -1023,17 +1112,23 @@ SelectionDAGISel::FinishBasicBlock() { SDB->EdgeMapping.find(BB); if (EI != SDB->EdgeMapping.end()) ThisBB = EI->second; - for (MachineBasicBlock::iterator Phi = BB->begin(); - Phi != BB->end() && Phi->getOpcode() == TargetInstrInfo::PHI; ++Phi){ - // This value for this PHI node is recorded in PHINodesToUpdate, get it. - for (unsigned pn = 0; ; ++pn) { - assert(pn != SDB->PHINodesToUpdate.size() && - "Didn't find PHI entry!"); - if (SDB->PHINodesToUpdate[pn].first == Phi) { - Phi->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[pn]. - second, false)); - Phi->addOperand(MachineOperand::CreateMBB(ThisBB)); - break; + + // BB may have been removed from the CFG if a branch was constant folded. + if (ThisBB->isSuccessor(BB)) { + for (MachineBasicBlock::iterator Phi = BB->begin(); + Phi != BB->end() && Phi->getOpcode() == TargetInstrInfo::PHI; + ++Phi) { + // This value for this PHI node is recorded in PHINodesToUpdate. + for (unsigned pn = 0; ; ++pn) { + assert(pn != SDB->PHINodesToUpdate.size() && + "Didn't find PHI entry!"); + if (SDB->PHINodesToUpdate[pn].first == Phi) { + Phi->addOperand(MachineOperand:: + CreateReg(SDB->PHINodesToUpdate[pn].second, + false)); + Phi->addOperand(MachineOperand::CreateMBB(ThisBB)); + break; + } } } } @@ -1302,45 +1397,47 @@ bool SelectionDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, return !isNonImmUse(Root, N, U); } -SDNode *SelectionDAGISel::Select_INLINEASM(SDValue N) { - std::vector Ops(N.getNode()->op_begin(), N.getNode()->op_end()); +SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) { + std::vector Ops(N->op_begin(), N->op_end()); SelectInlineAsmMemoryOperands(Ops); std::vector VTs; VTs.push_back(MVT::Other); VTs.push_back(MVT::Flag); - SDValue New = CurDAG->getNode(ISD::INLINEASM, N.getDebugLoc(), + SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(), VTs, &Ops[0], Ops.size()); return New.getNode(); } -SDNode *SelectionDAGISel::Select_UNDEF(const SDValue &N) { - return CurDAG->SelectNodeTo(N.getNode(), TargetInstrInfo::IMPLICIT_DEF, - N.getValueType()); +SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) { + return CurDAG->SelectNodeTo(N, TargetInstrInfo::IMPLICIT_DEF, + N->getValueType(0)); } -SDNode *SelectionDAGISel::Select_EH_LABEL(const SDValue &N) { - SDValue Chain = N.getOperand(0); +SDNode *SelectionDAGISel::Select_EH_LABEL(SDNode *N) { + SDValue Chain = N->getOperand(0); unsigned C = cast(N)->getLabelID(); SDValue Tmp = CurDAG->getTargetConstant(C, MVT::i32); - return CurDAG->SelectNodeTo(N.getNode(), TargetInstrInfo::EH_LABEL, + return CurDAG->SelectNodeTo(N, TargetInstrInfo::EH_LABEL, MVT::Other, Tmp, Chain); } -void SelectionDAGISel::CannotYetSelect(SDValue N) { +void SelectionDAGISel::CannotYetSelect(SDNode *N) { std::string msg; raw_string_ostream Msg(msg); Msg << "Cannot yet select: "; - N.getNode()->print(Msg, CurDAG); + N->print(Msg, CurDAG); llvm_report_error(Msg.str()); } -void SelectionDAGISel::CannotYetSelectIntrinsic(SDValue N) { - errs() << "Cannot yet select: "; +void SelectionDAGISel::CannotYetSelectIntrinsic(SDNode *N) { + dbgs() << "Cannot yet select: "; unsigned iid = - cast(N.getOperand(N.getOperand(0).getValueType() == MVT::Other))->getZExtValue(); + cast(N->getOperand(N->getOperand(0).getValueType() == + MVT::Other))->getZExtValue(); if (iid < Intrinsic::num_intrinsics) - llvm_report_error("Cannot yet select: intrinsic %" + Intrinsic::getName((Intrinsic::ID)iid)); + llvm_report_error("Cannot yet select: intrinsic %" + + Intrinsic::getName((Intrinsic::ID)iid)); else if (const TargetIntrinsicInfo *tii = TM.getIntrinsicInfo()) llvm_report_error(Twine("Cannot yet select: target intrinsic %") + tii->getName(iid)); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 83fa5a8fd1ba..3786bd197b85 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -225,7 +225,7 @@ bool SelectionDAG::setSubgraphColorHelper(SDNode *N, const char *Color, DenseSet if (level >= 20) { if (!printed) { printed = true; - DEBUG(errs() << "setSubgraphColor hit max level\n"); + DEBUG(dbgs() << "setSubgraphColor hit max level\n"); } return true; } diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index d9a5a13666b3..81c51c49b720 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -990,7 +990,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask)) return true; // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) return true; // Output known-1 bits are only known if set in both the LHS & RHS. @@ -1024,7 +1024,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (TLO.ShrinkDemandedConstant(Op, NewMask)) return true; // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) return true; // Output known-0 bits are only known if clear in both the LHS & RHS. @@ -1049,7 +1049,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if ((KnownZero2 & NewMask) == NewMask) return TLO.CombineTo(Op, Op.getOperand(1)); // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) return true; // If all of the unknown bits are known to be zero on one side or the other @@ -1272,19 +1272,21 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // Sign extension. Compute the demanded bits in the result that are not // present in the input. - APInt NewBits = APInt::getHighBitsSet(BitWidth, - BitWidth - EVT.getSizeInBits()) & - NewMask; + APInt NewBits = + APInt::getHighBitsSet(BitWidth, + BitWidth - EVT.getScalarType().getSizeInBits()) & + NewMask; // If none of the extended bits are demanded, eliminate the sextinreg. if (NewBits == 0) return TLO.CombineTo(Op, Op.getOperand(0)); - APInt InSignBit = APInt::getSignBit(EVT.getSizeInBits()); + APInt InSignBit = APInt::getSignBit(EVT.getScalarType().getSizeInBits()); InSignBit.zext(BitWidth); - APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, - EVT.getSizeInBits()) & - NewMask; + APInt InputDemandedBits = + APInt::getLowBitsSet(BitWidth, + EVT.getScalarType().getSizeInBits()) & + NewMask; // Since the sign extended bits are demanded, we know that the sign // bit is demanded. @@ -1313,7 +1315,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, break; } case ISD::ZERO_EXTEND: { - unsigned OperandBitWidth = Op.getOperand(0).getValueSizeInBits(); + unsigned OperandBitWidth = + Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); APInt InMask = NewMask; InMask.trunc(OperandBitWidth); @@ -1336,7 +1339,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } case ISD::SIGN_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); - unsigned InBits = InVT.getSizeInBits(); + unsigned InBits = InVT.getScalarType().getSizeInBits(); APInt InMask = APInt::getLowBitsSet(BitWidth, InBits); APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits); APInt NewBits = ~InMask & NewMask; @@ -1376,7 +1379,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, break; } case ISD::ANY_EXTEND: { - unsigned OperandBitWidth = Op.getOperand(0).getValueSizeInBits(); + unsigned OperandBitWidth = + Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); APInt InMask = NewMask; InMask.trunc(OperandBitWidth); if (SimplifyDemandedBits(Op.getOperand(0), InMask, @@ -1480,7 +1484,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, KnownOne2, TLO, Depth+1)) return true; // See if the operation should be performed at a smaller bit width. - if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) return true; } // FALL THROUGH @@ -1597,7 +1601,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) && N0.getOperand(0).getOpcode() == ISD::CTLZ && N0.getOperand(1).getOpcode() == ISD::Constant) { - unsigned ShAmt = cast(N0.getOperand(1))->getZExtValue(); + const APInt &ShAmt + = cast(N0.getOperand(1))->getAPIntValue(); if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && ShAmt == Log2_32(N0.getValueType().getSizeInBits())) { if ((C1 == 0) == (Cond == ISD::SETEQ)) { @@ -1625,27 +1630,26 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, N0.getOperand(0).getNode()->hasOneUse() && isa(N0.getOperand(1))) { LoadSDNode *Lod = cast(N0.getOperand(0)); - uint64_t bestMask = 0; + APInt bestMask; unsigned bestWidth = 0, bestOffset = 0; - if (!Lod->isVolatile() && Lod->isUnindexed() && - // FIXME: This uses getZExtValue() below so it only works on i64 and - // below. - N0.getValueType().getSizeInBits() <= 64) { + if (!Lod->isVolatile() && Lod->isUnindexed()) { unsigned origWidth = N0.getValueType().getSizeInBits(); + unsigned maskWidth = origWidth; // We can narrow (e.g.) 16-bit extending loads on 32-bit target to // 8 bits, but have to be careful... if (Lod->getExtensionType() != ISD::NON_EXTLOAD) origWidth = Lod->getMemoryVT().getSizeInBits(); - uint64_t Mask =cast(N0.getOperand(1))->getZExtValue(); + const APInt &Mask = + cast(N0.getOperand(1))->getAPIntValue(); for (unsigned width = origWidth / 2; width>=8; width /= 2) { - uint64_t newMask = (1ULL << width) - 1; + APInt newMask = APInt::getLowBitsSet(maskWidth, width); for (unsigned offset=0; offsetisLittleEndian()) bestOffset = (origWidth/width - offset - 1) * (width/8); else bestOffset = (uint64_t)offset * (width/8); - bestMask = Mask >> (offset * (width/8) * 8); + bestMask = Mask.lshr(offset * (width/8) * 8); bestWidth = width; break; } @@ -1668,7 +1672,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, false, NewAlign); return DAG.getSetCC(dl, VT, DAG.getNode(ISD::AND, dl, newVT, NewLoad, - DAG.getConstant(bestMask, newVT)), + DAG.getConstant(bestMask.trunc(bestWidth), + newVT)), DAG.getConstant(0LL, newVT), Cond); } } @@ -1760,7 +1765,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC if (N0.getOpcode() == ISD::SETCC) { - bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getZExtValue() != 1); + bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1); if (TrueWhenTrue) return N0; @@ -1876,24 +1881,27 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // Fold bit comparisons when we can. if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && - VT == N0.getValueType() && N0.getOpcode() == ISD::AND) + (VT == N0.getValueType() || + (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) && + N0.getOpcode() == ISD::AND) if (ConstantSDNode *AndRHS = dyn_cast(N0.getOperand(1))) { EVT ShiftTy = DCI.isBeforeLegalize() ? getPointerTy() : getShiftAmountTy(); if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 // Perform the xform if the AND RHS is a single bit. - if (isPowerOf2_64(AndRHS->getZExtValue())) { - return DAG.getNode(ISD::SRL, dl, VT, N0, - DAG.getConstant(Log2_64(AndRHS->getZExtValue()), - ShiftTy)); + if (AndRHS->getAPIntValue().isPowerOf2()) { + return DAG.getNode(ISD::TRUNCATE, dl, VT, + DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0, + DAG.getConstant(AndRHS->getAPIntValue().logBase2(), ShiftTy))); } - } else if (Cond == ISD::SETEQ && C1 == AndRHS->getZExtValue()) { + } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) { // (X & 8) == 8 --> (X & 8) >> 3 // Perform the xform if C1 is a single bit. if (C1.isPowerOf2()) { - return DAG.getNode(ISD::SRL, dl, VT, N0, - DAG.getConstant(C1.logBase2(), ShiftTy)); + return DAG.getNode(ISD::TRUNCATE, dl, VT, + DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0, + DAG.getConstant(C1.logBase2(), ShiftTy))); } } } diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp index 8070570cb84b..aeaa38b56433 100644 --- a/lib/CodeGen/ShrinkWrapping.cpp +++ b/lib/CodeGen/ShrinkWrapping.cpp @@ -185,7 +185,7 @@ void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) { initShrinkWrappingInfo(); DEBUG(if (ShrinkWrapThisFunction) { - errs() << "Place CSR spills/restores for " + dbgs() << "Place CSR spills/restores for " << MF->getFunction()->getName() << "\n"; }); @@ -299,7 +299,7 @@ void PEI::calculateAnticAvail(MachineFunction &Fn) { DEBUG({ if (ShrinkWrapDebugging >= Details) { - errs() + dbgs() << "-----------------------------------------------------------\n" << " Antic/Avail Sets:\n" << "-----------------------------------------------------------\n" @@ -314,7 +314,7 @@ void PEI::calculateAnticAvail(MachineFunction &Fn) { dumpSets(MBB); } - errs() + dbgs() << "-----------------------------------------------------------\n"; } }); @@ -363,7 +363,7 @@ bool PEI::calculateSets(MachineFunction &Fn) { // If no CSRs used, we are done. if (CSI.empty()) { DEBUG(if (ShrinkWrapThisFunction) - errs() << "DISABLED: " << Fn.getFunction()->getName() + dbgs() << "DISABLED: " << Fn.getFunction()->getName() << ": uses no callee-saved registers\n"); return false; } @@ -383,7 +383,7 @@ bool PEI::calculateSets(MachineFunction &Fn) { // implementation to functions with <= 500 MBBs. if (Fn.size() > 500) { DEBUG(if (ShrinkWrapThisFunction) - errs() << "DISABLED: " << Fn.getFunction()->getName() + dbgs() << "DISABLED: " << Fn.getFunction()->getName() << ": too large (" << Fn.size() << " MBBs)\n"); ShrinkWrapThisFunction = false; } @@ -465,7 +465,7 @@ bool PEI::calculateSets(MachineFunction &Fn) { } if (allCSRUsesInEntryBlock) { - DEBUG(errs() << "DISABLED: " << Fn.getFunction()->getName() + DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName() << ": all CSRs used in EntryBlock\n"); ShrinkWrapThisFunction = false; } else { @@ -477,7 +477,7 @@ bool PEI::calculateSets(MachineFunction &Fn) { allCSRsUsedInEntryFanout = false; } if (allCSRsUsedInEntryFanout) { - DEBUG(errs() << "DISABLED: " << Fn.getFunction()->getName() + DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName() << ": all CSRs used in imm successors of EntryBlock\n"); ShrinkWrapThisFunction = false; } @@ -504,7 +504,7 @@ bool PEI::calculateSets(MachineFunction &Fn) { if (dominatesExitNodes) { CSRUsedInChokePoints |= CSRUsed[MBB]; if (CSRUsedInChokePoints == UsedCSRegs) { - DEBUG(errs() << "DISABLED: " << Fn.getFunction()->getName() + DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName() << ": all CSRs used in choke point(s) at " << getBasicBlockName(MBB) << "\n"); ShrinkWrapThisFunction = false; @@ -520,16 +520,16 @@ bool PEI::calculateSets(MachineFunction &Fn) { return false; DEBUG({ - errs() << "ENABLED: " << Fn.getFunction()->getName(); + dbgs() << "ENABLED: " << Fn.getFunction()->getName(); if (HasFastExitPath) - errs() << " (fast exit path)"; - errs() << "\n"; + dbgs() << " (fast exit path)"; + dbgs() << "\n"; if (ShrinkWrapDebugging >= BasicInfo) { - errs() << "------------------------------" + dbgs() << "------------------------------" << "-----------------------------\n"; - errs() << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n"; + dbgs() << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n"; if (ShrinkWrapDebugging >= Details) { - errs() << "------------------------------" + dbgs() << "------------------------------" << "-----------------------------\n"; dumpAllUsed(); } @@ -602,7 +602,7 @@ bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB, addedUses = true; blks.push_back(SUCC); DEBUG(if (ShrinkWrapDebugging >= Iterations) - errs() << getBasicBlockName(MBB) + dbgs() << getBasicBlockName(MBB) << "(" << stringifyCSRegSet(prop) << ")->" << "successor " << getBasicBlockName(SUCC) << "\n"); } @@ -618,7 +618,7 @@ bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB, addedUses = true; blks.push_back(PRED); DEBUG(if (ShrinkWrapDebugging >= Iterations) - errs() << getBasicBlockName(MBB) + dbgs() << getBasicBlockName(MBB) << "(" << stringifyCSRegSet(prop) << ")->" << "predecessor " << getBasicBlockName(PRED) << "\n"); } @@ -656,7 +656,7 @@ bool PEI::addUsesForTopLevelLoops(SmallVector& blks) { CSRUsed[EXB] |= loopSpills; addedUses = true; DEBUG(if (ShrinkWrapDebugging >= Iterations) - errs() << "LOOP " << getBasicBlockName(MBB) + dbgs() << "LOOP " << getBasicBlockName(MBB) << "(" << stringifyCSRegSet(loopSpills) << ")->" << getBasicBlockName(EXB) << "\n"); if (EXB->succ_size() > 1 || EXB->pred_size() > 1) @@ -723,7 +723,7 @@ bool PEI::calcSpillPlacements(MachineBasicBlock* MBB, blks.push_back(MBB); DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations) - errs() << "SAVE[" << getBasicBlockName(MBB) << "] = " + dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " << stringifyCSRegSet(CSRSave[MBB]) << "\n"); return placedSpills; @@ -784,7 +784,7 @@ bool PEI::calcRestorePlacements(MachineBasicBlock* MBB, blks.push_back(MBB); DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations) - errs() << "RESTORE[" << getBasicBlockName(MBB) << "] = " + dbgs() << "RESTORE[" << getBasicBlockName(MBB) << "] = " << stringifyCSRegSet(CSRRestore[MBB]) << "\n"); return placedRestores; @@ -808,7 +808,7 @@ void PEI::placeSpillsAndRestores(MachineFunction &Fn) { ++iterations; DEBUG(if (ShrinkWrapDebugging >= Iterations) - errs() << "iter " << iterations + dbgs() << "iter " << iterations << " --------------------------------------------------\n"); // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG, @@ -858,15 +858,15 @@ void PEI::placeSpillsAndRestores(MachineFunction &Fn) { unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count(); numSRReduced += numSRReducedThisFunc; DEBUG(if (ShrinkWrapDebugging >= BasicInfo) { - errs() << "-----------------------------------------------------------\n"; - errs() << "total iterations = " << iterations << " ( " + dbgs() << "-----------------------------------------------------------\n"; + dbgs() << "total iterations = " << iterations << " ( " << Fn.getFunction()->getName() << " " << numSRReducedThisFunc << " " << Fn.size() << " )\n"; - errs() << "-----------------------------------------------------------\n"; + dbgs() << "-----------------------------------------------------------\n"; dumpSRSets(); - errs() << "-----------------------------------------------------------\n"; + dbgs() << "-----------------------------------------------------------\n"; if (numSRReducedThisFunc) verifySpillRestorePlacement(); }); @@ -899,7 +899,7 @@ void PEI::findFastExitPath() { // Check the immediate successors. if (isReturnBlock(SUCC)) { if (ShrinkWrapDebugging >= BasicInfo) - errs() << "Fast exit path: " << getBasicBlockName(EntryBlock) + dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock) << "->" << getBasicBlockName(SUCC) << "\n"; break; } @@ -917,7 +917,7 @@ void PEI::findFastExitPath() { } if (HasFastExitPath) { if (ShrinkWrapDebugging >= BasicInfo) - errs() << "Fast exit path: " << getBasicBlockName(EntryBlock) + dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock) << "->" << exitPath << "\n"; break; } @@ -951,7 +951,7 @@ void PEI::verifySpillRestorePlacement() { if (spilled.empty()) continue; - DEBUG(errs() << "SAVE[" << getBasicBlockName(MBB) << "] = " + DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " << stringifyCSRegSet(spilled) << " RESTORE[" << getBasicBlockName(MBB) << "] = " << stringifyCSRegSet(CSRRestore[MBB]) << "\n"); @@ -983,7 +983,7 @@ void PEI::verifySpillRestorePlacement() { if (isReturnBlock(SBB) || SBB->succ_size() == 0) { if (restored != spilled) { CSRegSet notRestored = (spilled - restored); - DEBUG(errs() << MF->getFunction()->getName() << ": " + DEBUG(dbgs() << MF->getFunction()->getName() << ": " << stringifyCSRegSet(notRestored) << " spilled at " << getBasicBlockName(MBB) << " are never restored on path to return " @@ -1004,7 +1004,7 @@ void PEI::verifySpillRestorePlacement() { if (restored.empty()) continue; - DEBUG(errs() << "SAVE[" << getBasicBlockName(MBB) << "] = " + DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " << stringifyCSRegSet(CSRSave[MBB]) << " RESTORE[" << getBasicBlockName(MBB) << "] = " << stringifyCSRegSet(restored) << "\n"); @@ -1031,7 +1031,7 @@ void PEI::verifySpillRestorePlacement() { } if (spilled != restored) { CSRegSet notSpilled = (restored - spilled); - DEBUG(errs() << MF->getFunction()->getName() << ": " + DEBUG(dbgs() << MF->getFunction()->getName() << ": " << stringifyCSRegSet(notSpilled) << " restored at " << getBasicBlockName(MBB) << " are never spilled\n"); @@ -1078,13 +1078,13 @@ std::string PEI::stringifyCSRegSet(const CSRegSet& s) { } void PEI::dumpSet(const CSRegSet& s) { - DEBUG(errs() << stringifyCSRegSet(s) << "\n"); + DEBUG(dbgs() << stringifyCSRegSet(s) << "\n"); } void PEI::dumpUsed(MachineBasicBlock* MBB) { DEBUG({ if (MBB) - errs() << "CSRUsed[" << getBasicBlockName(MBB) << "] = " + dbgs() << "CSRUsed[" << getBasicBlockName(MBB) << "] = " << stringifyCSRegSet(CSRUsed[MBB]) << "\n"; }); } @@ -1100,7 +1100,7 @@ void PEI::dumpAllUsed() { void PEI::dumpSets(MachineBasicBlock* MBB) { DEBUG({ if (MBB) - errs() << getBasicBlockName(MBB) << " | " + dbgs() << getBasicBlockName(MBB) << " | " << stringifyCSRegSet(CSRUsed[MBB]) << " | " << stringifyCSRegSet(AnticIn[MBB]) << " | " << stringifyCSRegSet(AnticOut[MBB]) << " | " @@ -1112,7 +1112,7 @@ void PEI::dumpSets(MachineBasicBlock* MBB) { void PEI::dumpSets1(MachineBasicBlock* MBB) { DEBUG({ if (MBB) - errs() << getBasicBlockName(MBB) << " | " + dbgs() << getBasicBlockName(MBB) << " | " << stringifyCSRegSet(CSRUsed[MBB]) << " | " << stringifyCSRegSet(AnticIn[MBB]) << " | " << stringifyCSRegSet(AnticOut[MBB]) << " | " @@ -1136,14 +1136,14 @@ void PEI::dumpSRSets() { for (MachineFunction::iterator MBB = MF->begin(), E = MF->end(); MBB != E; ++MBB) { if (!CSRSave[MBB].empty()) { - errs() << "SAVE[" << getBasicBlockName(MBB) << "] = " + dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " << stringifyCSRegSet(CSRSave[MBB]); if (CSRRestore[MBB].empty()) - errs() << '\n'; + dbgs() << '\n'; } if (!CSRRestore[MBB].empty() && !CSRSave[MBB].empty()) - errs() << " " + dbgs() << " " << "RESTORE[" << getBasicBlockName(MBB) << "] = " << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; } diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index 6314331482d1..27d429b999e7 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -183,16 +183,16 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, for (const unsigned* SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) if (li_->hasInterval(*SR) && IntA.overlaps(li_->getInterval(*SR))) { DEBUG({ - errs() << "Interfere with sub-register "; - li_->getInterval(*SR).print(errs(), tri_); + dbgs() << "Interfere with sub-register "; + li_->getInterval(*SR).print(dbgs(), tri_); }); return false; } } DEBUG({ - errs() << "\nExtending: "; - IntB.print(errs(), tri_); + dbgs() << "\nExtending: "; + IntB.print(dbgs(), tri_); }); SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start; @@ -224,9 +224,9 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, IntB.MergeValueNumberInto(BValNo, ValLR->valno); } DEBUG({ - errs() << " result = "; - IntB.print(errs(), tri_); - errs() << "\n"; + dbgs() << " result = "; + IntB.print(dbgs(), tri_); + dbgs() << "\n"; }); // If the source instruction was killing the source register before the @@ -467,8 +467,8 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, // We need to insert a new liverange: [ALR.start, LastUse). It may be we can // simply extend BLR if CopyMI doesn't end the range. DEBUG({ - errs() << "\nExtending: "; - IntB.print(errs(), tri_); + dbgs() << "\nExtending: "; + IntB.print(dbgs(), tri_); }); // Remove val#'s defined by copies that will be coalesced away. @@ -518,19 +518,19 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, ValNo->setHasPHIKill(BHasPHIKill); DEBUG({ - errs() << " result = "; - IntB.print(errs(), tri_); - errs() << '\n'; - errs() << "\nShortening: "; - IntA.print(errs(), tri_); + dbgs() << " result = "; + IntB.print(dbgs(), tri_); + dbgs() << '\n'; + dbgs() << "\nShortening: "; + IntA.print(dbgs(), tri_); }); IntA.removeValNo(AValNo); DEBUG({ - errs() << " result = "; - IntA.print(errs(), tri_); - errs() << '\n'; + dbgs() << " result = "; + IntA.print(dbgs(), tri_); + dbgs() << '\n'; }); ++numCommutes; @@ -1223,16 +1223,16 @@ SimpleRegisterCoalescing::CanJoinExtractSubRegToPhysReg(unsigned DstReg, if (li_->hasInterval(RealDstReg) && RHS.overlaps(li_->getInterval(RealDstReg))) { DEBUG({ - errs() << "Interfere with register "; - li_->getInterval(RealDstReg).print(errs(), tri_); + dbgs() << "Interfere with register "; + li_->getInterval(RealDstReg).print(dbgs(), tri_); }); return false; // Not coalescable } for (const unsigned* SR = tri_->getSubRegisters(RealDstReg); *SR; ++SR) if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) { DEBUG({ - errs() << "Interfere with sub-register "; - li_->getInterval(*SR).print(errs(), tri_); + dbgs() << "Interfere with sub-register "; + li_->getInterval(*SR).print(dbgs(), tri_); }); return false; // Not coalescable } @@ -1254,16 +1254,16 @@ SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg, if (li_->hasInterval(RealSrcReg) && RHS.overlaps(li_->getInterval(RealSrcReg))) { DEBUG({ - errs() << "Interfere with register "; - li_->getInterval(RealSrcReg).print(errs(), tri_); + dbgs() << "Interfere with register "; + li_->getInterval(RealSrcReg).print(dbgs(), tri_); }); return false; // Not coalescable } for (const unsigned* SR = tri_->getSubRegisters(RealSrcReg); *SR; ++SR) if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) { DEBUG({ - errs() << "Interfere with sub-register "; - li_->getInterval(*SR).print(errs(), tri_); + dbgs() << "Interfere with sub-register "; + li_->getInterval(*SR).print(dbgs(), tri_); }); return false; // Not coalescable } @@ -1293,7 +1293,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { if (JoinedCopies.count(CopyMI) || ReMatCopies.count(CopyMI)) return false; // Already done. - DEBUG(errs() << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI); + DEBUG(dbgs() << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI); unsigned SrcReg, DstReg, SrcSubIdx = 0, DstSubIdx = 0; bool isExtSubReg = CopyMI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG; @@ -1313,7 +1313,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { if (SrcSubIdx && SrcSubIdx != DstSubIdx) { // r1025 = INSERT_SUBREG r1025, r1024<2>, 2 Then r1024 has already been // coalesced to a larger register so the subreg indices cancel out. - DEBUG(errs() << "\tSource of insert_subreg or subreg_to_reg is already " + DEBUG(dbgs() << "\tSource of insert_subreg or subreg_to_reg is already " "coalesced to another register.\n"); return false; // Not coalescable. } @@ -1329,7 +1329,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // If they are already joined we continue. if (SrcReg == DstReg) { - DEBUG(errs() << "\tCopy already coalesced.\n"); + DEBUG(dbgs() << "\tCopy already coalesced.\n"); return false; // Not coalescable. } @@ -1338,17 +1338,17 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // If they are both physical registers, we cannot join them. if (SrcIsPhys && DstIsPhys) { - DEBUG(errs() << "\tCan not coalesce physregs.\n"); + DEBUG(dbgs() << "\tCan not coalesce physregs.\n"); return false; // Not coalescable. } // We only join virtual registers with allocatable physical registers. if (SrcIsPhys && !allocatableRegs_[SrcReg]) { - DEBUG(errs() << "\tSrc reg is unallocatable physreg.\n"); + DEBUG(dbgs() << "\tSrc reg is unallocatable physreg.\n"); return false; // Not coalescable. } if (DstIsPhys && !allocatableRegs_[DstReg]) { - DEBUG(errs() << "\tDst reg is unallocatable physreg.\n"); + DEBUG(dbgs() << "\tDst reg is unallocatable physreg.\n"); return false; // Not coalescable. } @@ -1362,7 +1362,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { DstSubRC = DstRC->getSubRegisterRegClass(DstSubIdx); assert(DstSubRC && "Illegal subregister index"); if (!DstSubRC->contains(SrcSubReg)) { - DEBUG(errs() << "\tIncompatible destination regclass: " + DEBUG(dbgs() << "\tIncompatible destination regclass: " << tri_->getName(SrcSubReg) << " not in " << DstSubRC->getName() << ".\n"); return false; // Not coalescable. @@ -1379,7 +1379,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { SrcSubRC = SrcRC->getSubRegisterRegClass(SrcSubIdx); assert(SrcSubRC && "Illegal subregister index"); if (!SrcSubRC->contains(DstSubReg)) { - DEBUG(errs() << "\tIncompatible source regclass: " + DEBUG(dbgs() << "\tIncompatible source regclass: " << tri_->getName(DstSubReg) << " not in " << SrcSubRC->getName() << ".\n"); (void)DstSubReg; @@ -1405,7 +1405,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // r1024<2> = EXTRACT_SUBREG EAX, 2. Then r1024 has already been // coalesced to a larger register so the subreg indices cancel out. if (DstSubIdx != SubIdx) { - DEBUG(errs() << "\t Sub-register indices mismatch.\n"); + DEBUG(dbgs() << "\t Sub-register indices mismatch.\n"); return false; // Not coalescable. } } else @@ -1418,7 +1418,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // EAX = INSERT_SUBREG EAX, r1024<2>, 2 Then r1024 has already been // coalesced to a larger register so the subreg indices cancel out. if (SrcSubIdx != SubIdx) { - DEBUG(errs() << "\t Sub-register indices mismatch.\n"); + DEBUG(dbgs() << "\t Sub-register indices mismatch.\n"); return false; // Not coalescable. } } else @@ -1427,7 +1427,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { } else if ((DstIsPhys && isExtSubReg) || (SrcIsPhys && (isInsSubReg || isSubRegToReg))) { if (!isSubRegToReg && CopyMI->getOperand(1).getSubReg()) { - DEBUG(errs() << "\tSrc of extract_subreg already coalesced with reg" + DEBUG(dbgs() << "\tSrc of extract_subreg already coalesced with reg" << " of a super-class.\n"); return false; // Not coalescable. } @@ -1451,7 +1451,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // class as the would be resulting register. SubIdx = 0; else { - DEBUG(errs() << "\t Sub-register indices mismatch.\n"); + DEBUG(dbgs() << "\t Sub-register indices mismatch.\n"); return false; // Not coalescable. } } @@ -1463,7 +1463,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { NewRC = tri_->getMatchingSuperRegClass(SrcRC, DstRC, SubIdx); } if (!NewRC) { - DEBUG(errs() << "\t Conflicting sub-register indices.\n"); + DEBUG(dbgs() << "\t Conflicting sub-register indices.\n"); return false; // Not coalescable } @@ -1535,7 +1535,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { } else if (!SrcIsPhys && !DstIsPhys) { NewRC = getCommonSubClass(SrcRC, DstRC); if (!NewRC) { - DEBUG(errs() << "\tDisjoint regclasses: " + DEBUG(dbgs() << "\tDisjoint regclasses: " << SrcRC->getName() << ", " << DstRC->getName() << ".\n"); return false; // Not coalescable. @@ -1551,7 +1551,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { (isExtSubReg || DstRC->isASubClass()) && !isWinToJoinCrossClass(LargeReg, SmallReg, allocatableRCRegs_[NewRC].count())) { - DEBUG(errs() << "\tSrc/Dest are different register classes.\n"); + DEBUG(dbgs() << "\tSrc/Dest are different register classes.\n"); // Allow the coalescer to try again in case either side gets coalesced to // a physical register that's compatible with the other side. e.g. // r1024 = MOV32to32_ r1025 @@ -1573,9 +1573,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { "Register mapping is horribly broken!"); DEBUG({ - errs() << "\t\tInspecting "; SrcInt.print(errs(), tri_); - errs() << " and "; DstInt.print(errs(), tri_); - errs() << ": "; + dbgs() << "\t\tInspecting "; SrcInt.print(dbgs(), tri_); + dbgs() << " and "; DstInt.print(dbgs(), tri_); + dbgs() << ": "; }); // Save a copy of the virtual register live interval. We'll manually @@ -1606,7 +1606,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { if (!isWinToJoinVRWithSrcPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) { mri_->setRegAllocationHint(DstInt.reg, 0, SrcReg); ++numAborts; - DEBUG(errs() << "\tMay tie down a physical register, abort!\n"); + DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n"); Again = true; // May be possible to coalesce later. return false; } @@ -1614,7 +1614,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { if (!isWinToJoinVRWithDstPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) { mri_->setRegAllocationHint(SrcInt.reg, 0, DstReg); ++numAborts; - DEBUG(errs() << "\tMay tie down a physical register, abort!\n"); + DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n"); Again = true; // May be possible to coalesce later. return false; } @@ -1635,7 +1635,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { mri_->use_end()) / Length) < Ratio)) { mri_->setRegAllocationHint(JoinVInt.reg, 0, JoinPReg); ++numAborts; - DEBUG(errs() << "\tMay tie down a physical register, abort!\n"); + DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n"); Again = true; // May be possible to coalesce later. return false; } @@ -1654,7 +1654,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // Only coalesce an empty interval (defined by implicit_def) with // another interval which has a valno defined by the CopyMI and the CopyMI // is a kill of the implicit def. - DEBUG(errs() << "Not profitable!\n"); + DEBUG(dbgs() << "Not profitable!\n"); return false; } @@ -1676,7 +1676,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { } // Otherwise, we are unable to join the intervals. - DEBUG(errs() << "Interference!\n"); + DEBUG(dbgs() << "Interference!\n"); Again = true; // May be possible to coalesce later. return false; } @@ -1779,9 +1779,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { } DEBUG({ - errs() << "\n\t\tJoined. Result = "; - ResDstInt->print(errs(), tri_); - errs() << "\n"; + dbgs() << "\n\t\tJoined. Result = "; + ResDstInt->print(dbgs(), tri_); + dbgs() << "\n"; }); ++numJoins; @@ -2134,8 +2134,8 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, for (const unsigned* SR = tri_->getSubRegisters(LHS.reg); *SR; ++SR) if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) { DEBUG({ - errs() << "Interfere with sub-register "; - li_->getInterval(*SR).print(errs(), tri_); + dbgs() << "Interfere with sub-register "; + li_->getInterval(*SR).print(dbgs(), tri_); }); return false; } @@ -2151,8 +2151,8 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, for (const unsigned* SR = tri_->getSubRegisters(RHS.reg); *SR; ++SR) if (li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) { DEBUG({ - errs() << "Interfere with sub-register "; - li_->getInterval(*SR).print(errs(), tri_); + dbgs() << "Interfere with sub-register "; + li_->getInterval(*SR).print(dbgs(), tri_); }); return false; } @@ -2413,7 +2413,7 @@ namespace { void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB, std::vector &TryAgain) { - DEBUG(errs() << MBB->getName() << ":\n"); + DEBUG(dbgs() << MBB->getName() << ":\n"); std::vector VirtCopies; std::vector PhysCopies; @@ -2478,7 +2478,7 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB, } void SimpleRegisterCoalescing::joinIntervals() { - DEBUG(errs() << "********** JOINING INTERVALS ***********\n"); + DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n"); std::vector TryAgainList; if (loopInfo->empty()) { @@ -2610,12 +2610,11 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start, return NULL; } - void SimpleRegisterCoalescing::printRegName(unsigned reg) const { if (TargetRegisterInfo::isPhysicalRegister(reg)) - errs() << tri_->getName(reg); + dbgs() << tri_->getName(reg); else - errs() << "%reg" << reg; + dbgs() << "%reg" << reg; } void SimpleRegisterCoalescing::releaseMemory() { @@ -2634,7 +2633,7 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { AA = &getAnalysis(); loopInfo = &getAnalysis(); - DEBUG(errs() << "********** SIMPLE REGISTER COALESCING **********\n" + DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n" << "********** Function: " << ((Value*)mf_->getFunction())->getName() << '\n'); @@ -2648,11 +2647,11 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { if (EnableJoining) { joinIntervals(); DEBUG({ - errs() << "********** INTERVALS POST JOINING **********\n"; + dbgs() << "********** INTERVALS POST JOINING **********\n"; for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I){ - I->second->print(errs(), tri_); - errs() << "\n"; + I->second->print(dbgs(), tri_); + dbgs() << "\n"; } }); } diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 6de03e1aa138..95589331cf42 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -381,9 +381,6 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { I->eraseFromParent(); } - - - // The entry block changes to have the eh.sjlj.setjmp, with a conditional // branch to a dispatch block for non-zero returns. If we return normally, // we're not handling an exception and just register the function context @@ -397,13 +394,15 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { // Insert a load in the Catch block, and a switch on its value. By default, // we go to a block that just does an unwind (which is the correct action // for a standard call). - BasicBlock *UnwindBlock = BasicBlock::Create(F.getContext(), "unwindbb", &F); + BasicBlock *UnwindBlock = + BasicBlock::Create(F.getContext(), "unwindbb", &F); Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBlock)); Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true, DispatchBlock); SwitchInst *DispatchSwitch = - SwitchInst::Create(DispatchLoad, UnwindBlock, Invokes.size(), DispatchBlock); + SwitchInst::Create(DispatchLoad, UnwindBlock, Invokes.size(), + DispatchBlock); // Split the entry block to insert the conditional branch for the setjmp. BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(), "eh.sjlj.setjmp.cont"); diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index 782af124a664..b8f529b89339 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -192,18 +192,18 @@ void SlotIndexes::renumberIndexes() { void SlotIndexes::dump() const { for (const IndexListEntry *itr = front(); itr != getTail(); itr = itr->getNext()) { - errs() << itr->getIndex() << " "; + dbgs() << itr->getIndex() << " "; if (itr->getInstr() != 0) { - errs() << *itr->getInstr(); + dbgs() << *itr->getInstr(); } else { - errs() << "\n"; + dbgs() << "\n"; } } for (MBB2IdxMap::const_iterator itr = mbb2IdxMap.begin(); itr != mbb2IdxMap.end(); ++itr) { - errs() << "MBB " << itr->first->getNumber() << " (" << itr->first << ") - [" + dbgs() << "MBB " << itr->first->getNumber() << " (" << itr->first << ") - [" << itr->second.first << ", " << itr->second.second << "]\n"; } } @@ -217,7 +217,7 @@ void SlotIndex::print(raw_ostream &os) const { // Dump a SlotIndex to stderr. void SlotIndex::dump() const { - print(errs()); - errs() << "\n"; + print(dbgs()); + dbgs() << "\n"; } diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp index bec92949ccd6..7ba44031714b 100644 --- a/lib/CodeGen/Spiller.cpp +++ b/lib/CodeGen/Spiller.cpp @@ -67,7 +67,7 @@ class SpillerBase : public Spiller { /// immediately before each use, and stores after each def. No folding or /// remat is attempted. std::vector trivialSpillEverywhere(LiveInterval *li) { - DEBUG(errs() << "Spilling everywhere " << *li << "\n"); + DEBUG(dbgs() << "Spilling everywhere " << *li << "\n"); assert(li->weight != HUGE_VALF && "Attempting to spill already spilled value."); @@ -75,7 +75,7 @@ class SpillerBase : public Spiller { assert(!li->isStackSlot() && "Trying to spill a stack slot."); - DEBUG(errs() << "Trivial spill everywhere of reg" << li->reg << "\n"); + DEBUG(dbgs() << "Trivial spill everywhere of reg" << li->reg << "\n"); std::vector added; @@ -89,7 +89,7 @@ class SpillerBase : public Spiller { // Grab the use/def instr. MachineInstr *mi = &*regItr; - DEBUG(errs() << " Processing " << *mi); + DEBUG(dbgs() << " Processing " << *mi); // Step regItr to the next use/def instr. do { @@ -242,7 +242,7 @@ class SplittingSpiller : public StandardSpiller { std::vector tryVNISplit(LiveInterval *li, SlotIndex *earliestStart) { - DEBUG(errs() << "Trying VNI split of %reg" << *li << "\n"); + DEBUG(dbgs() << "Trying VNI split of %reg" << *li << "\n"); std::vector added; SmallVector vnis; @@ -257,11 +257,11 @@ class SplittingSpiller : public StandardSpiller { if (vni->isUnused() || vni->kills.empty()) continue; - DEBUG(errs() << " Extracted Val #" << vni->id << " as "); + DEBUG(dbgs() << " Extracted Val #" << vni->id << " as "); LiveInterval *splitInterval = extractVNI(li, vni); if (splitInterval != 0) { - DEBUG(errs() << *splitInterval << "\n"); + DEBUG(dbgs() << *splitInterval << "\n"); added.push_back(splitInterval); alreadySplit.insert(splitInterval); if (earliestStart != 0) { @@ -269,11 +269,11 @@ class SplittingSpiller : public StandardSpiller { *earliestStart = splitInterval->beginIndex(); } } else { - DEBUG(errs() << "0\n"); + DEBUG(dbgs() << "0\n"); } } - DEBUG(errs() << "Original LI: " << *li << "\n"); + DEBUG(dbgs() << "Original LI: " << *li << "\n"); // If there original interval still contains some live ranges // add it to added and alreadySplit. diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index e8ee82213a4a..48bb5af4db3f 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -113,7 +113,7 @@ bool StackProtector::RequiresStackProtector() const { if (const ArrayType *AT = dyn_cast(AI->getAllocatedType())) { // We apparently only care about character arrays. - if (AT->getElementType() != Type::getInt8Ty(AT->getContext())) + if (!AT->getElementType()->isInteger(8)) continue; // If an array has more than SSPBufferSize bytes of allocated space, diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index fd25a37c6c6b..21707031c8e7 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -200,7 +200,7 @@ void StackSlotColoring::InitializeSlots() { Assignments.resize(LastFI); // Gather all spill slots into a list. - DEBUG(errs() << "Spill slot intervals:\n"); + DEBUG(dbgs() << "Spill slot intervals:\n"); for (LiveStacks::iterator i = LS->begin(), e = LS->end(); i != e; ++i) { LiveInterval &li = i->second; DEBUG(li.dump()); @@ -212,7 +212,7 @@ void StackSlotColoring::InitializeSlots() { OrigSizes[FI] = MFI->getObjectSize(FI); AllColors.set(FI); } - DEBUG(errs() << '\n'); + DEBUG(dbgs() << '\n'); // Sort them by weight. std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter()); @@ -244,7 +244,7 @@ StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector &SlotMapping, return false; bool Changed = false; - DEBUG(errs() << "Assigning unused registers to spill slots:\n"); + DEBUG(dbgs() << "Assigning unused registers to spill slots:\n"); for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { LiveInterval *li = SSIntervals[i]; int SS = li->getStackSlotIndex(); @@ -274,7 +274,7 @@ StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector &SlotMapping, AllColored = false; continue; } else { - DEBUG(errs() << "Assigning fi#" << RSS << " to " + DEBUG(dbgs() << "Assigning fi#" << RSS << " to " << TRI->getName(Reg) << '\n'); ColoredRegs.push_back(Reg); SlotMapping[RSS] = Reg; @@ -302,7 +302,7 @@ StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector &SlotMapping, ++NumEliminated; } } - DEBUG(errs() << '\n'); + DEBUG(dbgs() << '\n'); return Changed; } @@ -337,7 +337,7 @@ int StackSlotColoring::ColorSlot(LiveInterval *li) { // Record the assignment. Assignments[Color].push_back(li); int FI = li->getStackSlotIndex(); - DEBUG(errs() << "Assigning fi#" << FI << " to fi#" << Color << "\n"); + DEBUG(dbgs() << "Assigning fi#" << FI << " to fi#" << Color << "\n"); // Change size and alignment of the allocated slot. If there are multiple // objects sharing the same slot, then make sure the size and alignment @@ -361,7 +361,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { BitVector SlotIsReg(NumObjs); BitVector UsedColors(NumObjs); - DEBUG(errs() << "Color spill slot intervals:\n"); + DEBUG(dbgs() << "Color spill slot intervals:\n"); bool Changed = false; for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { LiveInterval *li = SSIntervals[i]; @@ -375,7 +375,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { Changed |= (SS != NewSS); } - DEBUG(errs() << "\nSpill slots after coloring:\n"); + DEBUG(dbgs() << "\nSpill slots after coloring:\n"); for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { LiveInterval *li = SSIntervals[i]; int SS = li->getStackSlotIndex(); @@ -387,7 +387,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { #ifndef NDEBUG for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) DEBUG(SSIntervals[i]->dump()); - DEBUG(errs() << '\n'); + DEBUG(dbgs() << '\n'); #endif // Can we "color" a stack slot with a unused register? @@ -419,7 +419,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { // Delete unused stack slots. while (NextColor != -1) { - DEBUG(errs() << "Removing unused stack object fi#" << NextColor << "\n"); + DEBUG(dbgs() << "Removing unused stack object fi#" << NextColor << "\n"); MFI->RemoveStackObject(NextColor); NextColor = AllColors.find_next(NextColor); } @@ -605,7 +605,7 @@ StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI, MachineBasicBlock *MBB = MI->getParent(); if (unsigned DstReg = TII->isLoadFromStackSlot(MI, OldFI)) { if (PropagateForward(MI, MBB, DstReg, Reg)) { - DEBUG(errs() << "Eliminated load: "); + DEBUG(dbgs() << "Eliminated load: "); DEBUG(MI->dump()); ++NumLoadElim; } else { @@ -621,7 +621,7 @@ StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI, } } else if (unsigned SrcReg = TII->isStoreToStackSlot(MI, OldFI)) { if (MI->killsRegister(SrcReg) && PropagateBackward(MI, MBB, SrcReg, Reg)) { - DEBUG(errs() << "Eliminated store: "); + DEBUG(dbgs() << "Eliminated store: "); DEBUG(MI->dump()); ++NumStoreElim; } else { @@ -699,7 +699,7 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { - DEBUG(errs() << "********** Stack Slot Coloring **********\n"); + DEBUG(dbgs() << "********** Stack Slot Coloring **********\n"); MFI = MF.getFrameInfo(); MRI = &MF.getRegInfo(); diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp index 3c139068e683..bd7cb7520c98 100644 --- a/lib/CodeGen/StrongPHIElimination.cpp +++ b/lib/CodeGen/StrongPHIElimination.cpp @@ -555,7 +555,7 @@ void StrongPHIElimination::processBlock(MachineBasicBlock* MBB) { // Add the renaming set for this PHI node to our overall renaming information for (std::map::iterator QI = PHIUnion.begin(), QE = PHIUnion.end(); QI != QE; ++QI) { - DEBUG(errs() << "Adding Renaming: " << QI->first << " -> " + DEBUG(dbgs() << "Adding Renaming: " << QI->first << " -> " << P->getOperand(0).getReg() << "\n"); } @@ -698,7 +698,7 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB, TII->copyRegToReg(*PI->getParent(), PI, t, curr.second, RC, RC); - DEBUG(errs() << "Inserted copy from " << curr.second << " to " << t + DEBUG(dbgs() << "Inserted copy from " << curr.second << " to " << t << "\n"); // Push temporary on Stacks @@ -715,7 +715,7 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB, TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), curr.second, map[curr.first], RC, RC); map[curr.first] = curr.second; - DEBUG(errs() << "Inserted copy from " << curr.first << " to " + DEBUG(dbgs() << "Inserted copy from " << curr.first << " to " << curr.second << "\n"); // Push this copy onto InsertedPHICopies so we can @@ -928,7 +928,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) { unsigned reg = OI->first; ++OI; I->second.erase(reg); - DEBUG(errs() << "Removing Renaming: " << reg << " -> " << I->first + DEBUG(dbgs() << "Removing Renaming: " << reg << " -> " << I->first << "\n"); } } @@ -946,7 +946,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) { while (I->second.size()) { std::map::iterator SI = I->second.begin(); - DEBUG(errs() << "Renaming: " << SI->first << " -> " << I->first << "\n"); + DEBUG(dbgs() << "Renaming: " << SI->first << " -> " << I->first << "\n"); if (SI->first != I->first) { if (mergeLiveIntervals(I->first, SI->first)) { @@ -978,7 +978,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) { R.valno->setCopy(--SI->second->getFirstTerminator()); R.valno->def = instrIdx.getDefIndex(); - DEBUG(errs() << "Renaming failed: " << SI->first << " -> " + DEBUG(dbgs() << "Renaming failed: " << SI->first << " -> " << I->first << "\n"); } } diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index bf589022301b..f51f74d5065f 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -139,8 +139,8 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { } } if (!Found) { - errs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; - errs() << " missing input from predecessor BB#" + dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; + dbgs() << " missing input from predecessor BB#" << PredBB->getNumber() << '\n'; llvm_unreachable(0); } @@ -150,14 +150,14 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB(); if (CheckExtra && !Preds.count(PHIBB)) { // This is not a hard error. - errs() << "Warning: malformed PHI in BB#" << MBB->getNumber() + dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; - errs() << " extra input from predecessor BB#" + dbgs() << " extra input from predecessor BB#" << PHIBB->getNumber() << '\n'; } if (PHIBB->getNumber() < 0) { - errs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; - errs() << " non-existing BB#" << PHIBB->getNumber() << '\n'; + dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; + dbgs() << " non-existing BB#" << PHIBB->getNumber() << '\n'; llvm_unreachable(0); } } @@ -173,7 +173,7 @@ bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) { bool MadeChange = false; if (PreRegAlloc && TailDupVerify) { - DEBUG(errs() << "\n*** Before tail-duplicating\n"); + DEBUG(dbgs() << "\n*** Before tail-duplicating\n"); VerifyPHIs(MF, true); } @@ -253,7 +253,7 @@ bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) { SSAUpdateVals.clear(); } - // Eliminate some of the copies inserted tail duplication to maintain + // Eliminate some of the copies inserted by tail duplication to maintain // SSA form. for (unsigned i = 0, e = Copies.size(); i != e; ++i) { MachineInstr *Copy = Copies[i]; @@ -346,7 +346,7 @@ void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI, MachineBasicBlock *PredBB, MachineFunction &MF, DenseMap &LocalVRMap) { - MachineInstr *NewMI = MF.CloneMachineInstr(MI); + MachineInstr *NewMI = TII->duplicate(MI, MF); for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { MachineOperand &MO = NewMI->getOperand(i); if (!MO.isReg()) @@ -437,8 +437,11 @@ bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, SmallVector &TDBBs, SmallVector &Copies) { - // Don't try to tail-duplicate single-block loops. - if (TailBB->isSuccessor(TailBB)) + // Pre-regalloc tail duplication hurts compile time and doesn't help + // much except for indirect branches. + bool hasIndirectBranch = (!TailBB->empty() && + TailBB->back().getDesc().isIndirectBranch()); + if (PreRegAlloc && !hasIndirectBranch) return false; // Set the limit on the number of instructions to duplicate, with a default @@ -446,7 +449,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, // duplicate only one, because one branch instruction can be eliminated to // compensate for the duplication. unsigned MaxDuplicateCount; - if (!TailBB->empty() && TailBB->back().getDesc().isIndirectBranch()) + if (hasIndirectBranch) // If the target has hardware branch prediction that can handle indirect // branches, duplicating them can often make them predictable when there // are common paths through the code. The limit needs to be high enough @@ -457,6 +460,10 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, else MaxDuplicateCount = TailDuplicateSize; + // Don't try to tail-duplicate single-block loops. + if (TailBB->isSuccessor(TailBB)) + return false; + // Check the instructions in the block to determine whether tail-duplication // is invalid or unlikely to be profitable. unsigned InstrCount = 0; @@ -481,7 +488,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, if (InstrCount > 1 && HasCall) return false; - DEBUG(errs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); + DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); // Iterate through all the unique predecessors and tail-duplicate this // block into them, if possible. Copying the list ahead of time also @@ -510,7 +517,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough()) continue; - DEBUG(errs() << "\nTail-duplicating into PredBB: " << *PredBB + DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB << "From Succ: " << *TailBB); TDBBs.push_back(PredBB); @@ -570,7 +577,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, if (!PriorUnAnalyzable && PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 && PrevBB->succ_size() == 1 && !TailBB->hasAddressTaken()) { - DEBUG(errs() << "\nMerging into block: " << *PrevBB + DEBUG(dbgs() << "\nMerging into block: " << *PrevBB << "From MBB: " << *TailBB); if (PreRegAlloc) { DenseMap LocalVRMap; @@ -620,7 +627,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, /// function, updating the CFG. void TailDuplicatePass::RemoveDeadBlock(MachineBasicBlock *MBB) { assert(MBB->pred_empty() && "MBB must be dead!"); - DEBUG(errs() << "\nRemoving MBB: " << *MBB); + DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); // Remove all successors. while (!MBB->succ_empty()) diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index 393e315a3214..a0fccabdb5af 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -150,6 +150,13 @@ void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB, MBB.insert(I, MI); } +MachineInstr *TargetInstrInfoImpl::duplicate(MachineInstr *Orig, + MachineFunction &MF) const { + assert(!Orig->getDesc().isNotDuplicable() && + "Instruction cannot be duplicated"); + return MF.CloneMachineInstr(Orig); +} + bool TargetInstrInfoImpl::isIdentical(const MachineInstr *MI, const MachineInstr *Other, diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 98b95acdbc8a..a3f6364aa8ed 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -573,15 +573,15 @@ TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi, MachineFunction::iterator &mbbi, unsigned RegB, unsigned RegC, unsigned Dist) { MachineInstr *MI = mi; - DEBUG(errs() << "2addr: COMMUTING : " << *MI); + DEBUG(dbgs() << "2addr: COMMUTING : " << *MI); MachineInstr *NewMI = TII->commuteInstruction(MI); if (NewMI == 0) { - DEBUG(errs() << "2addr: COMMUTING FAILED!\n"); + DEBUG(dbgs() << "2addr: COMMUTING FAILED!\n"); return false; } - DEBUG(errs() << "2addr: COMMUTED TO: " << *NewMI); + DEBUG(dbgs() << "2addr: COMMUTED TO: " << *NewMI); // If the instruction changed to commute it, update livevar. if (NewMI != MI) { if (LV) @@ -628,8 +628,8 @@ TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi, unsigned RegB, unsigned Dist) { MachineInstr *NewMI = TII->convertToThreeAddress(mbbi, mi, LV); if (NewMI) { - DEBUG(errs() << "2addr: CONVERTING 2-ADDR: " << *mi); - DEBUG(errs() << "2addr: TO 3-ADDR: " << *NewMI); + DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi); + DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI); bool Sunk = false; if (NewMI->findRegisterUseOperand(RegB, false, TRI)) @@ -891,7 +891,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, /// runOnMachineFunction - Reduce two-address instructions to two operands. /// bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { - DEBUG(errs() << "Machine Function\n"); + DEBUG(dbgs() << "Machine Function\n"); const TargetMachine &TM = MF.getTarget(); MRI = &MF.getRegInfo(); TII = TM.getInstrInfo(); @@ -901,8 +901,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { bool MadeChange = false; - DEBUG(errs() << "********** REWRITING TWO-ADDR INSTRS **********\n"); - DEBUG(errs() << "********** Function: " + DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n"); + DEBUG(dbgs() << "********** Function: " << MF.getFunction()->getName() << '\n'); // ReMatRegs - Keep track of the registers whose def's are remat'ed. @@ -943,7 +943,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { if (FirstTied) { FirstTied = false; ++NumTwoAddressInstrs; - DEBUG(errs() << '\t' << *mi); + DEBUG(dbgs() << '\t' << *mi); } assert(mi->getOperand(SrcIdx).isReg() && @@ -1024,7 +1024,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { DefMI->getDesc().isAsCheapAsAMove() && DefMI->isSafeToReMat(TII, regB, AA) && isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){ - DEBUG(errs() << "2addr: REMATTING : " << *DefMI << "\n"); + DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n"); unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg(); TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, TRI); ReMatRegs.set(regB); @@ -1040,7 +1040,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { DistanceMap.insert(std::make_pair(prevMI, Dist)); DistanceMap[mi] = ++Dist; - DEBUG(errs() << "\t\tprepend:\t" << *prevMI); + DEBUG(dbgs() << "\t\tprepend:\t" << *prevMI); MachineOperand &MO = mi->getOperand(SrcIdx); assert(MO.isReg() && MO.getReg() == regB && MO.isUse() && @@ -1085,7 +1085,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { MadeChange = true; - DEBUG(errs() << "\t\trewrite to:\t" << *mi); + DEBUG(dbgs() << "\t\trewrite to:\t" << *mi); } // Clear TiedOperands here instead of at the top of the loop diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index c8c5d861578d..d4fb2e4d8842 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -278,5 +278,5 @@ void VirtRegMap::print(raw_ostream &OS, const Module* M) const { } void VirtRegMap::dump() const { - print(errs()); + print(dbgs()); } diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp index 054c3b631b96..df2b8d28c93f 100644 --- a/lib/CodeGen/VirtRegRewriter.cpp +++ b/lib/CodeGen/VirtRegRewriter.cpp @@ -60,6 +60,33 @@ ScheduleSpills("schedule-spills", VirtRegRewriter::~VirtRegRewriter() {} +/// substitutePhysReg - Replace virtual register in MachineOperand with a +/// physical register. Do the right thing with the sub-register index. +static void substitutePhysReg(MachineOperand &MO, unsigned Reg, + const TargetRegisterInfo &TRI) { + if (unsigned SubIdx = MO.getSubReg()) { + // Insert the physical subreg and reset the subreg field. + MO.setReg(TRI.getSubReg(Reg, SubIdx)); + MO.setSubReg(0); + + // Any def, dead, and kill flags apply to the full virtual register, so they + // also apply to the full physical register. Add imp-def/dead and imp-kill + // as needed. + MachineInstr &MI = *MO.getParent(); + if (MO.isDef()) + if (MO.isDead()) + MI.addRegisterDead(Reg, &TRI, /*AddIfNotFound=*/ true); + else + MI.addRegisterDefined(Reg, &TRI); + else if (!MO.isUndef() && + (MO.isKill() || + MI.isRegTiedToDefOperand(&MO-&MI.getOperand(0)))) + MI.addRegisterKilled(Reg, &TRI, /*AddIfNotFound=*/ true); + } else { + MO.setReg(Reg); + } +} + namespace { /// This class is intended for use with the new spilling framework only. It @@ -69,10 +96,10 @@ struct TrivialRewriter : public VirtRegRewriter { bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM, LiveIntervals* LIs) { - DEBUG(errs() << "********** REWRITE MACHINE CODE **********\n"); - DEBUG(errs() << "********** Function: " + DEBUG(dbgs() << "********** REWRITE MACHINE CODE **********\n"); + DEBUG(dbgs() << "********** Function: " << MF.getFunction()->getName() << '\n'); - DEBUG(errs() << "**** Machine Instrs" + DEBUG(dbgs() << "**** Machine Instrs" << "(NOTE! Does not include spills and reloads!) ****\n"); DEBUG(MF.dump()); @@ -101,16 +128,13 @@ struct TrivialRewriter : public VirtRegRewriter { MachineOperand &mop = regItr.getOperand(); assert(mop.isReg() && mop.getReg() == reg && "reg_iterator broken?"); ++regItr; - unsigned subRegIdx = mop.getSubReg(); - unsigned pRegOp = subRegIdx ? tri->getSubReg(pReg, subRegIdx) : pReg; - mop.setReg(pRegOp); - mop.setSubReg(0); + substitutePhysReg(mop, pReg, *tri); changed = true; } } } - DEBUG(errs() << "**** Post Machine Instrs ****\n"); + DEBUG(dbgs() << "**** Post Machine Instrs ****\n"); DEBUG(MF.dump()); return changed; @@ -191,11 +215,11 @@ class AvailableSpills { (unsigned)CanClobber; if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT) - DEBUG(errs() << "Remembering RM#" + DEBUG(dbgs() << "Remembering RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1); else - DEBUG(errs() << "Remembering SS#" << SlotOrReMat); - DEBUG(errs() << " in physreg " << TRI->getName(Reg) << "\n"); + DEBUG(dbgs() << "Remembering SS#" << SlotOrReMat); + DEBUG(dbgs() << " in physreg " << TRI->getName(Reg) << "\n"); } /// canClobberPhysRegForSS - Return true if the spiller is allowed to change @@ -647,12 +671,9 @@ static void ReMaterialize(MachineBasicBlock &MBB, if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) continue; assert(MO.isUse()); - unsigned SubIdx = MO.getSubReg(); unsigned Phys = VRM.getPhys(VirtReg); assert(Phys && "Virtual register is not assigned a register?"); - unsigned RReg = SubIdx ? TRI->getSubReg(Phys, SubIdx) : Phys; - MO.setReg(RReg); - MO.setSubReg(0); + substitutePhysReg(MO, Phys, *TRI); } ++NumReMats; } @@ -686,7 +707,7 @@ void AvailableSpills::disallowClobberPhysRegOnly(unsigned PhysReg) { assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg && "Bidirectional map mismatch!"); SpillSlotsOrReMatsAvailable[SlotOrReMat] &= ~1; - DEBUG(errs() << "PhysReg " << TRI->getName(PhysReg) + DEBUG(dbgs() << "PhysReg " << TRI->getName(PhysReg) << " copied, it is available for use but can no longer be modified\n"); } } @@ -711,12 +732,12 @@ void AvailableSpills::ClobberPhysRegOnly(unsigned PhysReg) { assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg && "Bidirectional map mismatch!"); SpillSlotsOrReMatsAvailable.erase(SlotOrReMat); - DEBUG(errs() << "PhysReg " << TRI->getName(PhysReg) + DEBUG(dbgs() << "PhysReg " << TRI->getName(PhysReg) << " clobbered, invalidating "); if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT) - DEBUG(errs() << "RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1 <<"\n"); + DEBUG(dbgs() << "RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1 <<"\n"); else - DEBUG(errs() << "SS#" << SlotOrReMat << "\n"); + DEBUG(dbgs() << "SS#" << SlotOrReMat << "\n"); } } @@ -895,9 +916,9 @@ unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC, Spills.addAvailable(NewOp.StackSlotOrReMat, NewPhysReg); UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps); - DEBUG(errs() << '\t' << *prior(InsertLoc)); + DEBUG(dbgs() << '\t' << *prior(InsertLoc)); - DEBUG(errs() << "Reuse undone!\n"); + DEBUG(dbgs() << "Reuse undone!\n"); --NumReused; // Finally, PhysReg is now available, go ahead and use it. @@ -1004,11 +1025,12 @@ static unsigned FindFreeRegister(MachineBasicBlock::iterator MII, } static -void AssignPhysToVirtReg(MachineInstr *MI, unsigned VirtReg, unsigned PhysReg) { +void AssignPhysToVirtReg(MachineInstr *MI, unsigned VirtReg, unsigned PhysReg, + const TargetRegisterInfo &TRI) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.getReg() == VirtReg) - MO.setReg(PhysReg); + substitutePhysReg(MO, PhysReg, TRI); } } @@ -1041,9 +1063,9 @@ class LocalRewriter : public VirtRegRewriter { TRI = MF.getTarget().getRegisterInfo(); TII = MF.getTarget().getInstrInfo(); AllocatableRegs = TRI->getAllocatableSet(MF); - DEBUG(errs() << "\n**** Local spiller rewriting function '" + DEBUG(dbgs() << "\n**** Local spiller rewriting function '" << MF.getFunction()->getName() << "':\n"); - DEBUG(errs() << "**** Machine Instrs (NOTE! Does not include spills and" + DEBUG(dbgs() << "**** Machine Instrs (NOTE! Does not include spills and" " reloads!) ****\n"); DEBUG(MF.dump()); @@ -1095,7 +1117,7 @@ class LocalRewriter : public VirtRegRewriter { Spills.clear(); } - DEBUG(errs() << "**** Post Machine Instrs ****\n"); + DEBUG(dbgs() << "**** Post Machine Instrs ****\n"); DEBUG(MF.dump()); // Mark unused spill slots. @@ -1175,7 +1197,7 @@ class LocalRewriter : public VirtRegRewriter { if (!TII->unfoldMemoryOperand(MF, &MI, VirtReg, false, false, NewMIs)) llvm_unreachable("Unable unfold the load / store folding instruction!"); assert(NewMIs.size() == 1); - AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg); + AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI); VRM.transferRestorePts(&MI, NewMIs[0]); MII = MBB.insert(MII, NewMIs[0]); InvalidateKills(MI, TRI, RegKills, KillOps); @@ -1191,7 +1213,7 @@ class LocalRewriter : public VirtRegRewriter { if (!TII->unfoldMemoryOperand(MF, &NextMI, VirtReg, false, false, NewMIs)) llvm_unreachable("Unable unfold the load / store folding instruction!"); assert(NewMIs.size() == 1); - AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg); + AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI); VRM.transferRestorePts(&NextMI, NewMIs[0]); MBB.insert(NextMII, NewMIs[0]); InvalidateKills(NextMI, TRI, RegKills, KillOps); @@ -1467,11 +1489,11 @@ class LocalRewriter : public VirtRegRewriter { TII->storeRegToStackSlot(MBB, llvm::next(MII), PhysReg, true, StackSlot, RC); MachineInstr *StoreMI = prior(oldNextMII); VRM.addSpillSlotUse(StackSlot, StoreMI); - DEBUG(errs() << "Store:\t" << *StoreMI); + DEBUG(dbgs() << "Store:\t" << *StoreMI); // If there is a dead store to this stack slot, nuke it now. if (LastStore) { - DEBUG(errs() << "Removed dead store:\t" << *LastStore); + DEBUG(dbgs() << "Removed dead store:\t" << *LastStore); ++NumDSE; SmallVector KillRegs; InvalidateKills(*LastStore, TRI, RegKills, KillOps, &KillRegs); @@ -1599,7 +1621,7 @@ class LocalRewriter : public VirtRegRewriter { AvailableSpills &Spills, BitVector &RegKills, std::vector &KillOps) { - DEBUG(errs() << "\n**** Local spiller rewriting MBB '" + DEBUG(dbgs() << "\n**** Local spiller rewriting MBB '" << MBB.getName() << "':\n"); MachineFunction &MF = *MBB.getParent(); @@ -1699,11 +1721,11 @@ class LocalRewriter : public VirtRegRewriter { // If the value is already available in the expected register, save // a reload / remat. if (SSorRMId) - DEBUG(errs() << "Reusing RM#" + DEBUG(dbgs() << "Reusing RM#" << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1); else - DEBUG(errs() << "Reusing SS#" << SSorRMId); - DEBUG(errs() << " from physreg " + DEBUG(dbgs() << "Reusing SS#" << SSorRMId); + DEBUG(dbgs() << " from physreg " << TRI->getName(InReg) << " for vreg" << VirtReg <<" instead of reloading into physreg " << TRI->getName(Phys) << '\n'); @@ -1711,11 +1733,11 @@ class LocalRewriter : public VirtRegRewriter { continue; } else if (InReg && InReg != Phys) { if (SSorRMId) - DEBUG(errs() << "Reusing RM#" + DEBUG(dbgs() << "Reusing RM#" << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1); else - DEBUG(errs() << "Reusing SS#" << SSorRMId); - DEBUG(errs() << " from physreg " + DEBUG(dbgs() << "Reusing SS#" << SSorRMId); + DEBUG(dbgs() << " from physreg " << TRI->getName(InReg) << " for vreg" << VirtReg <<" by copying it into physreg " << TRI->getName(Phys) << '\n'); @@ -1742,7 +1764,7 @@ class LocalRewriter : public VirtRegRewriter { KillOpnd->setIsKill(); UpdateKills(*CopyMI, TRI, RegKills, KillOps); - DEBUG(errs() << '\t' << *CopyMI); + DEBUG(dbgs() << '\t' << *CopyMI); ++NumCopified; continue; } @@ -1769,7 +1791,7 @@ class LocalRewriter : public VirtRegRewriter { Spills.addAvailable(SSorRMId, Phys); UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps); - DEBUG(errs() << '\t' << *prior(MII)); + DEBUG(dbgs() << '\t' << *prior(MII)); } } @@ -1789,7 +1811,7 @@ class LocalRewriter : public VirtRegRewriter { TII->storeRegToStackSlot(MBB, llvm::next(MII), Phys, isKill, StackSlot, RC); MachineInstr *StoreMI = prior(oldNextMII); VRM.addSpillSlotUse(StackSlot, StoreMI); - DEBUG(errs() << "Store:\t" << *StoreMI); + DEBUG(dbgs() << "Store:\t" << *StoreMI); VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod); } NextMII = llvm::next(MII); @@ -1840,16 +1862,14 @@ class LocalRewriter : public VirtRegRewriter { RegInfo->setPhysRegUsed(Phys); if (MO.isDef()) ReusedOperands.markClobbered(Phys); - unsigned RReg = SubIdx ? TRI->getSubReg(Phys, SubIdx) : Phys; - MI.getOperand(i).setReg(RReg); - MI.getOperand(i).setSubReg(0); + substitutePhysReg(MO, Phys, *TRI); if (VRM.isImplicitlyDefined(VirtReg)) // FIXME: Is this needed? BuildMI(MBB, &MI, MI.getDebugLoc(), - TII->get(TargetInstrInfo::IMPLICIT_DEF), RReg); + TII->get(TargetInstrInfo::IMPLICIT_DEF), Phys); continue; } - + // This virtual register is now known to be a spilled value. if (!MO.isUse()) continue; // Handle defs in the loop below (handle use&def here though) @@ -1908,11 +1928,11 @@ class LocalRewriter : public VirtRegRewriter { if (CanReuse) { // If this stack slot value is already available, reuse it! if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT) - DEBUG(errs() << "Reusing RM#" + DEBUG(dbgs() << "Reusing RM#" << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1); else - DEBUG(errs() << "Reusing SS#" << ReuseSlot); - DEBUG(errs() << " from physreg " + DEBUG(dbgs() << "Reusing SS#" << ReuseSlot); + DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg) << " for vreg" << VirtReg <<" instead of reloading into physreg " << TRI->getName(VRM.getPhys(VirtReg)) << '\n'); @@ -1991,11 +2011,11 @@ class LocalRewriter : public VirtRegRewriter { if (DesignatedReg == PhysReg) { // If this stack slot value is already available, reuse it! if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT) - DEBUG(errs() << "Reusing RM#" + DEBUG(dbgs() << "Reusing RM#" << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1); else - DEBUG(errs() << "Reusing SS#" << ReuseSlot); - DEBUG(errs() << " from physreg " << TRI->getName(PhysReg) + DEBUG(dbgs() << "Reusing SS#" << ReuseSlot); + DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg) << " for vreg" << VirtReg << " instead of reloading into same physreg.\n"); unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; @@ -2029,7 +2049,7 @@ class LocalRewriter : public VirtRegRewriter { SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg; MI.getOperand(i).setReg(RReg); MI.getOperand(i).setSubReg(0); - DEBUG(errs() << '\t' << *prior(MII)); + DEBUG(dbgs() << '\t' << *prior(MII)); ++NumReused; continue; } // if (PhysReg) @@ -2082,7 +2102,7 @@ class LocalRewriter : public VirtRegRewriter { } UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps); - DEBUG(errs() << '\t' << *prior(InsertLoc)); + DEBUG(dbgs() << '\t' << *prior(InsertLoc)); } unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; MI.getOperand(i).setReg(RReg); @@ -2096,7 +2116,7 @@ class LocalRewriter : public VirtRegRewriter { int PDSSlot = PotentialDeadStoreSlots[j]; MachineInstr* DeadStore = MaybeDeadStores[PDSSlot]; if (DeadStore) { - DEBUG(errs() << "Removed dead store:\t" << *DeadStore); + DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore); InvalidateKills(*DeadStore, TRI, RegKills, KillOps); VRM.RemoveMachineInstrFromMaps(DeadStore); MBB.erase(DeadStore); @@ -2106,7 +2126,7 @@ class LocalRewriter : public VirtRegRewriter { } - DEBUG(errs() << '\t' << MI); + DEBUG(dbgs() << '\t' << MI); // If we have folded references to memory operands, make sure we clear all @@ -2116,7 +2136,7 @@ class LocalRewriter : public VirtRegRewriter { for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ) { unsigned VirtReg = I->second.first; VirtRegMap::ModRef MR = I->second.second; - DEBUG(errs() << "Folded vreg: " << VirtReg << " MR: " << MR); + DEBUG(dbgs() << "Folded vreg: " << VirtReg << " MR: " << MR); // MI2VirtMap be can updated which invalidate the iterator. // Increment the iterator first. @@ -2125,7 +2145,7 @@ class LocalRewriter : public VirtRegRewriter { if (SS == VirtRegMap::NO_STACK_SLOT) continue; FoldedSS.insert(SS); - DEBUG(errs() << " - StackSlot: " << SS << "\n"); + DEBUG(dbgs() << " - StackSlot: " << SS << "\n"); // If this folded instruction is just a use, check to see if it's a // straight load from the virt reg slot. @@ -2136,7 +2156,7 @@ class LocalRewriter : public VirtRegRewriter { // If this spill slot is available, turn it into a copy (or nothing) // instead of leaving it as a load! if (unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SS)) { - DEBUG(errs() << "Promoted Load To Copy: " << MI); + DEBUG(dbgs() << "Promoted Load To Copy: " << MI); if (DestReg != InReg) { const TargetRegisterClass *RC = RegInfo->getRegClass(VirtReg); TII->copyRegToReg(MBB, &MI, DestReg, InReg, RC, RC); @@ -2160,7 +2180,7 @@ class LocalRewriter : public VirtRegRewriter { BackTracked = true; } else { - DEBUG(errs() << "Removing now-noop copy: " << MI); + DEBUG(dbgs() << "Removing now-noop copy: " << MI); // Unset last kill since it's being reused. InvalidateKill(InReg, TRI, RegKills, KillOps); Spills.disallowClobberPhysReg(InReg); @@ -2230,7 +2250,7 @@ class LocalRewriter : public VirtRegRewriter { if (isDead) { // Previous store is dead. // If we get here, the store is dead, nuke it now. - DEBUG(errs() << "Removed dead store:\t" << *DeadStore); + DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore); InvalidateKills(*DeadStore, TRI, RegKills, KillOps); VRM.RemoveMachineInstrFromMaps(DeadStore); MBB.erase(DeadStore); @@ -2301,7 +2321,7 @@ class LocalRewriter : public VirtRegRewriter { if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst && !MI.findRegisterUseOperand(Src)->isUndef()) { ++NumDCE; - DEBUG(errs() << "Removing now-noop copy: " << MI); + DEBUG(dbgs() << "Removing now-noop copy: " << MI); SmallVector KillRegs; InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs); if (MO.isDead() && !KillRegs.empty()) { @@ -2389,7 +2409,7 @@ class LocalRewriter : public VirtRegRewriter { unsigned Src, Dst, SrcSR, DstSR; if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst) { ++NumDCE; - DEBUG(errs() << "Removing now-noop copy: " << MI); + DEBUG(dbgs() << "Removing now-noop copy: " << MI); InvalidateKills(MI, TRI, RegKills, KillOps); VRM.RemoveMachineInstrFromMaps(&MI); MBB.erase(&MI); diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index cb307483f7f0..89c4290f2346 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -138,7 +138,7 @@ void *ExecutionEngineState::RemoveMapping( void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) { MutexGuard locked(lock); - DEBUG(errs() << "JIT: Map \'" << GV->getName() + DEBUG(dbgs() << "JIT: Map \'" << GV->getName() << "\' to [" << Addr << "]\n";); void *&CurVal = EEState.getGlobalAddressMap(locked)[GV]; assert((CurVal == 0 || Addr == 0) && "GlobalMapping already established!"); @@ -246,13 +246,13 @@ static void *CreateArgv(LLVMContext &C, ExecutionEngine *EE, unsigned PtrSize = EE->getTargetData()->getPointerSize(); char *Result = new char[(InputArgv.size()+1)*PtrSize]; - DEBUG(errs() << "JIT: ARGV = " << (void*)Result << "\n"); + DEBUG(dbgs() << "JIT: ARGV = " << (void*)Result << "\n"); const Type *SBytePtr = Type::getInt8PtrTy(C); for (unsigned i = 0; i != InputArgv.size(); ++i) { unsigned Size = InputArgv[i].size()+1; char *Dest = new char[Size]; - DEBUG(errs() << "JIT: ARGV[" << i << "] = " << (void*)Dest << "\n"); + DEBUG(dbgs() << "JIT: ARGV[" << i << "] = " << (void*)Dest << "\n"); std::copy(InputArgv[i].begin(), InputArgv[i].end(), Dest); Dest[Size-1] = 0; @@ -343,9 +343,7 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn, // Check main() type unsigned NumArgs = Fn->getFunctionType()->getNumParams(); const FunctionType *FTy = Fn->getFunctionType(); - const Type* PPInt8Ty = - PointerType::getUnqual(PointerType::getUnqual( - Type::getInt8Ty(Fn->getContext()))); + const Type* PPInt8Ty = Type::getInt8PtrTy(Fn->getContext())->getPointerTo(); switch (NumArgs) { case 3: if (FTy->getParamType(2) != PPInt8Ty) { @@ -358,13 +356,13 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn, } // FALLS THROUGH case 1: - if (FTy->getParamType(0) != Type::getInt32Ty(Fn->getContext())) { + if (!FTy->getParamType(0)->isInteger(32)) { llvm_report_error("Invalid type for first argument of main() supplied"); } // FALLS THROUGH case 0: if (!isa(FTy->getReturnType()) && - FTy->getReturnType() != Type::getVoidTy(FTy->getContext())) { + !FTy->getReturnType()->isVoidTy()) { llvm_report_error("Invalid return type of main() supplied"); } break; @@ -493,8 +491,22 @@ void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) { /// @brief Get a GenericValue for a Constant* GenericValue ExecutionEngine::getConstantValue(const Constant *C) { // If its undefined, return the garbage. - if (isa(C)) - return GenericValue(); + if (isa(C)) { + GenericValue Result; + switch (C->getType()->getTypeID()) { + case Type::IntegerTyID: + case Type::X86_FP80TyID: + case Type::FP128TyID: + case Type::PPC_FP128TyID: + // Although the value is undefined, we still have to construct an APInt + // with the correct bit width. + Result.IntVal = APInt(C->getType()->getPrimitiveSizeInBits(), 0); + break; + default: + break; + } + return Result; + } // If the value is a ConstantExpr if (const ConstantExpr *CE = dyn_cast(C)) { @@ -620,13 +632,11 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { GV.DoubleVal = GV.IntVal.bitsToDouble(); break; case Type::FloatTyID: - assert(DestTy == Type::getInt32Ty(DestTy->getContext()) && - "Invalid bitcast"); + assert(DestTy->isInteger(32) && "Invalid bitcast"); GV.IntVal.floatToBits(GV.FloatVal); break; case Type::DoubleTyID: - assert(DestTy == Type::getInt64Ty(DestTy->getContext()) && - "Invalid bitcast"); + assert(DestTy->isInteger(64) && "Invalid bitcast"); GV.IntVal.doubleToBits(GV.DoubleVal); break; case Type::PointerTyID: @@ -832,7 +842,7 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val, *((PointerTy*)Ptr) = Val.PointerVal; break; default: - errs() << "Cannot store value of type " << *Ty << "!\n"; + dbgs() << "Cannot store value of type " << *Ty << "!\n"; } if (sys::isLittleEndianHost() != getTargetData()->isLittleEndian()) @@ -908,7 +918,7 @@ void ExecutionEngine::LoadValueFromMemory(GenericValue &Result, // specified memory location... // void ExecutionEngine::InitializeMemory(const Constant *Init, void *Addr) { - DEBUG(errs() << "JIT: Initializing " << Addr << " "); + DEBUG(dbgs() << "JIT: Initializing " << Addr << " "); DEBUG(Init->dump()); if (isa(Init)) { return; @@ -939,7 +949,7 @@ void ExecutionEngine::InitializeMemory(const Constant *Init, void *Addr) { return; } - errs() << "Bad Type: " << *Init->getType() << "\n"; + dbgs() << "Bad Type: " << *Init->getType() << "\n"; llvm_unreachable("Unknown constant type to initialize memory with!"); } diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp index 5901cd757dc1..412b49320d93 100644 --- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp +++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp @@ -24,7 +24,7 @@ using namespace llvm; LLVMGenericValueRef LLVMCreateGenericValueOfInt(LLVMTypeRef Ty, unsigned long long N, - int IsSigned) { + LLVMBool IsSigned) { GenericValue *GenVal = new GenericValue(); GenVal->IntVal = APInt(unwrap(Ty)->getBitWidth(), N, IsSigned); return wrap(GenVal); @@ -56,7 +56,7 @@ unsigned LLVMGenericValueIntWidth(LLVMGenericValueRef GenValRef) { } unsigned long long LLVMGenericValueToInt(LLVMGenericValueRef GenValRef, - int IsSigned) { + LLVMBool IsSigned) { GenericValue *GenVal = unwrap(GenValRef); if (IsSigned) return GenVal->IntVal.getSExtValue(); @@ -87,9 +87,9 @@ void LLVMDisposeGenericValue(LLVMGenericValueRef GenVal) { /*===-- Operations on execution engines -----------------------------------===*/ -int LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE, - LLVMModuleProviderRef MP, - char **OutError) { +LLVMBool LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE, + LLVMModuleProviderRef MP, + char **OutError) { std::string Error; EngineBuilder builder(unwrap(MP)); builder.setEngineKind(EngineKind::Either) @@ -102,9 +102,9 @@ int LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE, return 1; } -int LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp, - LLVMModuleProviderRef MP, - char **OutError) { +LLVMBool LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp, + LLVMModuleProviderRef MP, + char **OutError) { std::string Error; EngineBuilder builder(unwrap(MP)); builder.setEngineKind(EngineKind::Interpreter) @@ -117,10 +117,10 @@ int LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp, return 1; } -int LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT, - LLVMModuleProviderRef MP, - unsigned OptLevel, - char **OutError) { +LLVMBool LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT, + LLVMModuleProviderRef MP, + unsigned OptLevel, + char **OutError) { std::string Error; EngineBuilder builder(unwrap(MP)); builder.setEngineKind(EngineKind::JIT) @@ -177,9 +177,9 @@ void LLVMAddModuleProvider(LLVMExecutionEngineRef EE, LLVMModuleProviderRef MP){ unwrap(EE)->addModuleProvider(unwrap(MP)); } -int LLVMRemoveModuleProvider(LLVMExecutionEngineRef EE, - LLVMModuleProviderRef MP, - LLVMModuleRef *OutMod, char **OutError) { +LLVMBool LLVMRemoveModuleProvider(LLVMExecutionEngineRef EE, + LLVMModuleProviderRef MP, + LLVMModuleRef *OutMod, char **OutError) { std::string Error; if (Module *Gone = unwrap(EE)->removeModuleProvider(unwrap(MP), &Error)) { *OutMod = wrap(Gone); @@ -190,8 +190,8 @@ int LLVMRemoveModuleProvider(LLVMExecutionEngineRef EE, return 1; } -int LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name, - LLVMValueRef *OutFn) { +LLVMBool LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name, + LLVMValueRef *OutFn) { if (Function *F = unwrap(EE)->FindFunctionNamed(Name)) { *OutFn = wrap(F); return 0; diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp index b59cfd162f94..73f55588862d 100644 --- a/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -56,7 +56,7 @@ static void executeFAddInst(GenericValue &Dest, GenericValue Src1, IMPLEMENT_BINARY_OPERATOR(+, Float); IMPLEMENT_BINARY_OPERATOR(+, Double); default: - errs() << "Unhandled type for FAdd instruction: " << *Ty << "\n"; + dbgs() << "Unhandled type for FAdd instruction: " << *Ty << "\n"; llvm_unreachable(0); } } @@ -67,7 +67,7 @@ static void executeFSubInst(GenericValue &Dest, GenericValue Src1, IMPLEMENT_BINARY_OPERATOR(-, Float); IMPLEMENT_BINARY_OPERATOR(-, Double); default: - errs() << "Unhandled type for FSub instruction: " << *Ty << "\n"; + dbgs() << "Unhandled type for FSub instruction: " << *Ty << "\n"; llvm_unreachable(0); } } @@ -78,7 +78,7 @@ static void executeFMulInst(GenericValue &Dest, GenericValue Src1, IMPLEMENT_BINARY_OPERATOR(*, Float); IMPLEMENT_BINARY_OPERATOR(*, Double); default: - errs() << "Unhandled type for FMul instruction: " << *Ty << "\n"; + dbgs() << "Unhandled type for FMul instruction: " << *Ty << "\n"; llvm_unreachable(0); } } @@ -89,7 +89,7 @@ static void executeFDivInst(GenericValue &Dest, GenericValue Src1, IMPLEMENT_BINARY_OPERATOR(/, Float); IMPLEMENT_BINARY_OPERATOR(/, Double); default: - errs() << "Unhandled type for FDiv instruction: " << *Ty << "\n"; + dbgs() << "Unhandled type for FDiv instruction: " << *Ty << "\n"; llvm_unreachable(0); } } @@ -104,7 +104,7 @@ static void executeFRemInst(GenericValue &Dest, GenericValue Src1, Dest.DoubleVal = fmod(Src1.DoubleVal, Src2.DoubleVal); break; default: - errs() << "Unhandled type for Rem instruction: " << *Ty << "\n"; + dbgs() << "Unhandled type for Rem instruction: " << *Ty << "\n"; llvm_unreachable(0); } } @@ -131,7 +131,7 @@ static GenericValue executeICMP_EQ(GenericValue Src1, GenericValue Src2, IMPLEMENT_INTEGER_ICMP(eq,Ty); IMPLEMENT_POINTER_ICMP(==); default: - errs() << "Unhandled type for ICMP_EQ predicate: " << *Ty << "\n"; + dbgs() << "Unhandled type for ICMP_EQ predicate: " << *Ty << "\n"; llvm_unreachable(0); } return Dest; @@ -144,7 +144,7 @@ static GenericValue executeICMP_NE(GenericValue Src1, GenericValue Src2, IMPLEMENT_INTEGER_ICMP(ne,Ty); IMPLEMENT_POINTER_ICMP(!=); default: - errs() << "Unhandled type for ICMP_NE predicate: " << *Ty << "\n"; + dbgs() << "Unhandled type for ICMP_NE predicate: " << *Ty << "\n"; llvm_unreachable(0); } return Dest; @@ -157,7 +157,7 @@ static GenericValue executeICMP_ULT(GenericValue Src1, GenericValue Src2, IMPLEMENT_INTEGER_ICMP(ult,Ty); IMPLEMENT_POINTER_ICMP(<); default: - errs() << "Unhandled type for ICMP_ULT predicate: " << *Ty << "\n"; + dbgs() << "Unhandled type for ICMP_ULT predicate: " << *Ty << "\n"; llvm_unreachable(0); } return Dest; @@ -170,7 +170,7 @@ static GenericValue executeICMP_SLT(GenericValue Src1, GenericValue Src2, IMPLEMENT_INTEGER_ICMP(slt,Ty); IMPLEMENT_POINTER_ICMP(<); default: - errs() << "Unhandled type for ICMP_SLT predicate: " << *Ty << "\n"; + dbgs() << "Unhandled type for ICMP_SLT predicate: " << *Ty << "\n"; llvm_unreachable(0); } return Dest; @@ -183,7 +183,7 @@ static GenericValue executeICMP_UGT(GenericValue Src1, GenericValue Src2, IMPLEMENT_INTEGER_ICMP(ugt,Ty); IMPLEMENT_POINTER_ICMP(>); default: - errs() << "Unhandled type for ICMP_UGT predicate: " << *Ty << "\n"; + dbgs() << "Unhandled type for ICMP_UGT predicate: " << *Ty << "\n"; llvm_unreachable(0); } return Dest; @@ -196,7 +196,7 @@ static GenericValue executeICMP_SGT(GenericValue Src1, GenericValue Src2, IMPLEMENT_INTEGER_ICMP(sgt,Ty); IMPLEMENT_POINTER_ICMP(>); default: - errs() << "Unhandled type for ICMP_SGT predicate: " << *Ty << "\n"; + dbgs() << "Unhandled type for ICMP_SGT predicate: " << *Ty << "\n"; llvm_unreachable(0); } return Dest; @@ -209,7 +209,7 @@ static GenericValue executeICMP_ULE(GenericValue Src1, GenericValue Src2, IMPLEMENT_INTEGER_ICMP(ule,Ty); IMPLEMENT_POINTER_ICMP(<=); default: - errs() << "Unhandled type for ICMP_ULE predicate: " << *Ty << "\n"; + dbgs() << "Unhandled type for ICMP_ULE predicate: " << *Ty << "\n"; llvm_unreachable(0); } return Dest; @@ -222,7 +222,7 @@ static GenericValue executeICMP_SLE(GenericValue Src1, GenericValue Src2, IMPLEMENT_INTEGER_ICMP(sle,Ty); IMPLEMENT_POINTER_ICMP(<=); default: - errs() << "Unhandled type for ICMP_SLE predicate: " << *Ty << "\n"; + dbgs() << "Unhandled type for ICMP_SLE predicate: " << *Ty << "\n"; llvm_unreachable(0); } return Dest; @@ -235,7 +235,7 @@ static GenericValue executeICMP_UGE(GenericValue Src1, GenericValue Src2, IMPLEMENT_INTEGER_ICMP(uge,Ty); IMPLEMENT_POINTER_ICMP(>=); default: - errs() << "Unhandled type for ICMP_UGE predicate: " << *Ty << "\n"; + dbgs() << "Unhandled type for ICMP_UGE predicate: " << *Ty << "\n"; llvm_unreachable(0); } return Dest; @@ -248,7 +248,7 @@ static GenericValue executeICMP_SGE(GenericValue Src1, GenericValue Src2, IMPLEMENT_INTEGER_ICMP(sge,Ty); IMPLEMENT_POINTER_ICMP(>=); default: - errs() << "Unhandled type for ICMP_SGE predicate: " << *Ty << "\n"; + dbgs() << "Unhandled type for ICMP_SGE predicate: " << *Ty << "\n"; llvm_unreachable(0); } return Dest; @@ -273,7 +273,7 @@ void Interpreter::visitICmpInst(ICmpInst &I) { case ICmpInst::ICMP_UGE: R = executeICMP_UGE(Src1, Src2, Ty); break; case ICmpInst::ICMP_SGE: R = executeICMP_SGE(Src1, Src2, Ty); break; default: - errs() << "Don't know how to handle this ICmp predicate!\n-->" << I; + dbgs() << "Don't know how to handle this ICmp predicate!\n-->" << I; llvm_unreachable(0); } @@ -292,7 +292,7 @@ static GenericValue executeFCMP_OEQ(GenericValue Src1, GenericValue Src2, IMPLEMENT_FCMP(==, Float); IMPLEMENT_FCMP(==, Double); default: - errs() << "Unhandled type for FCmp EQ instruction: " << *Ty << "\n"; + dbgs() << "Unhandled type for FCmp EQ instruction: " << *Ty << "\n"; llvm_unreachable(0); } return Dest; @@ -306,7 +306,7 @@ static GenericValue executeFCMP_ONE(GenericValue Src1, GenericValue Src2, IMPLEMENT_FCMP(!=, Double); default: - errs() << "Unhandled type for FCmp NE instruction: " << *Ty << "\n"; + dbgs() << "Unhandled type for FCmp NE instruction: " << *Ty << "\n"; llvm_unreachable(0); } return Dest; @@ -319,7 +319,7 @@ static GenericValue executeFCMP_OLE(GenericValue Src1, GenericValue Src2, IMPLEMENT_FCMP(<=, Float); IMPLEMENT_FCMP(<=, Double); default: - errs() << "Unhandled type for FCmp LE instruction: " << *Ty << "\n"; + dbgs() << "Unhandled type for FCmp LE instruction: " << *Ty << "\n"; llvm_unreachable(0); } return Dest; @@ -332,7 +332,7 @@ static GenericValue executeFCMP_OGE(GenericValue Src1, GenericValue Src2, IMPLEMENT_FCMP(>=, Float); IMPLEMENT_FCMP(>=, Double); default: - errs() << "Unhandled type for FCmp GE instruction: " << *Ty << "\n"; + dbgs() << "Unhandled type for FCmp GE instruction: " << *Ty << "\n"; llvm_unreachable(0); } return Dest; @@ -345,7 +345,7 @@ static GenericValue executeFCMP_OLT(GenericValue Src1, GenericValue Src2, IMPLEMENT_FCMP(<, Float); IMPLEMENT_FCMP(<, Double); default: - errs() << "Unhandled type for FCmp LT instruction: " << *Ty << "\n"; + dbgs() << "Unhandled type for FCmp LT instruction: " << *Ty << "\n"; llvm_unreachable(0); } return Dest; @@ -358,7 +358,7 @@ static GenericValue executeFCMP_OGT(GenericValue Src1, GenericValue Src2, IMPLEMENT_FCMP(>, Float); IMPLEMENT_FCMP(>, Double); default: - errs() << "Unhandled type for FCmp GT instruction: " << *Ty << "\n"; + dbgs() << "Unhandled type for FCmp GT instruction: " << *Ty << "\n"; llvm_unreachable(0); } return Dest; @@ -467,7 +467,7 @@ void Interpreter::visitFCmpInst(FCmpInst &I) { case FCmpInst::FCMP_UGE: R = executeFCMP_UGE(Src1, Src2, Ty); break; case FCmpInst::FCMP_OGE: R = executeFCMP_OGE(Src1, Src2, Ty); break; default: - errs() << "Don't know how to handle this FCmp predicate!\n-->" << I; + dbgs() << "Don't know how to handle this FCmp predicate!\n-->" << I; llvm_unreachable(0); } @@ -513,7 +513,7 @@ static GenericValue executeCmpInst(unsigned predicate, GenericValue Src1, return Result; } default: - errs() << "Unhandled Cmp predicate\n"; + dbgs() << "Unhandled Cmp predicate\n"; llvm_unreachable(0); } } @@ -542,7 +542,7 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) { case Instruction::Or: R.IntVal = Src1.IntVal | Src2.IntVal; break; case Instruction::Xor: R.IntVal = Src1.IntVal ^ Src2.IntVal; break; default: - errs() << "Don't know how to handle this binary operator!\n-->" << I; + dbgs() << "Don't know how to handle this binary operator!\n-->" << I; llvm_unreachable(0); } @@ -602,7 +602,7 @@ void Interpreter::popStackAndReturnValueToCaller(const Type *RetTy, ExecutionContext &CallingSF = ECStack.back(); if (Instruction *I = CallingSF.Caller.getInstruction()) { // Save result... - if (CallingSF.Caller.getType() != Type::getVoidTy(RetTy->getContext())) + if (!CallingSF.Caller.getType()->isVoidTy()) SetValue(I, Result, CallingSF); if (InvokeInst *II = dyn_cast (I)) SwitchToNewBasicBlock (II->getNormalDest (), CallingSF); @@ -744,7 +744,7 @@ void Interpreter::visitAllocaInst(AllocaInst &I) { // Allocate enough memory to hold the type... void *Memory = malloc(MemToAlloc); - DEBUG(errs() << "Allocated Type: " << *Ty << " (" << TypeSize << " bytes) x " + DEBUG(dbgs() << "Allocated Type: " << *Ty << " (" << TypeSize << " bytes) x " << NumElements << " (Total: " << MemToAlloc << ") at " << uintptr_t(Memory) << '\n'); @@ -794,7 +794,7 @@ GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I, GenericValue Result; Result.PointerVal = ((char*)getOperandValue(Ptr, SF).PointerVal) + Total; - DEBUG(errs() << "GEP Index " << Total << " bytes.\n"); + DEBUG(dbgs() << "GEP Index " << Total << " bytes.\n"); return Result; } @@ -812,7 +812,7 @@ void Interpreter::visitLoadInst(LoadInst &I) { LoadValueFromMemory(Result, Ptr, I.getType()); SetValue(&I, Result, SF); if (I.isVolatile() && PrintVolatile) - errs() << "Volatile load " << I; + dbgs() << "Volatile load " << I; } void Interpreter::visitStoreInst(StoreInst &I) { @@ -822,7 +822,7 @@ void Interpreter::visitStoreInst(StoreInst &I) { StoreValueToMemory(Val, (GenericValue *)GVTOP(SRC), I.getOperand(0)->getType()); if (I.isVolatile() && PrintVolatile) - errs() << "Volatile store: " << I; + dbgs() << "Volatile store: " << I; } //===----------------------------------------------------------------------===// @@ -1164,7 +1164,7 @@ void Interpreter::visitVAArgInst(VAArgInst &I) { IMPLEMENT_VAARG(Float); IMPLEMENT_VAARG(Double); default: - errs() << "Unhandled dest type for vaarg instruction: " << *Ty << "\n"; + dbgs() << "Unhandled dest type for vaarg instruction: " << *Ty << "\n"; llvm_unreachable(0); } @@ -1251,7 +1251,7 @@ GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE, Dest.IntVal = Op0.IntVal.ashr(Op1.IntVal.getZExtValue()); break; default: - errs() << "Unhandled ConstantExpr: " << *CE << "\n"; + dbgs() << "Unhandled ConstantExpr: " << *CE << "\n"; llvm_unreachable(0); return GenericValue(); } @@ -1324,24 +1324,24 @@ void Interpreter::run() { // Track the number of dynamic instructions executed. ++NumDynamicInsts; - DEBUG(errs() << "About to interpret: " << I); + DEBUG(dbgs() << "About to interpret: " << I); visit(I); // Dispatch to one of the visit* methods... #if 0 // This is not safe, as visiting the instruction could lower it and free I. DEBUG( if (!isa(I) && !isa(I) && I.getType() != Type::VoidTy) { - errs() << " --> "; + dbgs() << " --> "; const GenericValue &Val = SF.Values[&I]; switch (I.getType()->getTypeID()) { default: llvm_unreachable("Invalid GenericValue Type"); - case Type::VoidTyID: errs() << "void"; break; - case Type::FloatTyID: errs() << "float " << Val.FloatVal; break; - case Type::DoubleTyID: errs() << "double " << Val.DoubleVal; break; - case Type::PointerTyID: errs() << "void* " << intptr_t(Val.PointerVal); + case Type::VoidTyID: dbgs() << "void"; break; + case Type::FloatTyID: dbgs() << "float " << Val.FloatVal; break; + case Type::DoubleTyID: dbgs() << "double " << Val.DoubleVal; break; + case Type::PointerTyID: dbgs() << "void* " << intptr_t(Val.PointerVal); break; case Type::IntegerTyID: - errs() << "i" << Val.IntVal.getBitWidth() << " " + dbgs() << "i" << Val.IntVal.getBitWidth() << " " << Val.IntVal.toStringUnsigned(10) << " (0x" << Val.IntVal.toStringUnsigned(16) << ")\n"; break; diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp index ebc25677438e..faf724fa854b 100644 --- a/lib/ExecutionEngine/JIT/JIT.cpp +++ b/lib/ExecutionEngine/JIT/JIT.cpp @@ -411,11 +411,10 @@ GenericValue JIT::runFunction(Function *F, // Handle some common cases first. These cases correspond to common `main' // prototypes. - if (RetTy == Type::getInt32Ty(F->getContext()) || - RetTy == Type::getVoidTy(F->getContext())) { + if (RetTy->isInteger(32) || RetTy->isVoidTy()) { switch (ArgValues.size()) { case 3: - if (FTy->getParamType(0) == Type::getInt32Ty(F->getContext()) && + if (FTy->getParamType(0)->isInteger(32) && isa(FTy->getParamType(1)) && isa(FTy->getParamType(2))) { int (*PF)(int, char **, const char **) = @@ -430,7 +429,7 @@ GenericValue JIT::runFunction(Function *F, } break; case 2: - if (FTy->getParamType(0) == Type::getInt32Ty(F->getContext()) && + if (FTy->getParamType(0)->isInteger(32) && isa(FTy->getParamType(1))) { int (*PF)(int, char **) = (int(*)(int, char **))(intptr_t)FPtr; @@ -443,7 +442,7 @@ GenericValue JIT::runFunction(Function *F, break; case 1: if (FTy->getNumParams() == 1 && - FTy->getParamType(0) == Type::getInt32Ty(F->getContext())) { + FTy->getParamType(0)->isInteger(32)) { GenericValue rv; int (*PF)(int) = (int(*)(int))(intptr_t)FPtr; rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue())); @@ -548,7 +547,7 @@ GenericValue JIT::runFunction(Function *F, "", StubBB); TheCall->setCallingConv(F->getCallingConv()); TheCall->setTailCall(); - if (TheCall->getType() != Type::getVoidTy(F->getContext())) + if (!TheCall->getType()->isVoidTy()) // Return result of the call. ReturnInst::Create(F->getContext(), TheCall, StubBB); else diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp index ef323b52da81..0f604ac766b3 100644 --- a/lib/ExecutionEngine/JIT/JITEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp @@ -377,7 +377,7 @@ namespace { MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager(); if (jit.getJITInfo().needsGOT()) { MemMgr->AllocateGOT(); - DEBUG(errs() << "JIT is managing a GOT\n"); + DEBUG(dbgs() << "JIT is managing a GOT\n"); } if (DwarfExceptionHandling || JITEmitDebugInfo) { @@ -431,7 +431,7 @@ namespace { if (MBBLocations.size() <= (unsigned)MBB->getNumber()) MBBLocations.resize((MBB->getNumber()+1)*2); MBBLocations[MBB->getNumber()] = getCurrentPCValue(); - DEBUG(errs() << "JIT: Emitting BB" << MBB->getNumber() << " at [" + DEBUG(dbgs() << "JIT: Emitting BB" << MBB->getNumber() << " at [" << (void*) getCurrentPCValue() << "]\n"); } @@ -547,7 +547,7 @@ void *JITResolver::getLazyFunctionStub(Function *F) { TheJIT->updateGlobalMapping(F, Stub); } - DEBUG(errs() << "JIT: Lazy stub emitted at [" << Stub << "] for function '" + DEBUG(dbgs() << "JIT: Lazy stub emitted at [" << Stub << "] for function '" << F->getName() << "'\n"); // Finally, keep track of the stub-to-Function mapping so that the @@ -577,7 +577,7 @@ void *JITResolver::getGlobalValueIndirectSym(GlobalValue *GV, void *GVAddress) { IndirectSym = TheJIT->getJITInfo().emitGlobalValueIndirectSym(GV, GVAddress, JE); - DEBUG(errs() << "JIT: Indirect symbol emitted at [" << IndirectSym + DEBUG(dbgs() << "JIT: Indirect symbol emitted at [" << IndirectSym << "] for GV '" << GV->getName() << "'\n"); return IndirectSym; @@ -595,7 +595,7 @@ void *JITResolver::getExternalFunctionStub(void *FnAddr) { Stub = TheJIT->getJITInfo().emitFunctionStub(0, FnAddr, JE); JE.finishGVStub(); - DEBUG(errs() << "JIT: Stub emitted at [" << Stub + DEBUG(dbgs() << "JIT: Stub emitted at [" << Stub << "] for external function at '" << FnAddr << "'\n"); return Stub; } @@ -605,7 +605,7 @@ unsigned JITResolver::getGOTIndexForAddr(void* addr) { if (!idx) { idx = ++nextGOTIndex; revGOTMap[addr] = idx; - DEBUG(errs() << "JIT: Adding GOT entry " << idx << " for addr [" + DEBUG(dbgs() << "JIT: Adding GOT entry " << idx << " for addr [" << addr << "]\n"); } return idx; @@ -701,7 +701,7 @@ void *JITResolver::JITCompilerFn(void *Stub) { + F->getName() + "' when lazy compiles are disabled!"); } - DEBUG(errs() << "JIT: Lazily resolving function '" << F->getName() + DEBUG(dbgs() << "JIT: Lazily resolving function '" << F->getName() << "' In stub ptr = " << Stub << " actual ptr = " << ActualPtr << "\n"); @@ -864,7 +864,7 @@ unsigned JITEmitter::addSizeOfGlobal(const GlobalVariable *GV, unsigned Size) { size_t GVSize = (size_t)TheJIT->getTargetData()->getTypeAllocSize(ElTy); size_t GVAlign = (size_t)TheJIT->getTargetData()->getPreferredAlignment(GV); - DEBUG(errs() << "JIT: Adding in size " << GVSize << " alignment " << GVAlign); + DEBUG(dbgs() << "JIT: Adding in size " << GVSize << " alignment " << GVAlign); DEBUG(GV->dump()); // Assume code section ends with worst possible alignment, so first // variable needs maximal padding. @@ -992,7 +992,7 @@ unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) { } } } - DEBUG(errs() << "JIT: About to look through initializers\n"); + DEBUG(dbgs() << "JIT: About to look through initializers\n"); // Look for more globals that are referenced only from initializers. // GVSet.end is computed each time because the set can grow as we go. for (SmallPtrSet::iterator I = GVSet.begin(); @@ -1006,14 +1006,14 @@ unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) { } void JITEmitter::startFunction(MachineFunction &F) { - DEBUG(errs() << "JIT: Starting CodeGen of Function " + DEBUG(dbgs() << "JIT: Starting CodeGen of Function " << F.getFunction()->getName() << "\n"); uintptr_t ActualSize = 0; // Set the memory writable, if it's not already MemMgr->setMemoryWritable(); if (MemMgr->NeedsExactSize()) { - DEBUG(errs() << "JIT: ExactSize\n"); + DEBUG(dbgs() << "JIT: ExactSize\n"); const TargetInstrInfo* TII = F.getTarget().getInstrInfo(); MachineJumpTableInfo *MJTI = F.getJumpTableInfo(); MachineConstantPool *MCP = F.getConstantPool(); @@ -1040,12 +1040,12 @@ void JITEmitter::startFunction(MachineFunction &F) { // Add the function size ActualSize += TII->GetFunctionSizeInBytes(F); - DEBUG(errs() << "JIT: ActualSize before globals " << ActualSize << "\n"); + DEBUG(dbgs() << "JIT: ActualSize before globals " << ActualSize << "\n"); // Add the size of the globals that will be allocated after this function. // These are all the ones referenced from this function that were not // previously allocated. ActualSize += GetSizeOfGlobalsInBytes(F); - DEBUG(errs() << "JIT: ActualSize after globals " << ActualSize << "\n"); + DEBUG(dbgs() << "JIT: ActualSize after globals " << ActualSize << "\n"); } else if (SizeEstimate > 0) { // SizeEstimate will be non-zero on reallocation attempts. ActualSize = SizeEstimate; @@ -1104,7 +1104,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) { if (MR.isExternalSymbol()) { ResultPtr = TheJIT->getPointerToNamedFunction(MR.getExternalSymbol(), false); - DEBUG(errs() << "JIT: Map \'" << MR.getExternalSymbol() << "\' to [" + DEBUG(dbgs() << "JIT: Map \'" << MR.getExternalSymbol() << "\' to [" << ResultPtr << "]\n"); // If the target REALLY wants a stub for this function, emit it now. @@ -1136,7 +1136,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) { unsigned idx = Resolver.getGOTIndexForAddr(ResultPtr); MR.setGOTIndex(idx); if (((void**)MemMgr->getGOTBase())[idx] != ResultPtr) { - DEBUG(errs() << "JIT: GOT was out of date for " << ResultPtr + DEBUG(dbgs() << "JIT: GOT was out of date for " << ResultPtr << " pointing at " << ((void**)MemMgr->getGOTBase())[idx] << "\n"); ((void**)MemMgr->getGOTBase())[idx] = ResultPtr; @@ -1153,7 +1153,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) { if (MemMgr->isManagingGOT()) { unsigned idx = Resolver.getGOTIndexForAddr((void*)BufferBegin); if (((void**)MemMgr->getGOTBase())[idx] != (void*)BufferBegin) { - DEBUG(errs() << "JIT: GOT was out of date for " << (void*)BufferBegin + DEBUG(dbgs() << "JIT: GOT was out of date for " << (void*)BufferBegin << " pointing at " << ((void**)MemMgr->getGOTBase())[idx] << "\n"); ((void**)MemMgr->getGOTBase())[idx] = (void*)BufferBegin; @@ -1182,7 +1182,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) { TheJIT->NotifyFunctionEmitted(*F.getFunction(), FnStart, FnEnd-FnStart, EmissionDetails); - DEBUG(errs() << "JIT: Finished CodeGen of [" << (void*)FnStart + DEBUG(dbgs() << "JIT: Finished CodeGen of [" << (void*)FnStart << "] Function: " << F.getFunction()->getName() << ": " << (FnEnd-FnStart) << " bytes of text, " << Relocations.size() << " relocations\n"); @@ -1195,31 +1195,31 @@ bool JITEmitter::finishFunction(MachineFunction &F) { DEBUG( if (sys::hasDisassembler()) { - errs() << "JIT: Disassembled code:\n"; - errs() << sys::disassembleBuffer(FnStart, FnEnd-FnStart, + dbgs() << "JIT: Disassembled code:\n"; + dbgs() << sys::disassembleBuffer(FnStart, FnEnd-FnStart, (uintptr_t)FnStart); } else { - errs() << "JIT: Binary code:\n"; + dbgs() << "JIT: Binary code:\n"; uint8_t* q = FnStart; for (int i = 0; q < FnEnd; q += 4, ++i) { if (i == 4) i = 0; if (i == 0) - errs() << "JIT: " << (long)(q - FnStart) << ": "; + dbgs() << "JIT: " << (long)(q - FnStart) << ": "; bool Done = false; for (int j = 3; j >= 0; --j) { if (q + j >= FnEnd) Done = true; else - errs() << (unsigned short)q[j]; + dbgs() << (unsigned short)q[j]; } if (Done) break; - errs() << ' '; + dbgs() << ' '; if (i == 3) - errs() << '\n'; + dbgs() << '\n'; } - errs()<< '\n'; + dbgs()<< '\n'; } ); @@ -1268,7 +1268,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) { } void JITEmitter::retryWithMoreMemory(MachineFunction &F) { - DEBUG(errs() << "JIT: Ran out of space for native code. Reattempting.\n"); + DEBUG(dbgs() << "JIT: Ran out of space for native code. Reattempting.\n"); Relocations.clear(); // Clear the old relocations or we'll reapply them. ConstPoolAddresses.clear(); ++NumRetries; @@ -1319,7 +1319,7 @@ void JITEmitter::deallocateMemForFunction(const Function *F) { // in the JITResolver. Were there a memory manager deallocateStub routine, // we could call that at this point too. if (FnRefs.empty()) { - DEBUG(errs() << "\nJIT: Invalidated Stub at [" << Stub << "]\n"); + DEBUG(dbgs() << "\nJIT: Invalidated Stub at [" << Stub << "]\n"); StubFnRefs.erase(Stub); // Invalidate the stub. If it is a GV stub, update the JIT's global @@ -1365,7 +1365,7 @@ void JITEmitter::emitConstantPool(MachineConstantPool *MCP) { if (ConstantPoolBase == 0) return; // Buffer overflow. - DEBUG(errs() << "JIT: Emitted constant pool at [" << ConstantPoolBase + DEBUG(dbgs() << "JIT: Emitted constant pool at [" << ConstantPoolBase << "] (size: " << Size << ", alignment: " << Align << ")\n"); // Initialize the memory for all of the constant pool entries. @@ -1383,8 +1383,8 @@ void JITEmitter::emitConstantPool(MachineConstantPool *MCP) { "entry has not been implemented!"); } TheJIT->InitializeMemory(CPE.Val.ConstVal, (void*)CAddr); - DEBUG(errs() << "JIT: CP" << i << " at [0x"; - errs().write_hex(CAddr) << "]\n"); + DEBUG(dbgs() << "JIT: CP" << i << " at [0x"; + dbgs().write_hex(CAddr) << "]\n"); const Type *Ty = CPE.Val.ConstVal->getType(); Offset += TheJIT->getTargetData()->getTypeAllocSize(Ty); diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp index 80cb999a90ac..a17caa17f4dd 100644 --- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp +++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp @@ -352,7 +352,7 @@ namespace { // another block of memory and add it to the free list. if (largest < ActualSize || largest <= FreeRangeHeader::getMinBlockSize()) { - DEBUG(errs() << "JIT: Allocating another slab of memory for function."); + DEBUG(dbgs() << "JIT: Allocating another slab of memory for function."); candidateBlock = allocateNewCodeSlab((size_t)ActualSize); } diff --git a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp index 52a8f71ca34b..d01c4b2db541 100644 --- a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp +++ b/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp @@ -50,9 +50,9 @@ OProfileJITEventListener::OProfileJITEventListener() : Agent(op_open_agent()) { if (Agent == NULL) { const std::string err_str = sys::StrError(); - DEBUG(errs() << "Failed to connect to OProfile agent: " << err_str << "\n"); + DEBUG(dbgs() << "Failed to connect to OProfile agent: " << err_str << "\n"); } else { - DEBUG(errs() << "Connected to OProfile agent.\n"); + DEBUG(dbgs() << "Connected to OProfile agent.\n"); } } @@ -60,10 +60,10 @@ OProfileJITEventListener::~OProfileJITEventListener() { if (Agent != NULL) { if (op_close_agent(Agent) == -1) { const std::string err_str = sys::StrError(); - DEBUG(errs() << "Failed to disconnect from OProfile agent: " + DEBUG(dbgs() << "Failed to disconnect from OProfile agent: " << err_str << "\n"); } else { - DEBUG(errs() << "Disconnected from OProfile agent.\n"); + DEBUG(dbgs() << "Disconnected from OProfile agent.\n"); } } } @@ -92,7 +92,7 @@ static debug_line_info LineStartToOProfileFormat( const DebugLocTuple &tuple = MF.getDebugLocTuple(Loc); Result.lineno = tuple.Line; Result.filename = Filenames.getFilename(tuple.Scope); - DEBUG(errs() << "Mapping " << reinterpret_cast(Result.vma) << " to " + DEBUG(dbgs() << "Mapping " << reinterpret_cast(Result.vma) << " to " << Result.filename << ":" << Result.lineno << "\n"); return Result; } @@ -105,7 +105,7 @@ void OProfileJITEventListener::NotifyFunctionEmitted( if (op_write_native_code(Agent, F.getName().data(), reinterpret_cast(FnStart), FnStart, FnSize) == -1) { - DEBUG(errs() << "Failed to tell OProfile about native function " + DEBUG(dbgs() << "Failed to tell OProfile about native function " << F.getName() << " at [" << FnStart << "-" << ((char*)FnStart + FnSize) << "]\n"); return; @@ -133,7 +133,7 @@ void OProfileJITEventListener::NotifyFunctionEmitted( if (!LineInfo.empty()) { if (op_write_debug_line_info(Agent, FnStart, LineInfo.size(), &*LineInfo.begin()) == -1) { - DEBUG(errs() + DEBUG(dbgs() << "Failed to tell OProfile about line numbers for native function " << F.getName() << " at [" << FnStart << "-" << ((char*)FnStart + FnSize) << "]\n"); @@ -145,7 +145,7 @@ void OProfileJITEventListener::NotifyFunctionEmitted( void OProfileJITEventListener::NotifyFreeingMachineCode(void *FnStart) { assert(FnStart && "Invalid function pointer"); if (op_unload_native_code(Agent, reinterpret_cast(FnStart)) == -1) { - DEBUG(errs() + DEBUG(dbgs() << "Failed to tell OProfile about unload of native function at " << FnStart << "\n"); } diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index 104cbe9405d4..dcd696c70d4c 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -25,6 +25,7 @@ #include "llvm/ValueSymbolTable.h" #include "llvm/Instructions.h" #include "llvm/Assembly/Writer.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/System/Path.h" @@ -144,7 +145,7 @@ class LinkerTypeMap : public AbstractTypeUser { // for debugging... virtual void dump() const { - errs() << "AbstractTypeSet!\n"; + dbgs() << "AbstractTypeSet!\n"; } }; } @@ -337,11 +338,11 @@ static bool LinkTypes(Module *Dest, const Module *Src, std::string *Err) { static void PrintMap(const std::map &M) { for (std::map::const_iterator I = M.begin(), E =M.end(); I != E; ++I) { - errs() << " Fr: " << (void*)I->first << " "; + dbgs() << " Fr: " << (void*)I->first << " "; I->first->dump(); - errs() << " To: " << (void*)I->second << " "; + dbgs() << " To: " << (void*)I->second << " "; I->second->dump(); - errs() << "\n"; + dbgs() << "\n"; } } #endif @@ -404,10 +405,10 @@ static Value *RemapOperand(const Value *In, } #ifndef NDEBUG - errs() << "LinkModules ValueMap: \n"; + dbgs() << "LinkModules ValueMap: \n"; PrintMap(ValueMap); - errs() << "Couldn't remap value: " << (void*)In << " " << *In << "\n"; + dbgs() << "Couldn't remap value: " << (void*)In << " " << *In << "\n"; llvm_unreachable("Couldn't remap value!"); #endif return 0; @@ -854,9 +855,14 @@ static bool LinkAlias(Module *Dest, const Module *Src, } else { // No linking to be performed, simply create an identical version of the // alias over in the dest module... - + Constant *Aliasee = DAliasee; + // Fixup aliases to bitcasts. Note that aliases to GEPs are still broken + // by this, but aliases to GEPs are broken to a lot of other things, so + // it's less important. + if (SGA->getType() != DAliasee->getType()) + Aliasee = ConstantExpr::getBitCast(DAliasee, SGA->getType()); NewGA = new GlobalAlias(SGA->getType(), SGA->getLinkage(), - SGA->getName(), DAliasee, Dest); + SGA->getName(), Aliasee, Dest); CopyGVAttributes(NewGA, SGA); // Proceed to 'common' steps @@ -1222,9 +1228,15 @@ static bool LinkAppendingVars(Module *M, static bool ResolveAliases(Module *Dest) { for (Module::alias_iterator I = Dest->alias_begin(), E = Dest->alias_end(); I != E; ++I) - if (const GlobalValue *GV = I->resolveAliasedGlobal()) - if (GV != I && !GV->isDeclaration()) - I->replaceAllUsesWith(const_cast(GV)); + // We can't sue resolveGlobalAlias here because we need to preserve + // bitcasts and GEPs. + if (const Constant *C = I->getAliasee()) { + while (dyn_cast(C)) + C = cast(C)->getAliasee(); + const GlobalValue *GV = dyn_cast(C); + if (C != I && !(GV && GV->isDeclaration())) + I->replaceAllUsesWith(const_cast(C)); + } return false; } diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp index a5a2256f4c88..a19ec19bca5d 100644 --- a/lib/MC/MCExpr.cpp +++ b/lib/MC/MCExpr.cpp @@ -11,6 +11,7 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -108,8 +109,8 @@ void MCExpr::print(raw_ostream &OS, const MCAsmInfo *MAI) const { } void MCExpr::dump() const { - print(errs(), 0); - errs() << '\n'; + print(dbgs(), 0); + dbgs() << '\n'; } /* *** */ diff --git a/lib/MC/MCInst.cpp b/lib/MC/MCInst.cpp index d05031870add..7c7a6447736c 100644 --- a/lib/MC/MCInst.cpp +++ b/lib/MC/MCInst.cpp @@ -9,6 +9,7 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCExpr.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -31,8 +32,8 @@ void MCOperand::print(raw_ostream &OS, const MCAsmInfo *MAI) const { } void MCOperand::dump() const { - print(errs(), 0); - errs() << "\n"; + print(dbgs(), 0); + dbgs() << "\n"; } void MCInst::print(raw_ostream &OS, const MCAsmInfo *MAI) const { @@ -45,6 +46,6 @@ void MCInst::print(raw_ostream &OS, const MCAsmInfo *MAI) const { } void MCInst::dump() const { - print(errs(), 0); - errs() << "\n"; + print(dbgs(), 0); + dbgs() << "\n"; } diff --git a/lib/MC/MCSectionELF.cpp b/lib/MC/MCSectionELF.cpp index c6812ed99c41..4d520ecd20a1 100644 --- a/lib/MC/MCSectionELF.cpp +++ b/lib/MC/MCSectionELF.cpp @@ -8,10 +8,10 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCSectionELF.h" -#include "llvm/MC/MCContext.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/MC/MCAsmInfo.h" - +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; MCSectionELF *MCSectionELF:: @@ -23,7 +23,7 @@ Create(StringRef Section, unsigned Type, unsigned Flags, // ShouldOmitSectionDirective - Decides whether a '.section' directive // should be printed before the section name bool MCSectionELF::ShouldOmitSectionDirective(const char *Name, - const MCAsmInfo &MAI) const { + const MCAsmInfo &MAI) const { // FIXME: Does .section .bss/.data/.text work everywhere?? if (strcmp(Name, ".text") == 0 || @@ -37,7 +37,6 @@ bool MCSectionELF::ShouldOmitSectionDirective(const char *Name, // ShouldPrintSectionType - Only prints the section type if supported bool MCSectionELF::ShouldPrintSectionType(unsigned Ty) const { - if (IsExplicit && !(Ty == SHT_NOBITS || Ty == SHT_PROGBITS)) return false; diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp index b145d07f4a68..265d06cceba1 100644 --- a/lib/MC/MCSymbol.cpp +++ b/lib/MC/MCSymbol.cpp @@ -9,6 +9,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -51,11 +52,14 @@ static bool NameNeedsEscaping(StringRef Str, const MCAsmInfo &MAI) { return false; } -static void PrintMangledName(raw_ostream &OS, StringRef Str, - const MCAsmInfo &MAI) { +/// printMangledName - Print the specified string in mangled form if it uses +/// any unusual characters. +void MCSymbol::printMangledName(StringRef Str, raw_ostream &OS, + const MCAsmInfo *MAI) { // The first character is not allowed to be a number unless the target // explicitly allows it. - if (!MAI.doesAllowNameToStartWithDigit() && Str[0] >= '0' && Str[0] <= '9') { + if ((MAI == 0 || !MAI->doesAllowNameToStartWithDigit()) && + Str[0] >= '0' && Str[0] <= '9') { MangleLetter(OS, Str[0]); Str = Str.substr(1); } @@ -94,7 +98,7 @@ void MCSymbol::print(raw_ostream &OS, const MCAsmInfo *MAI) const { // On systems that do not allow quoted names, print with mangling. if (!MAI->doesAllowQuotesInName()) - return PrintMangledName(OS, getName(), *MAI); + return printMangledName(getName(), OS, MAI); // If the string contains a double quote or newline, we still have to mangle // it. @@ -106,5 +110,5 @@ void MCSymbol::print(raw_ostream &OS, const MCAsmInfo *MAI) const { } void MCSymbol::dump() const { - print(errs(), 0); + print(dbgs(), 0); } diff --git a/lib/MC/MCValue.cpp b/lib/MC/MCValue.cpp index 69bd10c8e699..c1222ec88721 100644 --- a/lib/MC/MCValue.cpp +++ b/lib/MC/MCValue.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCValue.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -30,5 +31,5 @@ void MCValue::print(raw_ostream &OS, const MCAsmInfo *MAI) const { } void MCValue::dump() const { - print(errs(), 0); + print(dbgs(), 0); } diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp index 9532e1e160be..9d1468493d5b 100644 --- a/lib/Support/APInt.cpp +++ b/lib/Support/APInt.cpp @@ -1580,12 +1580,12 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, uint64_t b = uint64_t(1) << 32; #if 0 - DEBUG(errs() << "KnuthDiv: m=" << m << " n=" << n << '\n'); - DEBUG(errs() << "KnuthDiv: original:"); - DEBUG(for (int i = m+n; i >=0; i--) errs() << " " << u[i]); - DEBUG(errs() << " by"); - DEBUG(for (int i = n; i >0; i--) errs() << " " << v[i-1]); - DEBUG(errs() << '\n'); + DEBUG(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n'); + DEBUG(dbgs() << "KnuthDiv: original:"); + DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]); + DEBUG(dbgs() << " by"); + DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]); + DEBUG(dbgs() << '\n'); #endif // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of // u and v by d. Note that we have taken Knuth's advice here to use a power @@ -1612,17 +1612,17 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, } u[m+n] = u_carry; #if 0 - DEBUG(errs() << "KnuthDiv: normal:"); - DEBUG(for (int i = m+n; i >=0; i--) errs() << " " << u[i]); - DEBUG(errs() << " by"); - DEBUG(for (int i = n; i >0; i--) errs() << " " << v[i-1]); - DEBUG(errs() << '\n'); + DEBUG(dbgs() << "KnuthDiv: normal:"); + DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]); + DEBUG(dbgs() << " by"); + DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]); + DEBUG(dbgs() << '\n'); #endif // D2. [Initialize j.] Set j to m. This is the loop counter over the places. int j = m; do { - DEBUG(errs() << "KnuthDiv: quotient digit #" << j << '\n'); + DEBUG(dbgs() << "KnuthDiv: quotient digit #" << j << '\n'); // D3. [Calculate q'.]. // Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q') // Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r') @@ -1632,7 +1632,7 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, // value qp is one too large, and it eliminates all cases where qp is two // too large. uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]); - DEBUG(errs() << "KnuthDiv: dividend == " << dividend << '\n'); + DEBUG(dbgs() << "KnuthDiv: dividend == " << dividend << '\n'); uint64_t qp = dividend / v[n-1]; uint64_t rp = dividend % v[n-1]; if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) { @@ -1641,7 +1641,7 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2])) qp--; } - DEBUG(errs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n'); + DEBUG(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n'); // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation @@ -1652,7 +1652,7 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, uint64_t u_tmp = uint64_t(u[j+i]) | (uint64_t(u[j+i+1]) << 32); uint64_t subtrahend = uint64_t(qp) * uint64_t(v[i]); bool borrow = subtrahend > u_tmp; - DEBUG(errs() << "KnuthDiv: u_tmp == " << u_tmp + DEBUG(dbgs() << "KnuthDiv: u_tmp == " << u_tmp << ", subtrahend == " << subtrahend << ", borrow = " << borrow << '\n'); @@ -1666,12 +1666,12 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, k++; } isNeg |= borrow; - DEBUG(errs() << "KnuthDiv: u[j+i] == " << u[j+i] << ", u[j+i+1] == " << + DEBUG(dbgs() << "KnuthDiv: u[j+i] == " << u[j+i] << ", u[j+i+1] == " << u[j+i+1] << '\n'); } - DEBUG(errs() << "KnuthDiv: after subtraction:"); - DEBUG(for (int i = m+n; i >=0; i--) errs() << " " << u[i]); - DEBUG(errs() << '\n'); + DEBUG(dbgs() << "KnuthDiv: after subtraction:"); + DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]); + DEBUG(dbgs() << '\n'); // The digits (u[j+n]...u[j]) should be kept positive; if the result of // this step is actually negative, (u[j+n]...u[j]) should be left as the // true value plus b**(n+1), namely as the b's complement of @@ -1684,9 +1684,9 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, carry = carry && u[i] == 0; } } - DEBUG(errs() << "KnuthDiv: after complement:"); - DEBUG(for (int i = m+n; i >=0; i--) errs() << " " << u[i]); - DEBUG(errs() << '\n'); + DEBUG(dbgs() << "KnuthDiv: after complement:"); + DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]); + DEBUG(dbgs() << '\n'); // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was // negative, go to step D6; otherwise go on to step D7. @@ -1707,16 +1707,16 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, } u[j+n] += carry; } - DEBUG(errs() << "KnuthDiv: after correction:"); - DEBUG(for (int i = m+n; i >=0; i--) errs() <<" " << u[i]); - DEBUG(errs() << "\nKnuthDiv: digit result = " << q[j] << '\n'); + DEBUG(dbgs() << "KnuthDiv: after correction:"); + DEBUG(for (int i = m+n; i >=0; i--) dbgs() <<" " << u[i]); + DEBUG(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n'); // D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3. } while (--j >= 0); - DEBUG(errs() << "KnuthDiv: quotient:"); - DEBUG(for (int i = m; i >=0; i--) errs() <<" " << q[i]); - DEBUG(errs() << '\n'); + DEBUG(dbgs() << "KnuthDiv: quotient:"); + DEBUG(for (int i = m; i >=0; i--) dbgs() <<" " << q[i]); + DEBUG(dbgs() << '\n'); // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired // remainder may be obtained by dividing u[...] by d. If r is non-null we @@ -1727,22 +1727,22 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, // shift right here. In order to mak if (shift) { unsigned carry = 0; - DEBUG(errs() << "KnuthDiv: remainder:"); + DEBUG(dbgs() << "KnuthDiv: remainder:"); for (int i = n-1; i >= 0; i--) { r[i] = (u[i] >> shift) | carry; carry = u[i] << (32 - shift); - DEBUG(errs() << " " << r[i]); + DEBUG(dbgs() << " " << r[i]); } } else { for (int i = n-1; i >= 0; i--) { r[i] = u[i]; - DEBUG(errs() << " " << r[i]); + DEBUG(dbgs() << " " << r[i]); } } - DEBUG(errs() << '\n'); + DEBUG(dbgs() << '\n'); } #if 0 - DEBUG(errs() << '\n'); + DEBUG(dbgs() << '\n'); #endif } @@ -2191,7 +2191,7 @@ void APInt::dump() const { SmallString<40> S, U; this->toStringUnsigned(U); this->toStringSigned(S); - errs() << "APInt(" << BitWidth << "b, " + dbgs() << "APInt(" << BitWidth << "b, " << U.str() << "u " << S.str() << "s)"; } diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index b6c0e08c8ca2..fa692be8cc2c 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -354,7 +354,7 @@ static Option *HandlePrefixedOrGroupedOption(StringRef &Arg, StringRef &Value, // we don't need to pass argc/argv in. assert(PGOpt->getValueExpectedFlag() != cl::ValueRequired && "Option can not be cl::Grouping AND cl::ValueRequired!"); - int Dummy; + int Dummy = 0; ErrorParsing |= ProvideOption(PGOpt, OneArgName, StringRef(), 0, 0, Dummy); @@ -778,10 +778,10 @@ void cl::ParseCommandLineOptions(int argc, char **argv, free(*i); } - DEBUG(errs() << "Args: "; + DEBUG(dbgs() << "Args: "; for (int i = 0; i < argc; ++i) - errs() << argv[i] << ' '; - errs() << '\n'; + dbgs() << argv[i] << ' '; + dbgs() << '\n'; ); // If we had an error processing our arguments, don't let the program execute diff --git a/lib/Support/ConstantRange.cpp b/lib/Support/ConstantRange.cpp index e427f820c446..ddf14e33eed8 100644 --- a/lib/Support/ConstantRange.cpp +++ b/lib/Support/ConstantRange.cpp @@ -22,6 +22,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/ConstantRange.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Instructions.h" using namespace llvm; @@ -655,7 +656,7 @@ void ConstantRange::print(raw_ostream &OS) const { /// dump - Allow printing from a debugger easily... /// void ConstantRange::dump() const { - print(errs()); + print(dbgs()); } diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp index dff4f030fefe..8bb156653a74 100644 --- a/lib/Support/ErrorHandling.cpp +++ b/lib/Support/ErrorHandling.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/Twine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/System/Threading.h" @@ -62,11 +63,11 @@ void llvm_unreachable_internal(const char *msg, const char *file, // llvm_unreachable is intended to be used to indicate "impossible" // situations, and not legitimate runtime errors. if (msg) - errs() << msg << "\n"; - errs() << "UNREACHABLE executed"; + dbgs() << msg << "\n"; + dbgs() << "UNREACHABLE executed"; if (file) - errs() << " at " << file << ":" << line; - errs() << "!\n"; + dbgs() << " at " << file << ":" << line; + dbgs() << "!\n"; abort(); } } diff --git a/lib/Support/FormattedStream.cpp b/lib/Support/FormattedStream.cpp index 70f2cfa6ae88..9ab3666340e2 100644 --- a/lib/Support/FormattedStream.cpp +++ b/lib/Support/FormattedStream.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Support/Debug.h" #include "llvm/Support/FormattedStream.h" using namespace llvm; @@ -91,3 +92,10 @@ formatted_raw_ostream &llvm::ferrs() { static formatted_raw_ostream S(errs()); return S; } + +/// fdbgs() - This returns a reference to a formatted_raw_ostream for +/// the debug stream. Use it like: fdbgs() << "foo" << "bar"; +formatted_raw_ostream &llvm::fdbgs() { + static formatted_raw_ostream S(dbgs()); + return S; +} diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp index 14f94bc28447..e78767045998 100644 --- a/lib/Support/Statistic.cpp +++ b/lib/Support/Statistic.cpp @@ -23,6 +23,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/raw_ostream.h" #include "llvm/System/Mutex.h" @@ -127,6 +128,6 @@ StatisticInfo::~StatisticInfo() { OutStream << '\n'; // Flush the output stream... OutStream.flush(); - if (&OutStream != &outs() && &OutStream != &errs()) + if (&OutStream != &outs() && &OutStream != &errs() && &OutStream != &dbgs()) delete &OutStream; // Close the file. } diff --git a/lib/Support/StringExtras.cpp b/lib/Support/StringExtras.cpp index 1b233ab200ae..785e0ec4c633 100644 --- a/lib/Support/StringExtras.cpp +++ b/lib/Support/StringExtras.cpp @@ -11,50 +11,53 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/SmallVector.h" -#include +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" using namespace llvm; +/// StrInStrNoCase - Portable version of strcasestr. Locates the first +/// occurrence of string 's1' in string 's2', ignoring case. Returns +/// the offset of s2 in s1 or npos if s2 cannot be found. +StringRef::size_type llvm::StrInStrNoCase(StringRef s1, StringRef s2) { + size_t N = s2.size(), M = s1.size(); + if (N > M) + return StringRef::npos; + for (size_t i = 0, e = M - N + 1; i != e; ++i) + if (s1.substr(i, N).equals_lower(s2)) + return i; + return StringRef::npos; +} + /// getToken - This function extracts one token from source, ignoring any /// leading characters that appear in the Delimiters string, and ending the /// token at any of the characters that appear in the Delimiters string. If /// there are no tokens in the source string, an empty string is returned. -/// The Source source string is updated in place to remove the returned string -/// and any delimiter prefix from it. -std::string llvm::getToken(std::string &Source, const char *Delimiters) { - size_t NumDelimiters = std::strlen(Delimiters); - +/// The function returns a pair containing the extracted token and the +/// remaining tail string. +std::pair llvm::getToken(StringRef Source, + StringRef Delimiters) { // Figure out where the token starts. - std::string::size_type Start = - Source.find_first_not_of(Delimiters, 0, NumDelimiters); - if (Start == std::string::npos) Start = Source.size(); + StringRef::size_type Start = Source.find_first_not_of(Delimiters); + if (Start == StringRef::npos) Start = Source.size(); - // Find the next occurance of the delimiter. - std::string::size_type End = - Source.find_first_of(Delimiters, Start, NumDelimiters); - if (End == std::string::npos) End = Source.size(); + // Find the next occurrence of the delimiter. + StringRef::size_type End = Source.find_first_of(Delimiters, Start); + if (End == StringRef::npos) End = Source.size(); - // Create the return token. - std::string Result = std::string(Source.begin()+Start, Source.begin()+End); - - // Erase the token that we read in. - Source.erase(Source.begin(), Source.begin()+End); - - return Result; + return std::make_pair(Source.substr(Start, End), Source.substr(End)); } /// SplitString - Split up the specified string according to the specified /// delimiters, appending the result fragments to the output list. -void llvm::SplitString(const std::string &Source, - std::vector &OutFragments, - const char *Delimiters) { - std::string S = Source; - - std::string S2 = getToken(S, Delimiters); +void llvm::SplitString(StringRef Source, + SmallVectorImpl &OutFragments, + StringRef Delimiters) { + StringRef S2, S; + tie(S2, S) = getToken(Source, Delimiters); while (!S2.empty()) { OutFragments.push_back(S2); - S2 = getToken(S, Delimiters); + tie(S2, S) = getToken(S, Delimiters); } } diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp index e4a9984828f3..ae2640b5b946 100644 --- a/lib/Support/StringRef.cpp +++ b/lib/Support/StringRef.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/SmallVector.h" + using namespace llvm; // MSVC emits references to this into the translation units which reference it. @@ -51,13 +51,18 @@ unsigned StringRef::edit_distance(llvm::StringRef Other, size_type m = size(); size_type n = Other.size(); - SmallVector previous(n+1, 0); - for (SmallVector::size_type i = 0; i <= n; ++i) + const unsigned SmallBufferSize = 64; + unsigned SmallBuffer[SmallBufferSize]; + unsigned *Allocated = 0; + unsigned *previous = SmallBuffer; + if (2*(n + 1) > SmallBufferSize) + Allocated = previous = new unsigned [2*(n+1)]; + unsigned *current = previous + (n + 1); + + for (unsigned i = 0; i <= n; ++i) previous[i] = i; - SmallVector current(n+1, 0); for (size_type y = 1; y <= m; ++y) { - current.assign(n+1, 0); current[0] = y; for (size_type x = 1; x <= n; ++x) { if (AllowReplacements) { @@ -69,10 +74,16 @@ unsigned StringRef::edit_distance(llvm::StringRef Other, else current[x] = min(current[x-1], previous[x]) + 1; } } - current.swap(previous); + + unsigned *tmp = current; + current = previous; + previous = tmp; } - return previous[n]; + unsigned Result = previous[n]; + delete [] Allocated; + + return Result; } //===----------------------------------------------------------------------===// diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp index 7d32ee66c2bf..4bdfac298cc0 100644 --- a/lib/Support/Timer.cpp +++ b/lib/Support/Timer.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Support/Debug.h" #include "llvm/Support/Timer.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ManagedStatic.h" @@ -373,7 +374,7 @@ void TimerGroup::removeTimer() { TimersToPrint.clear(); - if (OutStream != &errs() && OutStream != &outs()) + if (OutStream != &errs() && OutStream != &outs() && OutStream != &dbgs()) delete OutStream; // Close the file... } } diff --git a/lib/Support/Twine.cpp b/lib/Support/Twine.cpp index 292c0c2b9e5e..21504e964ea9 100644 --- a/lib/Support/Twine.cpp +++ b/lib/Support/Twine.cpp @@ -9,13 +9,13 @@ #include "llvm/ADT/Twine.h" #include "llvm/ADT/SmallString.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; std::string Twine::str() const { SmallString<256> Vec; - toVector(Vec); - return std::string(Vec.begin(), Vec.end()); + return toStringRef(Vec).str(); } void Twine::toVector(SmallVectorImpl &Out) const { @@ -23,6 +23,13 @@ void Twine::toVector(SmallVectorImpl &Out) const { print(OS); } +StringRef Twine::toStringRef(SmallVectorImpl &Out) const { + if (isSingleStringRef()) + return getSingleStringRef(); + toVector(Out); + return StringRef(Out.data(), Out.size()); +} + void Twine::printOneChild(raw_ostream &OS, const void *Ptr, NodeKind Kind) const { switch (Kind) { @@ -125,9 +132,9 @@ void Twine::printRepr(raw_ostream &OS) const { } void Twine::dump() const { - print(llvm::errs()); + print(llvm::dbgs()); } void Twine::dumpRepr() const { - printRepr(llvm::errs()); + printRepr(llvm::dbgs()); } diff --git a/lib/System/Win32/DynamicLibrary.inc b/lib/System/Win32/DynamicLibrary.inc index 10e64aa990c9..c9a89e5b8c49 100644 --- a/lib/System/Win32/DynamicLibrary.inc +++ b/lib/System/Win32/DynamicLibrary.inc @@ -79,7 +79,7 @@ extern "C" { // Mingw32 uses msvcrt.dll by default. Don't ignore it. // Otherwise, user should be aware, what he's doing :) stricmp(ModuleName, "msvcrt") != 0 && -#endif +#endif stricmp(ModuleName, "msvcrt20") != 0 && stricmp(ModuleName, "msvcrt40") != 0) { OpenedHandles.push_back((HMODULE)ModuleBase); @@ -119,24 +119,24 @@ bool DynamicLibrary::LoadLibraryPermanently(const char *filename, extern "C" { extern void *SYM; } #if defined(__MINGW32__) - EXPLICIT_SYMBOL_DEF(_alloca); - EXPLICIT_SYMBOL_DEF(__main); - EXPLICIT_SYMBOL_DEF(__ashldi3); - EXPLICIT_SYMBOL_DEF(__ashrdi3); - EXPLICIT_SYMBOL_DEF(__cmpdi2); - EXPLICIT_SYMBOL_DEF(__divdi3); - EXPLICIT_SYMBOL_DEF(__fixdfdi); - EXPLICIT_SYMBOL_DEF(__fixsfdi); - EXPLICIT_SYMBOL_DEF(__fixunsdfdi); - EXPLICIT_SYMBOL_DEF(__fixunssfdi); - EXPLICIT_SYMBOL_DEF(__floatdidf); - EXPLICIT_SYMBOL_DEF(__floatdisf); - EXPLICIT_SYMBOL_DEF(__lshrdi3); - EXPLICIT_SYMBOL_DEF(__moddi3); - EXPLICIT_SYMBOL_DEF(__udivdi3); - EXPLICIT_SYMBOL_DEF(__umoddi3); + EXPLICIT_SYMBOL_DEF(_alloca) + EXPLICIT_SYMBOL_DEF(__main) + EXPLICIT_SYMBOL_DEF(__ashldi3) + EXPLICIT_SYMBOL_DEF(__ashrdi3) + EXPLICIT_SYMBOL_DEF(__cmpdi2) + EXPLICIT_SYMBOL_DEF(__divdi3) + EXPLICIT_SYMBOL_DEF(__fixdfdi) + EXPLICIT_SYMBOL_DEF(__fixsfdi) + EXPLICIT_SYMBOL_DEF(__fixunsdfdi) + EXPLICIT_SYMBOL_DEF(__fixunssfdi) + EXPLICIT_SYMBOL_DEF(__floatdidf) + EXPLICIT_SYMBOL_DEF(__floatdisf) + EXPLICIT_SYMBOL_DEF(__lshrdi3) + EXPLICIT_SYMBOL_DEF(__moddi3) + EXPLICIT_SYMBOL_DEF(__udivdi3) + EXPLICIT_SYMBOL_DEF(__umoddi3) #elif defined(_MSC_VER) - EXPLICIT_SYMBOL_DEF(_alloca_probe); + EXPLICIT_SYMBOL_DEF(_alloca_probe) #endif #endif @@ -181,7 +181,7 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) { EXPLICIT_SYMBOL2(alloca, _alloca); #undef EXPLICIT_SYMBOL #undef EXPLICIT_SYMBOL2 -#undef EXPLICIT_SYMBOL_DEF +#undef EXPLICIT_SYMBOL_DEF } #elif defined(_MSC_VER) { @@ -189,8 +189,8 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) { EXPLICIT_SYMBOL2(_alloca, _alloca_probe); #undef EXPLICIT_SYMBOL #undef EXPLICIT_SYMBOL2 -#undef EXPLICIT_SYMBOL_DEF - } +#undef EXPLICIT_SYMBOL_DEF + } #endif return 0; diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 7cfa09746707..969c4a4c861b 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -938,6 +938,35 @@ ARMBaseInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, return false; } +/// Create a copy of a const pool value. Update CPI to the new index and return +/// the label UID. +static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { + MachineConstantPool *MCP = MF.getConstantPool(); + ARMFunctionInfo *AFI = MF.getInfo(); + + const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI]; + assert(MCPE.isMachineConstantPoolEntry() && + "Expecting a machine constantpool entry!"); + ARMConstantPoolValue *ACPV = + static_cast(MCPE.Val.MachineCPVal); + + unsigned PCLabelId = AFI->createConstPoolEntryUId(); + ARMConstantPoolValue *NewCPV = 0; + if (ACPV->isGlobalValue()) + NewCPV = new ARMConstantPoolValue(ACPV->getGV(), PCLabelId, + ARMCP::CPValue, 4); + else if (ACPV->isExtSymbol()) + NewCPV = new ARMConstantPoolValue(MF.getFunction()->getContext(), + ACPV->getSymbol(), PCLabelId, 4); + else if (ACPV->isBlockAddress()) + NewCPV = new ARMConstantPoolValue(ACPV->getBlockAddress(), PCLabelId, + ARMCP::CPBlockAddress, 4); + else + llvm_unreachable("Unexpected ARM constantpool value type!!"); + CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment()); + return PCLabelId; +} + void ARMBaseInstrInfo:: reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, @@ -960,28 +989,8 @@ reMaterialize(MachineBasicBlock &MBB, case ARM::tLDRpci_pic: case ARM::t2LDRpci_pic: { MachineFunction &MF = *MBB.getParent(); - ARMFunctionInfo *AFI = MF.getInfo(); - MachineConstantPool *MCP = MF.getConstantPool(); unsigned CPI = Orig->getOperand(1).getIndex(); - const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI]; - assert(MCPE.isMachineConstantPoolEntry() && - "Expecting a machine constantpool entry!"); - ARMConstantPoolValue *ACPV = - static_cast(MCPE.Val.MachineCPVal); - unsigned PCLabelId = AFI->createConstPoolEntryUId(); - ARMConstantPoolValue *NewCPV = 0; - if (ACPV->isGlobalValue()) - NewCPV = new ARMConstantPoolValue(ACPV->getGV(), PCLabelId, - ARMCP::CPValue, 4); - else if (ACPV->isExtSymbol()) - NewCPV = new ARMConstantPoolValue(MF.getFunction()->getContext(), - ACPV->getSymbol(), PCLabelId, 4); - else if (ACPV->isBlockAddress()) - NewCPV = new ARMConstantPoolValue(ACPV->getBlockAddress(), PCLabelId, - ARMCP::CPBlockAddress, 4); - else - llvm_unreachable("Unexpected ARM constantpool value type!!"); - CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment()); + unsigned PCLabelId = duplicateCPV(MF, CPI); MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode), DestReg) .addConstantPoolIndex(CPI).addImm(PCLabelId); @@ -994,6 +1003,22 @@ reMaterialize(MachineBasicBlock &MBB, NewMI->getOperand(0).setSubReg(SubIdx); } +MachineInstr * +ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const { + MachineInstr *MI = TargetInstrInfoImpl::duplicate(Orig, MF); + switch(Orig->getOpcode()) { + case ARM::tLDRpci_pic: + case ARM::t2LDRpci_pic: { + unsigned CPI = Orig->getOperand(1).getIndex(); + unsigned PCLabelId = duplicateCPV(MF, CPI); + Orig->getOperand(1).setIndex(CPI); + Orig->getOperand(2).setImm(PCLabelId); + break; + } + } + return MI; +} + bool ARMBaseInstrInfo::isIdentical(const MachineInstr *MI0, const MachineInstr *MI1, const MachineRegisterInfo *MRI) const { diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 78d9135ab034..0d9d4a755b2b 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -287,6 +287,8 @@ class ARMBaseInstrInfo : public TargetInstrInfoImpl { const MachineInstr *Orig, const TargetRegisterInfo *TRI) const; + MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const; + virtual bool isIdentical(const MachineInstr *MI, const MachineInstr *Other, const MachineRegisterInfo *MRI) const; }; diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 7aebdf484f59..f1b6e1d8c578 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -217,7 +217,8 @@ ARMBaseRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { ? DarwinCalleeSavedRegClasses : CalleeSavedRegClasses; } -BitVector ARMBaseRegisterInfo::getReservedRegs(const MachineFunction &MF) const { +BitVector ARMBaseRegisterInfo:: +getReservedRegs(const MachineFunction &MF) const { // FIXME: avoid re-calculating this everytime. BitVector Reserved(getNumRegs()); Reserved.set(ARM::SP); @@ -494,7 +495,8 @@ needsStackRealignment(const MachineFunction &MF) const { !MFI->hasVarSizedObjects()); } -bool ARMBaseRegisterInfo::cannotEliminateFrame(const MachineFunction &MF) const { +bool ARMBaseRegisterInfo:: +cannotEliminateFrame(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); if (NoFramePointerElim && MFI->hasCalls()) return true; @@ -523,7 +525,7 @@ static unsigned estimateStackSize(MachineFunction &MF, MachineFrameInfo *MFI) { /// estimateRSStackSizeLimit - Look at each instruction that references stack /// frames and return the stack size limit beyond which some of these -/// instructions will require scratch register during their expansion later. +/// instructions will require a scratch register during their expansion later. unsigned ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const { unsigned Limit = (1 << 12) - 1; @@ -547,6 +549,9 @@ ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const { // When the stack offset is negative, we will end up using // the i8 instructions instead. return (1 << 8) - 1; + + if (AddrMode == ARMII::AddrMode6) + return 0; break; // At most one FI per instruction } } @@ -557,7 +562,7 @@ ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const { void ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { + RegScavenger *RS) const { // This tells PEI to spill the FP as if it is any other callee-save register // to take advantage the eliminateFrameIndex machinery. This also ensures it // is spilled in the order specified by getCalleeSavedRegs() to make it easier @@ -852,7 +857,7 @@ int ARMBaseRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { } unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg, - const MachineFunction &MF) const { + const MachineFunction &MF) const { switch (Reg) { default: break; // Return 0 if either register of the pair is a special register. diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index d63f3e66fa4f..14a45b3ab45c 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -64,53 +64,53 @@ class ARMDAGToDAGISel : public SelectionDAGISel { return CurDAG->getTargetConstant(Imm, MVT::i32); } - SDNode *Select(SDValue Op); + SDNode *Select(SDNode *N); virtual void InstructionSelect(); - bool SelectShifterOperandReg(SDValue Op, SDValue N, SDValue &A, + bool SelectShifterOperandReg(SDNode *Op, SDValue N, SDValue &A, SDValue &B, SDValue &C); - bool SelectAddrMode2(SDValue Op, SDValue N, SDValue &Base, + bool SelectAddrMode2(SDNode *Op, SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); - bool SelectAddrMode2Offset(SDValue Op, SDValue N, + bool SelectAddrMode2Offset(SDNode *Op, SDValue N, SDValue &Offset, SDValue &Opc); - bool SelectAddrMode3(SDValue Op, SDValue N, SDValue &Base, + bool SelectAddrMode3(SDNode *Op, SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); - bool SelectAddrMode3Offset(SDValue Op, SDValue N, + bool SelectAddrMode3Offset(SDNode *Op, SDValue N, SDValue &Offset, SDValue &Opc); - bool SelectAddrMode4(SDValue Op, SDValue N, SDValue &Addr, + bool SelectAddrMode4(SDNode *Op, SDValue N, SDValue &Addr, SDValue &Mode); - bool SelectAddrMode5(SDValue Op, SDValue N, SDValue &Base, + bool SelectAddrMode5(SDNode *Op, SDValue N, SDValue &Base, SDValue &Offset); - bool SelectAddrMode6(SDValue Op, SDValue N, SDValue &Addr, SDValue &Update, + bool SelectAddrMode6(SDNode *Op, SDValue N, SDValue &Addr, SDValue &Update, SDValue &Opc, SDValue &Align); - bool SelectAddrModePC(SDValue Op, SDValue N, SDValue &Offset, + bool SelectAddrModePC(SDNode *Op, SDValue N, SDValue &Offset, SDValue &Label); - bool SelectThumbAddrModeRR(SDValue Op, SDValue N, SDValue &Base, + bool SelectThumbAddrModeRR(SDNode *Op, SDValue N, SDValue &Base, SDValue &Offset); - bool SelectThumbAddrModeRI5(SDValue Op, SDValue N, unsigned Scale, + bool SelectThumbAddrModeRI5(SDNode *Op, SDValue N, unsigned Scale, SDValue &Base, SDValue &OffImm, SDValue &Offset); - bool SelectThumbAddrModeS1(SDValue Op, SDValue N, SDValue &Base, + bool SelectThumbAddrModeS1(SDNode *Op, SDValue N, SDValue &Base, SDValue &OffImm, SDValue &Offset); - bool SelectThumbAddrModeS2(SDValue Op, SDValue N, SDValue &Base, + bool SelectThumbAddrModeS2(SDNode *Op, SDValue N, SDValue &Base, SDValue &OffImm, SDValue &Offset); - bool SelectThumbAddrModeS4(SDValue Op, SDValue N, SDValue &Base, + bool SelectThumbAddrModeS4(SDNode *Op, SDValue N, SDValue &Base, SDValue &OffImm, SDValue &Offset); - bool SelectThumbAddrModeSP(SDValue Op, SDValue N, SDValue &Base, + bool SelectThumbAddrModeSP(SDNode *Op, SDValue N, SDValue &Base, SDValue &OffImm); - bool SelectT2ShifterOperandReg(SDValue Op, SDValue N, + bool SelectT2ShifterOperandReg(SDNode *Op, SDValue N, SDValue &BaseReg, SDValue &Opc); - bool SelectT2AddrModeImm12(SDValue Op, SDValue N, SDValue &Base, + bool SelectT2AddrModeImm12(SDNode *Op, SDValue N, SDValue &Base, SDValue &OffImm); - bool SelectT2AddrModeImm8(SDValue Op, SDValue N, SDValue &Base, + bool SelectT2AddrModeImm8(SDNode *Op, SDValue N, SDValue &Base, SDValue &OffImm); - bool SelectT2AddrModeImm8Offset(SDValue Op, SDValue N, + bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, SDValue &OffImm); - bool SelectT2AddrModeImm8s4(SDValue Op, SDValue N, SDValue &Base, + bool SelectT2AddrModeImm8s4(SDNode *Op, SDValue N, SDValue &Base, SDValue &OffImm); - bool SelectT2AddrModeSoReg(SDValue Op, SDValue N, SDValue &Base, + bool SelectT2AddrModeSoReg(SDNode *Op, SDValue N, SDValue &Base, SDValue &OffReg, SDValue &ShImm); // Include the pieces autogenerated from the target description. @@ -119,48 +119,48 @@ class ARMDAGToDAGISel : public SelectionDAGISel { private: /// SelectARMIndexedLoad - Indexed (pre/post inc/dec) load matching code for /// ARM. - SDNode *SelectARMIndexedLoad(SDValue Op); - SDNode *SelectT2IndexedLoad(SDValue Op); + SDNode *SelectARMIndexedLoad(SDNode *N); + SDNode *SelectT2IndexedLoad(SDNode *N); /// SelectDYN_ALLOC - Select dynamic alloc for Thumb. - SDNode *SelectDYN_ALLOC(SDValue Op); + SDNode *SelectDYN_ALLOC(SDNode *N); /// SelectVLD - Select NEON load intrinsics. NumVecs should /// be 2, 3 or 4. The opcode arrays specify the instructions used for /// loads of D registers and even subregs and odd subregs of Q registers. /// For NumVecs == 2, QOpcodes1 is not used. - SDNode *SelectVLD(SDValue Op, unsigned NumVecs, unsigned *DOpcodes, + SDNode *SelectVLD(SDNode *N, unsigned NumVecs, unsigned *DOpcodes, unsigned *QOpcodes0, unsigned *QOpcodes1); /// SelectVST - Select NEON store intrinsics. NumVecs should /// be 2, 3 or 4. The opcode arrays specify the instructions used for /// stores of D registers and even subregs and odd subregs of Q registers. /// For NumVecs == 2, QOpcodes1 is not used. - SDNode *SelectVST(SDValue Op, unsigned NumVecs, unsigned *DOpcodes, + SDNode *SelectVST(SDNode *N, unsigned NumVecs, unsigned *DOpcodes, unsigned *QOpcodes0, unsigned *QOpcodes1); /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should /// be 2, 3 or 4. The opcode arrays specify the instructions used for /// load/store of D registers and even subregs and odd subregs of Q registers. - SDNode *SelectVLDSTLane(SDValue Op, bool IsLoad, unsigned NumVecs, + SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad, unsigned NumVecs, unsigned *DOpcodes, unsigned *QOpcodes0, unsigned *QOpcodes1); /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM. - SDNode *SelectV6T2BitfieldExtractOp(SDValue Op, unsigned Opc); + SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, unsigned Opc); /// SelectCMOVOp - Select CMOV instructions for ARM. - SDNode *SelectCMOVOp(SDValue Op); - SDNode *SelectT2CMOVShiftOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, + SDNode *SelectCMOVOp(SDNode *N); + SDNode *SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag); - SDNode *SelectARMCMOVShiftOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, + SDNode *SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag); - SDNode *SelectT2CMOVSoImmOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, + SDNode *SelectT2CMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag); - SDNode *SelectARMCMOVSoImmOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, + SDNode *SelectARMCMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag); @@ -206,7 +206,7 @@ void ARMDAGToDAGISel::InstructionSelect() { CurDAG->RemoveDeadNodes(); } -bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue Op, +bool ARMDAGToDAGISel::SelectShifterOperandReg(SDNode *Op, SDValue N, SDValue &BaseReg, SDValue &ShReg, @@ -230,7 +230,7 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue Op, return true; } -bool ARMDAGToDAGISel::SelectAddrMode2(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectAddrMode2(SDNode *Op, SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc) { if (N.getOpcode() == ISD::MUL) { @@ -340,9 +340,9 @@ bool ARMDAGToDAGISel::SelectAddrMode2(SDValue Op, SDValue N, return true; } -bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N, SDValue &Offset, SDValue &Opc) { - unsigned Opcode = Op.getOpcode(); + unsigned Opcode = Op->getOpcode(); ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) ? cast(Op)->getAddressingMode() : cast(Op)->getAddressingMode(); @@ -379,7 +379,7 @@ bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDValue Op, SDValue N, } -bool ARMDAGToDAGISel::SelectAddrMode3(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectAddrMode3(SDNode *Op, SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc) { if (N.getOpcode() == ISD::SUB) { @@ -429,9 +429,9 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue Op, SDValue N, return true; } -bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N, SDValue &Offset, SDValue &Opc) { - unsigned Opcode = Op.getOpcode(); + unsigned Opcode = Op->getOpcode(); ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) ? cast(Op)->getAddressingMode() : cast(Op)->getAddressingMode(); @@ -451,14 +451,14 @@ bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDValue Op, SDValue N, return true; } -bool ARMDAGToDAGISel::SelectAddrMode4(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectAddrMode4(SDNode *Op, SDValue N, SDValue &Addr, SDValue &Mode) { Addr = N; Mode = CurDAG->getTargetConstant(0, MVT::i32); return true; } -bool ARMDAGToDAGISel::SelectAddrMode5(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectAddrMode5(SDNode *Op, SDValue N, SDValue &Base, SDValue &Offset) { if (N.getOpcode() != ISD::ADD) { Base = N; @@ -506,7 +506,7 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue Op, SDValue N, return true; } -bool ARMDAGToDAGISel::SelectAddrMode6(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Op, SDValue N, SDValue &Addr, SDValue &Update, SDValue &Opc, SDValue &Align) { Addr = N; @@ -518,7 +518,7 @@ bool ARMDAGToDAGISel::SelectAddrMode6(SDValue Op, SDValue N, return true; } -bool ARMDAGToDAGISel::SelectAddrModePC(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectAddrModePC(SDNode *Op, SDValue N, SDValue &Offset, SDValue &Label) { if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { Offset = N.getOperand(0); @@ -530,10 +530,10 @@ bool ARMDAGToDAGISel::SelectAddrModePC(SDValue Op, SDValue N, return false; } -bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDNode *Op, SDValue N, SDValue &Base, SDValue &Offset){ // FIXME dl should come from the parent load or store, not the address - DebugLoc dl = Op.getDebugLoc(); + DebugLoc dl = Op->getDebugLoc(); if (N.getOpcode() != ISD::ADD) { ConstantSDNode *NC = dyn_cast(N); if (!NC || NC->getZExtValue() != 0) @@ -549,7 +549,7 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue Op, SDValue N, } bool -ARMDAGToDAGISel::SelectThumbAddrModeRI5(SDValue Op, SDValue N, +ARMDAGToDAGISel::SelectThumbAddrModeRI5(SDNode *Op, SDValue N, unsigned Scale, SDValue &Base, SDValue &OffImm, SDValue &Offset) { if (Scale == 4) { @@ -605,25 +605,25 @@ ARMDAGToDAGISel::SelectThumbAddrModeRI5(SDValue Op, SDValue N, return true; } -bool ARMDAGToDAGISel::SelectThumbAddrModeS1(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectThumbAddrModeS1(SDNode *Op, SDValue N, SDValue &Base, SDValue &OffImm, SDValue &Offset) { return SelectThumbAddrModeRI5(Op, N, 1, Base, OffImm, Offset); } -bool ARMDAGToDAGISel::SelectThumbAddrModeS2(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectThumbAddrModeS2(SDNode *Op, SDValue N, SDValue &Base, SDValue &OffImm, SDValue &Offset) { return SelectThumbAddrModeRI5(Op, N, 2, Base, OffImm, Offset); } -bool ARMDAGToDAGISel::SelectThumbAddrModeS4(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectThumbAddrModeS4(SDNode *Op, SDValue N, SDValue &Base, SDValue &OffImm, SDValue &Offset) { return SelectThumbAddrModeRI5(Op, N, 4, Base, OffImm, Offset); } -bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDNode *Op, SDValue N, SDValue &Base, SDValue &OffImm) { if (N.getOpcode() == ISD::FrameIndex) { int FI = cast(N)->getIndex(); @@ -659,7 +659,7 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue Op, SDValue N, return false; } -bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDNode *Op, SDValue N, SDValue &BaseReg, SDValue &Opc) { ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N); @@ -679,7 +679,7 @@ bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue Op, SDValue N, return false; } -bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDNode *Op, SDValue N, SDValue &Base, SDValue &OffImm) { // Match simple R + imm12 operands. @@ -729,7 +729,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue Op, SDValue N, return true; } -bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDNode *Op, SDValue N, SDValue &Base, SDValue &OffImm) { // Match simple R - imm8 operands. if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::SUB) { @@ -753,9 +753,9 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue Op, SDValue N, return false; } -bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, SDValue &OffImm){ - unsigned Opcode = Op.getOpcode(); + unsigned Opcode = Op->getOpcode(); ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) ? cast(Op)->getAddressingMode() : cast(Op)->getAddressingMode(); @@ -772,7 +772,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDValue Op, SDValue N, return false; } -bool ARMDAGToDAGISel::SelectT2AddrModeImm8s4(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectT2AddrModeImm8s4(SDNode *Op, SDValue N, SDValue &Base, SDValue &OffImm) { if (N.getOpcode() == ISD::ADD) { if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { @@ -798,7 +798,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8s4(SDValue Op, SDValue N, return false; } -bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDNode *Op, SDValue N, SDValue &Base, SDValue &OffReg, SDValue &ShImm) { // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12. @@ -854,8 +854,8 @@ static inline SDValue getAL(SelectionDAG *CurDAG) { return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, MVT::i32); } -SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDValue Op) { - LoadSDNode *LD = cast(Op); +SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) { + LoadSDNode *LD = cast(N); ISD::MemIndexedMode AM = LD->getAddressingMode(); if (AM == ISD::UNINDEXED) return NULL; @@ -866,23 +866,23 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDValue Op) { unsigned Opcode = 0; bool Match = false; if (LoadedVT == MVT::i32 && - SelectAddrMode2Offset(Op, LD->getOffset(), Offset, AMOpc)) { + SelectAddrMode2Offset(N, LD->getOffset(), Offset, AMOpc)) { Opcode = isPre ? ARM::LDR_PRE : ARM::LDR_POST; Match = true; } else if (LoadedVT == MVT::i16 && - SelectAddrMode3Offset(Op, LD->getOffset(), Offset, AMOpc)) { + SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { Match = true; Opcode = (LD->getExtensionType() == ISD::SEXTLOAD) ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST) : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST); } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) { if (LD->getExtensionType() == ISD::SEXTLOAD) { - if (SelectAddrMode3Offset(Op, LD->getOffset(), Offset, AMOpc)) { + if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { Match = true; Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST; } } else { - if (SelectAddrMode2Offset(Op, LD->getOffset(), Offset, AMOpc)) { + if (SelectAddrMode2Offset(N, LD->getOffset(), Offset, AMOpc)) { Match = true; Opcode = isPre ? ARM::LDRB_PRE : ARM::LDRB_POST; } @@ -894,15 +894,15 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDValue Op) { SDValue Base = LD->getBasePtr(); SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), Chain }; - return CurDAG->getMachineNode(Opcode, Op.getDebugLoc(), MVT::i32, MVT::i32, + return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, MVT::i32, MVT::Other, Ops, 6); } return NULL; } -SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDValue Op) { - LoadSDNode *LD = cast(Op); +SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { + LoadSDNode *LD = cast(N); ISD::MemIndexedMode AM = LD->getAddressingMode(); if (AM == ISD::UNINDEXED) return NULL; @@ -913,7 +913,7 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDValue Op) { bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); unsigned Opcode = 0; bool Match = false; - if (SelectT2AddrModeImm8Offset(Op, LD->getOffset(), Offset)) { + if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) { switch (LoadedVT.getSimpleVT().SimpleTy) { case MVT::i32: Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST; @@ -942,20 +942,19 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDValue Op) { SDValue Base = LD->getBasePtr(); SDValue Ops[]= { Base, Offset, getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), Chain }; - return CurDAG->getMachineNode(Opcode, Op.getDebugLoc(), MVT::i32, MVT::i32, + return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, MVT::i32, MVT::Other, Ops, 5); } return NULL; } -SDNode *ARMDAGToDAGISel::SelectDYN_ALLOC(SDValue Op) { - SDNode *N = Op.getNode(); +SDNode *ARMDAGToDAGISel::SelectDYN_ALLOC(SDNode *N) { DebugLoc dl = N->getDebugLoc(); - EVT VT = Op.getValueType(); - SDValue Chain = Op.getOperand(0); - SDValue Size = Op.getOperand(1); - SDValue Align = Op.getOperand(2); + EVT VT = N->getValueType(0); + SDValue Chain = N->getOperand(0); + SDValue Size = N->getOperand(1); + SDValue Align = N->getOperand(2); SDValue SP = CurDAG->getRegister(ARM::SP, MVT::i32); int32_t AlignVal = cast(Align)->getSExtValue(); if (AlignVal < 0) @@ -1030,15 +1029,14 @@ static EVT GetNEONSubregVT(EVT VT) { } } -SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs, +SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, unsigned *DOpcodes, unsigned *QOpcodes0, unsigned *QOpcodes1) { assert(NumVecs >=2 && NumVecs <= 4 && "VLD NumVecs out-of-range"); - SDNode *N = Op.getNode(); DebugLoc dl = N->getDebugLoc(); SDValue MemAddr, MemUpdate, MemOpc, Align; - if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align)) + if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align)) return NULL; SDValue Chain = N->getOperand(0); @@ -1124,15 +1122,14 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs, return NULL; } -SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs, +SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, unsigned *DOpcodes, unsigned *QOpcodes0, unsigned *QOpcodes1) { assert(NumVecs >=2 && NumVecs <= 4 && "VST NumVecs out-of-range"); - SDNode *N = Op.getNode(); DebugLoc dl = N->getDebugLoc(); SDValue MemAddr, MemUpdate, MemOpc, Align; - if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align)) + if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align)) return NULL; SDValue Chain = N->getOperand(0); @@ -1225,16 +1222,15 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs, return NULL; } -SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDValue Op, bool IsLoad, +SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, unsigned NumVecs, unsigned *DOpcodes, unsigned *QOpcodes0, unsigned *QOpcodes1) { assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); - SDNode *N = Op.getNode(); DebugLoc dl = N->getDebugLoc(); SDValue MemAddr, MemUpdate, MemOpc, Align; - if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align)) + if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align)) return NULL; SDValue Chain = N->getOperand(0); @@ -1324,38 +1320,38 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDValue Op, bool IsLoad, return NULL; } -SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDValue Op, +SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, unsigned Opc) { if (!Subtarget->hasV6T2Ops()) return NULL; unsigned Shl_imm = 0; - if (isOpcWithIntImmediate(Op.getOperand(0).getNode(), ISD::SHL, Shl_imm)) { + if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!"); unsigned Srl_imm = 0; - if (isInt32Immediate(Op.getOperand(1), Srl_imm)) { + if (isInt32Immediate(N->getOperand(1), Srl_imm)) { assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); unsigned Width = 32 - Srl_imm; int LSB = Srl_imm - Shl_imm; if (LSB < 0) return NULL; SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); - SDValue Ops[] = { Op.getOperand(0).getOperand(0), + SDValue Ops[] = { N->getOperand(0).getOperand(0), CurDAG->getTargetConstant(LSB, MVT::i32), CurDAG->getTargetConstant(Width, MVT::i32), getAL(CurDAG), Reg0 }; - return CurDAG->SelectNodeTo(Op.getNode(), Opc, MVT::i32, Ops, 5); + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); } } return NULL; } SDNode *ARMDAGToDAGISel:: -SelectT2CMOVShiftOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, +SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) { SDValue CPTmp0; SDValue CPTmp1; - if (SelectT2ShifterOperandReg(Op, TrueVal, CPTmp0, CPTmp1)) { + if (SelectT2ShifterOperandReg(N, TrueVal, CPTmp0, CPTmp1)) { unsigned SOVal = cast(CPTmp1)->getZExtValue(); unsigned SOShOp = ARM_AM::getSORegShOp(SOVal); unsigned Opc = 0; @@ -1372,27 +1368,27 @@ SelectT2CMOVShiftOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, CurDAG->getTargetConstant(ARM_AM::getSORegOffset(SOVal), MVT::i32); SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); SDValue Ops[] = { FalseVal, CPTmp0, SOShImm, CC, CCR, InFlag }; - return CurDAG->SelectNodeTo(Op.getNode(), Opc, MVT::i32,Ops, 6); + return CurDAG->SelectNodeTo(N, Opc, MVT::i32,Ops, 6); } return 0; } SDNode *ARMDAGToDAGISel:: -SelectARMCMOVShiftOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, +SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) { SDValue CPTmp0; SDValue CPTmp1; SDValue CPTmp2; - if (SelectShifterOperandReg(Op, TrueVal, CPTmp0, CPTmp1, CPTmp2)) { + if (SelectShifterOperandReg(N, TrueVal, CPTmp0, CPTmp1, CPTmp2)) { SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, InFlag }; - return CurDAG->SelectNodeTo(Op.getNode(), ARM::MOVCCs, MVT::i32, Ops, 7); + return CurDAG->SelectNodeTo(N, ARM::MOVCCs, MVT::i32, Ops, 7); } return 0; } SDNode *ARMDAGToDAGISel:: -SelectT2CMOVSoImmOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, +SelectT2CMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) { ConstantSDNode *T = dyn_cast(TrueVal); if (!T) @@ -1402,14 +1398,14 @@ SelectT2CMOVSoImmOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32); SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag }; - return CurDAG->SelectNodeTo(Op.getNode(), + return CurDAG->SelectNodeTo(N, ARM::t2MOVCCi, MVT::i32, Ops, 5); } return 0; } SDNode *ARMDAGToDAGISel:: -SelectARMCMOVSoImmOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, +SelectARMCMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) { ConstantSDNode *T = dyn_cast(TrueVal); if (!T) @@ -1419,19 +1415,19 @@ SelectARMCMOVSoImmOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32); SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag }; - return CurDAG->SelectNodeTo(Op.getNode(), + return CurDAG->SelectNodeTo(N, ARM::MOVCCi, MVT::i32, Ops, 5); } return 0; } -SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDValue Op) { - EVT VT = Op.getValueType(); - SDValue FalseVal = Op.getOperand(0); - SDValue TrueVal = Op.getOperand(1); - SDValue CC = Op.getOperand(2); - SDValue CCR = Op.getOperand(3); - SDValue InFlag = Op.getOperand(4); +SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) { + EVT VT = N->getValueType(0); + SDValue FalseVal = N->getOperand(0); + SDValue TrueVal = N->getOperand(1); + SDValue CC = N->getOperand(2); + SDValue CCR = N->getOperand(3); + SDValue InFlag = N->getOperand(4); assert(CC.getOpcode() == ISD::Constant); assert(CCR.getOpcode() == ISD::Register); ARMCC::CondCodes CCVal = @@ -1445,18 +1441,18 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDValue Op) { SDValue CPTmp1; SDValue CPTmp2; if (Subtarget->isThumb()) { - SDNode *Res = SelectT2CMOVShiftOp(Op, FalseVal, TrueVal, + SDNode *Res = SelectT2CMOVShiftOp(N, FalseVal, TrueVal, CCVal, CCR, InFlag); if (!Res) - Res = SelectT2CMOVShiftOp(Op, TrueVal, FalseVal, + Res = SelectT2CMOVShiftOp(N, TrueVal, FalseVal, ARMCC::getOppositeCondition(CCVal), CCR, InFlag); if (Res) return Res; } else { - SDNode *Res = SelectARMCMOVShiftOp(Op, FalseVal, TrueVal, + SDNode *Res = SelectARMCMOVShiftOp(N, FalseVal, TrueVal, CCVal, CCR, InFlag); if (!Res) - Res = SelectARMCMOVShiftOp(Op, TrueVal, FalseVal, + Res = SelectARMCMOVShiftOp(N, TrueVal, FalseVal, ARMCC::getOppositeCondition(CCVal), CCR, InFlag); if (Res) return Res; @@ -1469,18 +1465,18 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDValue Op) { // (so_imm:i32 (imm:i32):$true), (imm:i32):$cc) // Pattern complexity = 10 cost = 1 size = 0 if (Subtarget->isThumb()) { - SDNode *Res = SelectT2CMOVSoImmOp(Op, FalseVal, TrueVal, + SDNode *Res = SelectT2CMOVSoImmOp(N, FalseVal, TrueVal, CCVal, CCR, InFlag); if (!Res) - Res = SelectT2CMOVSoImmOp(Op, TrueVal, FalseVal, + Res = SelectT2CMOVSoImmOp(N, TrueVal, FalseVal, ARMCC::getOppositeCondition(CCVal), CCR, InFlag); if (Res) return Res; } else { - SDNode *Res = SelectARMCMOVSoImmOp(Op, FalseVal, TrueVal, + SDNode *Res = SelectARMCMOVSoImmOp(N, FalseVal, TrueVal, CCVal, CCR, InFlag); if (!Res) - Res = SelectARMCMOVSoImmOp(Op, TrueVal, FalseVal, + Res = SelectARMCMOVSoImmOp(N, TrueVal, FalseVal, ARMCC::getOppositeCondition(CCVal), CCR, InFlag); if (Res) return Res; @@ -1514,11 +1510,10 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDValue Op) { Opc = ARM::VMOVDcc; break; } - return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5); + return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5); } -SDNode *ARMDAGToDAGISel::Select(SDValue Op) { - SDNode *N = Op.getNode(); +SDNode *ARMDAGToDAGISel::Select(SDNode *N) { DebugLoc dl = N->getDebugLoc(); if (N->isMachineOpcode()) @@ -1569,7 +1564,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, Ops, 6); } - ReplaceUses(Op, SDValue(ResNode, 0)); + ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0)); return NULL; } @@ -1593,28 +1588,28 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { } } case ARMISD::DYN_ALLOC: - return SelectDYN_ALLOC(Op); + return SelectDYN_ALLOC(N); case ISD::SRL: - if (SDNode *I = SelectV6T2BitfieldExtractOp(Op, + if (SDNode *I = SelectV6T2BitfieldExtractOp(N, Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX)) return I; break; case ISD::SRA: - if (SDNode *I = SelectV6T2BitfieldExtractOp(Op, + if (SDNode *I = SelectV6T2BitfieldExtractOp(N, Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)) return I; break; case ISD::MUL: if (Subtarget->isThumb1Only()) break; - if (ConstantSDNode *C = dyn_cast(Op.getOperand(1))) { + if (ConstantSDNode *C = dyn_cast(N->getOperand(1))) { unsigned RHSV = C->getZExtValue(); if (!RHSV) break; if (isPowerOf2_32(RHSV-1)) { // 2^n+1? unsigned ShImm = Log2_32(RHSV-1); if (ShImm >= 32) break; - SDValue V = Op.getOperand(0); + SDValue V = N->getOperand(0); ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32); SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); @@ -1630,7 +1625,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { unsigned ShImm = Log2_32(RHSV+1); if (ShImm >= 32) break; - SDValue V = Op.getOperand(0); + SDValue V = N->getOperand(0); ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32); SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); @@ -1650,7 +1645,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { // are entirely contributed by c2 and lower 16-bits are entirely contributed // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)). // Select it to: "movt x, ((c1 & 0xffff) >> 16) - EVT VT = Op.getValueType(); + EVT VT = N->getValueType(0); if (VT != MVT::i32) break; unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2()) @@ -1658,7 +1653,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0); if (!Opc) break; - SDValue N0 = Op.getOperand(0), N1 = Op.getOperand(1); + SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); ConstantSDNode *N1C = dyn_cast(N1); if (!N1C) break; @@ -1683,18 +1678,18 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { } case ARMISD::VMOVRRD: return CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32, - Op.getOperand(0), getAL(CurDAG), + N->getOperand(0), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32)); case ISD::UMUL_LOHI: { if (Subtarget->isThumb1Only()) break; if (Subtarget->isThumb()) { - SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), CurDAG->getRegister(0, MVT::i32) }; return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops,4); } else { - SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), CurDAG->getRegister(0, MVT::i32) }; return CurDAG->getMachineNode(ARM::UMULL, dl, MVT::i32, MVT::i32, Ops, 5); @@ -1704,11 +1699,11 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { if (Subtarget->isThumb1Only()) break; if (Subtarget->isThumb()) { - SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) }; return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops,4); } else { - SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), CurDAG->getRegister(0, MVT::i32) }; return CurDAG->getMachineNode(ARM::SMULL, dl, MVT::i32, MVT::i32, Ops, 5); @@ -1717,9 +1712,9 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { case ISD::LOAD: { SDNode *ResNode = 0; if (Subtarget->isThumb() && Subtarget->hasThumb2()) - ResNode = SelectT2IndexedLoad(Op); + ResNode = SelectT2IndexedLoad(N); else - ResNode = SelectARMIndexedLoad(Op); + ResNode = SelectARMIndexedLoad(N); if (ResNode) return ResNode; // Other cases are autogenerated. @@ -1740,11 +1735,11 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { unsigned Opc = Subtarget->isThumb() ? ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc; - SDValue Chain = Op.getOperand(0); - SDValue N1 = Op.getOperand(1); - SDValue N2 = Op.getOperand(2); - SDValue N3 = Op.getOperand(3); - SDValue InFlag = Op.getOperand(4); + SDValue Chain = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + SDValue N3 = N->getOperand(3); + SDValue InFlag = N->getOperand(4); assert(N1.getOpcode() == ISD::BasicBlock); assert(N2.getOpcode() == ISD::Constant); assert(N3.getOpcode() == ISD::Register); @@ -1756,23 +1751,23 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, MVT::Flag, Ops, 5); Chain = SDValue(ResNode, 0); - if (Op.getNode()->getNumValues() == 2) { + if (N->getNumValues() == 2) { InFlag = SDValue(ResNode, 1); - ReplaceUses(SDValue(Op.getNode(), 1), InFlag); + ReplaceUses(SDValue(N, 1), InFlag); } - ReplaceUses(SDValue(Op.getNode(), 0), + ReplaceUses(SDValue(N, 0), SDValue(Chain.getNode(), Chain.getResNo())); return NULL; } case ARMISD::CMOV: - return SelectCMOVOp(Op); + return SelectCMOVOp(N); case ARMISD::CNEG: { - EVT VT = Op.getValueType(); - SDValue N0 = Op.getOperand(0); - SDValue N1 = Op.getOperand(1); - SDValue N2 = Op.getOperand(2); - SDValue N3 = Op.getOperand(3); - SDValue InFlag = Op.getOperand(4); + EVT VT = N->getValueType(0); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + SDValue N3 = N->getOperand(3); + SDValue InFlag = N->getOperand(4); assert(N2.getOpcode() == ISD::Constant); assert(N3.getOpcode() == ISD::Register); @@ -1791,7 +1786,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { Opc = ARM::VNEGDcc; break; } - return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5); + return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5); } case ARMISD::VZIP: { @@ -1863,7 +1858,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { unsigned DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, ARM::VLD2d32, ARM::VLD2d64 }; unsigned QOpcodes[] = { ARM::VLD2q8, ARM::VLD2q16, ARM::VLD2q32 }; - return SelectVLD(Op, 2, DOpcodes, QOpcodes, 0); + return SelectVLD(N, 2, DOpcodes, QOpcodes, 0); } case Intrinsic::arm_neon_vld3: { @@ -1871,7 +1866,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { ARM::VLD3d32, ARM::VLD3d64 }; unsigned QOpcodes0[] = { ARM::VLD3q8a, ARM::VLD3q16a, ARM::VLD3q32a }; unsigned QOpcodes1[] = { ARM::VLD3q8b, ARM::VLD3q16b, ARM::VLD3q32b }; - return SelectVLD(Op, 3, DOpcodes, QOpcodes0, QOpcodes1); + return SelectVLD(N, 3, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vld4: { @@ -1879,35 +1874,35 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { ARM::VLD4d32, ARM::VLD4d64 }; unsigned QOpcodes0[] = { ARM::VLD4q8a, ARM::VLD4q16a, ARM::VLD4q32a }; unsigned QOpcodes1[] = { ARM::VLD4q8b, ARM::VLD4q16b, ARM::VLD4q32b }; - return SelectVLD(Op, 4, DOpcodes, QOpcodes0, QOpcodes1); + return SelectVLD(N, 4, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vld2lane: { unsigned DOpcodes[] = { ARM::VLD2LNd8, ARM::VLD2LNd16, ARM::VLD2LNd32 }; unsigned QOpcodes0[] = { ARM::VLD2LNq16a, ARM::VLD2LNq32a }; unsigned QOpcodes1[] = { ARM::VLD2LNq16b, ARM::VLD2LNq32b }; - return SelectVLDSTLane(Op, true, 2, DOpcodes, QOpcodes0, QOpcodes1); + return SelectVLDSTLane(N, true, 2, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vld3lane: { unsigned DOpcodes[] = { ARM::VLD3LNd8, ARM::VLD3LNd16, ARM::VLD3LNd32 }; unsigned QOpcodes0[] = { ARM::VLD3LNq16a, ARM::VLD3LNq32a }; unsigned QOpcodes1[] = { ARM::VLD3LNq16b, ARM::VLD3LNq32b }; - return SelectVLDSTLane(Op, true, 3, DOpcodes, QOpcodes0, QOpcodes1); + return SelectVLDSTLane(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vld4lane: { unsigned DOpcodes[] = { ARM::VLD4LNd8, ARM::VLD4LNd16, ARM::VLD4LNd32 }; unsigned QOpcodes0[] = { ARM::VLD4LNq16a, ARM::VLD4LNq32a }; unsigned QOpcodes1[] = { ARM::VLD4LNq16b, ARM::VLD4LNq32b }; - return SelectVLDSTLane(Op, true, 4, DOpcodes, QOpcodes0, QOpcodes1); + return SelectVLDSTLane(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vst2: { unsigned DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, ARM::VST2d32, ARM::VST2d64 }; unsigned QOpcodes[] = { ARM::VST2q8, ARM::VST2q16, ARM::VST2q32 }; - return SelectVST(Op, 2, DOpcodes, QOpcodes, 0); + return SelectVST(N, 2, DOpcodes, QOpcodes, 0); } case Intrinsic::arm_neon_vst3: { @@ -1915,7 +1910,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { ARM::VST3d32, ARM::VST3d64 }; unsigned QOpcodes0[] = { ARM::VST3q8a, ARM::VST3q16a, ARM::VST3q32a }; unsigned QOpcodes1[] = { ARM::VST3q8b, ARM::VST3q16b, ARM::VST3q32b }; - return SelectVST(Op, 3, DOpcodes, QOpcodes0, QOpcodes1); + return SelectVST(N, 3, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vst4: { @@ -1923,34 +1918,34 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { ARM::VST4d32, ARM::VST4d64 }; unsigned QOpcodes0[] = { ARM::VST4q8a, ARM::VST4q16a, ARM::VST4q32a }; unsigned QOpcodes1[] = { ARM::VST4q8b, ARM::VST4q16b, ARM::VST4q32b }; - return SelectVST(Op, 4, DOpcodes, QOpcodes0, QOpcodes1); + return SelectVST(N, 4, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vst2lane: { unsigned DOpcodes[] = { ARM::VST2LNd8, ARM::VST2LNd16, ARM::VST2LNd32 }; unsigned QOpcodes0[] = { ARM::VST2LNq16a, ARM::VST2LNq32a }; unsigned QOpcodes1[] = { ARM::VST2LNq16b, ARM::VST2LNq32b }; - return SelectVLDSTLane(Op, false, 2, DOpcodes, QOpcodes0, QOpcodes1); + return SelectVLDSTLane(N, false, 2, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vst3lane: { unsigned DOpcodes[] = { ARM::VST3LNd8, ARM::VST3LNd16, ARM::VST3LNd32 }; unsigned QOpcodes0[] = { ARM::VST3LNq16a, ARM::VST3LNq32a }; unsigned QOpcodes1[] = { ARM::VST3LNq16b, ARM::VST3LNq32b }; - return SelectVLDSTLane(Op, false, 3, DOpcodes, QOpcodes0, QOpcodes1); + return SelectVLDSTLane(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vst4lane: { unsigned DOpcodes[] = { ARM::VST4LNd8, ARM::VST4LNd16, ARM::VST4LNd32 }; unsigned QOpcodes0[] = { ARM::VST4LNq16a, ARM::VST4LNq32a }; unsigned QOpcodes1[] = { ARM::VST4LNq16b, ARM::VST4LNq32b }; - return SelectVLDSTLane(Op, false, 4, DOpcodes, QOpcodes0, QOpcodes1); + return SelectVLDSTLane(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); } } } } - return SelectCode(Op); + return SelectCode(N); } bool ARMDAGToDAGISel:: diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 334baaed00f3..7b62c00e1dbd 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -3130,6 +3130,9 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, // exitMBB: // ... BB = exitMBB; + + MF->DeleteMachineInstr(MI); // The instruction is gone now. + return BB; } @@ -3140,7 +3143,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction *F = BB->getParent(); + MachineFunction *MF = BB->getParent(); MachineFunction::iterator It = BB; ++It; @@ -3155,7 +3158,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); case 1: ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; - strOpc = isThumb2 ? ARM::t2LDREXB : ARM::STREXB; + strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; break; case 2: ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; @@ -3167,13 +3170,13 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, break; } - MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(It, loopMBB); - F->insert(It, exitMBB); + MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, loopMBB); + MF->insert(It, exitMBB); exitMBB->transferSuccessors(BB); - MachineRegisterInfo &RegInfo = F->getRegInfo(); + MachineRegisterInfo &RegInfo = MF->getRegInfo(); unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass); unsigned scratch2 = (!BinOpcode) ? incr : RegInfo.createVirtualRegister(ARM::GPRRegisterClass); @@ -3216,7 +3219,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, // ... BB = exitMBB; - F->DeleteMachineInstr(MI); // The instruction is gone now. + MF->DeleteMachineInstr(MI); // The instruction is gone now. return BB; } @@ -4258,10 +4261,10 @@ std::pair ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { if (Constraint.size() == 1) { - // GCC RS6000 Constraint Letters + // GCC ARM Constraint Letters switch (Constraint[0]) { case 'l': - if (Subtarget->isThumb1Only()) + if (Subtarget->isThumb()) return std::make_pair(0U, ARM::tGPRRegisterClass); else return std::make_pair(0U, ARM::GPRRegisterClass); diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index da8b3733c9a3..f67e74a99ed6 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -127,8 +127,8 @@ def IsThumb2 : Predicate<"Subtarget->isThumb2()">; def IsARM : Predicate<"!Subtarget->isThumb()">; def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">; def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">; -def CarryDefIsUnused : Predicate<"!N.getNode()->hasAnyUseOfValue(1)">; -def CarryDefIsUsed : Predicate<"N.getNode()->hasAnyUseOfValue(1)">; +def CarryDefIsUnused : Predicate<"!N->hasAnyUseOfValue(1)">; +def CarryDefIsUsed : Predicate<"N->hasAnyUseOfValue(1)">; // FIXME: Eventually this will be just "hasV6T2Ops". def UseMovt : Predicate<"Subtarget->useMovt()">; diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 34d7d8f6eff5..603ccf56180e 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -113,7 +113,7 @@ def t_addrmode_s1 : Operand, def t_addrmode_sp : Operand, ComplexPattern { let PrintMethod = "printThumbAddrModeSPOperand"; - let MIOperandInfo = (ops JustSP:$base, i32imm:$offsimm); + let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); } //===----------------------------------------------------------------------===// @@ -208,9 +208,8 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in { let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def tBRIND : TI<(outs), (ins GPR:$dst), IIC_Br, "mov\tpc, $dst", [(brind GPR:$dst)]>, - T1Special<{1,0,?,?}> { - // = pc - let Inst{7} = 1; + T1Special<{1,0,1,1}> { + // = Inst{7:2-0} = pc let Inst{2-0} = 0b111; } } @@ -342,16 +341,28 @@ def tLDR : T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr, "ldr", "\t$dst, $addr", [(set tGPR:$dst, (load t_addrmode_s4:$addr))]>, T1LdSt<0b100>; +def tLDRi: T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr, + "ldr", "\t$dst, $addr", + []>, + T1LdSt4Imm<{1,?,?}>; def tLDRB : T1pI1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr), IIC_iLoadr, "ldrb", "\t$dst, $addr", [(set tGPR:$dst, (zextloadi8 t_addrmode_s1:$addr))]>, T1LdSt<0b110>; +def tLDRBi: T1pI1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr), IIC_iLoadr, + "ldrb", "\t$dst, $addr", + []>, + T1LdSt1Imm<{1,?,?}>; def tLDRH : T1pI2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr), IIC_iLoadr, "ldrh", "\t$dst, $addr", [(set tGPR:$dst, (zextloadi16 t_addrmode_s2:$addr))]>, T1LdSt<0b101>; +def tLDRHi: T1pI2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr), IIC_iLoadr, + "ldrh", "\t$dst, $addr", + []>, + T1LdSt2Imm<{1,?,?}>; let AddedComplexity = 10 in def tLDRSB : T1pI1<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), IIC_iLoadr, @@ -397,16 +408,28 @@ def tSTR : T1pI4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr), IIC_iStorer, "str", "\t$src, $addr", [(store tGPR:$src, t_addrmode_s4:$addr)]>, T1LdSt<0b000>; +def tSTRi: T1pI4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr), IIC_iStorer, + "str", "\t$src, $addr", + []>, + T1LdSt4Imm<{0,?,?}>; def tSTRB : T1pI1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr), IIC_iStorer, "strb", "\t$src, $addr", [(truncstorei8 tGPR:$src, t_addrmode_s1:$addr)]>, T1LdSt<0b010>; +def tSTRBi: T1pI1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr), IIC_iStorer, + "strb", "\t$src, $addr", + []>, + T1LdSt1Imm<{0,?,?}>; def tSTRH : T1pI2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr), IIC_iStorer, "strh", "\t$src, $addr", [(truncstorei16 tGPR:$src, t_addrmode_s2:$addr)]>, T1LdSt<0b001>; +def tSTRHi: T1pI2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr), IIC_iStorer, + "strh", "\t$src, $addr", + []>, + T1LdSt2Imm<{0,?,?}>; def tSTRspi : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei, "str", "\t$src, $addr", @@ -748,7 +771,7 @@ let usesCustomInserter = 1 in // Expanded after instruction selection. // 16-bit movcc in IT blocks for Thumb2. def tMOVCCr : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iCMOVr, "mov", "\t$dst, $rhs", []>, - T1Special<{1,0,?,?}>; + T1Special<{1,0,1,1}>; def tMOVCCi : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iCMOVi, "mov", "\t$dst, $rhs", []>, diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 6f20ed4e9302..769df7ed9160 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -360,8 +360,8 @@ multiclass T2I_bin_ii12rs op23_21, string opc, PatFrag opnode, opc, ".w\t$dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> { let Inst{31-27} = 0b11101; - let Inst{24} = 1; let Inst{26-25} = 0b01; + let Inst{24} = 1; let Inst{23-21} = op23_21; let Inst{20} = 0; // The S bit. } diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index b13f98acb781..b78b95b22ebf 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -740,6 +740,18 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, /// isMemoryOp - Returns true if instruction is a memory operations (that this /// pass is capable of operating on). static bool isMemoryOp(const MachineInstr *MI) { + if (MI->hasOneMemOperand()) { + const MachineMemOperand *MMO = *MI->memoperands_begin(); + + // Don't touch volatile memory accesses - we may be changing their order. + if (MMO->isVolatile()) + return false; + + // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is not. + if (MMO->getAlignment() < 4) + return false; + } + int Opcode = MI->getOpcode(); switch (Opcode) { default: break; diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 9fbde815cdb5..d393e8d7e3e2 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -367,19 +367,6 @@ def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], // Condition code registers. def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]>; -// Just the stack pointer (for tSTRspi and friends). -def JustSP : RegisterClass<"ARM", [i32], 32, [SP]> { - let MethodProtos = [{ - iterator allocation_order_end(const MachineFunction &MF) const; - }]; - let MethodBodies = [{ - JustSPClass::iterator - JustSPClass::allocation_order_end(const MachineFunction &MF) const { - return allocation_order_begin(MF); - } - }]; -} - //===----------------------------------------------------------------------===// // Subregister Set Definitions... now that we have all of the pieces, define the // sub registers for each register. diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index ed4667ba1855..132738efdfb3 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -12,6 +12,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAsmLexer.h" #include "llvm/MC/MCAsmParser.h" +#include "llvm/MC/MCParsedAsmOperand.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" @@ -78,7 +79,7 @@ class ARMAsmParser : public TargetAsmParser { /// @name Auto-generated Match Functions /// { - bool MatchInstruction(SmallVectorImpl &Operands, + bool MatchInstruction(const SmallVectorImpl &Operands, MCInst &Inst); /// MatchRegisterName - Match the given string to a register name and return @@ -94,14 +95,15 @@ class ARMAsmParser : public TargetAsmParser { ARMAsmParser(const Target &T, MCAsmParser &_Parser) : TargetAsmParser(T), Parser(_Parser) {} - virtual bool ParseInstruction(const StringRef &Name, MCInst &Inst); + virtual bool ParseInstruction(const StringRef &Name, SMLoc NameLoc, + SmallVectorImpl &Operands); virtual bool ParseDirective(AsmToken DirectiveID); }; /// ARMOperand - Instances of this class represent a parsed ARM machine /// instruction. -struct ARMOperand { +struct ARMOperand : public MCParsedAsmOperand { enum { Token, Register, @@ -515,9 +517,10 @@ int ARMAsmParser::MatchRegisterName(const StringRef &Name) { } /// A hack to allow some testing, to be replaced by a real table gen version. -bool ARMAsmParser::MatchInstruction(SmallVectorImpl &Operands, - MCInst &Inst) { - struct ARMOperand Op0 = Operands[0]; +bool ARMAsmParser:: +MatchInstruction(const SmallVectorImpl &Operands, + MCInst &Inst) { + ARMOperand &Op0 = *(ARMOperand*)Operands[0]; assert(Op0.Kind == ARMOperand::Token && "First operand not a Token"); const StringRef &Mnemonic = Op0.getToken(); if (Mnemonic == "add" || @@ -578,33 +581,27 @@ bool ARMAsmParser::ParseOperand(ARMOperand &Op) { } /// Parse an arm instruction mnemonic followed by its operands. -bool ARMAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) { - SmallVector Operands; - - Operands.push_back(ARMOperand::CreateToken(Name)); +bool ARMAsmParser::ParseInstruction(const StringRef &Name, SMLoc NameLoc, + SmallVectorImpl &Operands) { + Operands.push_back(new ARMOperand(ARMOperand::CreateToken(Name))); SMLoc Loc = getLexer().getTok().getLoc(); if (getLexer().isNot(AsmToken::EndOfStatement)) { // Read the first operand. - Operands.push_back(ARMOperand()); - if (ParseOperand(Operands.back())) - return true; + ARMOperand Op; + if (ParseOperand(Op)) return true; + Operands.push_back(new ARMOperand(Op)); while (getLexer().is(AsmToken::Comma)) { getLexer().Lex(); // Eat the comma. // Parse and remember the operand. - Operands.push_back(ARMOperand()); - if (ParseOperand(Operands.back())) - return true; + if (ParseOperand(Op)) return true; + Operands.push_back(new ARMOperand(Op)); } } - if (!MatchInstruction(Operands, Inst)) - return false; - - Error(Loc, "ARMAsmParser::ParseInstruction only partly implemented"); - return true; + return false; } /// ParseDirective parses the arm specific directives diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index 931d8df0317b..2d135337b66a 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -175,16 +175,16 @@ namespace { printDataDirective(MCPV->getType()); ARMConstantPoolValue *ACPV = static_cast(MCPV); - std::string Name; + SmallString<128> TmpNameStr; if (ACPV->isLSDA()) { - SmallString<16> LSDAName; - raw_svector_ostream(LSDAName) << MAI->getPrivateGlobalPrefix() << + raw_svector_ostream(TmpNameStr) << MAI->getPrivateGlobalPrefix() << "_LSDA_" << getFunctionNumber(); - Name = LSDAName.str(); + O << TmpNameStr.str(); } else if (ACPV->isBlockAddress()) { - Name = GetBlockAddressSymbol(ACPV->getBlockAddress())->getName(); + O << GetBlockAddressSymbol(ACPV->getBlockAddress())->getName(); } else if (ACPV->isGlobalValue()) { + std::string Name; GlobalValue *GV = ACPV->getGV(); bool isIndirect = Subtarget->isTargetDarwin() && Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel()); @@ -201,16 +201,16 @@ namespace { GV->hasHiddenVisibility() ? MMIMachO.getHiddenGVStubEntry(Sym) : MMIMachO.getGVStubEntry(Sym); if (StubSym == 0) { - SmallString<128> NameStr; - Mang->getNameWithPrefix(NameStr, GV, false); - StubSym = OutContext.GetOrCreateSymbol(NameStr.str()); + Mang->getNameWithPrefix(TmpNameStr, GV, false); + StubSym = OutContext.GetOrCreateSymbol(TmpNameStr.str()); } } + O << Name; } else { assert(ACPV->isExtSymbol() && "unrecognized constant pool value"); - Name = Mang->makeNameProper(ACPV->getSymbol()); + Mang->getNameWithPrefix(TmpNameStr, ACPV->getSymbol()); + OutContext.GetOrCreateSymbol(TmpNameStr.str())->print(O, MAI); } - O << Name; if (ACPV->hasModifier()) O << "(" << ACPV->getModifier() << ")"; if (ACPV->getPCAdjustment() != 0) { @@ -392,9 +392,10 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, } case MachineOperand::MO_ExternalSymbol: { bool isCallOp = Modifier && !strcmp(Modifier, "call"); - std::string Name = Mang->makeNameProper(MO.getSymbolName()); - - O << Name; + SmallString<128> NameStr; + Mang->getNameWithPrefix(NameStr, MO.getSymbolName()); + OutContext.GetOrCreateSymbol(NameStr.str())->print(O, MAI); + if (isCallOp && Subtarget->isTargetELF() && TM.getRelocationModel() == Reloc::PIC_) O << "(PLT)"; diff --git a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp index 5b0a89d32d65..eaefef9c8b47 100644 --- a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp +++ b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp @@ -157,7 +157,7 @@ namespace { // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. - SDNode *Select(SDValue Op); + SDNode *Select(SDNode *N); /// InstructionSelect - This callback is invoked by /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. @@ -202,7 +202,7 @@ namespace { SDNode *getGlobalBaseReg(); SDNode *getGlobalRetAddr(); - void SelectCALL(SDValue Op); + void SelectCALL(SDNode *Op); }; } @@ -232,8 +232,7 @@ void AlphaDAGToDAGISel::InstructionSelect() { // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. -SDNode *AlphaDAGToDAGISel::Select(SDValue Op) { - SDNode *N = Op.getNode(); +SDNode *AlphaDAGToDAGISel::Select(SDNode *N) { if (N->isMachineOpcode()) { return NULL; // Already selected. } @@ -242,7 +241,7 @@ SDNode *AlphaDAGToDAGISel::Select(SDValue Op) { switch (N->getOpcode()) { default: break; case AlphaISD::CALL: - SelectCALL(Op); + SelectCALL(N); return NULL; case ISD::FrameIndex: { @@ -258,9 +257,9 @@ SDNode *AlphaDAGToDAGISel::Select(SDValue Op) { case AlphaISD::DivCall: { SDValue Chain = CurDAG->getEntryNode(); - SDValue N0 = Op.getOperand(0); - SDValue N1 = Op.getOperand(1); - SDValue N2 = Op.getOperand(2); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R24, N1, SDValue(0,0)); Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R25, N2, @@ -287,7 +286,7 @@ SDNode *AlphaDAGToDAGISel::Select(SDValue Op) { if (uval == 0) { SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, Alpha::R31, MVT::i64); - ReplaceUses(Op, Result); + ReplaceUses(SDValue(N, 0), Result); return NULL; } @@ -415,13 +414,12 @@ SDNode *AlphaDAGToDAGISel::Select(SDValue Op) { } - return SelectCode(Op); + return SelectCode(N); } -void AlphaDAGToDAGISel::SelectCALL(SDValue Op) { +void AlphaDAGToDAGISel::SelectCALL(SDNode *N) { //TODO: add flag stuff to prevent nondeturministic breakage! - SDNode *N = Op.getNode(); SDValue Chain = N->getOperand(0); SDValue Addr = N->getOperand(1); SDValue InFlag = N->getOperand(N->getNumOperands() - 1); @@ -442,8 +440,8 @@ void AlphaDAGToDAGISel::SelectCALL(SDValue Op) { } InFlag = Chain.getValue(1); - ReplaceUses(Op.getValue(0), Chain); - ReplaceUses(Op.getValue(1), InFlag); + ReplaceUses(SDValue(N, 0), Chain); + ReplaceUses(SDValue(N, 1), InFlag); } diff --git a/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp b/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp index 917f7f54fb1e..0bd94d489c85 100644 --- a/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp +++ b/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp @@ -25,12 +25,14 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetRegistry.h" #include "llvm/Support/Mangler.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallString.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" using namespace llvm; @@ -179,9 +181,12 @@ void BlackfinAsmPrinter::printOperand(const MachineInstr *MI, int opNum) { O << Mang->getMangledName(MO.getGlobal()); printOffset(MO.getOffset()); break; - case MachineOperand::MO_ExternalSymbol: - O << Mang->makeNameProper(MO.getSymbolName()); + case MachineOperand::MO_ExternalSymbol: { + SmallString<60> NameStr; + Mang->getNameWithPrefix(NameStr, MO.getSymbolName()); + OutContext.GetOrCreateSymbol(NameStr.str())->print(O, MAI); break; + } case MachineOperand::MO_ConstantPoolIndex: O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_" << MO.getIndex(); diff --git a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp index 2217af4f2d44..e1b60086e970 100644 --- a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp +++ b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp @@ -51,8 +51,8 @@ namespace { #include "BlackfinGenDAGISel.inc" private: - SDNode *Select(SDValue Op); - bool SelectADDRspii(SDValue Op, SDValue Addr, + SDNode *Select(SDNode *N); + bool SelectADDRspii(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset); // Walk the DAG after instruction selection, fixing register class issues. @@ -82,8 +82,7 @@ void BlackfinDAGToDAGISel::InstructionSelect() { FixRegisterClasses(*CurDAG); } -SDNode *BlackfinDAGToDAGISel::Select(SDValue Op) { - SDNode *N = Op.getNode(); +SDNode *BlackfinDAGToDAGISel::Select(SDNode *N) { if (N->isMachineOpcode()) return NULL; // Already selected. @@ -99,10 +98,10 @@ SDNode *BlackfinDAGToDAGISel::Select(SDValue Op) { } } - return SelectCode(Op); + return SelectCode(N); } -bool BlackfinDAGToDAGISel::SelectADDRspii(SDValue Op, +bool BlackfinDAGToDAGISel::SelectADDRspii(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset) { diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp index 1ab3c0a2405e..0fd975cffd8f 100644 --- a/lib/Target/CBackend/CBackend.cpp +++ b/lib/Target/CBackend/CBackend.cpp @@ -25,6 +25,7 @@ #include "llvm/IntrinsicInst.h" #include "llvm/InlineAsm.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/ConstantsScanner.h" #include "llvm/Analysis/FindUsedTypes.h" @@ -34,6 +35,7 @@ #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/Transforms/Scalar.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetRegistry.h" #include "llvm/Support/CallSite.h" @@ -341,6 +343,15 @@ namespace { char CWriter::ID = 0; + +static std::string Mangle(const std::string &S) { + std::string Result; + raw_string_ostream OS(Result); + MCSymbol::printMangledName(S, OS, 0); + return OS.str(); +} + + /// This method inserts names for any unnamed structure types that are used by /// the program, and removes names from structure types that are not used by the /// program. @@ -1431,8 +1442,11 @@ void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) { std::string CWriter::GetValueName(const Value *Operand) { // Mangle globals with the standard mangler interface for LLC compatibility. - if (const GlobalValue *GV = dyn_cast(Operand)) - return Mang->getMangledName(GV); + if (const GlobalValue *GV = dyn_cast(Operand)) { + SmallString<128> Str; + Mang->getNameWithPrefix(Str, GV, false); + return Mangle(Str.str().str()); + } std::string Name = Operand->getName(); @@ -1857,7 +1871,6 @@ bool CWriter::doInitialization(Module &M) { // Ensure that all structure types have names... Mang = new Mangler(M); - Mang->markCharUnacceptable('.'); // Keep track of which functions are static ctors/dtors so they can have // an attribute added to their prototypes. @@ -2210,7 +2223,7 @@ void CWriter::printModuleTypes(const TypeSymbolTable &TST) { // Print out forward declarations for structure types before anything else! Out << "/* Structure forward decls */\n"; for (; I != End; ++I) { - std::string Name = "struct l_" + Mang->makeNameProper(I->first); + std::string Name = "struct " + Mangle("l_"+I->first); Out << Name << ";\n"; TypeNames.insert(std::make_pair(I->second, Name)); } @@ -2221,7 +2234,7 @@ void CWriter::printModuleTypes(const TypeSymbolTable &TST) { // for struct or opaque types. Out << "/* Typedefs */\n"; for (I = TST.begin(); I != End; ++I) { - std::string Name = "l_" + Mang->makeNameProper(I->first); + std::string Name = Mangle("l_"+I->first); Out << "typedef "; printType(Out, I->second, false, Name); Out << ";\n"; @@ -2921,7 +2934,6 @@ void CWriter::lowerIntrinsics(Function &F) { case Intrinsic::setjmp: case Intrinsic::longjmp: case Intrinsic::prefetch: - case Intrinsic::dbg_stoppoint: case Intrinsic::powi: case Intrinsic::x86_sse_cmp_ss: case Intrinsic::x86_sse_cmp_ps: @@ -3178,20 +3190,6 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, Out << "0; *((void**)&" << GetValueName(&I) << ") = __builtin_stack_save()"; return true; - case Intrinsic::dbg_stoppoint: { - // If we use writeOperand directly we get a "u" suffix which is rejected - // by gcc. - DbgStopPointInst &SPI = cast(I); - std::string dir; - GetConstantStringInfo(SPI.getDirectory(), dir); - std::string file; - GetConstantStringInfo(SPI.getFileName(), file); - Out << "\n#line " - << SPI.getLine() - << " \"" - << dir << '/' << file << "\"\n"; - return true; - } case Intrinsic::x86_sse_cmp_ss: case Intrinsic::x86_sse_cmp_ps: case Intrinsic::x86_sse2_cmp_sd: diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index c69a7514fdb6..80693e1801e1 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -277,10 +277,9 @@ namespace { return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy()); } - SDNode *emitBuildVector(SDValue build_vec) { - EVT vecVT = build_vec.getValueType(); + SDNode *emitBuildVector(SDNode *bvNode) { + EVT vecVT = bvNode->getValueType(0); EVT eltVT = vecVT.getVectorElementType(); - SDNode *bvNode = build_vec.getNode(); DebugLoc dl = bvNode->getDebugLoc(); // Check to see if this vector can be represented as a CellSPU immediate @@ -296,13 +295,13 @@ namespace { ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) || (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) || (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)))) - return Select(build_vec); + return Select(bvNode); // No, need to emit a constant pool spill: std::vector CV; - for (size_t i = 0; i < build_vec.getNumOperands(); ++i) { - ConstantSDNode *V = dyn_cast (build_vec.getOperand(i)); + for (size_t i = 0; i < bvNode->getNumOperands(); ++i) { + ConstantSDNode *V = dyn_cast (bvNode->getOperand(i)); CV.push_back(const_cast (V->getConstantIntValue())); } @@ -312,49 +311,49 @@ namespace { SDValue CGPoolOffset = SPU::LowerConstantPool(CPIdx, *CurDAG, SPUtli.getSPUTargetMachine()); - return SelectCode(CurDAG->getLoad(build_vec.getValueType(), dl, + return SelectCode(CurDAG->getLoad(vecVT, dl, CurDAG->getEntryNode(), CGPoolOffset, PseudoSourceValue::getConstantPool(), 0, - false, Alignment)); + false, Alignment).getNode()); } /// Select - Convert the specified operand from a target-independent to a /// target-specific node if it hasn't already been changed. - SDNode *Select(SDValue Op); + SDNode *Select(SDNode *N); //! Emit the instruction sequence for i64 shl - SDNode *SelectSHLi64(SDValue &Op, EVT OpVT); + SDNode *SelectSHLi64(SDNode *N, EVT OpVT); //! Emit the instruction sequence for i64 srl - SDNode *SelectSRLi64(SDValue &Op, EVT OpVT); + SDNode *SelectSRLi64(SDNode *N, EVT OpVT); //! Emit the instruction sequence for i64 sra - SDNode *SelectSRAi64(SDValue &Op, EVT OpVT); + SDNode *SelectSRAi64(SDNode *N, EVT OpVT); //! Emit the necessary sequence for loading i64 constants: - SDNode *SelectI64Constant(SDValue &Op, EVT OpVT, DebugLoc dl); + SDNode *SelectI64Constant(SDNode *N, EVT OpVT, DebugLoc dl); //! Alternate instruction emit sequence for loading i64 constants SDNode *SelectI64Constant(uint64_t i64const, EVT OpVT, DebugLoc dl); //! Returns true if the address N is an A-form (local store) address - bool SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base, + bool SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index); //! D-form address predicate - bool SelectDFormAddr(SDValue Op, SDValue N, SDValue &Base, + bool SelectDFormAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index); /// Alternate D-form address using i7 offset predicate - bool SelectDForm2Addr(SDValue Op, SDValue N, SDValue &Disp, + bool SelectDForm2Addr(SDNode *Op, SDValue N, SDValue &Disp, SDValue &Base); /// D-form address selection workhorse - bool DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Disp, + bool DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Disp, SDValue &Base, int minOffset, int maxOffset); //! Address predicate if N can be expressed as an indexed [r+r] operation. - bool SelectXFormAddr(SDValue Op, SDValue N, SDValue &Base, + bool SelectXFormAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index); /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for @@ -366,13 +365,13 @@ namespace { switch (ConstraintCode) { default: return true; case 'm': // memory - if (!SelectDFormAddr(Op, Op, Op0, Op1) - && !SelectAFormAddr(Op, Op, Op0, Op1)) - SelectXFormAddr(Op, Op, Op0, Op1); + if (!SelectDFormAddr(Op.getNode(), Op, Op0, Op1) + && !SelectAFormAddr(Op.getNode(), Op, Op0, Op1)) + SelectXFormAddr(Op.getNode(), Op, Op0, Op1); break; case 'o': // offsetable - if (!SelectDFormAddr(Op, Op, Op0, Op1) - && !SelectAFormAddr(Op, Op, Op0, Op1)) { + if (!SelectDFormAddr(Op.getNode(), Op, Op0, Op1) + && !SelectAFormAddr(Op.getNode(), Op, Op0, Op1)) { Op0 = Op; Op1 = getSmallIPtrImm(0); } @@ -429,7 +428,7 @@ SPUDAGToDAGISel::InstructionSelect() \arg Index The base address index */ bool -SPUDAGToDAGISel::SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base, +SPUDAGToDAGISel::SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index) { // These match the addr256k operand type: EVT OffsVT = MVT::i16; @@ -479,7 +478,7 @@ SPUDAGToDAGISel::SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base, } bool -SPUDAGToDAGISel::SelectDForm2Addr(SDValue Op, SDValue N, SDValue &Disp, +SPUDAGToDAGISel::SelectDForm2Addr(SDNode *Op, SDValue N, SDValue &Disp, SDValue &Base) { const int minDForm2Offset = -(1 << 7); const int maxDForm2Offset = (1 << 7) - 1; @@ -500,7 +499,7 @@ SPUDAGToDAGISel::SelectDForm2Addr(SDValue Op, SDValue N, SDValue &Disp, to non-empty SDValue instances. */ bool -SPUDAGToDAGISel::SelectDFormAddr(SDValue Op, SDValue N, SDValue &Base, +SPUDAGToDAGISel::SelectDFormAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index) { return DFormAddressPredicate(Op, N, Base, Index, SPUFrameInfo::minFrameOffset(), @@ -508,7 +507,7 @@ SPUDAGToDAGISel::SelectDFormAddr(SDValue Op, SDValue N, SDValue &Base, } bool -SPUDAGToDAGISel::DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Base, +SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index, int minOffset, int maxOffset) { unsigned Opc = N.getOpcode(); @@ -618,7 +617,7 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Base, Index = N; return true; } else if (Opc == ISD::Register || Opc == ISD::CopyFromReg) { - unsigned OpOpc = Op.getOpcode(); + unsigned OpOpc = Op->getOpcode(); if (OpOpc == ISD::STORE || OpOpc == ISD::LOAD) { // Direct load/store without getelementptr @@ -630,7 +629,7 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Base, else Addr = N; // Register - Offs = ((OpOpc == ISD::STORE) ? Op.getOperand(3) : Op.getOperand(2)); + Offs = ((OpOpc == ISD::STORE) ? Op->getOperand(3) : Op->getOperand(2)); if (Offs.getOpcode() == ISD::Constant || Offs.getOpcode() == ISD::UNDEF) { if (Offs.getOpcode() == ISD::UNDEF) @@ -667,7 +666,7 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Base, (r)(r) X-form address. */ bool -SPUDAGToDAGISel::SelectXFormAddr(SDValue Op, SDValue N, SDValue &Base, +SPUDAGToDAGISel::SelectXFormAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index) { if (!SelectAFormAddr(Op, N, Base, Index) && !SelectDFormAddr(Op, N, Base, Index)) { @@ -685,12 +684,11 @@ SPUDAGToDAGISel::SelectXFormAddr(SDValue Op, SDValue N, SDValue &Base, /*! */ SDNode * -SPUDAGToDAGISel::Select(SDValue Op) { - SDNode *N = Op.getNode(); +SPUDAGToDAGISel::Select(SDNode *N) { unsigned Opc = N->getOpcode(); int n_ops = -1; unsigned NewOpc; - EVT OpVT = Op.getValueType(); + EVT OpVT = N->getValueType(0); SDValue Ops[8]; DebugLoc dl = N->getDebugLoc(); @@ -700,8 +698,8 @@ SPUDAGToDAGISel::Select(SDValue Op) { if (Opc == ISD::FrameIndex) { int FI = cast(N)->getIndex(); - SDValue TFI = CurDAG->getTargetFrameIndex(FI, Op.getValueType()); - SDValue Imm0 = CurDAG->getTargetConstant(0, Op.getValueType()); + SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0)); + SDValue Imm0 = CurDAG->getTargetConstant(0, N->getValueType(0)); if (FI < 128) { NewOpc = SPU::AIr32; @@ -710,9 +708,9 @@ SPUDAGToDAGISel::Select(SDValue Op) { n_ops = 2; } else { NewOpc = SPU::Ar32; - Ops[0] = CurDAG->getRegister(SPU::R1, Op.getValueType()); + Ops[0] = CurDAG->getRegister(SPU::R1, N->getValueType(0)); Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILAr32, dl, - Op.getValueType(), TFI, Imm0), + N->getValueType(0), TFI, Imm0), 0); n_ops = 2; } @@ -720,10 +718,10 @@ SPUDAGToDAGISel::Select(SDValue Op) { // Catch the i64 constants that end up here. Note: The backend doesn't // attempt to legalize the constant (it's useless because DAGCombiner // will insert 64-bit constants and we can't stop it). - return SelectI64Constant(Op, OpVT, Op.getDebugLoc()); + return SelectI64Constant(N, OpVT, N->getDebugLoc()); } else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) && OpVT == MVT::i64) { - SDValue Op0 = Op.getOperand(0); + SDValue Op0 = N->getOperand(0); EVT Op0VT = Op0.getValueType(); EVT Op0VecVT = EVT::getVectorVT(*CurDAG->getContext(), Op0VT, (128 / Op0VT.getSizeInBits())); @@ -760,9 +758,10 @@ SPUDAGToDAGISel::Select(SDValue Op) { break; } - SDNode *shufMaskLoad = emitBuildVector(shufMask); + SDNode *shufMaskLoad = emitBuildVector(shufMask.getNode()); SDNode *PromoteScalar = - SelectCode(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl, Op0VecVT, Op0)); + SelectCode(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl, + Op0VecVT, Op0).getNode()); SDValue zextShuffle = CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT, @@ -773,32 +772,32 @@ SPUDAGToDAGISel::Select(SDValue Op) { // N.B.: BIT_CONVERT replaces and updates the zextShuffle node, so we // re-use it in the VEC2PREFSLOT selection without needing to explicitly // call SelectCode (it's already done for us.) - SelectCode(CurDAG->getNode(ISD::BIT_CONVERT, dl, OpVecVT, zextShuffle)); + SelectCode(CurDAG->getNode(ISD::BIT_CONVERT, dl, OpVecVT, zextShuffle).getNode()); return SelectCode(CurDAG->getNode(SPUISD::VEC2PREFSLOT, dl, OpVT, - zextShuffle)); + zextShuffle).getNode()); } else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { SDNode *CGLoad = - emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl)); + emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode()); return SelectCode(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT, - Op.getOperand(0), Op.getOperand(1), - SDValue(CGLoad, 0))); + N->getOperand(0), N->getOperand(1), + SDValue(CGLoad, 0)).getNode()); } else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { SDNode *CGLoad = - emitBuildVector(getBorrowGenerateShufMask(*CurDAG, dl)); + emitBuildVector(getBorrowGenerateShufMask(*CurDAG, dl).getNode()); return SelectCode(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT, - Op.getOperand(0), Op.getOperand(1), - SDValue(CGLoad, 0))); + N->getOperand(0), N->getOperand(1), + SDValue(CGLoad, 0)).getNode()); } else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { SDNode *CGLoad = - emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl)); + emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode()); return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, dl, OpVT, - Op.getOperand(0), Op.getOperand(1), - SDValue(CGLoad, 0))); + N->getOperand(0), N->getOperand(1), + SDValue(CGLoad, 0)).getNode()); } else if (Opc == ISD::TRUNCATE) { - SDValue Op0 = Op.getOperand(0); + SDValue Op0 = N->getOperand(0); if ((Op0.getOpcode() == ISD::SRA || Op0.getOpcode() == ISD::SRL) && OpVT == MVT::i32 && Op0.getValueType() == MVT::i64) { @@ -834,22 +833,22 @@ SPUDAGToDAGISel::Select(SDValue Op) { } } else if (Opc == ISD::SHL) { if (OpVT == MVT::i64) { - return SelectSHLi64(Op, OpVT); + return SelectSHLi64(N, OpVT); } } else if (Opc == ISD::SRL) { if (OpVT == MVT::i64) { - return SelectSRLi64(Op, OpVT); + return SelectSRLi64(N, OpVT); } } else if (Opc == ISD::SRA) { if (OpVT == MVT::i64) { - return SelectSRAi64(Op, OpVT); + return SelectSRAi64(N, OpVT); } } else if (Opc == ISD::FNEG && (OpVT == MVT::f64 || OpVT == MVT::v2f64)) { - DebugLoc dl = Op.getDebugLoc(); + DebugLoc dl = N->getDebugLoc(); // Check if the pattern is a special form of DFNMS: // (fneg (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC)) - SDValue Op0 = Op.getOperand(0); + SDValue Op0 = N->getOperand(0); if (Op0.getOpcode() == ISD::FSUB) { SDValue Op00 = Op0.getOperand(0); if (Op00.getOpcode() == ISD::FMUL) { @@ -869,28 +868,28 @@ SPUDAGToDAGISel::Select(SDValue Op) { unsigned Opc = SPU::XORfneg64; if (OpVT == MVT::f64) { - signMask = SelectI64Constant(negConst, MVT::i64, dl); + signMask = SelectI64Constant(negConst.getNode(), MVT::i64, dl); } else if (OpVT == MVT::v2f64) { Opc = SPU::XORfnegvec; signMask = emitBuildVector(CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, - negConst, negConst)); + negConst, negConst).getNode()); } return CurDAG->getMachineNode(Opc, dl, OpVT, - Op.getOperand(0), SDValue(signMask, 0)); + N->getOperand(0), SDValue(signMask, 0)); } else if (Opc == ISD::FABS) { if (OpVT == MVT::f64) { SDNode *signMask = SelectI64Constant(0x7fffffffffffffffULL, MVT::i64, dl); return CurDAG->getMachineNode(SPU::ANDfabs64, dl, OpVT, - Op.getOperand(0), SDValue(signMask, 0)); + N->getOperand(0), SDValue(signMask, 0)); } else if (OpVT == MVT::v2f64) { SDValue absConst = CurDAG->getConstant(0x7fffffffffffffffULL, MVT::i64); SDValue absVec = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, absConst, absConst); - SDNode *signMask = emitBuildVector(absVec); + SDNode *signMask = emitBuildVector(absVec.getNode()); return CurDAG->getMachineNode(SPU::ANDfabsvec, dl, OpVT, - Op.getOperand(0), SDValue(signMask, 0)); + N->getOperand(0), SDValue(signMask, 0)); } } else if (Opc == SPUISD::LDRESULT) { // Custom select instructions for LDRESULT @@ -925,7 +924,7 @@ SPUDAGToDAGISel::Select(SDValue Op) { // SPUInstrInfo catches the following patterns: // (SPUindirect (SPUhi ...), (SPUlo ...)) // (SPUindirect $sp, imm) - EVT VT = Op.getValueType(); + EVT VT = N->getValueType(0); SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); RegisterSDNode *RN; @@ -952,7 +951,7 @@ SPUDAGToDAGISel::Select(SDValue Op) { else return CurDAG->getMachineNode(NewOpc, dl, OpVT, Ops, n_ops); } else - return SelectCode(Op); + return SelectCode(N); } /*! @@ -968,15 +967,15 @@ SPUDAGToDAGISel::Select(SDValue Op) { * @return The SDNode with the entire instruction sequence */ SDNode * -SPUDAGToDAGISel::SelectSHLi64(SDValue &Op, EVT OpVT) { - SDValue Op0 = Op.getOperand(0); +SPUDAGToDAGISel::SelectSHLi64(SDNode *N, EVT OpVT) { + SDValue Op0 = N->getOperand(0); EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(), OpVT, (128 / OpVT.getSizeInBits())); - SDValue ShiftAmt = Op.getOperand(1); + SDValue ShiftAmt = N->getOperand(1); EVT ShiftAmtVT = ShiftAmt.getValueType(); SDNode *VecOp0, *SelMask, *ZeroFill, *Shift = 0; SDValue SelMaskVal; - DebugLoc dl = Op.getDebugLoc(); + DebugLoc dl = N->getDebugLoc(); VecOp0 = CurDAG->getMachineNode(SPU::ORv2i64_i64, dl, VecVT, Op0); SelMaskVal = CurDAG->getTargetConstant(0xff00ULL, MVT::i16); @@ -1034,14 +1033,14 @@ SPUDAGToDAGISel::SelectSHLi64(SDValue &Op, EVT OpVT) { * @return The SDNode with the entire instruction sequence */ SDNode * -SPUDAGToDAGISel::SelectSRLi64(SDValue &Op, EVT OpVT) { - SDValue Op0 = Op.getOperand(0); +SPUDAGToDAGISel::SelectSRLi64(SDNode *N, EVT OpVT) { + SDValue Op0 = N->getOperand(0); EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(), OpVT, (128 / OpVT.getSizeInBits())); - SDValue ShiftAmt = Op.getOperand(1); + SDValue ShiftAmt = N->getOperand(1); EVT ShiftAmtVT = ShiftAmt.getValueType(); SDNode *VecOp0, *Shift = 0; - DebugLoc dl = Op.getDebugLoc(); + DebugLoc dl = N->getDebugLoc(); VecOp0 = CurDAG->getMachineNode(SPU::ORv2i64_i64, dl, VecVT, Op0); @@ -1101,16 +1100,16 @@ SPUDAGToDAGISel::SelectSRLi64(SDValue &Op, EVT OpVT) { * @return The SDNode with the entire instruction sequence */ SDNode * -SPUDAGToDAGISel::SelectSRAi64(SDValue &Op, EVT OpVT) { +SPUDAGToDAGISel::SelectSRAi64(SDNode *N, EVT OpVT) { // Promote Op0 to vector EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(), OpVT, (128 / OpVT.getSizeInBits())); - SDValue ShiftAmt = Op.getOperand(1); + SDValue ShiftAmt = N->getOperand(1); EVT ShiftAmtVT = ShiftAmt.getValueType(); - DebugLoc dl = Op.getDebugLoc(); + DebugLoc dl = N->getDebugLoc(); SDNode *VecOp0 = - CurDAG->getMachineNode(SPU::ORv2i64_i64, dl, VecVT, Op.getOperand(0)); + CurDAG->getMachineNode(SPU::ORv2i64_i64, dl, VecVT, N->getOperand(0)); SDValue SignRotAmt = CurDAG->getTargetConstant(31, ShiftAmtVT); SDNode *SignRot = @@ -1170,9 +1169,9 @@ SPUDAGToDAGISel::SelectSRAi64(SDValue &Op, EVT OpVT) { /*! Do the necessary magic necessary to load a i64 constant */ -SDNode *SPUDAGToDAGISel::SelectI64Constant(SDValue& Op, EVT OpVT, +SDNode *SPUDAGToDAGISel::SelectI64Constant(SDNode *N, EVT OpVT, DebugLoc dl) { - ConstantSDNode *CN = cast(Op.getNode()); + ConstantSDNode *CN = cast(N); return SelectI64Constant(CN->getZExtValue(), OpVT, dl); } @@ -1192,7 +1191,7 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT, ReplaceUses(i64vec, Op0); return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, - SDValue(emitBuildVector(Op0), 0)); + SDValue(emitBuildVector(Op0.getNode()), 0)); } else if (i64vec.getOpcode() == SPUISD::SHUFB) { SDValue lhs = i64vec.getOperand(0); SDValue rhs = i64vec.getOperand(1); @@ -1205,7 +1204,7 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT, SDNode *lhsNode = (lhs.getNode()->isMachineOpcode() ? lhs.getNode() - : emitBuildVector(lhs)); + : emitBuildVector(lhs.getNode())); if (rhs.getOpcode() == ISD::BIT_CONVERT) { ReplaceUses(rhs, rhs.getOperand(0)); @@ -1214,7 +1213,7 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT, SDNode *rhsNode = (rhs.getNode()->isMachineOpcode() ? rhs.getNode() - : emitBuildVector(rhs)); + : emitBuildVector(rhs.getNode())); if (shufmask.getOpcode() == ISD::BIT_CONVERT) { ReplaceUses(shufmask, shufmask.getOperand(0)); @@ -1223,18 +1222,18 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT, SDNode *shufMaskNode = (shufmask.getNode()->isMachineOpcode() ? shufmask.getNode() - : emitBuildVector(shufmask)); + : emitBuildVector(shufmask.getNode())); SDNode *shufNode = Select(CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT, SDValue(lhsNode, 0), SDValue(rhsNode, 0), - SDValue(shufMaskNode, 0))); + SDValue(shufMaskNode, 0)).getNode()); return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(shufNode, 0)); } else if (i64vec.getOpcode() == ISD::BUILD_VECTOR) { return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, - SDValue(emitBuildVector(i64vec), 0)); + SDValue(emitBuildVector(i64vec.getNode()), 0)); } else { llvm_report_error("SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec" "condition"); diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp index 4d407695190d..4eec757297d8 100644 --- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp +++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp @@ -146,12 +146,12 @@ namespace { private: DenseMap RMWStores; void PreprocessForRMW(); - SDNode *Select(SDValue Op); - SDNode *SelectIndexedLoad(SDValue Op); - SDNode *SelectIndexedBinOp(SDValue Op, SDValue N1, SDValue N2, + SDNode *Select(SDNode *N); + SDNode *SelectIndexedLoad(SDNode *Op); + SDNode *SelectIndexedBinOp(SDNode *Op, SDValue N1, SDValue N2, unsigned Opc8, unsigned Opc16); - bool SelectAddr(SDValue Op, SDValue Addr, SDValue &Base, SDValue &Disp); + bool SelectAddr(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Disp); #ifndef NDEBUG unsigned Indent; @@ -283,7 +283,7 @@ bool MSP430DAGToDAGISel::MatchAddress(SDValue N, MSP430ISelAddressMode &AM) { /// SelectAddr - returns true if it is able pattern match an addressing mode. /// It returns the operands which make up the maximal addressing mode it can /// match by reference. -bool MSP430DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, +bool MSP430DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Disp) { MSP430ISelAddressMode AM; @@ -326,7 +326,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, switch (ConstraintCode) { default: return true; case 'm': // memory - if (!SelectAddr(Op, Op, Op0, Op1)) + if (!SelectAddr(Op.getNode(), Op, Op0, Op1)) return true; break; } @@ -627,8 +627,8 @@ static bool isValidIndexedLoad(const LoadSDNode *LD) { return true; } -SDNode *MSP430DAGToDAGISel::SelectIndexedLoad(SDValue Op) { - LoadSDNode *LD = cast(Op); +SDNode *MSP430DAGToDAGISel::SelectIndexedLoad(SDNode *N) { + LoadSDNode *LD = cast(N); if (!isValidIndexedLoad(LD)) return NULL; @@ -646,17 +646,17 @@ SDNode *MSP430DAGToDAGISel::SelectIndexedLoad(SDValue Op) { return NULL; } - return CurDAG->getMachineNode(Opcode, Op.getDebugLoc(), + return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), VT, MVT::i16, MVT::Other, LD->getBasePtr(), LD->getChain()); } -SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDValue Op, +SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDNode *Op, SDValue N1, SDValue N2, unsigned Opc8, unsigned Opc16) { if (N1.getOpcode() == ISD::LOAD && N1.hasOneUse() && - IsLegalAndProfitableToFold(N1.getNode(), Op.getNode(), Op.getNode())) { + IsLegalAndProfitableToFold(N1.getNode(), Op, Op)) { LoadSDNode *LD = cast(N1); if (!isValidIndexedLoad(LD)) return NULL; @@ -667,7 +667,7 @@ SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDValue Op, MemRefs0[0] = cast(N1)->getMemOperand(); SDValue Ops0[] = { N2, LD->getBasePtr(), LD->getChain() }; SDNode *ResNode = - CurDAG->SelectNodeTo(Op.getNode(), Opc, + CurDAG->SelectNodeTo(Op, Opc, VT, MVT::i16, MVT::Other, Ops0, 3); cast(ResNode)->setMemRefs(MemRefs0, MemRefs0 + 1); @@ -707,9 +707,8 @@ void MSP430DAGToDAGISel::InstructionSelect() { RMWStores.clear(); } -SDNode *MSP430DAGToDAGISel::Select(SDValue Op) { - SDNode *Node = Op.getNode(); - DebugLoc dl = Op.getDebugLoc(); +SDNode *MSP430DAGToDAGISel::Select(SDNode *Node) { + DebugLoc dl = Node->getDebugLoc(); // Dump information about the Node being selected DEBUG(errs().indent(Indent) << "Selecting: "); @@ -730,7 +729,7 @@ SDNode *MSP430DAGToDAGISel::Select(SDValue Op) { switch (Node->getOpcode()) { default: break; case ISD::FrameIndex: { - assert(Op.getValueType() == MVT::i16); + assert(Node->getValueType(0) == MVT::i16); int FI = cast(Node)->getIndex(); SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i16); if (Node->hasOneUse()) @@ -740,18 +739,18 @@ SDNode *MSP430DAGToDAGISel::Select(SDValue Op) { TFI, CurDAG->getTargetConstant(0, MVT::i16)); } case ISD::LOAD: - if (SDNode *ResNode = SelectIndexedLoad(Op)) + if (SDNode *ResNode = SelectIndexedLoad(Node)) return ResNode; // Other cases are autogenerated. break; case ISD::ADD: if (SDNode *ResNode = - SelectIndexedBinOp(Op, - Op.getOperand(0), Op.getOperand(1), + SelectIndexedBinOp(Node, + Node->getOperand(0), Node->getOperand(1), MSP430::ADD8rm_POST, MSP430::ADD16rm_POST)) return ResNode; else if (SDNode *ResNode = - SelectIndexedBinOp(Op, Op.getOperand(1), Op.getOperand(0), + SelectIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0), MSP430::ADD8rm_POST, MSP430::ADD16rm_POST)) return ResNode; @@ -759,8 +758,8 @@ SDNode *MSP430DAGToDAGISel::Select(SDValue Op) { break; case ISD::SUB: if (SDNode *ResNode = - SelectIndexedBinOp(Op, - Op.getOperand(0), Op.getOperand(1), + SelectIndexedBinOp(Node, + Node->getOperand(0), Node->getOperand(1), MSP430::SUB8rm_POST, MSP430::SUB16rm_POST)) return ResNode; @@ -768,12 +767,12 @@ SDNode *MSP430DAGToDAGISel::Select(SDValue Op) { break; case ISD::AND: if (SDNode *ResNode = - SelectIndexedBinOp(Op, - Op.getOperand(0), Op.getOperand(1), + SelectIndexedBinOp(Node, + Node->getOperand(0), Node->getOperand(1), MSP430::AND8rm_POST, MSP430::AND16rm_POST)) return ResNode; else if (SDNode *ResNode = - SelectIndexedBinOp(Op, Op.getOperand(1), Op.getOperand(0), + SelectIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0), MSP430::AND8rm_POST, MSP430::AND16rm_POST)) return ResNode; @@ -781,12 +780,12 @@ SDNode *MSP430DAGToDAGISel::Select(SDValue Op) { break; case ISD::OR: if (SDNode *ResNode = - SelectIndexedBinOp(Op, - Op.getOperand(0), Op.getOperand(1), + SelectIndexedBinOp(Node, + Node->getOperand(0), Node->getOperand(1), MSP430::OR8rm_POST, MSP430::OR16rm_POST)) return ResNode; else if (SDNode *ResNode = - SelectIndexedBinOp(Op, Op.getOperand(1), Op.getOperand(0), + SelectIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0), MSP430::OR8rm_POST, MSP430::OR16rm_POST)) return ResNode; @@ -794,12 +793,12 @@ SDNode *MSP430DAGToDAGISel::Select(SDValue Op) { break; case ISD::XOR: if (SDNode *ResNode = - SelectIndexedBinOp(Op, - Op.getOperand(0), Op.getOperand(1), + SelectIndexedBinOp(Node, + Node->getOperand(0), Node->getOperand(1), MSP430::XOR8rm_POST, MSP430::XOR16rm_POST)) return ResNode; else if (SDNode *ResNode = - SelectIndexedBinOp(Op, Op.getOperand(1), Op.getOperand(0), + SelectIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0), MSP430::XOR8rm_POST, MSP430::XOR16rm_POST)) return ResNode; @@ -808,11 +807,11 @@ SDNode *MSP430DAGToDAGISel::Select(SDValue Op) { } // Select the default instruction - SDNode *ResNode = SelectCode(Op); + SDNode *ResNode = SelectCode(Node); DEBUG(errs() << std::string(Indent-2, ' ') << "=> "); - if (ResNode == NULL || ResNode == Op.getNode()) - DEBUG(Op.getNode()->dump(CurDAG)); + if (ResNode == NULL || ResNode == Node) + DEBUG(Node->dump(CurDAG)); else DEBUG(ResNode->dump(CurDAG)); DEBUG(errs() << "\n"); diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index 5fe9b20bd369..d3dce4b4ba2f 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -660,16 +660,16 @@ static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC, default: llvm_unreachable("Invalid integer condition!"); case ISD::SETEQ: TCC = MSP430CC::COND_E; // aka COND_Z - // Minor optimization: if RHS is a constant, swap operands, then the + // Minor optimization: if LHS is a constant, swap operands, then the // constant can be folded into comparison. - if (RHS.getOpcode() == ISD::Constant) + if (LHS.getOpcode() == ISD::Constant) std::swap(LHS, RHS); break; case ISD::SETNE: TCC = MSP430CC::COND_NE; // aka COND_NZ - // Minor optimization: if RHS is a constant, swap operands, then the + // Minor optimization: if LHS is a constant, swap operands, then the // constant can be folded into comparison. - if (RHS.getOpcode() == ISD::Constant) + if (LHS.getOpcode() == ISD::Constant) std::swap(LHS, RHS); break; case ISD::SETULE: @@ -1014,8 +1014,8 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI, // BB: // cmp 0, N // je RemBB - BuildMI(BB, dl, TII.get(MSP430::CMP8ir)) - .addImm(0).addReg(ShiftAmtSrcReg); + BuildMI(BB, dl, TII.get(MSP430::CMP8ri)) + .addReg(ShiftAmtSrcReg).addImm(0); BuildMI(BB, dl, TII.get(MSP430::JCC)) .addMBB(RemBB) .addImm(MSP430CC::COND_E); @@ -1045,6 +1045,7 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI, .addReg(SrcReg).addMBB(BB) .addReg(ShiftReg2).addMBB(LoopBB); + F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. return RemBB; } diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td index d67ba90a4caa..022d171f35b7 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.td +++ b/lib/Target/MSP430/MSP430InstrInfo.td @@ -819,38 +819,40 @@ def SWPB16r : Pseudo<(outs GR16:$dst), (ins GR16:$src), // Integer comparisons let Defs = [SRW] in { def CMP8rr : Pseudo<(outs), (ins GR8:$src1, GR8:$src2), - "cmp.b\t{$src1, $src2}", + "cmp.b\t{$src2, $src1}", [(MSP430cmp GR8:$src1, GR8:$src2), (implicit SRW)]>; def CMP16rr : Pseudo<(outs), (ins GR16:$src1, GR16:$src2), - "cmp.w\t{$src1, $src2}", + "cmp.w\t{$src2, $src1}", [(MSP430cmp GR16:$src1, GR16:$src2), (implicit SRW)]>; -def CMP8ir : Pseudo<(outs), (ins i8imm:$src1, GR8:$src2), - "cmp.b\t{$src1, $src2}", - [(MSP430cmp imm:$src1, GR8:$src2), (implicit SRW)]>; -def CMP16ir : Pseudo<(outs), (ins i16imm:$src1, GR16:$src2), - "cmp.w\t{$src1, $src2}", - [(MSP430cmp imm:$src1, GR16:$src2), (implicit SRW)]>; +def CMP8ri : Pseudo<(outs), (ins GR8:$src1, i8imm:$src2), + "cmp.b\t{$src2, $src1}", + [(MSP430cmp GR8:$src1, imm:$src2), (implicit SRW)]>; +def CMP16ri : Pseudo<(outs), (ins GR16:$src1, i16imm:$src2), + "cmp.w\t{$src2, $src1}", + [(MSP430cmp GR16:$src1, imm:$src2), (implicit SRW)]>; -def CMP8im : Pseudo<(outs), (ins i8imm:$src1, memsrc:$src2), - "cmp.b\t{$src1, $src2}", - [(MSP430cmp (i8 imm:$src1), (load addr:$src2)), (implicit SRW)]>; -def CMP16im : Pseudo<(outs), (ins i16imm:$src1, memsrc:$src2), - "cmp.w\t{$src1, $src2}", - [(MSP430cmp (i16 imm:$src1), (load addr:$src2)), (implicit SRW)]>; +def CMP8mi : Pseudo<(outs), (ins memsrc:$src1, i8imm:$src2), + "cmp.b\t{$src2, $src1}", + [(MSP430cmp (load addr:$src1), + (i8 imm:$src2)), (implicit SRW)]>; +def CMP16mi : Pseudo<(outs), (ins memsrc:$src1, i16imm:$src2), + "cmp.w\t{$src2, $src1}", + [(MSP430cmp (load addr:$src1), + (i16 imm:$src2)), (implicit SRW)]>; def CMP8rm : Pseudo<(outs), (ins GR8:$src1, memsrc:$src2), - "cmp.b\t{$src1, $src2}", + "cmp.b\t{$src2, $src1}", [(MSP430cmp GR8:$src1, (load addr:$src2)), (implicit SRW)]>; def CMP16rm : Pseudo<(outs), (ins GR16:$src1, memsrc:$src2), - "cmp.w\t{$src1, $src2}", + "cmp.w\t{$src2, $src1}", [(MSP430cmp GR16:$src1, (load addr:$src2)), (implicit SRW)]>; def CMP8mr : Pseudo<(outs), (ins memsrc:$src1, GR8:$src2), - "cmp.b\t{$src1, $src2}", + "cmp.b\t{$src2, $src1}", [(MSP430cmp (load addr:$src1), GR8:$src2), (implicit SRW)]>; def CMP16mr : Pseudo<(outs), (ins memsrc:$src1, GR16:$src2), - "cmp.w\t{$src1, $src2}", + "cmp.w\t{$src2, $src1}", [(MSP430cmp (load addr:$src1), GR16:$src2), (implicit SRW)]>; diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index ede111d5090b..a53e9185f701 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -84,14 +84,14 @@ class MipsDAGToDAGISel : public SelectionDAGISel { } SDNode *getGlobalBaseReg(); - SDNode *Select(SDValue N); + SDNode *Select(SDNode *N); // Complex Pattern. - bool SelectAddr(SDValue Op, SDValue N, + bool SelectAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Offset); - SDNode *SelectLoadFp64(SDValue N); - SDNode *SelectStoreFp64(SDValue N); + SDNode *SelectLoadFp64(SDNode *N); + SDNode *SelectStoreFp64(SDNode *N); // getI32Imm - Return a target constant with the specified // value, of type i32. @@ -132,7 +132,7 @@ SDNode *MipsDAGToDAGISel::getGlobalBaseReg() { /// ComplexPattern used on MipsInstrInfo /// Used on Mips Load/Store instructions bool MipsDAGToDAGISel:: -SelectAddr(SDValue Op, SDValue Addr, SDValue &Offset, SDValue &Base) +SelectAddr(SDNode *Op, SDValue Addr, SDValue &Offset, SDValue &Base) { // if Address is FI, get the TargetFrameIndex. if (FrameIndexSDNode *FIN = dyn_cast(Addr)) { @@ -199,19 +199,19 @@ SelectAddr(SDValue Op, SDValue Addr, SDValue &Offset, SDValue &Base) return true; } -SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDValue N) { +SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) { MVT::SimpleValueType NVT = - N.getNode()->getValueType(0).getSimpleVT().SimpleTy; + N->getValueType(0).getSimpleVT().SimpleTy; if (!Subtarget.isMips1() || NVT != MVT::f64) return NULL; - if (!Predicate_unindexedload(N.getNode()) || - !Predicate_load(N.getNode())) + if (!Predicate_unindexedload(N) || + !Predicate_load(N)) return NULL; - SDValue Chain = N.getOperand(0); - SDValue N1 = N.getOperand(1); + SDValue Chain = N->getOperand(0); + SDValue N1 = N->getOperand(1); SDValue Offset0, Offset1, Base; if (!SelectAddr(N, N1, Offset0, Base) || @@ -220,7 +220,7 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDValue N) { MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); MemRefs0[0] = cast(N)->getMemOperand(); - DebugLoc dl = N.getDebugLoc(); + DebugLoc dl = N->getDebugLoc(); // The second load should start after for 4 bytes. if (ConstantSDNode *C = dyn_cast(Offset0)) @@ -255,27 +255,27 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDValue N) { SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::SUBREG_FPODD, dl, MVT::f64, I0, SDValue(LD1, 0)); - ReplaceUses(N, I1); - ReplaceUses(N.getValue(1), Chain); + ReplaceUses(SDValue(N, 0), I1); + ReplaceUses(SDValue(N, 1), Chain); cast(LD0)->setMemRefs(MemRefs0, MemRefs0 + 1); cast(LD1)->setMemRefs(MemRefs0, MemRefs0 + 1); return I1.getNode(); } -SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDValue N) { +SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDNode *N) { if (!Subtarget.isMips1() || - N.getOperand(1).getValueType() != MVT::f64) + N->getOperand(1).getValueType() != MVT::f64) return NULL; - SDValue Chain = N.getOperand(0); + SDValue Chain = N->getOperand(0); - if (!Predicate_unindexedstore(N.getNode()) || - !Predicate_store(N.getNode())) + if (!Predicate_unindexedstore(N) || + !Predicate_store(N)) return NULL; - SDValue N1 = N.getOperand(1); - SDValue N2 = N.getOperand(2); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); SDValue Offset0, Offset1, Base; if (!SelectAddr(N, N2, Offset0, Base) || @@ -285,7 +285,7 @@ SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDValue N) { MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); MemRefs0[0] = cast(N)->getMemOperand(); - DebugLoc dl = N.getDebugLoc(); + DebugLoc dl = N->getDebugLoc(); // Get the even and odd part from the f64 register SDValue FPOdd = CurDAG->getTargetExtractSubreg(Mips::SUBREG_FPODD, @@ -318,14 +318,13 @@ SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDValue N) { MVT::Other, Ops1, 4), 0); cast(Chain.getNode())->setMemRefs(MemRefs0, MemRefs0 + 1); - ReplaceUses(N.getValue(0), Chain); + ReplaceUses(SDValue(N, 0), Chain); return Chain.getNode(); } /// Select instructions not customized! Used for /// expanded, promoted and normal instructions -SDNode* MipsDAGToDAGISel::Select(SDValue N) { - SDNode *Node = N.getNode(); +SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { unsigned Opcode = Node->getOpcode(); DebugLoc dl = Node->getDebugLoc(); @@ -379,7 +378,7 @@ SDNode* MipsDAGToDAGISel::Select(SDValue N) { SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, dl, VT, SDValue(Carry,0), RHS); - return CurDAG->SelectNodeTo(N.getNode(), MOp, VT, MVT::Flag, + return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Flag, LHS, SDValue(AddCarry,0)); } @@ -405,11 +404,11 @@ SDNode* MipsDAGToDAGISel::Select(SDValue N) { InFlag = SDValue(Lo,1); SDNode *Hi = CurDAG->getMachineNode(Mips::MFHI, dl, MVT::i32, InFlag); - if (!N.getValue(0).use_empty()) - ReplaceUses(N.getValue(0), SDValue(Lo,0)); + if (!SDValue(Node, 0).use_empty()) + ReplaceUses(SDValue(Node, 0), SDValue(Lo,0)); - if (!N.getValue(1).use_empty()) - ReplaceUses(N.getValue(1), SDValue(Hi,0)); + if (!SDValue(Node, 1).use_empty()) + ReplaceUses(SDValue(Node, 1), SDValue(Hi,0)); return NULL; } @@ -460,23 +459,23 @@ SDNode* MipsDAGToDAGISel::Select(SDValue N) { return getGlobalBaseReg(); case ISD::ConstantFP: { - ConstantFPSDNode *CN = dyn_cast(N); - if (N.getValueType() == MVT::f64 && CN->isExactlyValue(+0.0)) { + ConstantFPSDNode *CN = dyn_cast(Node); + if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) { SDValue Zero = CurDAG->getRegister(Mips::ZERO, MVT::i32); - ReplaceUses(N, Zero); + ReplaceUses(SDValue(Node, 0), Zero); return Zero.getNode(); } break; } case ISD::LOAD: - if (SDNode *ResNode = SelectLoadFp64(N)) + if (SDNode *ResNode = SelectLoadFp64(Node)) return ResNode; // Other cases are autogenerated. break; case ISD::STORE: - if (SDNode *ResNode = SelectStoreFp64(N)) + if (SDNode *ResNode = SelectStoreFp64(Node)) return ResNode; // Other cases are autogenerated. break; @@ -523,11 +522,11 @@ SDNode* MipsDAGToDAGISel::Select(SDValue N) { } // Select the default instruction - SDNode *ResNode = SelectCode(N); + SDNode *ResNode = SelectCode(Node); DEBUG(errs().indent(Indent-2) << "=> "); - if (ResNode == NULL || ResNode == N.getNode()) - DEBUG(N.getNode()->dump(CurDAG)); + if (ResNode == NULL || ResNode == Node) + DEBUG(Node->dump(CurDAG)); else DEBUG(ResNode->dump(CurDAG)); DEBUG(errs() << "\n"); diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp b/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp index e13e6cd065fc..82197aebdb39 100644 --- a/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp +++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp @@ -36,7 +36,7 @@ void PIC16DAGToDAGISel::InstructionSelect() { /// Select - Select instructions not customized! Used for /// expanded, promoted and normal instructions. -SDNode* PIC16DAGToDAGISel::Select(SDValue N) { +SDNode* PIC16DAGToDAGISel::Select(SDNode *N) { // Select the default instruction. SDNode *ResNode = SelectCode(N); @@ -47,7 +47,7 @@ SDNode* PIC16DAGToDAGISel::Select(SDValue N) { // SelectDirectAddr - Match a direct address for DAG. // A direct address could be a globaladdress or externalsymbol. -bool PIC16DAGToDAGISel::SelectDirectAddr(SDValue Op, SDValue N, +bool PIC16DAGToDAGISel::SelectDirectAddr(SDNode *Op, SDValue N, SDValue &Address) { // Return true if TGA or ES. if (N.getOpcode() == ISD::TargetGlobalAddress diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.h b/lib/Target/PIC16/PIC16ISelDAGToDAG.h index d9172f2b3622..813a540fb8a3 100644 --- a/lib/Target/PIC16/PIC16ISelDAGToDAG.h +++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.h @@ -52,10 +52,10 @@ class VISIBILITY_HIDDEN PIC16DAGToDAGISel : public SelectionDAGISel { // Include the pieces autogenerated from the target description. #include "PIC16GenDAGISel.inc" - SDNode *Select(SDValue N); + SDNode *Select(SDNode *N); // Match direct address complex pattern. - bool SelectDirectAddr(SDValue Op, SDValue N, SDValue &Address); + bool SelectDirectAddr(SDNode *Op, SDValue N, SDValue &Address); }; diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp index aae4607f43dd..d505d381306d 100644 --- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp @@ -32,6 +32,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" @@ -49,6 +50,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSet.h" +#include "llvm/ADT/SmallString.h" using namespace llvm; STATISTIC(EmittedInsts, "Number of machine instrs printed"); @@ -57,27 +59,42 @@ namespace { class PPCAsmPrinter : public AsmPrinter { protected: struct FnStubInfo { - std::string Stub, LazyPtr, AnonSymbol; + MCSymbol *Stub, *LazyPtr, *AnonSymbol; - FnStubInfo() {} + FnStubInfo() { + Stub = LazyPtr = AnonSymbol = 0; + } - void Init(const GlobalValue *GV, Mangler *Mang) { + void Init(const GlobalValue *GV, Mangler *Mang, MCContext &Ctx) { // Already initialized. - if (!Stub.empty()) return; - Stub = Mang->getMangledName(GV, "$stub", true); - LazyPtr = Mang->getMangledName(GV, "$lazy_ptr", true); - AnonSymbol = Mang->getMangledName(GV, "$stub$tmp", true); + if (Stub != 0) return; + + // Get the names. + SmallString<128> TmpStr; + Mang->getNameWithPrefix(TmpStr, GV, true); + MakeSymbols(TmpStr, Ctx); } - void Init(const std::string &GV, Mangler *Mang) { - // Already initialized. - if (!Stub.empty()) return; - Stub = Mang->makeNameProper(GV + "$stub", - Mangler::Private); - LazyPtr = Mang->makeNameProper(GV + "$lazy_ptr", - Mangler::Private); - AnonSymbol = Mang->makeNameProper(GV + "$stub$tmp", - Mangler::Private); + void Init(StringRef GVName, Mangler *Mang, MCContext &Ctx) { + assert(!GVName.empty() && "external symbol name shouldn't be empty"); + if (Stub != 0) return; // Already initialized. + // Get the names for the external symbol name. + SmallString<128> TmpStr; + Mang->getNameWithPrefix(TmpStr, GVName, Mangler::Private); + MakeSymbols(TmpStr, Ctx); + } + + void MakeSymbols(SmallString<128> &TmpStr, MCContext &Ctx) { + TmpStr += "$stub"; + Stub = Ctx.GetOrCreateSymbol(TmpStr.str()); + TmpStr.erase(TmpStr.end()-5, TmpStr.end()); // Remove $stub + + TmpStr += "$lazy_ptr"; + LazyPtr = Ctx.GetOrCreateSymbol(TmpStr.str()); + TmpStr.erase(TmpStr.end()-9, TmpStr.end()); // Remove $lazy_ptr + + TmpStr += "$stub$tmp"; + AnonSymbol = Ctx.GetOrCreateSymbol(TmpStr.str()); } }; @@ -224,15 +241,17 @@ namespace { if (GV->isDeclaration() || GV->isWeakForLinker()) { // Dynamically-resolved functions need a stub for the function. FnStubInfo &FnInfo = FnStubs[Mang->getMangledName(GV)]; - FnInfo.Init(GV, Mang); - O << FnInfo.Stub; + FnInfo.Init(GV, Mang, OutContext); + FnInfo.Stub->print(O, MAI); return; } } if (MO.getType() == MachineOperand::MO_ExternalSymbol) { - FnStubInfo &FnInfo =FnStubs[Mang->makeNameProper(MO.getSymbolName())]; - FnInfo.Init(MO.getSymbolName(), Mang); - O << FnInfo.Stub; + SmallString<128> MangledName; + Mang->getNameWithPrefix(MangledName, MO.getSymbolName()); + FnStubInfo &FnInfo = FnStubs[MangledName.str()]; + FnInfo.Init(MO.getSymbolName(), Mang, OutContext); + FnInfo.Stub->print(O, MAI); return; } } @@ -550,50 +569,49 @@ void PPCAsmPrinter::printMachineInstruction(const MachineInstr *MI) { processDebugLoc(MI, true); // Check for slwi/srwi mnemonics. + bool useSubstituteMnemonic = false; if (MI->getOpcode() == PPC::RLWINM) { - bool FoundMnemonic = false; unsigned char SH = MI->getOperand(2).getImm(); unsigned char MB = MI->getOperand(3).getImm(); unsigned char ME = MI->getOperand(4).getImm(); if (SH <= 31 && MB == 0 && ME == (31-SH)) { - O << "\tslwi "; FoundMnemonic = true; + O << "\tslwi "; useSubstituteMnemonic = true; } if (SH <= 31 && MB == (32-SH) && ME == 31) { - O << "\tsrwi "; FoundMnemonic = true; + O << "\tsrwi "; useSubstituteMnemonic = true; SH = 32-SH; } - if (FoundMnemonic) { + if (useSubstituteMnemonic) { printOperand(MI, 0); O << ", "; printOperand(MI, 1); - O << ", " << (unsigned int)SH << '\n'; - return; + O << ", " << (unsigned int)SH; } } else if (MI->getOpcode() == PPC::OR || MI->getOpcode() == PPC::OR8) { if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) { + useSubstituteMnemonic = true; O << "\tmr "; printOperand(MI, 0); O << ", "; printOperand(MI, 1); - O << '\n'; - return; } } else if (MI->getOpcode() == PPC::RLDICR) { unsigned char SH = MI->getOperand(2).getImm(); unsigned char ME = MI->getOperand(3).getImm(); // rldicr RA, RS, SH, 63-SH == sldi RA, RS, SH if (63-SH == ME) { + useSubstituteMnemonic = true; O << "\tsldi "; printOperand(MI, 0); O << ", "; printOperand(MI, 1); - O << ", " << (unsigned int)SH << '\n'; - return; + O << ", " << (unsigned int)SH; } } - printInstruction(MI); - + if (!useSubstituteMnemonic) + printInstruction(MI); + if (VerboseAsm) EmitComments(*MI); O << '\n'; @@ -1038,27 +1056,38 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { MCSectionMachO::S_SYMBOL_STUBS | MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, 32, SectionKind::getText()); - for (StringMap::iterator I = FnStubs.begin(), E = FnStubs.end(); + for (StringMap::iterator I = FnStubs.begin(), E = FnStubs.end(); I != E; ++I) { OutStreamer.SwitchSection(StubSection); EmitAlignment(4); const FnStubInfo &Info = I->second; - O << Info.Stub << ":\n"; + Info.Stub->print(O, MAI); + O << ":\n"; O << "\t.indirect_symbol " << I->getKeyData() << '\n'; O << "\tmflr r0\n"; - O << "\tbcl 20,31," << Info.AnonSymbol << '\n'; - O << Info.AnonSymbol << ":\n"; + O << "\tbcl 20,31,"; + Info.AnonSymbol->print(O, MAI); + O << '\n'; + Info.AnonSymbol->print(O, MAI); + O << ":\n"; O << "\tmflr r11\n"; - O << "\taddis r11,r11,ha16(" << Info.LazyPtr << "-" << Info.AnonSymbol; + O << "\taddis r11,r11,ha16("; + Info.LazyPtr->print(O, MAI); + O << '-'; + Info.AnonSymbol->print(O, MAI); O << ")\n"; O << "\tmtlr r0\n"; O << (isPPC64 ? "\tldu" : "\tlwzu") << " r12,lo16("; - O << Info.LazyPtr << "-" << Info.AnonSymbol << ")(r11)\n"; + Info.LazyPtr->print(O, MAI); + O << '-'; + Info.AnonSymbol->print(O, MAI); + O << ")(r11)\n"; O << "\tmtctr r12\n"; O << "\tbctr\n"; OutStreamer.SwitchSection(LSPSection); - O << Info.LazyPtr << ":\n"; + Info.LazyPtr->print(O, MAI); + O << ":\n"; O << "\t.indirect_symbol " << I->getKeyData() << '\n'; O << (isPPC64 ? "\t.quad" : "\t.long") << " dyld_stub_binding_helper\n"; } @@ -1074,15 +1103,20 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { OutStreamer.SwitchSection(StubSection); EmitAlignment(4); const FnStubInfo &Info = I->second; - O << Info.Stub << ":\n"; + Info.Stub->print(O, MAI); + O << ":\n"; O << "\t.indirect_symbol " << I->getKeyData() << '\n'; - O << "\tlis r11,ha16(" << Info.LazyPtr << ")\n"; + O << "\tlis r11,ha16("; + Info.LazyPtr->print(O, MAI); + O << ")\n"; O << (isPPC64 ? "\tldu" : "\tlwzu") << " r12,lo16("; - O << Info.LazyPtr << ")(r11)\n"; + Info.LazyPtr->print(O, MAI); + O << ")(r11)\n"; O << "\tmtctr r12\n"; O << "\tbctr\n"; OutStreamer.SwitchSection(LSPSection); - O << Info.LazyPtr << ":\n"; + Info.LazyPtr->print(O, MAI); + O << ":\n"; O << "\t.indirect_symbol " << I->getKeyData() << '\n'; O << (isPPC64 ? "\t.quad" : "\t.long") << " dyld_stub_binding_helper\n"; } diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index e7334b54d473..32c1879cf713 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -95,7 +95,7 @@ namespace { // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. - SDNode *Select(SDValue Op); + SDNode *Select(SDNode *N); SDNode *SelectBitfieldInsert(SDNode *N); @@ -105,7 +105,7 @@ namespace { /// SelectAddrImm - Returns true if the address N can be represented by /// a base register plus a signed 16-bit displacement [r+imm]. - bool SelectAddrImm(SDValue Op, SDValue N, SDValue &Disp, + bool SelectAddrImm(SDNode *Op, SDValue N, SDValue &Disp, SDValue &Base) { return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG); } @@ -113,7 +113,7 @@ namespace { /// SelectAddrImmOffs - Return true if the operand is valid for a preinc /// immediate field. Because preinc imms have already been validated, just /// accept it. - bool SelectAddrImmOffs(SDValue Op, SDValue N, SDValue &Out) const { + bool SelectAddrImmOffs(SDNode *Op, SDValue N, SDValue &Out) const { Out = N; return true; } @@ -121,14 +121,14 @@ namespace { /// SelectAddrIdx - Given the specified addressed, check to see if it can be /// represented as an indexed [r+r] operation. Returns false if it can /// be represented by [r+imm], which are preferred. - bool SelectAddrIdx(SDValue Op, SDValue N, SDValue &Base, + bool SelectAddrIdx(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index) { return PPCLowering.SelectAddressRegReg(N, Base, Index, *CurDAG); } /// SelectAddrIdxOnly - Given the specified addressed, force it to be /// represented as an indexed [r+r] operation. - bool SelectAddrIdxOnly(SDValue Op, SDValue N, SDValue &Base, + bool SelectAddrIdxOnly(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index) { return PPCLowering.SelectAddressRegRegOnly(N, Base, Index, *CurDAG); } @@ -136,7 +136,7 @@ namespace { /// SelectAddrImmShift - Returns true if the address N can be represented by /// a base register plus a signed 14-bit displacement [r+imm*4]. Suitable /// for use by STD and friends. - bool SelectAddrImmShift(SDValue Op, SDValue N, SDValue &Disp, + bool SelectAddrImmShift(SDNode *Op, SDValue N, SDValue &Disp, SDValue &Base) { return PPCLowering.SelectAddressRegImmShift(N, Disp, Base, *CurDAG); } @@ -180,7 +180,7 @@ namespace { #include "PPCGenDAGISel.inc" private: - SDNode *SelectSETCC(SDValue Op); + SDNode *SelectSETCC(SDNode *N); }; } @@ -635,8 +635,7 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert, int &Other) { return 0; } -SDNode *PPCDAGToDAGISel::SelectSETCC(SDValue Op) { - SDNode *N = Op.getNode(); +SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { DebugLoc dl = N->getDebugLoc(); unsigned Imm; ISD::CondCode CC = cast(N->getOperand(2))->get(); @@ -756,9 +755,8 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDValue Op) { // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. -SDNode *PPCDAGToDAGISel::Select(SDValue Op) { - SDNode *N = Op.getNode(); - DebugLoc dl = Op.getDebugLoc(); +SDNode *PPCDAGToDAGISel::Select(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); if (N->isMachineOpcode()) return NULL; // Already selected. @@ -841,18 +839,18 @@ SDNode *PPCDAGToDAGISel::Select(SDValue Op) { } case ISD::SETCC: - return SelectSETCC(Op); + return SelectSETCC(N); case PPCISD::GlobalBaseReg: return getGlobalBaseReg(); case ISD::FrameIndex: { int FI = cast(N)->getIndex(); - SDValue TFI = CurDAG->getTargetFrameIndex(FI, Op.getValueType()); - unsigned Opc = Op.getValueType() == MVT::i32 ? PPC::ADDI : PPC::ADDI8; + SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0)); + unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8; if (N->hasOneUse()) - return CurDAG->SelectNodeTo(N, Opc, Op.getValueType(), TFI, + return CurDAG->SelectNodeTo(N, Opc, N->getValueType(0), TFI, getSmallIPtrImm(0)); - return CurDAG->getMachineNode(Opc, dl, Op.getValueType(), TFI, + return CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI, getSmallIPtrImm(0)); } @@ -899,7 +897,7 @@ SDNode *PPCDAGToDAGISel::Select(SDValue Op) { case ISD::LOAD: { // Handle preincrement loads. - LoadSDNode *LD = cast(Op); + LoadSDNode *LD = cast(N); EVT LoadedVT = LD->getMemoryVT(); // Normal loads are handled by code generated from the .td file. @@ -1092,7 +1090,7 @@ SDNode *PPCDAGToDAGISel::Select(SDValue Op) { } } - return SelectCode(Op); + return SelectCode(N); } diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 8fe151ad229a..842f8ee08977 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -430,9 +430,7 @@ let isCall = 1, PPC970_Unit = 7, F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13, V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19, LR,CTR, - CR0,CR1,CR5,CR6,CR7, - CR0LT,CR0GT,CR0EQ,CR0UN,CR1LT,CR1GT,CR1EQ,CR1UN,CR5LT,CR5GT,CR5EQ, - CR5UN,CR6LT,CR6GT,CR6EQ,CR6UN,CR7LT,CR7GT,CR7EQ,CR7UN,CARRY] in { + CR0,CR1,CR5,CR6,CR7,CARRY] in { // Convenient aliases for call instructions let Uses = [RM] in { def BL_Darwin : IForm<18, 0, 1, @@ -457,9 +455,7 @@ let isCall = 1, PPC970_Unit = 7, F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13, V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19, LR,CTR, - CR0,CR1,CR5,CR6,CR7, - CR0LT,CR0GT,CR0EQ,CR0UN,CR1LT,CR1GT,CR1EQ,CR1UN,CR5LT,CR5GT,CR5EQ, - CR5UN,CR6LT,CR6GT,CR6EQ,CR6UN,CR7LT,CR7GT,CR7EQ,CR7UN,CARRY] in { + CR0,CR1,CR5,CR6,CR7,CARRY] in { // Convenient aliases for call instructions let Uses = [RM] in { def BL_SVR4 : IForm<18, 0, 1, diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp index be6e51e0ddb4..daf4ec6d012a 100644 --- a/lib/Target/PowerPC/PPCJITInfo.cpp +++ b/lib/Target/PowerPC/PPCJITInfo.cpp @@ -308,6 +308,7 @@ extern "C" void *PPCCompilationCallbackC(unsigned *StubCallAddrPlus4, // Rewrite the stub with an unconditional branch to the target, for any users // who took the address of the stub. EmitBranchToAt((intptr_t)StubCallAddr, (intptr_t)Target, false, is64Bit); + sys::Memory::InvalidateInstructionCache(StubCallAddr, 7*4); // Put the address of the target function to call and the address to return to // after calling the target function in a place that is easy to get on the @@ -441,4 +442,5 @@ void PPCJITInfo::relocate(void *Function, MachineRelocation *MR, void PPCJITInfo::replaceMachineCodeForFunction(void *Old, void *New) { EmitBranchToAt((intptr_t)Old, (intptr_t)New, false, is64Bit); + sys::Memory::InvalidateInstructionCache(Old, 7*4); } diff --git a/lib/Target/PowerPC/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/PPCMCAsmInfo.cpp index c87879b2a332..ee6deb5c5090 100644 --- a/lib/Target/PowerPC/PPCMCAsmInfo.cpp +++ b/lib/Target/PowerPC/PPCMCAsmInfo.cpp @@ -22,6 +22,7 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) { if (!is64Bit) Data64bitsDirective = 0; // We can't emit a 64-bit unit in PPC32 mode. AssemblerDialect = 1; // New-Style mnemonics. + SupportsDebugInformation= true; // Debug information. } PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) { diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt index f5e50fc808a8..060d6a5c5c2b 100644 --- a/lib/Target/PowerPC/README.txt +++ b/lib/Target/PowerPC/README.txt @@ -5,6 +5,39 @@ TODO: * implement do-loop -> bdnz transform * lmw/stmw pass a la arm load store optimizer for prolog/epilog +===-------------------------------------------------------------------------=== + +On PPC64, this: + +long f2 (long x) { return 0xfffffff000000000UL; } +long f3 (long x) { return 0x1ffffffffUL; } + +could compile into: + +_f2: + li r3,-1 + rldicr r3,r3,0,27 + blr +_f3: + li r3,-1 + rldicl r3,r3,0,31 + blr + +we produce: + +_f2: + lis r2, 4095 + ori r2, r2, 65535 + sldi r3, r2, 36 + blr +_f3: + li r2, 1 + sldi r2, r2, 32 + oris r2, r2, 65535 + ori r3, r2, 65535 + blr + + ===-------------------------------------------------------------------------=== Support 'update' load/store instructions. These are cracked on the G5, but are diff --git a/lib/Target/README.txt b/lib/Target/README.txt index a6e05fadefd9..69da35f1c7af 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -282,19 +282,6 @@ this requires TBAA. //===---------------------------------------------------------------------===// -This should be optimized to one 'and' and one 'or', from PR4216: - -define i32 @test_bitfield(i32 %bf.prev.low) nounwind ssp { -entry: - %bf.prev.lo.cleared10 = or i32 %bf.prev.low, 32962 ; [#uses=1] - %0 = and i32 %bf.prev.low, -65536 ; [#uses=1] - %1 = and i32 %bf.prev.lo.cleared10, 40186 ; [#uses=1] - %2 = or i32 %1, %0 ; [#uses=1] - ret i32 %2 -} - -//===---------------------------------------------------------------------===// - This isn't recognized as bswap by instcombine (yes, it really is bswap): unsigned long reverse(unsigned v) { @@ -1661,38 +1648,9 @@ would delete the or instruction for us. //===---------------------------------------------------------------------===// -FunctionAttrs is not marking this function as readnone (just readonly): -$ clang t.c -emit-llvm -S -o - -O0 | opt -mem2reg -S -functionattrs - -int t(int a, int b, int c) { - int *p; - if (a) - p = &a; - else - p = &c; - return *p; -} - -This is because we codegen this to: - -define i32 @t(i32 %a, i32 %b, i32 %c) nounwind readonly ssp { -entry: - %a.addr = alloca i32 ; [#uses=3] - %c.addr = alloca i32 ; [#uses=2] -... - -if.end: - %p.0 = phi i32* [ %a.addr, %if.then ], [ %c.addr, %if.else ] - %tmp2 = load i32* %p.0 ; [#uses=1] - ret i32 %tmp2 -} - -And functionattrs doesn't realize that the p.0 load points to function local -memory. - -Also, functionattrs doesn't know about memcpy/memset. This function should be -marked readnone, since it only twiddles local memory, but functionattrs doesn't -handle memset/memcpy/memmove aggressively: +functionattrs doesn't know much about memcpy/memset. This function should be +marked readnone rather than readonly, since it only twiddles local memory, but +functionattrs doesn't handle memset/memcpy/memmove aggressively: struct X { int *p; int *q; }; int foo() { diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp index b41917e20285..e1b32998b23f 100644 --- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp +++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp @@ -43,11 +43,11 @@ class SparcDAGToDAGISel : public SelectionDAGISel { TM(tm) { } - SDNode *Select(SDValue Op); + SDNode *Select(SDNode *N); // Complex Pattern Selectors. - bool SelectADDRrr(SDValue Op, SDValue N, SDValue &R1, SDValue &R2); - bool SelectADDRri(SDValue Op, SDValue N, SDValue &Base, + bool SelectADDRrr(SDNode *Op, SDValue N, SDValue &R1, SDValue &R2); + bool SelectADDRri(SDNode *Op, SDValue N, SDValue &Base, SDValue &Offset); /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for @@ -87,7 +87,7 @@ SDNode* SparcDAGToDAGISel::getGlobalBaseReg() { return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); } -bool SparcDAGToDAGISel::SelectADDRri(SDValue Op, SDValue Addr, +bool SparcDAGToDAGISel::SelectADDRri(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset) { if (FrameIndexSDNode *FIN = dyn_cast(Addr)) { Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); @@ -128,7 +128,7 @@ bool SparcDAGToDAGISel::SelectADDRri(SDValue Op, SDValue Addr, return true; } -bool SparcDAGToDAGISel::SelectADDRrr(SDValue Op, SDValue Addr, +bool SparcDAGToDAGISel::SelectADDRrr(SDNode *Op, SDValue Addr, SDValue &R1, SDValue &R2) { if (Addr.getOpcode() == ISD::FrameIndex) return false; if (Addr.getOpcode() == ISD::TargetExternalSymbol || @@ -152,8 +152,7 @@ bool SparcDAGToDAGISel::SelectADDRrr(SDValue Op, SDValue Addr, return true; } -SDNode *SparcDAGToDAGISel::Select(SDValue Op) { - SDNode *N = Op.getNode(); +SDNode *SparcDAGToDAGISel::Select(SDNode *N) { DebugLoc dl = N->getDebugLoc(); if (N->isMachineOpcode()) return NULL; // Already selected. @@ -199,7 +198,7 @@ SDNode *SparcDAGToDAGISel::Select(SDValue Op) { } } - return SelectCode(Op); + return SelectCode(N); } @@ -213,8 +212,8 @@ SparcDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op, switch (ConstraintCode) { default: return true; case 'm': // memory - if (!SelectADDRrr(Op, Op, Op0, Op1)) - SelectADDRri(Op, Op, Op0, Op1); + if (!SelectADDRrr(Op.getNode(), Op, Op0, Op1)) + SelectADDRri(Op.getNode(), Op, Op0, Op1); break; } diff --git a/lib/Target/SubtargetFeature.cpp b/lib/Target/SubtargetFeature.cpp index 590574ef3981..7cc4fd1d2637 100644 --- a/lib/Target/SubtargetFeature.cpp +++ b/lib/Target/SubtargetFeature.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/SubtargetFeature.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/StringExtras.h" #include @@ -355,7 +356,7 @@ void SubtargetFeatures::print(raw_ostream &OS) const { /// dump - Dump feature info. /// void SubtargetFeatures::dump() const { - print(errs()); + print(dbgs()); } /// getDefaultSubtargetFeatures - Return a string listing diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index d64611d7c07d..7096c0e0d8ea 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -128,23 +128,23 @@ namespace { #include "SystemZGenDAGISel.inc" private: - bool SelectAddrRI12Only(SDValue Op, SDValue& Addr, + bool SelectAddrRI12Only(SDNode *Op, SDValue& Addr, SDValue &Base, SDValue &Disp); - bool SelectAddrRI12(SDValue Op, SDValue& Addr, + bool SelectAddrRI12(SDNode *Op, SDValue& Addr, SDValue &Base, SDValue &Disp, bool is12BitOnly = false); - bool SelectAddrRI(SDValue Op, SDValue& Addr, + bool SelectAddrRI(SDNode *Op, SDValue& Addr, SDValue &Base, SDValue &Disp); - bool SelectAddrRRI12(SDValue Op, SDValue Addr, + bool SelectAddrRRI12(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Disp, SDValue &Index); - bool SelectAddrRRI20(SDValue Op, SDValue Addr, + bool SelectAddrRRI20(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Disp, SDValue &Index); - bool SelectLAAddr(SDValue Op, SDValue Addr, + bool SelectLAAddr(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Disp, SDValue &Index); - SDNode *Select(SDValue Op); + SDNode *Select(SDNode *Node); - bool TryFoldLoad(SDValue P, SDValue N, + bool TryFoldLoad(SDNode *P, SDValue N, SDValue &Base, SDValue &Disp, SDValue &Index); bool MatchAddress(SDValue N, SystemZRRIAddressMode &AM, @@ -367,12 +367,12 @@ void SystemZDAGToDAGISel::getAddressOperands(const SystemZRRIAddressMode &AM, /// Returns true if the address can be represented by a base register plus /// an unsigned 12-bit displacement [r+imm]. -bool SystemZDAGToDAGISel::SelectAddrRI12Only(SDValue Op, SDValue& Addr, +bool SystemZDAGToDAGISel::SelectAddrRI12Only(SDNode *Op, SDValue& Addr, SDValue &Base, SDValue &Disp) { return SelectAddrRI12(Op, Addr, Base, Disp, /*is12BitOnly*/true); } -bool SystemZDAGToDAGISel::SelectAddrRI12(SDValue Op, SDValue& Addr, +bool SystemZDAGToDAGISel::SelectAddrRI12(SDNode *Op, SDValue& Addr, SDValue &Base, SDValue &Disp, bool is12BitOnly) { SystemZRRIAddressMode AM20(/*isRI*/true), AM12(/*isRI*/true); @@ -422,7 +422,7 @@ bool SystemZDAGToDAGISel::SelectAddrRI12(SDValue Op, SDValue& Addr, /// Returns true if the address can be represented by a base register plus /// a signed 20-bit displacement [r+imm]. -bool SystemZDAGToDAGISel::SelectAddrRI(SDValue Op, SDValue& Addr, +bool SystemZDAGToDAGISel::SelectAddrRI(SDNode *Op, SDValue& Addr, SDValue &Base, SDValue &Disp) { SystemZRRIAddressMode AM(/*isRI*/true); bool Done = false; @@ -465,7 +465,7 @@ bool SystemZDAGToDAGISel::SelectAddrRI(SDValue Op, SDValue& Addr, /// Returns true if the address can be represented by a base register plus /// index register plus an unsigned 12-bit displacement [base + idx + imm]. -bool SystemZDAGToDAGISel::SelectAddrRRI12(SDValue Op, SDValue Addr, +bool SystemZDAGToDAGISel::SelectAddrRRI12(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Disp, SDValue &Index) { SystemZRRIAddressMode AM20, AM12; bool Done = false; @@ -514,7 +514,7 @@ bool SystemZDAGToDAGISel::SelectAddrRRI12(SDValue Op, SDValue Addr, /// Returns true if the address can be represented by a base register plus /// index register plus a signed 20-bit displacement [base + idx + imm]. -bool SystemZDAGToDAGISel::SelectAddrRRI20(SDValue Op, SDValue Addr, +bool SystemZDAGToDAGISel::SelectAddrRRI20(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Disp, SDValue &Index) { SystemZRRIAddressMode AM; bool Done = false; @@ -558,7 +558,7 @@ bool SystemZDAGToDAGISel::SelectAddrRRI20(SDValue Op, SDValue Addr, /// SelectLAAddr - it calls SelectAddr and determines if the maximal addressing /// mode it matches can be cost effectively emitted as an LA/LAY instruction. -bool SystemZDAGToDAGISel::SelectLAAddr(SDValue Op, SDValue Addr, +bool SystemZDAGToDAGISel::SelectLAAddr(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Disp, SDValue &Index) { SystemZRRIAddressMode AM; @@ -591,11 +591,11 @@ bool SystemZDAGToDAGISel::SelectLAAddr(SDValue Op, SDValue Addr, return false; } -bool SystemZDAGToDAGISel::TryFoldLoad(SDValue P, SDValue N, +bool SystemZDAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, SDValue &Base, SDValue &Disp, SDValue &Index) { if (ISD::isNON_EXTLoad(N.getNode()) && N.hasOneUse() && - IsLegalAndProfitableToFold(N.getNode(), P.getNode(), P.getNode())) + IsLegalAndProfitableToFold(N.getNode(), P, P)) return SelectAddrRRI20(P, N.getOperand(1), Base, Disp, Index); return false; } @@ -612,10 +612,9 @@ void SystemZDAGToDAGISel::InstructionSelect() { CurDAG->RemoveDeadNodes(); } -SDNode *SystemZDAGToDAGISel::Select(SDValue Op) { - SDNode *Node = Op.getNode(); +SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { EVT NVT = Node->getValueType(0); - DebugLoc dl = Op.getDebugLoc(); + DebugLoc dl = Node->getDebugLoc(); unsigned Opcode = Node->getOpcode(); // Dump information about the Node being selected @@ -643,20 +642,20 @@ SDNode *SystemZDAGToDAGISel::Select(SDValue Op) { EVT ResVT; bool is32Bit = false; switch (NVT.getSimpleVT().SimpleTy) { - default: assert(0 && "Unsupported VT!"); - case MVT::i32: - Opc = SystemZ::SDIVREM32r; MOpc = SystemZ::SDIVREM32m; - ResVT = MVT::v2i64; - is32Bit = true; - break; - case MVT::i64: - Opc = SystemZ::SDIVREM64r; MOpc = SystemZ::SDIVREM64m; - ResVT = MVT::v2i64; - break; + default: assert(0 && "Unsupported VT!"); + case MVT::i32: + Opc = SystemZ::SDIVREM32r; MOpc = SystemZ::SDIVREM32m; + ResVT = MVT::v2i64; + is32Bit = true; + break; + case MVT::i64: + Opc = SystemZ::SDIVREM64r; MOpc = SystemZ::SDIVREM64m; + ResVT = MVT::v2i64; + break; } SDValue Tmp0, Tmp1, Tmp2; - bool foldedLoad = TryFoldLoad(Op, N1, Tmp0, Tmp1, Tmp2); + bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2); // Prepare the dividend SDNode *Dividend; @@ -677,16 +676,16 @@ SDNode *SystemZDAGToDAGISel::Select(SDValue Op) { SDValue DivVal = SDValue(Dividend, 0); if (foldedLoad) { SDValue Ops[] = { DivVal, Tmp0, Tmp1, Tmp2, N1.getOperand(0) }; - Result = CurDAG->getMachineNode(MOpc, dl, ResVT, + Result = CurDAG->getMachineNode(MOpc, dl, ResVT, MVT::Other, Ops, array_lengthof(Ops)); // Update the chain. - ReplaceUses(N1.getValue(1), SDValue(Result, 0)); + ReplaceUses(N1.getValue(1), SDValue(Result, 1)); } else { Result = CurDAG->getMachineNode(Opc, dl, ResVT, SDValue(Dividend, 0), N1); } // Copy the division (odd subreg) result, if it is needed. - if (!Op.getValue(0).use_empty()) { + if (!SDValue(Node, 0).use_empty()) { unsigned SubRegIdx = (is32Bit ? subreg_odd32 : subreg_odd); SDNode *Div = CurDAG->getMachineNode(TargetInstrInfo::EXTRACT_SUBREG, dl, NVT, @@ -694,14 +693,14 @@ SDNode *SystemZDAGToDAGISel::Select(SDValue Op) { CurDAG->getTargetConstant(SubRegIdx, MVT::i32)); - ReplaceUses(Op.getValue(0), SDValue(Div, 0)); + ReplaceUses(SDValue(Node, 0), SDValue(Div, 0)); DEBUG(errs().indent(Indent-2) << "=> "; Result->dump(CurDAG); errs() << "\n"); } // Copy the remainder (even subreg) result, if it is needed. - if (!Op.getValue(1).use_empty()) { + if (!SDValue(Node, 1).use_empty()) { unsigned SubRegIdx = (is32Bit ? subreg_even32 : subreg_even); SDNode *Rem = CurDAG->getMachineNode(TargetInstrInfo::EXTRACT_SUBREG, dl, NVT, @@ -709,7 +708,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDValue Op) { CurDAG->getTargetConstant(SubRegIdx, MVT::i32)); - ReplaceUses(Op.getValue(1), SDValue(Rem, 0)); + ReplaceUses(SDValue(Node, 1), SDValue(Rem, 0)); DEBUG(errs().indent(Indent-2) << "=> "; Result->dump(CurDAG); errs() << "\n"); @@ -729,22 +728,22 @@ SDNode *SystemZDAGToDAGISel::Select(SDValue Op) { bool is32Bit = false; switch (NVT.getSimpleVT().SimpleTy) { - default: assert(0 && "Unsupported VT!"); - case MVT::i32: - Opc = SystemZ::UDIVREM32r; MOpc = SystemZ::UDIVREM32m; - ClrOpc = SystemZ::MOV64Pr0_even; - ResVT = MVT::v2i32; - is32Bit = true; - break; - case MVT::i64: - Opc = SystemZ::UDIVREM64r; MOpc = SystemZ::UDIVREM64m; - ClrOpc = SystemZ::MOV128r0_even; - ResVT = MVT::v2i64; - break; + default: assert(0 && "Unsupported VT!"); + case MVT::i32: + Opc = SystemZ::UDIVREM32r; MOpc = SystemZ::UDIVREM32m; + ClrOpc = SystemZ::MOV64Pr0_even; + ResVT = MVT::v2i32; + is32Bit = true; + break; + case MVT::i64: + Opc = SystemZ::UDIVREM64r; MOpc = SystemZ::UDIVREM64m; + ClrOpc = SystemZ::MOV128r0_even; + ResVT = MVT::v2i64; + break; } SDValue Tmp0, Tmp1, Tmp2; - bool foldedLoad = TryFoldLoad(Op, N1, Tmp0, Tmp1, Tmp2); + bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2); // Prepare the dividend SDNode *Dividend = N0.getNode(); @@ -767,37 +766,37 @@ SDNode *SystemZDAGToDAGISel::Select(SDValue Op) { SDNode *Result; if (foldedLoad) { SDValue Ops[] = { DivVal, Tmp0, Tmp1, Tmp2, N1.getOperand(0) }; - Result = CurDAG->getMachineNode(MOpc, dl,ResVT, + Result = CurDAG->getMachineNode(MOpc, dl, ResVT, MVT::Other, Ops, array_lengthof(Ops)); // Update the chain. - ReplaceUses(N1.getValue(1), SDValue(Result, 0)); + ReplaceUses(N1.getValue(1), SDValue(Result, 1)); } else { Result = CurDAG->getMachineNode(Opc, dl, ResVT, DivVal, N1); } // Copy the division (odd subreg) result, if it is needed. - if (!Op.getValue(0).use_empty()) { + if (!SDValue(Node, 0).use_empty()) { unsigned SubRegIdx = (is32Bit ? subreg_odd32 : subreg_odd); SDNode *Div = CurDAG->getMachineNode(TargetInstrInfo::EXTRACT_SUBREG, dl, NVT, SDValue(Result, 0), CurDAG->getTargetConstant(SubRegIdx, MVT::i32)); - ReplaceUses(Op.getValue(0), SDValue(Div, 0)); + ReplaceUses(SDValue(Node, 0), SDValue(Div, 0)); DEBUG(errs().indent(Indent-2) << "=> "; Result->dump(CurDAG); errs() << "\n"); } // Copy the remainder (even subreg) result, if it is needed. - if (!Op.getValue(1).use_empty()) { + if (!SDValue(Node, 1).use_empty()) { unsigned SubRegIdx = (is32Bit ? subreg_even32 : subreg_even); SDNode *Rem = CurDAG->getMachineNode(TargetInstrInfo::EXTRACT_SUBREG, dl, NVT, SDValue(Result, 0), CurDAG->getTargetConstant(SubRegIdx, MVT::i32)); - ReplaceUses(Op.getValue(1), SDValue(Rem, 0)); + ReplaceUses(SDValue(Node, 1), SDValue(Rem, 0)); DEBUG(errs().indent(Indent-2) << "=> "; Result->dump(CurDAG); errs() << "\n"); @@ -812,11 +811,11 @@ SDNode *SystemZDAGToDAGISel::Select(SDValue Op) { } // Select the default instruction - SDNode *ResNode = SelectCode(Op); + SDNode *ResNode = SelectCode(Node); DEBUG(errs().indent(Indent-2) << "=> "; - if (ResNode == NULL || ResNode == Op.getNode()) - Op.getNode()->dump(CurDAG); + if (ResNode == NULL || ResNode == Node) + Node->dump(CurDAG); else ResNode->dump(CurDAG); errs() << "\n"; diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp index cddf49e5154e..f5c969ae133d 100644 --- a/lib/Target/Target.cpp +++ b/lib/Target/Target.cpp @@ -34,7 +34,7 @@ char *LLVMCopyStringRepOfTargetData(LLVMTargetDataRef TD) { } LLVMByteOrdering LLVMByteOrder(LLVMTargetDataRef TD) { - return unwrap(TD)->isLittleEndian(); + return unwrap(TD)->isLittleEndian() ? LLVMLittleEndian : LLVMBigEndian; } unsigned LLVMPointerSize(LLVMTargetDataRef TD) { diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index f887523c5b71..70e8008eb44a 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -21,11 +21,13 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Mangler.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" using namespace llvm; @@ -492,16 +494,15 @@ getELFKindForNamedSection(const char *Name, SectionKind K) { } -static unsigned -getELFSectionType(const char *Name, SectionKind K) { +static unsigned getELFSectionType(StringRef Name, SectionKind K) { - if (strcmp(Name, ".init_array") == 0) + if (Name == ".init_array") return MCSectionELF::SHT_INIT_ARRAY; - if (strcmp(Name, ".fini_array") == 0) + if (Name == ".fini_array") return MCSectionELF::SHT_FINI_ARRAY; - if (strcmp(Name, ".preinit_array") == 0) + if (Name == ".preinit_array") return MCSectionELF::SHT_PREINIT_ARRAY; if (K.isBSS() || K.isThreadBSS()) @@ -577,10 +578,16 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, // into a 'uniqued' section name, create and return the section now. if (GV->isWeakForLinker()) { const char *Prefix = getSectionPrefixForUniqueGlobal(Kind); - std::string Name = Mang->makeNameProper(GV->getNameStr()); - - return getELFSection((Prefix+Name).c_str(), - getELFSectionType((Prefix+Name).c_str(), Kind), + SmallString<128> Name, MangledName; + Name.append(Prefix, Prefix+strlen(Prefix)); + Mang->getNameWithPrefix(Name, GV, false); + + raw_svector_ostream OS(MangledName); + MCSymbol::printMangledName(Name, OS, 0); + OS.flush(); + + return getELFSection(MangledName.str(), + getELFSectionType(MangledName.str(), Kind), getELFSectionFlags(Kind), Kind); } @@ -922,7 +929,7 @@ const MCSection * TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind) const { // If this constant requires a relocation, we have to put it in the data // segment, not in the text segment. - if (Kind.isDataRel()) + if (Kind.isDataRel() || Kind.isReadOnlyWithRel()) return ConstDataSection; if (Kind.isMergeableConst4()) @@ -983,7 +990,7 @@ TargetLoweringObjectFileCOFF::~TargetLoweringObjectFileCOFF() { const MCSection *TargetLoweringObjectFileCOFF:: -getCOFFSection(const char *Name, bool isDirective, SectionKind Kind) const { +getCOFFSection(StringRef Name, bool isDirective, SectionKind Kind) const { // Create the map if it doesn't already exist. if (UniquingMap == 0) UniquingMap = new MachOUniqueMapTy(); @@ -1078,8 +1085,9 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, // into a 'uniqued' section name, create and return the section now. if (GV->isWeakForLinker()) { const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind); - std::string Name = Mang->makeNameProper(GV->getNameStr()); - return getCOFFSection((Prefix+Name).c_str(), false, Kind); + SmallString<128> Name(Prefix, Prefix+strlen(Prefix)); + Mang->getNameWithPrefix(Name, GV, false); + return getCOFFSection(Name.str(), false, Kind); } if (Kind.isText()) diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index c357b4d0dee1..c4ae5d220b32 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Target/TargetAsmParser.h" #include "X86.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Twine.h" @@ -15,6 +16,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCParsedAsmOperand.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Target/TargetRegistry.h" #include "llvm/Target/TargetAsmParser.h" @@ -46,7 +48,7 @@ class X86ATTAsmParser : public TargetAsmParser { /// @name Auto-generated Match Functions /// { - bool MatchInstruction(SmallVectorImpl &Operands, + bool MatchInstruction(const SmallVectorImpl &Operands, MCInst &Inst); /// MatchRegisterName - Match the given string to a register name, or 0 if @@ -59,7 +61,8 @@ class X86ATTAsmParser : public TargetAsmParser { X86ATTAsmParser(const Target &T, MCAsmParser &_Parser) : TargetAsmParser(T), Parser(_Parser) {} - virtual bool ParseInstruction(const StringRef &Name, MCInst &Inst); + virtual bool ParseInstruction(const StringRef &Name, SMLoc NameLoc, + SmallVectorImpl &Operands); virtual bool ParseDirective(AsmToken DirectiveID); }; @@ -71,7 +74,7 @@ namespace { /// X86Operand - Instances of this class represent a parsed X86 machine /// instruction. -struct X86Operand { +struct X86Operand : public MCParsedAsmOperand { enum { Token, Register, @@ -400,10 +403,11 @@ bool X86ATTAsmParser::ParseMemOperand(X86Operand &Op) { return false; } -bool X86ATTAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) { - SmallVector Operands; +bool X86ATTAsmParser:: +ParseInstruction(const StringRef &Name, SMLoc NameLoc, + SmallVectorImpl &Operands) { - Operands.push_back(X86Operand::CreateToken(Name)); + Operands.push_back(new X86Operand(X86Operand::CreateToken(Name))); SMLoc Loc = getLexer().getTok().getLoc(); if (getLexer().isNot(AsmToken::EndOfStatement)) { @@ -411,31 +415,27 @@ bool X86ATTAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) { // Parse '*' modifier. if (getLexer().is(AsmToken::Star)) { getLexer().Lex(); // Eat the star. - Operands.push_back(X86Operand::CreateToken("*")); + Operands.push_back(new X86Operand(X86Operand::CreateToken("*"))); } // Read the first operand. - Operands.push_back(X86Operand()); - if (ParseOperand(Operands.back())) + X86Operand Op; + if (ParseOperand(Op)) return true; + Operands.push_back(new X86Operand(Op)); + while (getLexer().is(AsmToken::Comma)) { getLexer().Lex(); // Eat the comma. // Parse and remember the operand. - Operands.push_back(X86Operand()); - if (ParseOperand(Operands.back())) + if (ParseOperand(Op)) return true; + Operands.push_back(new X86Operand(Op)); } } - if (!MatchInstruction(Operands, Inst)) - return false; - - // FIXME: We should give nicer diagnostics about the exact failure. - - Error(Loc, "unrecognized instruction"); - return true; + return false; } bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) { diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp index b88063f9ce72..70c6dd03eb19 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp @@ -201,6 +201,7 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) { /// jump tables, constant pools, global address and external symbols, all of /// which print to a label with various suffixes for relocation types etc. void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) { + SmallString<128> TempNameStr; switch (MO.getType()) { default: llvm_unreachable("unknown symbol type!"); case MachineOperand::MO_JumpTableIndex: @@ -236,41 +237,38 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) { if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY || MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE) { - SmallString<128> NameStr; - Mang->getNameWithPrefix(NameStr, GV, true); - NameStr += "$non_lazy_ptr"; - MCSymbol *Sym = OutContext.GetOrCreateSymbol(NameStr.str()); + Mang->getNameWithPrefix(TempNameStr, GV, true); + TempNameStr += "$non_lazy_ptr"; + MCSymbol *Sym = OutContext.GetOrCreateSymbol(TempNameStr.str()); const MCSymbol *&StubSym = MMI->getObjFileInfo().getGVStubEntry(Sym); if (StubSym == 0) { - NameStr.clear(); - Mang->getNameWithPrefix(NameStr, GV, false); - StubSym = OutContext.GetOrCreateSymbol(NameStr.str()); + TempNameStr.clear(); + Mang->getNameWithPrefix(TempNameStr, GV, false); + StubSym = OutContext.GetOrCreateSymbol(TempNameStr.str()); } } else if (MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE){ - SmallString<128> NameStr; - Mang->getNameWithPrefix(NameStr, GV, true); - NameStr += "$non_lazy_ptr"; - MCSymbol *Sym = OutContext.GetOrCreateSymbol(NameStr.str()); + Mang->getNameWithPrefix(TempNameStr, GV, true); + TempNameStr += "$non_lazy_ptr"; + MCSymbol *Sym = OutContext.GetOrCreateSymbol(TempNameStr.str()); const MCSymbol *&StubSym = MMI->getObjFileInfo().getHiddenGVStubEntry(Sym); if (StubSym == 0) { - NameStr.clear(); - Mang->getNameWithPrefix(NameStr, GV, false); - StubSym = OutContext.GetOrCreateSymbol(NameStr.str()); + TempNameStr.clear(); + Mang->getNameWithPrefix(TempNameStr, GV, false); + StubSym = OutContext.GetOrCreateSymbol(TempNameStr.str()); } } else if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) { - SmallString<128> NameStr; - Mang->getNameWithPrefix(NameStr, GV, true); - NameStr += "$stub"; - MCSymbol *Sym = OutContext.GetOrCreateSymbol(NameStr.str()); + Mang->getNameWithPrefix(TempNameStr, GV, true); + TempNameStr += "$stub"; + MCSymbol *Sym = OutContext.GetOrCreateSymbol(TempNameStr.str()); const MCSymbol *&StubSym = MMI->getObjFileInfo().getFnStubEntry(Sym); if (StubSym == 0) { - NameStr.clear(); - Mang->getNameWithPrefix(NameStr, GV, false); - StubSym = OutContext.GetOrCreateSymbol(NameStr.str()); + TempNameStr.clear(); + Mang->getNameWithPrefix(TempNameStr, GV, false); + StubSym = OutContext.GetOrCreateSymbol(TempNameStr.str()); } } @@ -285,24 +283,32 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) { break; } case MachineOperand::MO_ExternalSymbol: { - std::string Name = Mang->makeNameProper(MO.getSymbolName()); + const MCSymbol *SymToPrint; if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) { - Name += "$stub"; - MCSymbol *Sym = OutContext.GetOrCreateSymbol(StringRef(Name)); + Mang->getNameWithPrefix(TempNameStr, + StringRef(MO.getSymbolName())+"$stub"); + const MCSymbol *Sym = OutContext.GetOrCreateSymbol(TempNameStr.str()); const MCSymbol *&StubSym = MMI->getObjFileInfo().getFnStubEntry(Sym); if (StubSym == 0) { - Name.erase(Name.end()-5, Name.end()); - StubSym = OutContext.GetOrCreateSymbol(StringRef(Name)); + TempNameStr.erase(TempNameStr.end()-5, TempNameStr.end()); + StubSym = OutContext.GetOrCreateSymbol(TempNameStr.str()); } + SymToPrint = StubSym; + } else { + Mang->getNameWithPrefix(TempNameStr, MO.getSymbolName()); + SymToPrint = OutContext.GetOrCreateSymbol(TempNameStr.str()); } // If the name begins with a dollar-sign, enclose it in parens. We do this // to avoid having it look like an integer immediate to the assembler. - if (Name[0] == '$') - O << '(' << Name << ')'; - else - O << Name; + if (SymToPrint->getName()[0] != '$') + SymToPrint->print(O, MAI); + else { + O << '('; + SymToPrint->print(O, MAI); + O << '('; + } break; } } diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp index 1015b6924734..9ee118cdfd93 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp @@ -25,6 +25,7 @@ #include "llvm/Support/FormattedStream.h" #include "llvm/Support/Mangler.h" #include "llvm/ADT/SmallString.h" +#include "llvm/Analysis/DebugInfo.h" using namespace llvm; @@ -399,6 +400,14 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { OutMI.setOpcode(X86::MOVZX32rm16); lower_subreg32(&OutMI, 0); break; + case X86::MOV16r0: + OutMI.setOpcode(X86::MOV32r0); + lower_subreg32(&OutMI, 0); + break; + case X86::MOV64r0: + OutMI.setOpcode(X86::MOV32r0); + lower_subreg32(&OutMI, 0); + break; } } @@ -412,6 +421,25 @@ void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { case TargetInstrInfo::GC_LABEL: printLabel(MI); return; + case TargetInstrInfo::DEBUG_VALUE: { + if (!VerboseAsm) + return; + O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; + // cast away const; DIetc do not take const operands for some reason + DIVariable V((MDNode*)(MI->getOperand(2).getMetadata())); + O << V.getName(); + O << " <- "; + if (MI->getOperand(0).getType()==MachineOperand::MO_Register) + printOperand(MI, 0); + else { + assert(MI->getOperand(0).getType()==MachineOperand::MO_Immediate); + int64_t imm = MI->getOperand(0).getImm(); + O << '[' << ((imm<0) ? "EBP" : "ESP+") << imm << ']'; + } + O << "+"; + printOperand(MI, 1); + return; + } case TargetInstrInfo::INLINEASM: printInlineAsm(MI); return; diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index 71ad51c7984e..0f3e44b52899 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -916,3 +916,23 @@ cheaper to do fld1 than load from a constant pool for example, so "load, add 1.0, store" is better done in the fp stack, etc. //===---------------------------------------------------------------------===// + +The X86 backend should be able to if-convert SSE comparisons like "ucomisd" to +"cmpsd". For example, this code: + +double d1(double x) { return x == x ? x : x + x; } + +Compiles into: + +_d1: + ucomisd %xmm0, %xmm0 + jnp LBB1_2 + addsd %xmm0, %xmm0 + ret +LBB1_2: + ret + +Also, the 'ret's should be shared. This is PR6032. + +//===---------------------------------------------------------------------===// + diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index afd9f53ea6bb..aa7bb3d97889 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -530,7 +530,7 @@ We should inline lrintf and probably other libc functions. //===---------------------------------------------------------------------===// -Start using the flags more. For example, compile: +Use the FLAGS values from arithmetic instructions more. For example, compile: int add_zf(int *x, int y, int a, int b) { if ((*x += y) == 0) @@ -554,31 +554,8 @@ _add_zf: movl %ecx, %eax ret -and: - -int add_zf(int *x, int y, int a, int b) { - if ((*x + y) < 0) - return a; - else - return b; -} - -to: - -add_zf: - addl (%rdi), %esi - movl %edx, %eax - cmovns %ecx, %eax - ret - -instead of: - -_add_zf: - addl (%rdi), %esi - testl %esi, %esi - cmovs %edx, %ecx - movl %ecx, %eax - ret +As another example, compile function f2 in test/CodeGen/X86/cmp-test.ll +without a test instruction. //===---------------------------------------------------------------------===// @@ -685,55 +662,6 @@ Though this probably isn't worth it. //===---------------------------------------------------------------------===// -We need to teach the codegen to convert two-address INC instructions to LEA -when the flags are dead (likewise dec). For example, on X86-64, compile: - -int foo(int A, int B) { - return A+1; -} - -to: - -_foo: - leal 1(%edi), %eax - ret - -instead of: - -_foo: - incl %edi - movl %edi, %eax - ret - -Another example is: - -;; X's live range extends beyond the shift, so the register allocator -;; cannot coalesce it with Y. Because of this, a copy needs to be -;; emitted before the shift to save the register value before it is -;; clobbered. However, this copy is not needed if the register -;; allocator turns the shift into an LEA. This also occurs for ADD. - -; Check that the shift gets turned into an LEA. -; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \ -; RUN: not grep {mov E.X, E.X} - -@G = external global i32 ; [#uses=3] - -define i32 @test1(i32 %X, i32 %Y) { - %Z = add i32 %X, %Y ; [#uses=1] - volatile store i32 %Y, i32* @G - volatile store i32 %Z, i32* @G - ret i32 %X -} - -define i32 @test2(i32 %X) { - %Z = add i32 %X, 1 ; [#uses=1] - volatile store i32 %Z, i32* @G - ret i32 %X -} - -//===---------------------------------------------------------------------===// - Sometimes it is better to codegen subtractions from a constant (e.g. 7-x) with a neg instead of a sub instruction. Consider: @@ -852,11 +780,6 @@ __Z11no_overflowjj: ret -//===---------------------------------------------------------------------===// - -Re-materialize MOV32r0 etc. with xor instead of changing them to moves if the -condition register is dead. xor reg reg is shorter than mov reg, #0. - //===---------------------------------------------------------------------===// The following code: diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index a6e1ca3128ee..7919559058b1 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -23,6 +23,7 @@ include "llvm/Target/Target.td" def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true", "Enable conditional move instructions">; + def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX", "Enable MMX instructions">; def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1", @@ -66,6 +67,9 @@ def FeatureFMA3 : SubtargetFeature<"fma3", "HasFMA3", "true", "Enable three-operand fused multiple-add">; def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true", "Enable four-operand fused multiple-add">; +def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem", + "HasVectorUAMem", "true", + "Allow unaligned memory operands on vector/SIMD instructions">; //===----------------------------------------------------------------------===// // X86 processors supported. diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index 4892e1746079..828e872cacbf 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -135,7 +135,7 @@ bool Emitter::runOnMachineFunction(MachineFunction &MF) { IsPIC = TM.getRelocationModel() == Reloc::PIC_; do { - DEBUG(errs() << "JITTing function '" + DEBUG(dbgs() << "JITTing function '" << MF.getFunction()->getName() << "'\n"); MCE.startFunction(MF); for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); @@ -477,7 +477,7 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI, template void Emitter::emitInstruction(const MachineInstr &MI, const TargetInstrDesc *Desc) { - DEBUG(errs() << MI); + DEBUG(dbgs() << MI); MCE.processDebugLoc(MI.getDebugLoc(), true); @@ -618,11 +618,11 @@ void Emitter::emitInstruction(const MachineInstr &MI, const MachineOperand &MO = MI.getOperand(CurOp++); - DEBUG(errs() << "RawFrm CurOp " << CurOp << "\n"); - DEBUG(errs() << "isMBB " << MO.isMBB() << "\n"); - DEBUG(errs() << "isGlobal " << MO.isGlobal() << "\n"); - DEBUG(errs() << "isSymbol " << MO.isSymbol() << "\n"); - DEBUG(errs() << "isImm " << MO.isImm() << "\n"); + DEBUG(dbgs() << "RawFrm CurOp " << CurOp << "\n"); + DEBUG(dbgs() << "isMBB " << MO.isMBB() << "\n"); + DEBUG(dbgs() << "isGlobal " << MO.isGlobal() << "\n"); + DEBUG(dbgs() << "isSymbol " << MO.isSymbol() << "\n"); + DEBUG(dbgs() << "isImm " << MO.isImm() << "\n"); if (MO.isMBB()) { emitPCRelativeBlockAddress(MO.getMBB()); @@ -843,7 +843,7 @@ void Emitter::emitInstruction(const MachineInstr &MI, if (!Desc->isVariadic() && CurOp != NumOps) { #ifndef NDEBUG - errs() << "Cannot encode all operands of: " << MI << "\n"; + dbgs() << "Cannot encode all operands of: " << MI << "\n"; #endif llvm_unreachable(0); } @@ -1082,9 +1082,9 @@ class X86MCCodeEmitter : public MCCodeEmitter { } if (!OK) { - errs() << "couldn't convert inst '"; + dbgs() << "couldn't convert inst '"; MI.dump(); - errs() << "' to machine instr:\n"; + dbgs() << "' to machine instr:\n"; Instr->dump(); } diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 431c120f8f0d..7e02d59c1bca 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -786,8 +786,8 @@ bool X86FastISel::X86SelectCmp(Instruction *I) { bool X86FastISel::X86SelectZExt(Instruction *I) { // Handle zero-extension from i1 to i8, which is common. - if (I->getType() == Type::getInt8Ty(I->getContext()) && - I->getOperand(0)->getType() == Type::getInt1Ty(I->getContext())) { + if (I->getType()->isInteger(8) && + I->getOperand(0)->getType()->isInteger(1)) { unsigned ResultReg = getRegForValue(I->getOperand(0)); if (ResultReg == 0) return false; // Set the high bits to zero. @@ -948,7 +948,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { bool X86FastISel::X86SelectShift(Instruction *I) { unsigned CReg = 0, OpReg = 0, OpImm = 0; const TargetRegisterClass *RC = NULL; - if (I->getType() == Type::getInt8Ty(I->getContext())) { + if (I->getType()->isInteger(8)) { CReg = X86::CL; RC = &X86::GR8RegClass; switch (I->getOpcode()) { @@ -957,7 +957,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { case Instruction::Shl: OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break; default: return false; } - } else if (I->getType() == Type::getInt16Ty(I->getContext())) { + } else if (I->getType()->isInteger(16)) { CReg = X86::CX; RC = &X86::GR16RegClass; switch (I->getOpcode()) { @@ -966,7 +966,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { case Instruction::Shl: OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break; default: return false; } - } else if (I->getType() == Type::getInt32Ty(I->getContext())) { + } else if (I->getType()->isInteger(32)) { CReg = X86::ECX; RC = &X86::GR32RegClass; switch (I->getOpcode()) { @@ -975,7 +975,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { case Instruction::Shl: OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break; default: return false; } - } else if (I->getType() == Type::getInt64Ty(I->getContext())) { + } else if (I->getType()->isInteger(64)) { CReg = X86::RCX; RC = &X86::GR64RegClass; switch (I->getOpcode()) { @@ -1230,8 +1230,8 @@ bool X86FastISel::X86SelectCall(Instruction *I) { CC != CallingConv::X86_FastCall) return false; - // On X86, -tailcallopt changes the fastcc ABI. FastISel doesn't - // handle this for now. + // fastcc with -tailcallopt is intended to provide a guaranteed + // tail call optimization. Fastisel doesn't know how to do that. if (CC == CallingConv::Fast && PerformTailCallOpt) return false; diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 044bd4be322a..503ac146d27a 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -75,12 +75,12 @@ namespace { unsigned StackTop; // The current top of the FP stack. void dumpStack() const { - errs() << "Stack contents:"; + dbgs() << "Stack contents:"; for (unsigned i = 0; i != StackTop; ++i) { - errs() << " FP" << Stack[i]; + dbgs() << " FP" << Stack[i]; assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!"); } - errs() << "\n"; + dbgs() << "\n"; } private: /// isStackEmpty - Return true if the FP stack is empty. @@ -246,7 +246,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { PrevMI = prior(I); ++NumFP; // Keep track of # of pseudo instrs - DEBUG(errs() << "\nFPInst:\t" << *MI); + DEBUG(dbgs() << "\nFPInst:\t" << *MI); // Get dead variables list now because the MI pointer may be deleted as part // of processing! @@ -273,7 +273,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) { unsigned Reg = DeadRegs[i]; if (Reg >= X86::FP0 && Reg <= X86::FP6) { - DEBUG(errs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n"); + DEBUG(dbgs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n"); freeStackSlotAfter(I, Reg-X86::FP0); } } @@ -282,13 +282,13 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { DEBUG( MachineBasicBlock::iterator PrevI(PrevMI); if (I == PrevI) { - errs() << "Just deleted pseudo instruction\n"; + dbgs() << "Just deleted pseudo instruction\n"; } else { MachineBasicBlock::iterator Start = I; // Rewind to first instruction newly inserted. while (Start != BB.begin() && prior(Start) != PrevI) --Start; - errs() << "Inserted instructions:\n\t"; - Start->print(errs(), &MF.getTarget()); + dbgs() << "Inserted instructions:\n\t"; + Start->print(dbgs(), &MF.getTarget()); while (++Start != llvm::next(I)) {} } dumpStack(); diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index cb8238377858..e2a53d1118b8 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -113,37 +113,37 @@ namespace { } void dump() { - errs() << "X86ISelAddressMode " << this << '\n'; - errs() << "Base.Reg "; + dbgs() << "X86ISelAddressMode " << this << '\n'; + dbgs() << "Base.Reg "; if (Base.Reg.getNode() != 0) Base.Reg.getNode()->dump(); else - errs() << "nul"; - errs() << " Base.FrameIndex " << Base.FrameIndex << '\n' + dbgs() << "nul"; + dbgs() << " Base.FrameIndex " << Base.FrameIndex << '\n' << " Scale" << Scale << '\n' << "IndexReg "; if (IndexReg.getNode() != 0) IndexReg.getNode()->dump(); else - errs() << "nul"; - errs() << " Disp " << Disp << '\n' + dbgs() << "nul"; + dbgs() << " Disp " << Disp << '\n' << "GV "; if (GV) GV->dump(); else - errs() << "nul"; - errs() << " CP "; + dbgs() << "nul"; + dbgs() << " CP "; if (CP) CP->dump(); else - errs() << "nul"; - errs() << '\n' + dbgs() << "nul"; + dbgs() << '\n' << "ES "; if (ES) - errs() << ES; + dbgs() << ES; else - errs() << "nul"; - errs() << " JT" << JT << " Align" << Align << '\n'; + dbgs() << "nul"; + dbgs() << " JT" << JT << " Align" << Align << '\n'; } }; } @@ -190,7 +190,7 @@ namespace { #include "X86GenDAGISel.inc" private: - SDNode *Select(SDValue N); + SDNode *Select(SDNode *N); SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT); @@ -201,19 +201,19 @@ namespace { bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, unsigned Depth); bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM); - bool SelectAddr(SDValue Op, SDValue N, SDValue &Base, + bool SelectAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); - bool SelectLEAAddr(SDValue Op, SDValue N, SDValue &Base, + bool SelectLEAAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp); - bool SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base, + bool SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp); - bool SelectScalarSSELoad(SDValue Op, SDValue Pred, + bool SelectScalarSSELoad(SDNode *Op, SDValue Pred, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment, SDValue &InChain, SDValue &OutChain); - bool TryFoldLoad(SDValue P, SDValue N, + bool TryFoldLoad(SDNode *P, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); @@ -310,6 +310,11 @@ bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, if (U == Root) switch (U->getOpcode()) { default: break; + case X86ISD::ADD: + case X86ISD::SUB: + case X86ISD::AND: + case X86ISD::XOR: + case X86ISD::OR: case ISD::ADD: case ISD::ADDC: case ISD::ADDE: @@ -675,12 +680,12 @@ void X86DAGToDAGISel::InstructionSelect() { // Codegen the basic block. #ifndef NDEBUG - DEBUG(errs() << "===== Instruction selection begins:\n"); + DEBUG(dbgs() << "===== Instruction selection begins:\n"); Indent = 0; #endif SelectRoot(*CurDAG); #ifndef NDEBUG - DEBUG(errs() << "===== Instruction selection ends:\n"); + DEBUG(dbgs() << "===== Instruction selection ends:\n"); #endif CurDAG->RemoveDeadNodes(); @@ -850,7 +855,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, bool is64Bit = Subtarget->is64Bit(); DebugLoc dl = N.getDebugLoc(); DEBUG({ - errs() << "MatchAddress: "; + dbgs() << "MatchAddress: "; AM.dump(); }); // Limit recursion. @@ -1268,7 +1273,7 @@ bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) { /// SelectAddr - returns true if it is able pattern match an addressing mode. /// It returns the operands which make up the maximal addressing mode it can /// match by reference. -bool X86DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, SDValue &Base, +bool X86DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { X86ISelAddressMode AM; @@ -1291,7 +1296,7 @@ bool X86DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, SDValue &Base, /// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to /// match a load whose top elements are either undef or zeros. The load flavor /// is derived from the type of N, which is either v4f32 or v2f64. -bool X86DAGToDAGISel::SelectScalarSSELoad(SDValue Op, SDValue Pred, +bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Op, SDValue Pred, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment, @@ -1302,7 +1307,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDValue Op, SDValue Pred, if (ISD::isNON_EXTLoad(InChain.getNode()) && InChain.getValue(0).hasOneUse() && N.hasOneUse() && - IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op.getNode())) { + IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op)) { LoadSDNode *LD = cast(InChain); if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) return false; @@ -1333,7 +1338,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDValue Op, SDValue Pred, /// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing /// mode it matches can be cost effectively emitted as an LEA instruction. -bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N, +bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp) { X86ISelAddressMode AM; @@ -1395,10 +1400,10 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N, } /// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes. -bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base, +bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp) { - assert(Op.getOpcode() == X86ISD::TLSADDR); + assert(Op->getOpcode() == X86ISD::TLSADDR); assert(N.getOpcode() == ISD::TargetGlobalTLSAddress); const GlobalAddressSDNode *GA = cast(N); @@ -1421,13 +1426,13 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base, } -bool X86DAGToDAGISel::TryFoldLoad(SDValue P, SDValue N, +bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { if (ISD::isNON_EXTLoad(N.getNode()) && N.hasOneUse() && - IsLegalAndProfitableToFold(N.getNode(), P.getNode(), P.getNode())) + IsLegalAndProfitableToFold(N.getNode(), P, P)) return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment); return false; } @@ -1454,7 +1459,7 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { SDValue In2L = Node->getOperand(2); SDValue In2H = Node->getOperand(3); SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; - if (!SelectAddr(In1, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) + if (!SelectAddr(In1.getNode(), In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) return NULL; MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast(Node)->getMemOperand(); @@ -1480,7 +1485,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { SDValue Ptr = Node->getOperand(1); SDValue Val = Node->getOperand(2); SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; - if (!SelectAddr(Ptr, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) + if (!SelectAddr(Ptr.getNode(), Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) return 0; bool isInc = false, isDec = false, isSub = false, isCN = false; @@ -1678,8 +1683,7 @@ static bool HasNoSignedComparisonUses(SDNode *N) { return true; } -SDNode *X86DAGToDAGISel::Select(SDValue N) { - SDNode *Node = N.getNode(); +SDNode *X86DAGToDAGISel::Select(SDNode *Node) { EVT NVT = Node->getValueType(0); unsigned Opc, MOpc; unsigned Opcode = Node->getOpcode(); @@ -1687,9 +1691,9 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { #ifndef NDEBUG DEBUG({ - errs() << std::string(Indent, ' ') << "Selecting: "; + dbgs() << std::string(Indent, ' ') << "Selecting: "; Node->dump(CurDAG); - errs() << '\n'; + dbgs() << '\n'; }); Indent += 2; #endif @@ -1697,9 +1701,9 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { if (Node->isMachineOpcode()) { #ifndef NDEBUG DEBUG({ - errs() << std::string(Indent-2, ' ') << "== "; + dbgs() << std::string(Indent-2, ' ') << "== "; Node->dump(CurDAG); - errs() << '\n'; + dbgs() << '\n'; }); Indent -= 2; #endif @@ -1767,10 +1771,10 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { } SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; - bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); + bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); // Multiply is commmutative. if (!foldedLoad) { - foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); + foldedLoad = TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); if (foldedLoad) std::swap(N0, N1); } @@ -1793,21 +1797,21 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { } // Copy the low half of the result, if it is needed. - if (!N.getValue(0).use_empty()) { + if (!SDValue(Node, 0).use_empty()) { SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT, InFlag); InFlag = Result.getValue(2); - ReplaceUses(N.getValue(0), Result); + ReplaceUses(SDValue(Node, 0), Result); #ifndef NDEBUG DEBUG({ - errs() << std::string(Indent-2, ' ') << "=> "; + dbgs() << std::string(Indent-2, ' ') << "=> "; Result.getNode()->dump(CurDAG); - errs() << '\n'; + dbgs() << '\n'; }); #endif } // Copy the high half of the result, if it is needed. - if (!N.getValue(1).use_empty()) { + if (!SDValue(Node, 1).use_empty()) { SDValue Result; if (HiReg == X86::AH && Subtarget->is64Bit()) { // Prevent use of AH in a REX instruction by referencing AX instead. @@ -1826,12 +1830,12 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { HiReg, NVT, InFlag); InFlag = Result.getValue(2); } - ReplaceUses(N.getValue(1), Result); + ReplaceUses(SDValue(Node, 1), Result); #ifndef NDEBUG DEBUG({ - errs() << std::string(Indent-2, ' ') << "=> "; + dbgs() << std::string(Indent-2, ' ') << "=> "; Result.getNode()->dump(CurDAG); - errs() << '\n'; + dbgs() << '\n'; }); #endif } @@ -1869,7 +1873,6 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { unsigned LoReg, HiReg, ClrReg; unsigned ClrOpcode, SExtOpcode; - EVT ClrVT = NVT; switch (NVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unsupported VT!"); case MVT::i8: @@ -1879,7 +1882,7 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { break; case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; - ClrOpcode = X86::MOV32r0; ClrReg = X86::EDX; ClrVT = MVT::i32; + ClrOpcode = X86::MOV16r0; ClrReg = X86::DX; SExtOpcode = X86::CWD; break; case MVT::i32: @@ -1889,13 +1892,13 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { break; case MVT::i64: LoReg = X86::RAX; ClrReg = HiReg = X86::RDX; - ClrOpcode = ~0U; // NOT USED. + ClrOpcode = X86::MOV64r0; SExtOpcode = X86::CQO; break; } SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; - bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); + bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); bool signBitIsZero = CurDAG->SignBitIsZero(N0); SDValue InFlag; @@ -1903,7 +1906,7 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { // Special case for div8, just use a move with zero extension to AX to // clear the upper 8 bits (AH). SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain; - if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { + if (TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; Move = SDValue(CurDAG->getMachineNode(X86::MOVZX16rm8, dl, MVT::i16, @@ -1928,24 +1931,8 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Flag, InFlag),0); } else { // Zero out the high part, effectively zero extending the input. - SDValue ClrNode; - - if (NVT.getSimpleVT() == MVT::i64) { - ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, MVT::i32), - 0); - // We just did a 32-bit clear, insert it into a 64-bit register to - // clear the whole 64-bit reg. - SDValue Zero = CurDAG->getTargetConstant(0, MVT::i64); - SDValue SubRegNo = - CurDAG->getTargetConstant(X86::SUBREG_32BIT, MVT::i32); - ClrNode = - SDValue(CurDAG->getMachineNode(TargetInstrInfo::SUBREG_TO_REG, dl, - MVT::i64, Zero, ClrNode, SubRegNo), - 0); - } else { - ClrNode = SDValue(CurDAG->getMachineNode(ClrOpcode, dl, ClrVT), 0); - } - + SDValue ClrNode = + SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0); InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg, ClrNode, InFlag).getValue(1); } @@ -1966,21 +1953,21 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { } // Copy the division (low) result, if it is needed. - if (!N.getValue(0).use_empty()) { + if (!SDValue(Node, 0).use_empty()) { SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT, InFlag); InFlag = Result.getValue(2); - ReplaceUses(N.getValue(0), Result); + ReplaceUses(SDValue(Node, 0), Result); #ifndef NDEBUG DEBUG({ - errs() << std::string(Indent-2, ' ') << "=> "; + dbgs() << std::string(Indent-2, ' ') << "=> "; Result.getNode()->dump(CurDAG); - errs() << '\n'; + dbgs() << '\n'; }); #endif } // Copy the remainder (high) result, if it is needed. - if (!N.getValue(1).use_empty()) { + if (!SDValue(Node, 1).use_empty()) { SDValue Result; if (HiReg == X86::AH && Subtarget->is64Bit()) { // Prevent use of AH in a REX instruction by referencing AX instead. @@ -2000,12 +1987,12 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { HiReg, NVT, InFlag); InFlag = Result.getValue(2); } - ReplaceUses(N.getValue(1), Result); + ReplaceUses(SDValue(Node, 1), Result); #ifndef NDEBUG DEBUG({ - errs() << std::string(Indent-2, ' ') << "=> "; + dbgs() << std::string(Indent-2, ' ') << "=> "; Result.getNode()->dump(CurDAG); - errs() << '\n'; + dbgs() << '\n'; }); #endif } @@ -2124,16 +2111,16 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { } } - SDNode *ResNode = SelectCode(N); + SDNode *ResNode = SelectCode(Node); #ifndef NDEBUG DEBUG({ - errs() << std::string(Indent-2, ' ') << "=> "; - if (ResNode == NULL || ResNode == N.getNode()) - N.getNode()->dump(CurDAG); + dbgs() << std::string(Indent-2, ' ') << "=> "; + if (ResNode == NULL || ResNode == Node) + Node->dump(CurDAG); else ResNode->dump(CurDAG); - errs() << '\n'; + dbgs() << '\n'; }); Indent -= 2; #endif @@ -2150,7 +2137,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, case 'v': // not offsetable ?? default: return true; case 'm': // memory - if (!SelectAddr(Op, Op, Op0, Op1, Op2, Op3, Op4)) + if (!SelectAddr(Op.getNode(), Op, Op0, Op1, Op2, Op3, Op4)) return true; break; } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index c722fbf648b4..228ec9f2d63d 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -978,6 +978,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::SRA); setTargetDAGCombine(ISD::SRL); + setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::MEMBARRIER); setTargetDAGCombine(ISD::ZERO_EXTEND); @@ -2077,10 +2078,10 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, assert(((Callee.getOpcode() == ISD::Register && (cast(Callee)->getReg() == X86::EAX || - cast(Callee)->getReg() == X86::R9)) || + cast(Callee)->getReg() == X86::R11)) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress) && - "Expecting an global address, external symbol, or register"); + "Expecting a global address, external symbol, or scratch register"); return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); @@ -5610,13 +5611,21 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, // because a TEST instruction will be better. bool NonFlagUse = false; for (SDNode::use_iterator UI = Op.getNode()->use_begin(), - UE = Op.getNode()->use_end(); UI != UE; ++UI) - if (UI->getOpcode() != ISD::BRCOND && - UI->getOpcode() != ISD::SELECT && - UI->getOpcode() != ISD::SETCC) { + UE = Op.getNode()->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + unsigned UOpNo = UI.getOperandNo(); + if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) { + // Look pass truncate. + UOpNo = User->use_begin().getOperandNo(); + User = *User->use_begin(); + } + if (User->getOpcode() != ISD::BRCOND && + User->getOpcode() != ISD::SETCC && + (User->getOpcode() != ISD::SELECT || UOpNo != 0)) { NonFlagUse = true; break; } + } if (!NonFlagUse) break; } @@ -5680,6 +5689,56 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1); } +/// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node +/// if it's possible. +static SDValue LowerToBT(SDValue Op0, ISD::CondCode CC, + DebugLoc dl, SelectionDAG &DAG) { + SDValue LHS, RHS; + if (Op0.getOperand(1).getOpcode() == ISD::SHL) { + if (ConstantSDNode *Op010C = + dyn_cast(Op0.getOperand(1).getOperand(0))) + if (Op010C->getZExtValue() == 1) { + LHS = Op0.getOperand(0); + RHS = Op0.getOperand(1).getOperand(1); + } + } else if (Op0.getOperand(0).getOpcode() == ISD::SHL) { + if (ConstantSDNode *Op000C = + dyn_cast(Op0.getOperand(0).getOperand(0))) + if (Op000C->getZExtValue() == 1) { + LHS = Op0.getOperand(1); + RHS = Op0.getOperand(0).getOperand(1); + } + } else if (Op0.getOperand(1).getOpcode() == ISD::Constant) { + ConstantSDNode *AndRHS = cast(Op0.getOperand(1)); + SDValue AndLHS = Op0.getOperand(0); + if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) { + LHS = AndLHS.getOperand(0); + RHS = AndLHS.getOperand(1); + } + } + + if (LHS.getNode()) { + // If LHS is i8, promote it to i16 with any_extend. There is no i8 BT + // instruction. Since the shift amount is in-range-or-undefined, we know + // that doing a bittest on the i16 value is ok. We extend to i32 because + // the encoding for the i16 version is larger than the i32 version. + if (LHS.getValueType() == MVT::i8) + LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS); + + // If the operand types disagree, extend the shift amount to match. Since + // BT ignores high bits (like shifts) we can use anyextend. + if (LHS.getValueType() != RHS.getValueType()) + RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS); + + SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS); + unsigned Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B; + return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(Cond, MVT::i8), BT); + } + + return SDValue(); +} + SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); SDValue Op0 = Op.getOperand(0); @@ -5687,6 +5746,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); ISD::CondCode CC = cast(Op.getOperand(2))->get(); + // Optimize to BT if possible. // Lower (X & (1 << N)) == 0 to BT(X, N). // Lower ((X >>u N) & 1) != 0 to BT(X, N). // Lower ((X >>s N) & 1) != 0 to BT(X, N). @@ -5695,48 +5755,9 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { Op1.getOpcode() == ISD::Constant && cast(Op1)->getZExtValue() == 0 && (CC == ISD::SETEQ || CC == ISD::SETNE)) { - SDValue LHS, RHS; - if (Op0.getOperand(1).getOpcode() == ISD::SHL) { - if (ConstantSDNode *Op010C = - dyn_cast(Op0.getOperand(1).getOperand(0))) - if (Op010C->getZExtValue() == 1) { - LHS = Op0.getOperand(0); - RHS = Op0.getOperand(1).getOperand(1); - } - } else if (Op0.getOperand(0).getOpcode() == ISD::SHL) { - if (ConstantSDNode *Op000C = - dyn_cast(Op0.getOperand(0).getOperand(0))) - if (Op000C->getZExtValue() == 1) { - LHS = Op0.getOperand(1); - RHS = Op0.getOperand(0).getOperand(1); - } - } else if (Op0.getOperand(1).getOpcode() == ISD::Constant) { - ConstantSDNode *AndRHS = cast(Op0.getOperand(1)); - SDValue AndLHS = Op0.getOperand(0); - if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) { - LHS = AndLHS.getOperand(0); - RHS = AndLHS.getOperand(1); - } - } - - if (LHS.getNode()) { - // If LHS is i8, promote it to i16 with any_extend. There is no i8 BT - // instruction. Since the shift amount is in-range-or-undefined, we know - // that doing a bittest on the i16 value is ok. We extend to i32 because - // the encoding for the i16 version is larger than the i32 version. - if (LHS.getValueType() == MVT::i8) - LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS); - - // If the operand types disagree, extend the shift amount to match. Since - // BT ignores high bits (like shifts) we can use anyextend. - if (LHS.getValueType() != RHS.getValueType()) - RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS); - - SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS); - unsigned Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B; - return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, - DAG.getConstant(Cond, MVT::i8), BT); - } + SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG); + if (NewSetCC.getNode()) + return NewSetCC; } bool isFP = Op.getOperand(1).getValueType().isFloatingPoint(); @@ -5935,6 +5956,23 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) { } } + if (addTest) { + // Look pass the truncate. + if (Cond.getOpcode() == ISD::TRUNCATE) + Cond = Cond.getOperand(0); + + // We know the result of AND is compared against zero. Try to match + // it to BT. + if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { + SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG); + if (NewSetCC.getNode()) { + CC = NewSetCC.getOperand(0); + Cond = NewSetCC.getOperand(1); + addTest = false; + } + } + } + if (addTest) { CC = DAG.getConstant(X86::COND_NE, MVT::i8); Cond = EmitTest(Cond, X86::COND_NE, DAG); @@ -6092,6 +6130,23 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) { } } + if (addTest) { + // Look pass the truncate. + if (Cond.getOpcode() == ISD::TRUNCATE) + Cond = Cond.getOperand(0); + + // We know the result of AND is compared against zero. Try to match + // it to BT. + if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { + SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG); + if (NewSetCC.getNode()) { + CC = NewSetCC.getOperand(0); + Cond = NewSetCC.getOperand(1); + addTest = false; + } + } + } + if (addTest) { CC = DAG.getConstant(X86::COND_NE, MVT::i8); Cond = EmitTest(Cond, X86::COND_NE, DAG); @@ -7524,8 +7579,7 @@ bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { bool X86TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const { // x86-64 implicitly zero-extends 32-bit results in 64-bit registers. - return Ty1 == Type::getInt32Ty(Ty1->getContext()) && - Ty2 == Type::getInt64Ty(Ty1->getContext()) && Subtarget->is64Bit(); + return Ty1->isInteger(64) && Ty2->isInteger(64) && Subtarget->is64Bit(); } bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const { @@ -7749,7 +7803,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, for (int i=0; i < 2 + X86AddrNumOperands; ++i) argOpers[i] = &bInstr->getOperand(i+2); - // x86 address has 4 operands: base, index, scale, and displacement + // x86 address has 5 operands: base, index, scale, displacement, and segment. int lastAddrIndx = X86AddrNumOperands - 1; // [0,3] unsigned t1 = F->getRegInfo().createVirtualRegister(RC); @@ -7777,14 +7831,16 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, BuildMI(newMBB, dl, TII->get(X86::PHI), dest2Oper.getReg()) .addReg(t2).addMBB(thisMBB).addReg(t4).addMBB(newMBB); - unsigned tt1 = F->getRegInfo().createVirtualRegister(RC); - unsigned tt2 = F->getRegInfo().createVirtualRegister(RC); + // The subsequent operations should be using the destination registers of + //the PHI instructions. if (invSrc) { - MIB = BuildMI(newMBB, dl, TII->get(NotOpc), tt1).addReg(t1); - MIB = BuildMI(newMBB, dl, TII->get(NotOpc), tt2).addReg(t2); + t1 = F->getRegInfo().createVirtualRegister(RC); + t2 = F->getRegInfo().createVirtualRegister(RC); + MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t1).addReg(dest1Oper.getReg()); + MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t2).addReg(dest2Oper.getReg()); } else { - tt1 = t1; - tt2 = t2; + t1 = dest1Oper.getReg(); + t2 = dest2Oper.getReg(); } int valArgIndx = lastAddrIndx + 1; @@ -7798,7 +7854,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, else MIB = BuildMI(newMBB, dl, TII->get(immOpcL), t5); if (regOpcL != X86::MOV32rr) - MIB.addReg(tt1); + MIB.addReg(t1); (*MIB).addOperand(*argOpers[valArgIndx]); assert(argOpers[valArgIndx + 1]->isReg() == argOpers[valArgIndx]->isReg()); @@ -7809,7 +7865,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, else MIB = BuildMI(newMBB, dl, TII->get(immOpcH), t6); if (regOpcH != X86::MOV32rr) - MIB.addReg(tt2); + MIB.addReg(t2); (*MIB).addOperand(*argOpers[valArgIndx + 1]); MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EAX); @@ -9108,6 +9164,64 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, return SDValue(); } +static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); + if (VT != MVT::i64 || !Subtarget->is64Bit()) + return SDValue(); + + // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c) + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) + std::swap(N0, N1); + if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) + return SDValue(); + + SDValue ShAmt0 = N0.getOperand(1); + if (ShAmt0.getValueType() != MVT::i8) + return SDValue(); + SDValue ShAmt1 = N1.getOperand(1); + if (ShAmt1.getValueType() != MVT::i8) + return SDValue(); + if (ShAmt0.getOpcode() == ISD::TRUNCATE) + ShAmt0 = ShAmt0.getOperand(0); + if (ShAmt1.getOpcode() == ISD::TRUNCATE) + ShAmt1 = ShAmt1.getOperand(0); + + DebugLoc DL = N->getDebugLoc(); + unsigned Opc = X86ISD::SHLD; + SDValue Op0 = N0.getOperand(0); + SDValue Op1 = N1.getOperand(0); + if (ShAmt0.getOpcode() == ISD::SUB) { + Opc = X86ISD::SHRD; + std::swap(Op0, Op1); + std::swap(ShAmt0, ShAmt1); + } + + if (ShAmt1.getOpcode() == ISD::SUB) { + SDValue Sum = ShAmt1.getOperand(0); + if (ConstantSDNode *SumC = dyn_cast(Sum)) { + if (SumC->getSExtValue() == 64 && + ShAmt1.getOperand(1) == ShAmt0) + return DAG.getNode(Opc, DL, VT, + Op0, Op1, + DAG.getNode(ISD::TRUNCATE, DL, + MVT::i8, ShAmt0)); + } + } else if (ConstantSDNode *ShAmt1C = dyn_cast(ShAmt1)) { + ConstantSDNode *ShAmt0C = dyn_cast(ShAmt0); + if (ShAmt0C && + ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue() == 64) + return DAG.getNode(Opc, DL, VT, + N0.getOperand(0), N1.getOperand(0), + DAG.getNode(ISD::TRUNCATE, DL, + MVT::i8, ShAmt0)); + } + + return SDValue(); +} + /// PerformSTORECombine - Do target-specific dag combines on STORE nodes. static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { @@ -9370,6 +9484,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::SHL: case ISD::SRA: case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget); + case ISD::OR: return PerformOrCombine(N, DAG, Subtarget); case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget); case X86ISD::FXOR: case X86ISD::FOR: return PerformFORCombine(N, DAG); @@ -9423,7 +9538,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { std::string AsmStr = IA->getAsmString(); // TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a" - std::vector AsmPieces; + SmallVector AsmPieces; SplitString(AsmStr, AsmPieces, "\n"); // ; as separator? switch (AsmPieces.size()) { @@ -9445,7 +9560,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { return LowerToBSwap(CI); } // rorw $$8, ${0:w} --> llvm.bswap.i16 - if (CI->getType() == Type::getInt16Ty(CI->getContext()) && + if (CI->getType()->isInteger(16) && AsmPieces.size() == 3 && AsmPieces[0] == "rorw" && AsmPieces[1] == "$$8," && @@ -9455,12 +9570,12 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { } break; case 3: - if (CI->getType() == Type::getInt64Ty(CI->getContext()) && + if (CI->getType()->isInteger(64) && Constraints.size() >= 2 && Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" && Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") { // bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64 - std::vector Words; + SmallVector Words; SplitString(AsmPieces[0], Words, " \t"); if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") { Words.clear(); diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 65fbbdae9a7f..08e1dd1e060d 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -1106,13 +1106,13 @@ def OR64rm : RI<0x0B, MRMSrcMem , (outs GR64:$dst), def OR64ri8 : RIi8<0x83, MRM1r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), "or{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (or GR64:$src1, i64immSExt8:$src2)), - (implicit EFLAGS)]>; + [(set GR64:$dst, (or GR64:$src1, i64immSExt8:$src2)), + (implicit EFLAGS)]>; def OR64ri32 : RIi32<0x81, MRM1r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), "or{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (or GR64:$src1, i64immSExt32:$src2)), - (implicit EFLAGS)]>; + [(set GR64:$dst, (or GR64:$src1, i64immSExt32:$src2)), + (implicit EFLAGS)]>; } // isTwoAddress def OR64mr : RI<0x09, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), @@ -1598,17 +1598,21 @@ def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins), // Alias Instructions //===----------------------------------------------------------------------===// -// Alias instructions that map movr0 to xor. Use xorl instead of xorq; it's -// equivalent due to implicit zero-extending, and it sometimes has a smaller -// encoding. +// We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a +// smaller encoding, but doing so at isel time interferes with rematerialization +// in the current register allocator. For now, this is rewritten when the +// instruction is lowered to an MCInst. // FIXME: AddedComplexity gives this a higher priority than MOV64ri32. Remove // when we have a better way to specify isel priority. -let AddedComplexity = 1 in -def : Pat<(i64 0), - (SUBREG_TO_REG (i64 0), (MOV32r0), x86_subreg_32bit)>; +let Defs = [EFLAGS], + AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in +def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), + "", + [(set GR64:$dst, 0)]>; - -// Materialize i64 constant where top 32-bits are zero. +// Materialize i64 constant where top 32-bits are zero. This could theoretically +// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however +// that would make it more difficult to rematerialize. let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src), "", [(set GR64:$dst, i64immZExt32:$src)]>; @@ -1683,6 +1687,7 @@ def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), def CMPXCHG64rm : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB; +let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst), "cmpxchg16b\t$dst", []>, TB; @@ -1962,6 +1967,17 @@ def : Pat<(add GR64:$src1, 0x0000000080000000), def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst), (SUB64mi32 addr:$dst, 0xffffffff80000000)>; +// Use a 32-bit and with implicit zero-extension instead of a 64-bit and if it +// has an immediate with at least 32 bits of leading zeros, to avoid needing to +// materialize that immediate in a register first. +def : Pat<(and GR64:$src, i64immZExt32:$imm), + (SUBREG_TO_REG + (i64 0), + (AND32ri + (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit), + imm:$imm), + x86_subreg_32bit)>; + // r & (2^32-1) ==> movz def : Pat<(and GR64:$src, 0x00000000FFFFFFFF), (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>; @@ -2028,7 +2044,7 @@ def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), x86_subreg_8bit_hi))>, Requires<[In64BitMode]>; -def : Pat<(srl_su GR16:$src, (i8 8)), +def : Pat<(srl GR16:$src, (i8 8)), (EXTRACT_SUBREG (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), @@ -2098,24 +2114,7 @@ def : Pat<(sra GR64:$src1, (and CL:$amt, 63)), def : Pat<(store (sra (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst), (SAR64mCL addr:$dst)>; -// (or (x >> c) | (y << (64 - c))) ==> (shrd64 x, y, c) -def : Pat<(or (srl GR64:$src1, CL:$amt), - (shl GR64:$src2, (sub 64, CL:$amt))), - (SHRD64rrCL GR64:$src1, GR64:$src2)>; - -def : Pat<(store (or (srl (loadi64 addr:$dst), CL:$amt), - (shl GR64:$src2, (sub 64, CL:$amt))), addr:$dst), - (SHRD64mrCL addr:$dst, GR64:$src2)>; - -def : Pat<(or (srl GR64:$src1, (i8 (trunc RCX:$amt))), - (shl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))), - (SHRD64rrCL GR64:$src1, GR64:$src2)>; - -def : Pat<(store (or (srl (loadi64 addr:$dst), (i8 (trunc RCX:$amt))), - (shl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))), - addr:$dst), - (SHRD64mrCL addr:$dst, GR64:$src2)>; - +// Double shift patterns def : Pat<(shrd GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)), (SHRD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>; @@ -2123,24 +2122,6 @@ def : Pat<(store (shrd (loadi64 addr:$dst), (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)), addr:$dst), (SHRD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>; -// (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c) -def : Pat<(or (shl GR64:$src1, CL:$amt), - (srl GR64:$src2, (sub 64, CL:$amt))), - (SHLD64rrCL GR64:$src1, GR64:$src2)>; - -def : Pat<(store (or (shl (loadi64 addr:$dst), CL:$amt), - (srl GR64:$src2, (sub 64, CL:$amt))), addr:$dst), - (SHLD64mrCL addr:$dst, GR64:$src2)>; - -def : Pat<(or (shl GR64:$src1, (i8 (trunc RCX:$amt))), - (srl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))), - (SHLD64rrCL GR64:$src1, GR64:$src2)>; - -def : Pat<(store (or (shl (loadi64 addr:$dst), (i8 (trunc RCX:$amt))), - (srl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))), - addr:$dst), - (SHLD64mrCL addr:$dst, GR64:$src2)>; - def : Pat<(shld GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)), (SHLD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>; @@ -2148,6 +2129,19 @@ def : Pat<(store (shld (loadi64 addr:$dst), (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)), addr:$dst), (SHLD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>; +// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits. +let AddedComplexity = 5 in { // Try this before the selecting to OR +def : Pat<(parallel (or_is_add GR64:$src1, i64immSExt8:$src2), + (implicit EFLAGS)), + (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>; +def : Pat<(parallel (or_is_add GR64:$src1, i64immSExt32:$src2), + (implicit EFLAGS)), + (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>; +def : Pat<(parallel (or_is_add GR64:$src1, GR64:$src2), + (implicit EFLAGS)), + (ADD64rr GR64:$src1, GR64:$src2)>; +} // AddedComplexity + // X86 specific add which produces a flag. def : Pat<(addc GR64:$src1, GR64:$src2), (ADD64rr GR64:$src1, GR64:$src2)>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index e555cd176cdf..7b39fb311cba 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" @@ -711,6 +712,62 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI, } } +bool +X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SubIdx) const { + switch (MI.getOpcode()) { + default: break; + case X86::MOVSX16rr8: + case X86::MOVZX16rr8: + case X86::MOVSX32rr8: + case X86::MOVZX32rr8: + case X86::MOVSX64rr8: + case X86::MOVZX64rr8: + if (!TM.getSubtarget().is64Bit()) + // It's not always legal to reference the low 8-bit of the larger + // register in 32-bit mode. + return false; + case X86::MOVSX32rr16: + case X86::MOVZX32rr16: + case X86::MOVSX64rr16: + case X86::MOVZX64rr16: + case X86::MOVSX64rr32: + case X86::MOVZX64rr32: { + if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg()) + // Be conservative. + return false; + SrcReg = MI.getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + switch (MI.getOpcode()) { + default: + llvm_unreachable(0); + break; + case X86::MOVSX16rr8: + case X86::MOVZX16rr8: + case X86::MOVSX32rr8: + case X86::MOVZX32rr8: + case X86::MOVSX64rr8: + case X86::MOVZX64rr8: + SubIdx = 1; + break; + case X86::MOVSX32rr16: + case X86::MOVZX32rr16: + case X86::MOVSX64rr16: + case X86::MOVZX64rr16: + SubIdx = 3; + break; + case X86::MOVSX64rr32: + case X86::MOVZX64rr32: + SubIdx = 4; + break; + } + return true; + } + } + return false; +} + /// isFrameOperand - Return true and the FrameIndex if the specified /// operand and follow operands form a reference to the stack frame. bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op, @@ -1018,12 +1075,16 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, switch (Opc) { default: break; case X86::MOV8r0: - case X86::MOV32r0: { + case X86::MOV16r0: + case X86::MOV32r0: + case X86::MOV64r0: { if (!isSafeToClobberEFLAGS(MBB, I)) { switch (Opc) { default: break; case X86::MOV8r0: Opc = X86::MOV8ri; break; + case X86::MOV16r0: Opc = X86::MOV16ri; break; case X86::MOV32r0: Opc = X86::MOV32ri; break; + case X86::MOV64r0: Opc = X86::MOV64ri; break; } Clone = false; } @@ -2290,8 +2351,12 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, OpcodeTablePtr = &RegOp2MemOpTable2Addr; isTwoAddrFold = true; } else if (i == 0) { // If operand 0 - if (MI->getOpcode() == X86::MOV32r0) + if (MI->getOpcode() == X86::MOV64r0) + NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI); + else if (MI->getOpcode() == X86::MOV32r0) NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI); + else if (MI->getOpcode() == X86::MOV16r0) + NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI); else if (MI->getOpcode() == X86::MOV8r0) NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI); if (NewMI) @@ -2354,7 +2419,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // No fusion if (PrintFailedFusing) - errs() << "We failed to fuse operand " << i << " in " << *MI; + dbgs() << "We failed to fuse operand " << i << " in " << *MI; return NULL; } @@ -2559,7 +2624,9 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, } else if (OpNum == 0) { // If operand 0 switch (Opc) { case X86::MOV8r0: + case X86::MOV16r0: case X86::MOV32r0: + case X86::MOV64r0: return true; default: break; } diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index b83441d89eff..0ab85f4f45b2 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -448,6 +448,16 @@ class X86InstrInfo : public TargetInstrInfoImpl { unsigned &SrcReg, unsigned &DstReg, unsigned &SrcSubIdx, unsigned &DstSubIdx) const; + /// isCoalescableExtInstr - Return true if the instruction is a "coalescable" + /// extension instruction. That is, it's like a copy where it's legal for the + /// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns + /// true, then it's expected the pre-extension value is available as a subreg + /// of the result register. This also returns the sub-register index in + /// SubIdx. + virtual bool isCoalescableExtInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SubIdx) const; + unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; /// isLoadFromStackSlotPostFE - Check for post-frame ptr elimination /// stack locations as well. This uses a heuristic so it isn't diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 4d922a54ec2c..396cb53502ef 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -160,15 +160,21 @@ def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET, def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET, [SDNPHasChain, SDNPOptInFlag]>; -def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags>; +def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags, + [SDNPCommutative]>; def X86sub_flag : SDNode<"X86ISD::SUB", SDTBinaryArithWithFlags>; -def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags>; -def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags>; +def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags, + [SDNPCommutative]>; +def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags, + [SDNPCommutative]>; def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>; def X86dec_flag : SDNode<"X86ISD::DEC", SDTUnaryArithWithFlags>; -def X86or_flag : SDNode<"X86ISD::OR", SDTBinaryArithWithFlags>; -def X86xor_flag : SDNode<"X86ISD::XOR", SDTBinaryArithWithFlags>; -def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags>; +def X86or_flag : SDNode<"X86ISD::OR", SDTBinaryArithWithFlags, + [SDNPCommutative]>; +def X86xor_flag : SDNode<"X86ISD::XOR", SDTBinaryArithWithFlags, + [SDNPCommutative]>; +def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags, + [SDNPCommutative]>; def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>; @@ -487,6 +493,21 @@ def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{ return N->hasOneUse(); }]>; +// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero. +def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ + if (ConstantSDNode *CN = dyn_cast(N->getOperand(1))) + return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); + else { + unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits(); + APInt Mask = APInt::getAllOnesValue(BitWidth); + APInt KnownZero0, KnownOne0; + CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0); + APInt KnownZero1, KnownOne1; + CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0); + return (~KnownZero0 & ~KnownZero1) == 0; + } +}]>; + // 'shld' and 'shrd' instruction patterns. Note that even though these have // the srl and shl in their patterns, the C++ code must still check for them, // because predicates are tested before children nodes are explored. @@ -3700,18 +3721,21 @@ let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1, def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "xor{b}\t$dst, $dst", [(set GR8:$dst, 0)]>; + +// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller +// encoding and avoids a partial-register update sometimes, but doing so +// at isel time interferes with rematerialization in the current register +// allocator. For now, this is rewritten when the instruction is lowered +// to an MCInst. +def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins), + "", + [(set GR16:$dst, 0)]>, OpSize; def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "xor{l}\t$dst, $dst", [(set GR32:$dst, 0)]>; } -// Use xorl instead of xorw since we don't care about the high 16 bits, -// it's smaller, and it avoids a partial-register update. -let AddedComplexity = 1 in -def : Pat<(i16 0), - (EXTRACT_SUBREG (MOV32r0), x86_subreg_16bit)>; - //===----------------------------------------------------------------------===// // Thread Local Storage Instructions // @@ -3792,7 +3816,7 @@ def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap), [(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK; } let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in { -def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i32mem:$ptr), +def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr), "lock\n\t" "cmpxchg8b\t$ptr", [(X86cas8 addr:$ptr)]>, TB, LOCK; @@ -3858,6 +3882,7 @@ def CMPXCHG16rm : I<0xB1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), def CMPXCHG32rm : I<0xB1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB; +let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst), "cmpxchg8b\t$dst", []>, TB; @@ -4466,7 +4491,7 @@ def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))), (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), x86_subreg_8bit_hi)>, Requires<[In32BitMode]>; -def : Pat<(srl_su GR16:$src, (i8 8)), +def : Pat<(srl GR16:$src, (i8 8)), (EXTRACT_SUBREG (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), @@ -4640,6 +4665,28 @@ def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C32r)>; +// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits. +let AddedComplexity = 5 in { // Try this before the selecting to OR +def : Pat<(parallel (or_is_add GR16:$src1, imm:$src2), + (implicit EFLAGS)), + (ADD16ri GR16:$src1, imm:$src2)>; +def : Pat<(parallel (or_is_add GR32:$src1, imm:$src2), + (implicit EFLAGS)), + (ADD32ri GR32:$src1, imm:$src2)>; +def : Pat<(parallel (or_is_add GR16:$src1, i16immSExt8:$src2), + (implicit EFLAGS)), + (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>; +def : Pat<(parallel (or_is_add GR32:$src1, i32immSExt8:$src2), + (implicit EFLAGS)), + (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>; +def : Pat<(parallel (or_is_add GR16:$src1, GR16:$src2), + (implicit EFLAGS)), + (ADD16rr GR16:$src1, GR16:$src2)>; +def : Pat<(parallel (or_is_add GR32:$src1, GR32:$src2), + (implicit EFLAGS)), + (ADD32rr GR32:$src1, GR32:$src2)>; +} // AddedComplexity + //===----------------------------------------------------------------------===// // EFLAGS-defining Patterns //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index b26e50869205..94b9b5543066 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -131,11 +131,13 @@ def alignedloadv2i64 : PatFrag<(ops node:$ptr), // Like 'load', but uses special alignment checks suitable for use in // memory operands in most SSE instructions, which are required to -// be naturally aligned on some targets but not on others. -// FIXME: Actually implement support for targets that don't require the -// alignment. This probably wants a subtarget predicate. +// be naturally aligned on some targets but not on others. If the subtarget +// allows unaligned accesses, match any load, though this may require +// setting a feature bit in the processor (on startup, for example). +// Opteron 10h and later implement such a feature. def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return cast(N)->getAlignment() >= 16; + return Subtarget->hasVectorUAMem() + || cast(N)->getAlignment() >= 16; }]>; def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>; diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index c69cc83df6bb..f363903d9316 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -348,7 +348,7 @@ X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { #endif #if 0 - DEBUG(errs() << "In callback! Addr=" << (void*)RetAddr + DEBUG(dbgs() << "In callback! Addr=" << (void*)RetAddr << " ESP=" << (void*)StackPtr << ": Resolving call to function: " << TheVM->getFunctionReferencedName((void*)RetAddr) << "\n"); diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index d96aafda603a..9bd96af6c750 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -591,6 +591,15 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int FrameIndex = MI.getOperand(i).getIndex(); unsigned BasePtr; + // DEBUG_VALUE has a special representation, and is only robust enough to + // represent SP(or BP) +- offset addressing modes. We rewrite the + // FrameIndex to be a constant; implicitly positive constants are relative + // to ESP and negative ones to EBP. + if (MI.getOpcode()==TargetInstrInfo::DEBUG_VALUE) { + MI.getOperand(i).ChangeToImmediate(getFrameIndexOffset(MF, FrameIndex)); + return 0; + } + if (needsStackRealignment(MF)) BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr); else diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 75cdbada1b5a..2039be7c9b3d 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -286,6 +286,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, , HasFMA3(false) , HasFMA4(false) , IsBTMemSlow(false) + , HasVectorUAMem(false) , DarwinVers(0) , stackAlignment(8) // FIXME: this is a known good value for Yonah. How about others? @@ -317,7 +318,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, if (Is64Bit) HasX86_64 = true; - DEBUG(errs() << "Subtarget features: SSELevel " << X86SSELevel + DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel << ", 3DNowLevel " << X863DNowLevel << ", 64bit " << HasX86_64 << "\n"); assert((!Is64Bit || HasX86_64) && diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index ef6dbafac346..618dd102f32e 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -78,6 +78,10 @@ class X86Subtarget : public TargetSubtarget { /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow. bool IsBTMemSlow; + /// HasVectorUAMem - True if SIMD operations can have unaligned memory operands. + /// This may require setting a feature bit in the processor. + bool HasVectorUAMem; + /// DarwinVers - Nonzero if this is a darwin platform: the numeric /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc. unsigned char DarwinVers; // Is any darwin-x86 platform. @@ -142,6 +146,7 @@ class X86Subtarget : public TargetSubtarget { bool hasFMA3() const { return HasFMA3; } bool hasFMA4() const { return HasFMA4; } bool isBTMemSlow() const { return IsBTMemSlow; } + bool hasVectorUAMem() const { return HasVectorUAMem; } bool isTargetDarwin() const { return TargetType == isDarwin; } bool isTargetELF() const { return TargetType == isELF; } @@ -169,7 +174,7 @@ class X86Subtarget : public TargetSubtarget { p = "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-n8:16:32:64"; else if (isTargetDarwin()) p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32"; - else if (isTargetCygMing() || isTargetWindows()) + else if (isTargetMingw() || isTargetWindows()) p = "e-p:32:32-f64:64:64-i64:64:64-f80:128:128-n8:16:32"; else p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32"; diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp index da2fb047be2d..383fd91d2ef7 100644 --- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp +++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp @@ -49,7 +49,7 @@ namespace { Lowering(*TM.getTargetLowering()), Subtarget(*TM.getSubtargetImpl()) { } - SDNode *Select(SDValue Op); + SDNode *Select(SDNode *N); /// getI32Imm - Return a target constant with the specified value, of type /// i32. @@ -58,11 +58,11 @@ namespace { } // Complex Pattern Selectors. - bool SelectADDRspii(SDValue Op, SDValue Addr, SDValue &Base, + bool SelectADDRspii(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset); - bool SelectADDRdpii(SDValue Op, SDValue Addr, SDValue &Base, + bool SelectADDRdpii(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset); - bool SelectADDRcpii(SDValue Op, SDValue Addr, SDValue &Base, + bool SelectADDRcpii(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset); virtual void InstructionSelect(); @@ -83,7 +83,7 @@ FunctionPass *llvm::createXCoreISelDag(XCoreTargetMachine &TM) { return new XCoreDAGToDAGISel(TM); } -bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Op, SDValue Addr, +bool XCoreDAGToDAGISel::SelectADDRspii(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset) { FrameIndexSDNode *FIN = 0; if ((FIN = dyn_cast(Addr))) { @@ -105,7 +105,7 @@ bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Op, SDValue Addr, return false; } -bool XCoreDAGToDAGISel::SelectADDRdpii(SDValue Op, SDValue Addr, +bool XCoreDAGToDAGISel::SelectADDRdpii(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset) { if (Addr.getOpcode() == XCoreISD::DPRelativeWrapper) { Base = Addr.getOperand(0); @@ -126,7 +126,7 @@ bool XCoreDAGToDAGISel::SelectADDRdpii(SDValue Op, SDValue Addr, return false; } -bool XCoreDAGToDAGISel::SelectADDRcpii(SDValue Op, SDValue Addr, +bool XCoreDAGToDAGISel::SelectADDRcpii(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset) { if (Addr.getOpcode() == XCoreISD::CPRelativeWrapper) { Base = Addr.getOperand(0); @@ -156,8 +156,7 @@ void XCoreDAGToDAGISel::InstructionSelect() { CurDAG->RemoveDeadNodes(); } -SDNode *XCoreDAGToDAGISel::Select(SDValue Op) { - SDNode *N = Op.getNode(); +SDNode *XCoreDAGToDAGISel::Select(SDNode *N) { DebugLoc dl = N->getDebugLoc(); EVT NVT = N->getValueType(0); if (NVT == MVT::i32) { @@ -185,7 +184,7 @@ SDNode *XCoreDAGToDAGISel::Select(SDValue Op) { // FIXME fold addition into the macc instruction SDValue Zero(CurDAG->getMachineNode(XCore::LDC_ru6, dl, MVT::i32, CurDAG->getTargetConstant(0, MVT::i32)), 0); - SDValue Ops[] = { Zero, Zero, Op.getOperand(0), Op.getOperand(1) }; + SDValue Ops[] = { Zero, Zero, N->getOperand(0), N->getOperand(1) }; SDNode *ResNode = CurDAG->getMachineNode(XCore::MACCS_l4r, dl, MVT::i32, MVT::i32, Ops, 4); ReplaceUses(SDValue(N, 0), SDValue(ResNode, 1)); @@ -196,7 +195,7 @@ SDNode *XCoreDAGToDAGISel::Select(SDValue Op) { // FIXME fold addition into the macc / lmul instruction SDValue Zero(CurDAG->getMachineNode(XCore::LDC_ru6, dl, MVT::i32, CurDAG->getTargetConstant(0, MVT::i32)), 0); - SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Zero, Zero }; SDNode *ResNode = CurDAG->getMachineNode(XCore::LMUL_l6r, dl, MVT::i32, MVT::i32, Ops, 4); @@ -205,19 +204,19 @@ SDNode *XCoreDAGToDAGISel::Select(SDValue Op) { return NULL; } case XCoreISD::LADD: { - SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), - Op.getOperand(2) }; + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), + N->getOperand(2) }; return CurDAG->getMachineNode(XCore::LADD_l5r, dl, MVT::i32, MVT::i32, Ops, 3); } case XCoreISD::LSUB: { - SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), - Op.getOperand(2) }; + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), + N->getOperand(2) }; return CurDAG->getMachineNode(XCore::LSUB_l5r, dl, MVT::i32, MVT::i32, Ops, 3); } // Other cases are autogenerated. } } - return SelectCode(Op); + return SelectCode(N); } diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index dd5a6d868c57..d8190a43e10d 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -147,7 +147,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { const Type *AgTy = cast(PtrArg->getType())->getElementType(); if (const StructType *STy = dyn_cast(AgTy)) { if (maxElements > 0 && STy->getNumElements() > maxElements) { - DEBUG(errs() << "argpromotion disable promoting argument '" + DEBUG(dbgs() << "argpromotion disable promoting argument '" << PtrArg->getName() << "' because it would require adding more" << " than " << maxElements << " arguments to the function.\n"); } else { @@ -409,7 +409,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { // to do. if (ToPromote.find(Operands) == ToPromote.end()) { if (maxElements > 0 && ToPromote.size() == maxElements) { - DEBUG(errs() << "argpromotion not promoting argument '" + DEBUG(dbgs() << "argpromotion not promoting argument '" << Arg->getName() << "' because it would require adding more " << "than " << maxElements << " arguments to the function.\n"); // We limit aggregate promotion to only promoting up to a fixed number @@ -593,7 +593,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, NF->copyAttributesFrom(F); - DEBUG(errs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" + DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" << "From: " << *F); // Recompute the parameter attributes list based on the new arguments for @@ -808,7 +808,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, LI->replaceAllUsesWith(I2); AA.replaceWithNewValue(LI, I2); LI->eraseFromParent(); - DEBUG(errs() << "*** Promoted load of argument '" << I->getName() + DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName() << "' in function '" << F->getName() << "'\n"); } else { GetElementPtrInst *GEP = cast(I->use_back()); @@ -835,7 +835,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, NewName += ".val"; TheArg->setName(NewName); - DEBUG(errs() << "*** Promoted agg argument '" << TheArg->getName() + DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName() << "' of function '" << NF->getName() << "'\n"); // All of the uses must be load instructions. Replace them all with diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index a3db8369e232..1749b1eff3a8 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -425,7 +425,7 @@ void DAE::SurveyFunction(Function &F) { return; } - DEBUG(errs() << "DAE - Inspecting callers for fn: " << F.getName() << "\n"); + DEBUG(dbgs() << "DAE - Inspecting callers for fn: " << F.getName() << "\n"); // Keep track of the number of live retvals, so we can skip checks once all // of them turn out to be live. unsigned NumLiveRetVals = 0; @@ -488,7 +488,7 @@ void DAE::SurveyFunction(Function &F) { for (unsigned i = 0; i != RetCount; ++i) MarkValue(CreateRet(&F, i), RetValLiveness[i], MaybeLiveRetUses[i]); - DEBUG(errs() << "DAE - Inspecting args for fn: " << F.getName() << "\n"); + DEBUG(dbgs() << "DAE - Inspecting args for fn: " << F.getName() << "\n"); // Now, check all of our arguments. unsigned i = 0; @@ -530,7 +530,7 @@ void DAE::MarkValue(const RetOrArg &RA, Liveness L, /// mark any values that are used as this function's parameters or by its return /// values (according to Uses) live as well. void DAE::MarkLive(const Function &F) { - DEBUG(errs() << "DAE - Intrinsically live fn: " << F.getName() << "\n"); + DEBUG(dbgs() << "DAE - Intrinsically live fn: " << F.getName() << "\n"); // Mark the function as live. LiveFunctions.insert(&F); // Mark all arguments as live. @@ -551,7 +551,7 @@ void DAE::MarkLive(const RetOrArg &RA) { if (!LiveValues.insert(RA).second) return; // We were already marked Live. - DEBUG(errs() << "DAE - Marking " << RA.getDescription() << " live\n"); + DEBUG(dbgs() << "DAE - Marking " << RA.getDescription() << " live\n"); PropagateLiveness(RA); } @@ -616,7 +616,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { NewRetIdxs[i] = RetTypes.size() - 1; } else { ++NumRetValsEliminated; - DEBUG(errs() << "DAE - Removing return value " << i << " from " + DEBUG(dbgs() << "DAE - Removing return value " << i << " from " << F->getName() << "\n"); } } @@ -626,7 +626,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { RetTypes.push_back(RetTy); NewRetIdxs[0] = 0; } else { - DEBUG(errs() << "DAE - Removing return value from " << F->getName() + DEBUG(dbgs() << "DAE - Removing return value from " << F->getName() << "\n"); ++NumRetValsEliminated; } @@ -681,7 +681,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { AttributesVec.push_back(AttributeWithIndex::get(Params.size(), Attrs)); } else { ++NumArgumentsEliminated; - DEBUG(errs() << "DAE - Removing argument " << i << " (" << I->getName() + DEBUG(dbgs() << "DAE - Removing argument " << i << " (" << I->getName() << ") from " << F->getName() << "\n"); } } @@ -915,7 +915,7 @@ bool DAE::runOnModule(Module &M) { // removed. We can do this if they never call va_start. This loop cannot be // fused with the next loop, because deleting a function invalidates // information computed while surveying other functions. - DEBUG(errs() << "DAE - Deleting dead varargs\n"); + DEBUG(dbgs() << "DAE - Deleting dead varargs\n"); for (Module::iterator I = M.begin(), E = M.end(); I != E; ) { Function &F = *I++; if (F.getFunctionType()->isVarArg()) @@ -926,7 +926,7 @@ bool DAE::runOnModule(Module &M) { // We assume all arguments are dead unless proven otherwise (allowing us to // determine that dead arguments passed into recursive functions are dead). // - DEBUG(errs() << "DAE - Determining liveness\n"); + DEBUG(dbgs() << "DAE - Determining liveness\n"); for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) SurveyFunction(*I); diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index a16d335ef50f..64a6d7809649 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -79,16 +79,47 @@ Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); } /// memory that is local to the function. Global constants are considered /// local to all functions. bool FunctionAttrs::PointsToLocalMemory(Value *V) { - V = V->getUnderlyingObject(); - // An alloca instruction defines local memory. - if (isa(V)) - return true; - // A global constant counts as local memory for our purposes. - if (GlobalVariable *GV = dyn_cast(V)) - return GV->isConstant(); - // Could look through phi nodes and selects here, but it doesn't seem - // to be useful in practice. - return false; + SmallVector Worklist; + unsigned MaxLookup = 8; + + Worklist.push_back(V); + + do { + V = Worklist.pop_back_val()->getUnderlyingObject(); + + // An alloca instruction defines local memory. + if (isa(V)) + continue; + + // A global constant counts as local memory for our purposes. + if (GlobalVariable *GV = dyn_cast(V)) { + if (!GV->isConstant()) + return false; + continue; + } + + // If both select values point to local memory, then so does the select. + if (SelectInst *SI = dyn_cast(V)) { + Worklist.push_back(SI->getTrueValue()); + Worklist.push_back(SI->getFalseValue()); + continue; + } + + // If all values incoming to a phi node point to local memory, then so does + // the phi. + if (PHINode *PN = dyn_cast(V)) { + // Don't bother inspecting phi nodes with many operands. + if (PN->getNumIncomingValues() > MaxLookup) + return false; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + Worklist.push_back(PN->getIncomingValue(i)); + continue; + } + + return false; + } while (!Worklist.empty() && --MaxLookup); + + return Worklist.empty(); } /// AddReadAttrs - Deduce readonly/readnone attributes for the SCC. @@ -136,6 +167,21 @@ bool FunctionAttrs::AddReadAttrs(const std::vector &SCC) { // Ignore calls to functions in the same SCC. if (SCCNodes.count(CS.getCalledFunction())) continue; + // Ignore intrinsics that only access local memory. + if (unsigned id = CS.getCalledFunction()->getIntrinsicID()) + if (AliasAnalysis::getModRefBehavior(id) == + AliasAnalysis::AccessesArguments) { + // Check that all pointer arguments point to local memory. + for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); + CI != CE; ++CI) { + Value *Arg = *CI; + if (isa(Arg->getType()) && !PointsToLocalMemory(Arg)) + // Writes memory. Just give up. + return false; + } + // Only reads and writes local memory. + continue; + } } else if (LoadInst *LI = dyn_cast(I)) { // Ignore loads from local memory. if (PointsToLocalMemory(LI->getPointerOperand())) diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 1793bbf48ef2..ee260e9488e9 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -544,7 +544,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { if (NewGlobals.empty()) return 0; - DEBUG(errs() << "PERFORMING GLOBAL SRA ON: " << *GV); + DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV); Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext())); @@ -771,14 +771,14 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) { } if (Changed) { - DEBUG(errs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV); + DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV); ++NumGlobUses; } // If we nuked all of the loads, then none of the stores are needed either, // nor is the global. if (AllNonStoreUsesGone) { - DEBUG(errs() << " *** GLOBAL NOW DEAD!\n"); + DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n"); CleanupConstantGlobalUsers(GV, 0); if (GV->use_empty()) { GV->eraseFromParent(); @@ -815,7 +815,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, const Type *AllocTy, Value* NElems, TargetData* TD) { - DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n'); + DEBUG(dbgs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n'); const Type *IntPtrTy = TD->getIntPtrType(GV->getContext()); @@ -1268,7 +1268,7 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, /// it up into multiple allocations of arrays of the fields. static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, Value* NElems, TargetData *TD) { - DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n'); + DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n'); const Type* MAT = getMallocAllocatedType(CI); const StructType *STy = cast(MAT); @@ -1600,7 +1600,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { if (!isa(I) && !isa(I)) return false; - DEBUG(errs() << " *** SHRINKING TO BOOL: " << *GV); + DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV); // Create the new global, initializing it to false. GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()), @@ -1681,7 +1681,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, GV->removeDeadConstantUsers(); if (GV->use_empty()) { - DEBUG(errs() << "GLOBAL DEAD: " << *GV); + DEBUG(dbgs() << "GLOBAL DEAD: " << *GV); GV->eraseFromParent(); ++NumDeleted; return true; @@ -1689,26 +1689,26 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, if (!AnalyzeGlobal(GV, GS, PHIUsers)) { #if 0 - DEBUG(errs() << "Global: " << *GV); - DEBUG(errs() << " isLoaded = " << GS.isLoaded << "\n"); - DEBUG(errs() << " StoredType = "); + DEBUG(dbgs() << "Global: " << *GV); + DEBUG(dbgs() << " isLoaded = " << GS.isLoaded << "\n"); + DEBUG(dbgs() << " StoredType = "); switch (GS.StoredType) { - case GlobalStatus::NotStored: DEBUG(errs() << "NEVER STORED\n"); break; - case GlobalStatus::isInitializerStored: DEBUG(errs() << "INIT STORED\n"); + case GlobalStatus::NotStored: DEBUG(dbgs() << "NEVER STORED\n"); break; + case GlobalStatus::isInitializerStored: DEBUG(dbgs() << "INIT STORED\n"); break; - case GlobalStatus::isStoredOnce: DEBUG(errs() << "STORED ONCE\n"); break; - case GlobalStatus::isStored: DEBUG(errs() << "stored\n"); break; + case GlobalStatus::isStoredOnce: DEBUG(dbgs() << "STORED ONCE\n"); break; + case GlobalStatus::isStored: DEBUG(dbgs() << "stored\n"); break; } if (GS.StoredType == GlobalStatus::isStoredOnce && GS.StoredOnceValue) - DEBUG(errs() << " StoredOnceValue = " << *GS.StoredOnceValue << "\n"); + DEBUG(dbgs() << " StoredOnceValue = " << *GS.StoredOnceValue << "\n"); if (GS.AccessingFunction && !GS.HasMultipleAccessingFunctions) - DEBUG(errs() << " AccessingFunction = " << GS.AccessingFunction->getName() + DEBUG(dbgs() << " AccessingFunction = " << GS.AccessingFunction->getName() << "\n"); - DEBUG(errs() << " HasMultipleAccessingFunctions = " + DEBUG(dbgs() << " HasMultipleAccessingFunctions = " << GS.HasMultipleAccessingFunctions << "\n"); - DEBUG(errs() << " HasNonInstructionUser = " + DEBUG(dbgs() << " HasNonInstructionUser = " << GS.HasNonInstructionUser<<"\n"); - DEBUG(errs() << "\n"); + DEBUG(dbgs() << "\n"); #endif // If this is a first class global and has only one accessing function @@ -1726,7 +1726,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, GS.AccessingFunction->getName() == "main" && GS.AccessingFunction->hasExternalLinkage() && GV->getType()->getAddressSpace() == 0) { - DEBUG(errs() << "LOCALIZING GLOBAL: " << *GV); + DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV); Instruction* FirstI = GS.AccessingFunction->getEntryBlock().begin(); const Type* ElemTy = GV->getType()->getElementType(); // FIXME: Pass Global's alignment when globals have alignment @@ -1743,7 +1743,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, // If the global is never loaded (but may be stored to), it is dead. // Delete it now. if (!GS.isLoaded) { - DEBUG(errs() << "GLOBAL NEVER LOADED: " << *GV); + DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV); // Delete any stores we can find to the global. We may not be able to // make it completely dead though. @@ -1758,7 +1758,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, return Changed; } else if (GS.StoredType <= GlobalStatus::isInitializerStored) { - DEBUG(errs() << "MARKING CONSTANT: " << *GV); + DEBUG(dbgs() << "MARKING CONSTANT: " << *GV); GV->setConstant(true); // Clean up any obviously simplifiable users now. @@ -1766,7 +1766,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, // If the global is dead now, just nuke it. if (GV->use_empty()) { - DEBUG(errs() << " *** Marking constant allowed us to simplify " + DEBUG(dbgs() << " *** Marking constant allowed us to simplify " << "all users and delete global!\n"); GV->eraseFromParent(); ++NumDeleted; @@ -1794,7 +1794,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, CleanupConstantGlobalUsers(GV, GV->getInitializer()); if (GV->use_empty()) { - DEBUG(errs() << " *** Substituting initializer allowed us to " + DEBUG(dbgs() << " *** Substituting initializer allowed us to " << "simplify all users and delete global!\n"); GV->eraseFromParent(); ++NumDeleted; @@ -1925,11 +1925,11 @@ GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) { if (!ATy) return 0; const StructType *STy = dyn_cast(ATy->getElementType()); if (!STy || STy->getNumElements() != 2 || - STy->getElementType(0) != Type::getInt32Ty(M.getContext())) return 0; + !STy->getElementType(0)->isInteger(32)) return 0; const PointerType *PFTy = dyn_cast(STy->getElementType(1)); if (!PFTy) return 0; const FunctionType *FTy = dyn_cast(PFTy->getElementType()); - if (!FTy || FTy->getReturnType() != Type::getVoidTy(M.getContext()) || + if (!FTy || !FTy->getReturnType()->isVoidTy() || FTy->isVarArg() || FTy->getNumParams() != 0) return 0; @@ -2091,8 +2091,8 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, return Val; } + std::vector Elts; if (const StructType *STy = dyn_cast(Init->getType())) { - std::vector Elts; // Break up the constant into its elements. if (ConstantStruct *CS = dyn_cast(Init)) { @@ -2120,28 +2120,38 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, STy->isPacked()); } else { ConstantInt *CI = cast(Addr->getOperand(OpNo)); - const ArrayType *ATy = cast(Init->getType()); + const SequentialType *InitTy = cast(Init->getType()); + uint64_t NumElts; + if (const ArrayType *ATy = dyn_cast(InitTy)) + NumElts = ATy->getNumElements(); + else + NumElts = cast(InitTy)->getNumElements(); + + // Break up the array into elements. - std::vector Elts; if (ConstantArray *CA = dyn_cast(Init)) { for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) Elts.push_back(cast(*i)); + } else if (ConstantVector *CV = dyn_cast(Init)) { + for (User::op_iterator i = CV->op_begin(), e = CV->op_end(); i != e; ++i) + Elts.push_back(cast(*i)); } else if (isa(Init)) { - Constant *Elt = Constant::getNullValue(ATy->getElementType()); - Elts.assign(ATy->getNumElements(), Elt); - } else if (isa(Init)) { - Constant *Elt = UndefValue::get(ATy->getElementType()); - Elts.assign(ATy->getNumElements(), Elt); + Elts.assign(NumElts, Constant::getNullValue(InitTy->getElementType())); } else { - llvm_unreachable("This code is out of sync with " + assert(isa(Init) && "This code is out of sync with " " ConstantFoldLoadThroughGEPConstantExpr"); + Elts.assign(NumElts, UndefValue::get(InitTy->getElementType())); } - assert(CI->getZExtValue() < ATy->getNumElements()); + assert(CI->getZExtValue() < NumElts); Elts[CI->getZExtValue()] = EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1); - return ConstantArray::get(ATy, Elts); + + if (isa(Init->getType())) + return ConstantArray::get(cast(InitTy), Elts); + else + return ConstantVector::get(&Elts[0], Elts.size()); } } @@ -2153,13 +2163,10 @@ static void CommitValueTo(Constant *Val, Constant *Addr) { GV->setInitializer(Val); return; } - + ConstantExpr *CE = cast(Addr); GlobalVariable *GV = cast(CE->getOperand(0)); - - Constant *Init = GV->getInitializer(); - Init = EvaluateStoreInto(Init, Val, CE, 2); - GV->setInitializer(Init); + GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2)); } /// ComputeLoadResult - Return the value that would be computed by a load from @@ -2402,7 +2409,7 @@ static bool EvaluateStaticConstructor(Function *F) { MutatedMemory, AllocaTmps); if (EvalSuccess) { // We succeeded at evaluation: commit the result. - DEBUG(errs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" + DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" << F->getName() << "' to " << MutatedMemory.size() << " stores.\n"); for (DenseMap::iterator I = MutatedMemory.begin(), diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 6918fe87c696..5725db1b7451 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -147,7 +147,7 @@ static bool InlineCallIfPossible(CallSite CS, CallGraph &CG, // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare // success! - DEBUG(errs() << " ***MERGED ALLOCA: " << *AI); + DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI); AI->replaceAllUsesWith(AvailableAlloca); AI->eraseFromParent(); @@ -178,13 +178,13 @@ bool Inliner::shouldInline(CallSite CS) { InlineCost IC = getInlineCost(CS); if (IC.isAlways()) { - DEBUG(errs() << " Inlining: cost=always" + DEBUG(dbgs() << " Inlining: cost=always" << ", Call: " << *CS.getInstruction() << "\n"); return true; } if (IC.isNever()) { - DEBUG(errs() << " NOT Inlining: cost=never" + DEBUG(dbgs() << " NOT Inlining: cost=never" << ", Call: " << *CS.getInstruction() << "\n"); return false; } @@ -200,7 +200,7 @@ bool Inliner::shouldInline(CallSite CS) { float FudgeFactor = getInlineFudgeFactor(CS); if (Cost >= (int)(CurrentThreshold * FudgeFactor)) { - DEBUG(errs() << " NOT Inlining: cost=" << Cost + DEBUG(dbgs() << " NOT Inlining: cost=" << Cost << ", Call: " << *CS.getInstruction() << "\n"); return false; } @@ -263,14 +263,14 @@ bool Inliner::shouldInline(CallSite CS) { if (outerCallsFound && someOuterCallWouldNotBeInlined && TotalSecondaryCost < Cost) { - DEBUG(errs() << " NOT Inlining: " << *CS.getInstruction() << + DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() << " Cost = " << Cost << ", outer Cost = " << TotalSecondaryCost << '\n'); return false; } } - DEBUG(errs() << " Inlining: cost=" << Cost + DEBUG(dbgs() << " Inlining: cost=" << Cost << ", Call: " << *CS.getInstruction() << '\n'); return true; } @@ -280,11 +280,11 @@ bool Inliner::runOnSCC(std::vector &SCC) { const TargetData *TD = getAnalysisIfAvailable(); SmallPtrSet SCCFunctions; - DEBUG(errs() << "Inliner visiting SCC:"); + DEBUG(dbgs() << "Inliner visiting SCC:"); for (unsigned i = 0, e = SCC.size(); i != e; ++i) { Function *F = SCC[i]->getFunction(); if (F) SCCFunctions.insert(F); - DEBUG(errs() << " " << (F ? F->getName() : "INDIRECTNODE")); + DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE")); } // Scan through and identify all call sites ahead of time so that we only @@ -314,7 +314,7 @@ bool Inliner::runOnSCC(std::vector &SCC) { } } - DEBUG(errs() << ": " << CallSites.size() << " call sites.\n"); + DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n"); // Now that we have all of the call sites, move the ones to functions in the // current SCC to the end of the list. @@ -346,7 +346,7 @@ bool Inliner::runOnSCC(std::vector &SCC) { // size. This happens because IPSCCP propagates the result out of the // call and then we're left with the dead call. if (isInstructionTriviallyDead(CS.getInstruction())) { - DEBUG(errs() << " -> Deleting dead call: " + DEBUG(dbgs() << " -> Deleting dead call: " << *CS.getInstruction() << "\n"); // Update the call graph by deleting the edge from Callee to Caller. CG[Caller]->removeCallEdgeFor(CS); @@ -377,7 +377,7 @@ bool Inliner::runOnSCC(std::vector &SCC) { // callgraph references to the node, we cannot delete it yet, this // could invalidate the CGSCC iterator. CG[Callee]->getNumReferences() == 0) { - DEBUG(errs() << " -> Deleting dead function: " + DEBUG(dbgs() << " -> Deleting dead function: " << Callee->getName() << "\n"); CallGraphNode *CalleeNode = CG[Callee]; diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp index 20ae0d585766..3d319320862d 100644 --- a/lib/Transforms/IPO/Internalize.cpp +++ b/lib/Transforms/IPO/Internalize.cpp @@ -131,7 +131,7 @@ bool InternalizePass::runOnModule(Module &M) { if (ExternalNode) ExternalNode->removeOneAbstractEdgeTo((*CG)[I]); Changed = true; ++NumFunctions; - DEBUG(errs() << "Internalizing func " << I->getName() << "\n"); + DEBUG(dbgs() << "Internalizing func " << I->getName() << "\n"); } // Never internalize the llvm.used symbol. It is used to implement @@ -160,7 +160,7 @@ bool InternalizePass::runOnModule(Module &M) { I->setLinkage(GlobalValue::InternalLinkage); Changed = true; ++NumGlobals; - DEBUG(errs() << "Internalized gvar " << I->getName() << "\n"); + DEBUG(dbgs() << "Internalized gvar " << I->getName() << "\n"); } // Mark all aliases that are not in the api as internal as well. @@ -171,7 +171,7 @@ bool InternalizePass::runOnModule(Module &M) { I->setLinkage(GlobalValue::InternalLinkage); Changed = true; ++NumAliases; - DEBUG(errs() << "Internalized alias " << I->getName() << "\n"); + DEBUG(dbgs() << "Internalized alias " << I->getName() << "\n"); } return Changed; diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index b2bdabc0d05e..fa8845b84281 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -498,7 +498,7 @@ static void ThunkGToF(Function *F, Function *G) { CallInst *CI = CallInst::Create(F, Args.begin(), Args.end(), "", BB); CI->setTailCall(); CI->setCallingConv(F->getCallingConv()); - if (NewG->getReturnType() == Type::getVoidTy(F->getContext())) { + if (NewG->getReturnType()->isVoidTy()) { ReturnInst::Create(F->getContext(), BB); } else if (CI->getType() != NewG->getReturnType()) { Value *BCI = new BitCastInst(CI, NewG->getReturnType(), "", BB); @@ -633,17 +633,17 @@ bool MergeFunctions::runOnModule(Module &M) { bool LocalChanged; do { LocalChanged = false; - DEBUG(errs() << "size: " << FnMap.size() << "\n"); + DEBUG(dbgs() << "size: " << FnMap.size() << "\n"); for (std::map >::iterator I = FnMap.begin(), E = FnMap.end(); I != E; ++I) { std::vector &FnVec = I->second; - DEBUG(errs() << "hash (" << I->first << "): " << FnVec.size() << "\n"); + DEBUG(dbgs() << "hash (" << I->first << "): " << FnVec.size() << "\n"); for (int i = 0, e = FnVec.size(); i != e; ++i) { for (int j = i + 1; j != e; ++j) { bool isEqual = equals(FnVec[i], FnVec[j]); - DEBUG(errs() << " " << FnVec[i]->getName() + DEBUG(dbgs() << " " << FnVec[i]->getName() << (isEqual ? " == " : " != ") << FnVec[j]->getName() << "\n"); diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index b955b9743543..f40902ffa2fd 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -145,7 +145,7 @@ bool PartialInliner::runOnModule(Module& M) { worklist.reserve(M.size()); for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) if (!FI->use_empty() && !FI->isDeclaration()) - worklist.push_back(&*FI); + worklist.push_back(&*FI); bool changed = false; while (!worklist.empty()) { diff --git a/lib/Transforms/IPO/StructRetPromotion.cpp b/lib/Transforms/IPO/StructRetPromotion.cpp index 67fc9349c5f1..dda32d02c873 100644 --- a/lib/Transforms/IPO/StructRetPromotion.cpp +++ b/lib/Transforms/IPO/StructRetPromotion.cpp @@ -93,11 +93,10 @@ CallGraphNode *SRETPromotion::PromoteReturn(CallGraphNode *CGN) { if (F->arg_size() == 0 || !F->hasStructRetAttr() || F->doesNotReturn()) return 0; - DEBUG(errs() << "SretPromotion: Looking at sret function " + DEBUG(dbgs() << "SretPromotion: Looking at sret function " << F->getName() << "\n"); - assert(F->getReturnType() == Type::getVoidTy(F->getContext()) && - "Invalid function return type"); + assert(F->getReturnType()->isVoidTy() && "Invalid function return type"); Function::arg_iterator AI = F->arg_begin(); const llvm::PointerType *FArgType = dyn_cast(AI->getType()); assert(FArgType && "Invalid sret parameter type"); @@ -107,12 +106,12 @@ CallGraphNode *SRETPromotion::PromoteReturn(CallGraphNode *CGN) { // Check if it is ok to perform this promotion. if (isSafeToUpdateAllCallers(F) == false) { - DEBUG(errs() << "SretPromotion: Not all callers can be updated\n"); + DEBUG(dbgs() << "SretPromotion: Not all callers can be updated\n"); NumRejectedSRETUses++; return 0; } - DEBUG(errs() << "SretPromotion: sret argument will be promoted\n"); + DEBUG(dbgs() << "SretPromotion: sret argument will be promoted\n"); NumSRET++; // [1] Replace use of sret parameter AllocaInst *TheAlloca = new AllocaInst(STy, NULL, "mrv", @@ -358,7 +357,7 @@ bool SRETPromotion::nestedStructType(const StructType *STy) { unsigned Num = STy->getNumElements(); for (unsigned i = 0; i < Num; i++) { const Type *Ty = STy->getElementType(i); - if (!Ty->isSingleValueType() && Ty != Type::getVoidTy(STy->getContext())) + if (!Ty->isSingleValueType() && !Ty->isVoidTy()) return true; } return false; diff --git a/lib/Transforms/InstCombine/CMakeLists.txt b/lib/Transforms/InstCombine/CMakeLists.txt new file mode 100644 index 000000000000..5b1ff3e23bb0 --- /dev/null +++ b/lib/Transforms/InstCombine/CMakeLists.txt @@ -0,0 +1,17 @@ +add_llvm_library(LLVMInstCombine + InstructionCombining.cpp + InstCombineAddSub.cpp + InstCombineAndOrXor.cpp + InstCombineCalls.cpp + InstCombineCasts.cpp + InstCombineCompares.cpp + InstCombineLoadStoreAlloca.cpp + InstCombineMulDivRem.cpp + InstCombinePHI.cpp + InstCombineSelect.cpp + InstCombineShifts.cpp + InstCombineSimplifyDemanded.cpp + InstCombineVectorOps.cpp + ) + +target_link_libraries (LLVMInstCombine LLVMTransformUtils) diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h new file mode 100644 index 000000000000..536790004e86 --- /dev/null +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -0,0 +1,349 @@ +//===- InstCombine.h - Main InstCombine pass definition -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef INSTCOMBINE_INSTCOMBINE_H +#define INSTCOMBINE_INSTCOMBINE_H + +#include "InstCombineWorklist.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Support/IRBuilder.h" +#include "llvm/Support/InstVisitor.h" +#include "llvm/Support/TargetFolder.h" + +namespace llvm { + class CallSite; + class TargetData; + class DbgDeclareInst; + class MemIntrinsic; + class MemSetInst; + +/// SelectPatternFlavor - We can match a variety of different patterns for +/// select operations. +enum SelectPatternFlavor { + SPF_UNKNOWN = 0, + SPF_SMIN, SPF_UMIN, + SPF_SMAX, SPF_UMAX + //SPF_ABS - TODO. +}; + +/// getComplexity: Assign a complexity or rank value to LLVM Values... +/// 0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst +static inline unsigned getComplexity(Value *V) { + if (isa(V)) { + if (BinaryOperator::isNeg(V) || + BinaryOperator::isFNeg(V) || + BinaryOperator::isNot(V)) + return 3; + return 4; + } + if (isa(V)) return 3; + return isa(V) ? (isa(V) ? 0 : 1) : 2; +} + + +/// InstCombineIRInserter - This is an IRBuilder insertion helper that works +/// just like the normal insertion helper, but also adds any new instructions +/// to the instcombine worklist. +class VISIBILITY_HIDDEN InstCombineIRInserter + : public IRBuilderDefaultInserter { + InstCombineWorklist &Worklist; +public: + InstCombineIRInserter(InstCombineWorklist &WL) : Worklist(WL) {} + + void InsertHelper(Instruction *I, const Twine &Name, + BasicBlock *BB, BasicBlock::iterator InsertPt) const { + IRBuilderDefaultInserter::InsertHelper(I, Name, BB, InsertPt); + Worklist.Add(I); + } +}; + +/// InstCombiner - The -instcombine pass. +class VISIBILITY_HIDDEN InstCombiner + : public FunctionPass, + public InstVisitor { + TargetData *TD; + bool MustPreserveLCSSA; + bool MadeIRChange; +public: + /// Worklist - All of the instructions that need to be simplified. + InstCombineWorklist Worklist; + + /// Builder - This is an IRBuilder that automatically inserts new + /// instructions into the worklist when they are created. + typedef IRBuilder BuilderTy; + BuilderTy *Builder; + + static char ID; // Pass identification, replacement for typeid + InstCombiner() : FunctionPass(&ID), TD(0), Builder(0) {} + +public: + virtual bool runOnFunction(Function &F); + + bool DoOneIteration(Function &F, unsigned ItNum); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + + TargetData *getTargetData() const { return TD; } + + // Visitation implementation - Implement instruction combining for different + // instruction types. The semantics are as follows: + // Return Value: + // null - No change was made + // I - Change was made, I is still valid, I may be dead though + // otherwise - Change was made, replace I with returned instruction + // + Instruction *visitAdd(BinaryOperator &I); + Instruction *visitFAdd(BinaryOperator &I); + Value *OptimizePointerDifference(Value *LHS, Value *RHS, const Type *Ty); + Instruction *visitSub(BinaryOperator &I); + Instruction *visitFSub(BinaryOperator &I); + Instruction *visitMul(BinaryOperator &I); + Instruction *visitFMul(BinaryOperator &I); + Instruction *visitURem(BinaryOperator &I); + Instruction *visitSRem(BinaryOperator &I); + Instruction *visitFRem(BinaryOperator &I); + bool SimplifyDivRemOfSelect(BinaryOperator &I); + Instruction *commonRemTransforms(BinaryOperator &I); + Instruction *commonIRemTransforms(BinaryOperator &I); + Instruction *commonDivTransforms(BinaryOperator &I); + Instruction *commonIDivTransforms(BinaryOperator &I); + Instruction *visitUDiv(BinaryOperator &I); + Instruction *visitSDiv(BinaryOperator &I); + Instruction *visitFDiv(BinaryOperator &I); + Instruction *FoldAndOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS); + Instruction *FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, FCmpInst *RHS); + Instruction *visitAnd(BinaryOperator &I); + Instruction *FoldOrOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS); + Instruction *FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, FCmpInst *RHS); + Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, + Value *A, Value *B, Value *C); + Instruction *visitOr (BinaryOperator &I); + Instruction *visitXor(BinaryOperator &I); + Instruction *visitShl(BinaryOperator &I); + Instruction *visitAShr(BinaryOperator &I); + Instruction *visitLShr(BinaryOperator &I); + Instruction *commonShiftTransforms(BinaryOperator &I); + Instruction *FoldFCmp_IntToFP_Cst(FCmpInst &I, Instruction *LHSI, + Constant *RHSC); + Instruction *FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, + GlobalVariable *GV, CmpInst &ICI, + ConstantInt *AndCst = 0); + Instruction *visitFCmpInst(FCmpInst &I); + Instruction *visitICmpInst(ICmpInst &I); + Instruction *visitICmpInstWithCastAndCast(ICmpInst &ICI); + Instruction *visitICmpInstWithInstAndIntCst(ICmpInst &ICI, + Instruction *LHS, + ConstantInt *RHS); + Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, + ConstantInt *DivRHS); + Instruction *FoldICmpAddOpCst(ICmpInst &ICI, Value *X, ConstantInt *CI, + ICmpInst::Predicate Pred, Value *TheAdd); + Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, + ICmpInst::Predicate Cond, Instruction &I); + Instruction *FoldShiftByConstant(Value *Op0, ConstantInt *Op1, + BinaryOperator &I); + Instruction *commonCastTransforms(CastInst &CI); + Instruction *commonPointerCastTransforms(CastInst &CI); + Instruction *visitTrunc(TruncInst &CI); + Instruction *visitZExt(ZExtInst &CI); + Instruction *visitSExt(SExtInst &CI); + Instruction *visitFPTrunc(FPTruncInst &CI); + Instruction *visitFPExt(CastInst &CI); + Instruction *visitFPToUI(FPToUIInst &FI); + Instruction *visitFPToSI(FPToSIInst &FI); + Instruction *visitUIToFP(CastInst &CI); + Instruction *visitSIToFP(CastInst &CI); + Instruction *visitPtrToInt(PtrToIntInst &CI); + Instruction *visitIntToPtr(IntToPtrInst &CI); + Instruction *visitBitCast(BitCastInst &CI); + Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI, + Instruction *FI); + Instruction *FoldSelectIntoOp(SelectInst &SI, Value*, Value*); + Instruction *FoldSPFofSPF(Instruction *Inner, SelectPatternFlavor SPF1, + Value *A, Value *B, Instruction &Outer, + SelectPatternFlavor SPF2, Value *C); + Instruction *visitSelectInst(SelectInst &SI); + Instruction *visitSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI); + Instruction *visitCallInst(CallInst &CI); + Instruction *visitInvokeInst(InvokeInst &II); + + Instruction *SliceUpIllegalIntegerPHI(PHINode &PN); + Instruction *visitPHINode(PHINode &PN); + Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP); + Instruction *visitAllocaInst(AllocaInst &AI); + Instruction *visitFree(Instruction &FI); + Instruction *visitLoadInst(LoadInst &LI); + Instruction *visitStoreInst(StoreInst &SI); + Instruction *visitBranchInst(BranchInst &BI); + Instruction *visitSwitchInst(SwitchInst &SI); + Instruction *visitInsertElementInst(InsertElementInst &IE); + Instruction *visitExtractElementInst(ExtractElementInst &EI); + Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI); + Instruction *visitExtractValueInst(ExtractValueInst &EV); + + // visitInstruction - Specify what to return for unhandled instructions... + Instruction *visitInstruction(Instruction &I) { return 0; } + +private: + bool ShouldChangeType(const Type *From, const Type *To) const; + Value *dyn_castNegVal(Value *V) const; + Value *dyn_castFNegVal(Value *V) const; + const Type *FindElementAtOffset(const Type *Ty, int64_t Offset, + SmallVectorImpl &NewIndices); + Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI); + + /// ValueRequiresCast - Return true if the cast from "V to Ty" actually + /// results in any code being generated. It does not require codegen if V is + /// simple enough or if the cast can be folded into other casts. + bool ValueRequiresCast(Instruction::CastOps opcode,const Value *V, + const Type *Ty); + + Instruction *visitCallSite(CallSite CS); + bool transformConstExprCastCall(CallSite CS); + Instruction *transformCallThroughTrampoline(CallSite CS); + Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI, + bool DoXform = true); + bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS); + DbgDeclareInst *hasOneUsePlusDeclare(Value *V); + Value *EmitGEPOffset(User *GEP); + +public: + // InsertNewInstBefore - insert an instruction New before instruction Old + // in the program. Add the new instruction to the worklist. + // + Instruction *InsertNewInstBefore(Instruction *New, Instruction &Old) { + assert(New && New->getParent() == 0 && + "New instruction already inserted into a basic block!"); + BasicBlock *BB = Old.getParent(); + BB->getInstList().insert(&Old, New); // Insert inst + Worklist.Add(New); + return New; + } + + // ReplaceInstUsesWith - This method is to be used when an instruction is + // found to be dead, replacable with another preexisting expression. Here + // we add all uses of I to the worklist, replace all uses of I with the new + // value, then return I, so that the inst combiner will know that I was + // modified. + // + Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) { + Worklist.AddUsersToWorkList(I); // Add all modified instrs to worklist. + + // If we are replacing the instruction with itself, this must be in a + // segment of unreachable code, so just clobber the instruction. + if (&I == V) + V = UndefValue::get(I.getType()); + + I.replaceAllUsesWith(V); + return &I; + } + + // EraseInstFromFunction - When dealing with an instruction that has side + // effects or produces a void value, we can't rely on DCE to delete the + // instruction. Instead, visit methods should return the value returned by + // this function. + Instruction *EraseInstFromFunction(Instruction &I) { + DEBUG(errs() << "IC: ERASE " << I << '\n'); + + assert(I.use_empty() && "Cannot erase instruction that is used!"); + // Make sure that we reprocess all operands now that we reduced their + // use counts. + if (I.getNumOperands() < 8) { + for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i) + if (Instruction *Op = dyn_cast(*i)) + Worklist.Add(Op); + } + Worklist.Remove(&I); + I.eraseFromParent(); + MadeIRChange = true; + return 0; // Don't do anything with FI + } + + void ComputeMaskedBits(Value *V, const APInt &Mask, APInt &KnownZero, + APInt &KnownOne, unsigned Depth = 0) const { + return llvm::ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth); + } + + bool MaskedValueIsZero(Value *V, const APInt &Mask, + unsigned Depth = 0) const { + return llvm::MaskedValueIsZero(V, Mask, TD, Depth); + } + unsigned ComputeNumSignBits(Value *Op, unsigned Depth = 0) const { + return llvm::ComputeNumSignBits(Op, TD, Depth); + } + +private: + + /// SimplifyCommutative - This performs a few simplifications for + /// commutative operators. + bool SimplifyCommutative(BinaryOperator &I); + + /// SimplifyDemandedUseBits - Attempts to replace V with a simpler value + /// based on the demanded bits. + Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, + APInt& KnownZero, APInt& KnownOne, + unsigned Depth); + bool SimplifyDemandedBits(Use &U, APInt DemandedMask, + APInt& KnownZero, APInt& KnownOne, + unsigned Depth=0); + + /// SimplifyDemandedInstructionBits - Inst is an integer instruction that + /// SimplifyDemandedBits knows about. See if the instruction has any + /// properties that allow us to simplify its operands. + bool SimplifyDemandedInstructionBits(Instruction &Inst); + + Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, + APInt& UndefElts, unsigned Depth = 0); + + // FoldOpIntoPhi - Given a binary operator, cast instruction, or select + // which has a PHI node as operand #0, see if we can fold the instruction + // into the PHI (which is only possible if all operands to the PHI are + // constants). + // + // If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms + // that would normally be unprofitable because they strongly encourage jump + // threading. + Instruction *FoldOpIntoPhi(Instruction &I, bool AllowAggressive = false); + + // FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary" + // operator and they all are only used by the PHI, PHI together their + // inputs, and do the operation once, to the result of the PHI. + Instruction *FoldPHIArgOpIntoPHI(PHINode &PN); + Instruction *FoldPHIArgBinOpIntoPHI(PHINode &PN); + Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN); + Instruction *FoldPHIArgLoadIntoPHI(PHINode &PN); + + + Instruction *OptAndOp(Instruction *Op, ConstantInt *OpRHS, + ConstantInt *AndRHS, BinaryOperator &TheAnd); + + Value *FoldLogicalPlusAnd(Value *LHS, Value *RHS, ConstantInt *Mask, + bool isSub, Instruction &I); + Instruction *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, + bool isSigned, bool Inside, Instruction &IB); + Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI); + Instruction *MatchBSwap(BinaryOperator &I); + bool SimplifyStoreAtEndOfBlock(StoreInst &SI); + Instruction *SimplifyMemTransfer(MemIntrinsic *MI); + Instruction *SimplifyMemSet(MemSetInst *MI); + + + Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned); + + unsigned GetOrEnforceKnownAlignment(Value *V, + unsigned PrefAlign = 0); + +}; + + + +} // end namespace llvm. + +#endif diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp new file mode 100644 index 000000000000..4891ff00e7b1 --- /dev/null +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -0,0 +1,740 @@ +//===- InstCombineAddSub.cpp ----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the visit functions for add, fadd, sub, and fsub. +// +//===----------------------------------------------------------------------===// + +#include "InstCombine.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/PatternMatch.h" +using namespace llvm; +using namespace PatternMatch; + +/// AddOne - Add one to a ConstantInt. +static Constant *AddOne(Constant *C) { + return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1)); +} +/// SubOne - Subtract one from a ConstantInt. +static Constant *SubOne(ConstantInt *C) { + return ConstantInt::get(C->getContext(), C->getValue()-1); +} + + +// dyn_castFoldableMul - If this value is a multiply that can be folded into +// other computations (because it has a constant operand), return the +// non-constant operand of the multiply, and set CST to point to the multiplier. +// Otherwise, return null. +// +static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) { + if (!V->hasOneUse() || !V->getType()->isInteger()) + return 0; + + Instruction *I = dyn_cast(V); + if (I == 0) return 0; + + if (I->getOpcode() == Instruction::Mul) + if ((CST = dyn_cast(I->getOperand(1)))) + return I->getOperand(0); + if (I->getOpcode() == Instruction::Shl) + if ((CST = dyn_cast(I->getOperand(1)))) { + // The multiplier is really 1 << CST. + uint32_t BitWidth = cast(V->getType())->getBitWidth(); + uint32_t CSTVal = CST->getLimitedValue(BitWidth); + CST = ConstantInt::get(V->getType()->getContext(), + APInt(BitWidth, 1).shl(CSTVal)); + return I->getOperand(0); + } + return 0; +} + + +/// WillNotOverflowSignedAdd - Return true if we can prove that: +/// (sext (add LHS, RHS)) === (add (sext LHS), (sext RHS)) +/// This basically requires proving that the add in the original type would not +/// overflow to change the sign bit or have a carry out. +bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) { + // There are different heuristics we can use for this. Here are some simple + // ones. + + // Add has the property that adding any two 2's complement numbers can only + // have one carry bit which can change a sign. As such, if LHS and RHS each + // have at least two sign bits, we know that the addition of the two values + // will sign extend fine. + if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1) + return true; + + + // If one of the operands only has one non-zero bit, and if the other operand + // has a known-zero bit in a more significant place than it (not including the + // sign bit) the ripple may go up to and fill the zero, but won't change the + // sign. For example, (X & ~4) + 1. + + // TODO: Implement. + + return false; +} + +Instruction *InstCombiner::visitAdd(BinaryOperator &I) { + bool Changed = SimplifyCommutative(I); + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + + if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), + I.hasNoUnsignedWrap(), TD)) + return ReplaceInstUsesWith(I, V); + + + if (Constant *RHSC = dyn_cast(RHS)) { + if (ConstantInt *CI = dyn_cast(RHSC)) { + // X + (signbit) --> X ^ signbit + const APInt& Val = CI->getValue(); + uint32_t BitWidth = Val.getBitWidth(); + if (Val == APInt::getSignBit(BitWidth)) + return BinaryOperator::CreateXor(LHS, RHS); + + // See if SimplifyDemandedBits can simplify this. This handles stuff like + // (X & 254)+1 -> (X&254)|1 + if (SimplifyDemandedInstructionBits(I)) + return &I; + + // zext(bool) + C -> bool ? C + 1 : C + if (ZExtInst *ZI = dyn_cast(LHS)) + if (ZI->getSrcTy() == Type::getInt1Ty(I.getContext())) + return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI); + } + + if (isa(LHS)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + + ConstantInt *XorRHS = 0; + Value *XorLHS = 0; + if (isa(RHSC) && + match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) { + uint32_t TySizeBits = I.getType()->getScalarSizeInBits(); + const APInt& RHSVal = cast(RHSC)->getValue(); + + uint32_t Size = TySizeBits / 2; + APInt C0080Val(APInt(TySizeBits, 1ULL).shl(Size - 1)); + APInt CFF80Val(-C0080Val); + do { + if (TySizeBits > Size) { + // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext. + // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext. + if ((RHSVal == CFF80Val && XorRHS->getValue() == C0080Val) || + (RHSVal == C0080Val && XorRHS->getValue() == CFF80Val)) { + // This is a sign extend if the top bits are known zero. + if (!MaskedValueIsZero(XorLHS, + APInt::getHighBitsSet(TySizeBits, TySizeBits - Size))) + Size = 0; // Not a sign ext, but can't be any others either. + break; + } + } + Size >>= 1; + C0080Val = APIntOps::lshr(C0080Val, Size); + CFF80Val = APIntOps::ashr(CFF80Val, Size); + } while (Size >= 1); + + // FIXME: This shouldn't be necessary. When the backends can handle types + // with funny bit widths then this switch statement should be removed. It + // is just here to get the size of the "middle" type back up to something + // that the back ends can handle. + const Type *MiddleType = 0; + switch (Size) { + default: break; + case 32: + case 16: + case 8: MiddleType = IntegerType::get(I.getContext(), Size); break; + } + if (MiddleType) { + Value *NewTrunc = Builder->CreateTrunc(XorLHS, MiddleType, "sext"); + return new SExtInst(NewTrunc, I.getType(), I.getName()); + } + } + } + + if (I.getType()->isInteger(1)) + return BinaryOperator::CreateXor(LHS, RHS); + + if (I.getType()->isInteger()) { + // X + X --> X << 1 + if (LHS == RHS) + return BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1)); + + if (Instruction *RHSI = dyn_cast(RHS)) { + if (RHSI->getOpcode() == Instruction::Sub) + if (LHS == RHSI->getOperand(1)) // A + (B - A) --> B + return ReplaceInstUsesWith(I, RHSI->getOperand(0)); + } + if (Instruction *LHSI = dyn_cast(LHS)) { + if (LHSI->getOpcode() == Instruction::Sub) + if (RHS == LHSI->getOperand(1)) // (B - A) + A --> B + return ReplaceInstUsesWith(I, LHSI->getOperand(0)); + } + } + + // -A + B --> B - A + // -A + -B --> -(A + B) + if (Value *LHSV = dyn_castNegVal(LHS)) { + if (LHS->getType()->isIntOrIntVector()) { + if (Value *RHSV = dyn_castNegVal(RHS)) { + Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum"); + return BinaryOperator::CreateNeg(NewAdd); + } + } + + return BinaryOperator::CreateSub(RHS, LHSV); + } + + // A + -B --> A - B + if (!isa(RHS)) + if (Value *V = dyn_castNegVal(RHS)) + return BinaryOperator::CreateSub(LHS, V); + + + ConstantInt *C2; + if (Value *X = dyn_castFoldableMul(LHS, C2)) { + if (X == RHS) // X*C + X --> X * (C+1) + return BinaryOperator::CreateMul(RHS, AddOne(C2)); + + // X*C1 + X*C2 --> X * (C1+C2) + ConstantInt *C1; + if (X == dyn_castFoldableMul(RHS, C1)) + return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2)); + } + + // X + X*C --> X * (C+1) + if (dyn_castFoldableMul(RHS, C2) == LHS) + return BinaryOperator::CreateMul(LHS, AddOne(C2)); + + // X + ~X --> -1 since ~X = -X-1 + if (match(LHS, m_Not(m_Specific(RHS))) || + match(RHS, m_Not(m_Specific(LHS)))) + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); + + // A+B --> A|B iff A and B have no bits set in common. + if (const IntegerType *IT = dyn_cast(I.getType())) { + APInt Mask = APInt::getAllOnesValue(IT->getBitWidth()); + APInt LHSKnownOne(IT->getBitWidth(), 0); + APInt LHSKnownZero(IT->getBitWidth(), 0); + ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne); + if (LHSKnownZero != 0) { + APInt RHSKnownOne(IT->getBitWidth(), 0); + APInt RHSKnownZero(IT->getBitWidth(), 0); + ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne); + + // No bits in common -> bitwise or. + if ((LHSKnownZero|RHSKnownZero).isAllOnesValue()) + return BinaryOperator::CreateOr(LHS, RHS); + } + } + + // W*X + Y*Z --> W * (X+Z) iff W == Y + if (I.getType()->isIntOrIntVector()) { + Value *W, *X, *Y, *Z; + if (match(LHS, m_Mul(m_Value(W), m_Value(X))) && + match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) { + if (W != Y) { + if (W == Z) { + std::swap(Y, Z); + } else if (Y == X) { + std::swap(W, X); + } else if (X == Z) { + std::swap(Y, Z); + std::swap(W, X); + } + } + + if (W == Y) { + Value *NewAdd = Builder->CreateAdd(X, Z, LHS->getName()); + return BinaryOperator::CreateMul(W, NewAdd); + } + } + } + + if (ConstantInt *CRHS = dyn_cast(RHS)) { + Value *X = 0; + if (match(LHS, m_Not(m_Value(X)))) // ~X + C --> (C-1) - X + return BinaryOperator::CreateSub(SubOne(CRHS), X); + + // (X & FF00) + xx00 -> (X+xx00) & FF00 + if (LHS->hasOneUse() && + match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) { + Constant *Anded = ConstantExpr::getAnd(CRHS, C2); + if (Anded == CRHS) { + // See if all bits from the first bit set in the Add RHS up are included + // in the mask. First, get the rightmost bit. + const APInt &AddRHSV = CRHS->getValue(); + + // Form a mask of all bits from the lowest bit added through the top. + APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1)); + + // See if the and mask includes all of these bits. + APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue()); + + if (AddRHSHighBits == AddRHSHighBitsAnd) { + // Okay, the xform is safe. Insert the new add pronto. + Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName()); + return BinaryOperator::CreateAnd(NewAdd, C2); + } + } + } + + // Try to fold constant add into select arguments. + if (SelectInst *SI = dyn_cast(LHS)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + } + + // add (select X 0 (sub n A)) A --> select X A n + { + SelectInst *SI = dyn_cast(LHS); + Value *A = RHS; + if (!SI) { + SI = dyn_cast(RHS); + A = LHS; + } + if (SI && SI->hasOneUse()) { + Value *TV = SI->getTrueValue(); + Value *FV = SI->getFalseValue(); + Value *N; + + // Can we fold the add into the argument of the select? + // We check both true and false select arguments for a matching subtract. + if (match(FV, m_Zero()) && + match(TV, m_Sub(m_Value(N), m_Specific(A)))) + // Fold the add into the true select value. + return SelectInst::Create(SI->getCondition(), N, A); + if (match(TV, m_Zero()) && + match(FV, m_Sub(m_Value(N), m_Specific(A)))) + // Fold the add into the false select value. + return SelectInst::Create(SI->getCondition(), A, N); + } + } + + // Check for (add (sext x), y), see if we can merge this into an + // integer add followed by a sext. + if (SExtInst *LHSConv = dyn_cast(LHS)) { + // (add (sext x), cst) --> (sext (add x, cst')) + if (ConstantInt *RHSC = dyn_cast(RHS)) { + Constant *CI = + ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType()); + if (LHSConv->hasOneUse() && + ConstantExpr::getSExt(CI, I.getType()) == RHSC && + WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { + // Insert the new, smaller add. + Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), + CI, "addconv"); + return new SExtInst(NewAdd, I.getType()); + } + } + + // (add (sext x), (sext y)) --> (sext (add int x, y)) + if (SExtInst *RHSConv = dyn_cast(RHS)) { + // Only do this if x/y have the same type, if at last one of them has a + // single use (so we don't increase the number of sexts), and if the + // integer add will not overflow. + if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&& + (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && + WillNotOverflowSignedAdd(LHSConv->getOperand(0), + RHSConv->getOperand(0))) { + // Insert the new integer add. + Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), + RHSConv->getOperand(0), "addconv"); + return new SExtInst(NewAdd, I.getType()); + } + } + } + + return Changed ? &I : 0; +} + +Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { + bool Changed = SimplifyCommutative(I); + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + + if (Constant *RHSC = dyn_cast(RHS)) { + // X + 0 --> X + if (ConstantFP *CFP = dyn_cast(RHSC)) { + if (CFP->isExactlyValue(ConstantFP::getNegativeZero + (I.getType())->getValueAPF())) + return ReplaceInstUsesWith(I, LHS); + } + + if (isa(LHS)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + } + + // -A + B --> B - A + // -A + -B --> -(A + B) + if (Value *LHSV = dyn_castFNegVal(LHS)) + return BinaryOperator::CreateFSub(RHS, LHSV); + + // A + -B --> A - B + if (!isa(RHS)) + if (Value *V = dyn_castFNegVal(RHS)) + return BinaryOperator::CreateFSub(LHS, V); + + // Check for X+0.0. Simplify it to X if we know X is not -0.0. + if (ConstantFP *CFP = dyn_cast(RHS)) + if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS)) + return ReplaceInstUsesWith(I, LHS); + + // Check for (add double (sitofp x), y), see if we can merge this into an + // integer add followed by a promotion. + if (SIToFPInst *LHSConv = dyn_cast(LHS)) { + // (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst)) + // ... if the constant fits in the integer value. This is useful for things + // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer + // requires a constant pool load, and generally allows the add to be better + // instcombined. + if (ConstantFP *CFP = dyn_cast(RHS)) { + Constant *CI = + ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType()); + if (LHSConv->hasOneUse() && + ConstantExpr::getSIToFP(CI, I.getType()) == CFP && + WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { + // Insert the new integer add. + Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), + CI, "addconv"); + return new SIToFPInst(NewAdd, I.getType()); + } + } + + // (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) + if (SIToFPInst *RHSConv = dyn_cast(RHS)) { + // Only do this if x/y have the same type, if at last one of them has a + // single use (so we don't increase the number of int->fp conversions), + // and if the integer add will not overflow. + if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&& + (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && + WillNotOverflowSignedAdd(LHSConv->getOperand(0), + RHSConv->getOperand(0))) { + // Insert the new integer add. + Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), + RHSConv->getOperand(0),"addconv"); + return new SIToFPInst(NewAdd, I.getType()); + } + } + } + + return Changed ? &I : 0; +} + + +/// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the +/// code necessary to compute the offset from the base pointer (without adding +/// in the base pointer). Return the result as a signed integer of intptr size. +Value *InstCombiner::EmitGEPOffset(User *GEP) { + TargetData &TD = *getTargetData(); + gep_type_iterator GTI = gep_type_begin(GEP); + const Type *IntPtrTy = TD.getIntPtrType(GEP->getContext()); + Value *Result = Constant::getNullValue(IntPtrTy); + + // Build a mask for high order bits. + unsigned IntPtrWidth = TD.getPointerSizeInBits(); + uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); + + for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e; + ++i, ++GTI) { + Value *Op = *i; + uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask; + if (ConstantInt *OpC = dyn_cast(Op)) { + if (OpC->isZero()) continue; + + // Handle a struct index, which adds its field offset to the pointer. + if (const StructType *STy = dyn_cast(*GTI)) { + Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); + + Result = Builder->CreateAdd(Result, + ConstantInt::get(IntPtrTy, Size), + GEP->getName()+".offs"); + continue; + } + + Constant *Scale = ConstantInt::get(IntPtrTy, Size); + Constant *OC = + ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/); + Scale = ConstantExpr::getMul(OC, Scale); + // Emit an add instruction. + Result = Builder->CreateAdd(Result, Scale, GEP->getName()+".offs"); + continue; + } + // Convert to correct type. + if (Op->getType() != IntPtrTy) + Op = Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c"); + if (Size != 1) { + Constant *Scale = ConstantInt::get(IntPtrTy, Size); + // We'll let instcombine(mul) convert this to a shl if possible. + Op = Builder->CreateMul(Op, Scale, GEP->getName()+".idx"); + } + + // Emit an add instruction. + Result = Builder->CreateAdd(Op, Result, GEP->getName()+".offs"); + } + return Result; +} + + + + +/// Optimize pointer differences into the same array into a size. Consider: +/// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer +/// operands to the ptrtoint instructions for the LHS/RHS of the subtract. +/// +Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, + const Type *Ty) { + assert(TD && "Must have target data info for this"); + + // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize + // this. + bool Swapped = false; + GetElementPtrInst *GEP = 0; + ConstantExpr *CstGEP = 0; + + // TODO: Could also optimize &A[i] - &A[j] -> "i-j", and "&A.foo[i] - &A.foo". + // For now we require one side to be the base pointer "A" or a constant + // expression derived from it. + if (GetElementPtrInst *LHSGEP = dyn_cast(LHS)) { + // (gep X, ...) - X + if (LHSGEP->getOperand(0) == RHS) { + GEP = LHSGEP; + Swapped = false; + } else if (ConstantExpr *CE = dyn_cast(RHS)) { + // (gep X, ...) - (ce_gep X, ...) + if (CE->getOpcode() == Instruction::GetElementPtr && + LHSGEP->getOperand(0) == CE->getOperand(0)) { + CstGEP = CE; + GEP = LHSGEP; + Swapped = false; + } + } + } + + if (GetElementPtrInst *RHSGEP = dyn_cast(RHS)) { + // X - (gep X, ...) + if (RHSGEP->getOperand(0) == LHS) { + GEP = RHSGEP; + Swapped = true; + } else if (ConstantExpr *CE = dyn_cast(LHS)) { + // (ce_gep X, ...) - (gep X, ...) + if (CE->getOpcode() == Instruction::GetElementPtr && + RHSGEP->getOperand(0) == CE->getOperand(0)) { + CstGEP = CE; + GEP = RHSGEP; + Swapped = true; + } + } + } + + if (GEP == 0) + return 0; + + // Emit the offset of the GEP and an intptr_t. + Value *Result = EmitGEPOffset(GEP); + + // If we had a constant expression GEP on the other side offsetting the + // pointer, subtract it from the offset we have. + if (CstGEP) { + Value *CstOffset = EmitGEPOffset(CstGEP); + Result = Builder->CreateSub(Result, CstOffset); + } + + + // If we have p - gep(p, ...) then we have to negate the result. + if (Swapped) + Result = Builder->CreateNeg(Result, "diff.neg"); + + return Builder->CreateIntCast(Result, Ty, true); +} + + +Instruction *InstCombiner::visitSub(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (Op0 == Op1) // sub X, X -> 0 + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + + // If this is a 'B = x-(-A)', change to B = x+A. This preserves NSW/NUW. + if (Value *V = dyn_castNegVal(Op1)) { + BinaryOperator *Res = BinaryOperator::CreateAdd(Op0, V); + Res->setHasNoSignedWrap(I.hasNoSignedWrap()); + Res->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); + return Res; + } + + if (isa(Op0)) + return ReplaceInstUsesWith(I, Op0); // undef - X -> undef + if (isa(Op1)) + return ReplaceInstUsesWith(I, Op1); // X - undef -> undef + if (I.getType()->isInteger(1)) + return BinaryOperator::CreateXor(Op0, Op1); + + if (ConstantInt *C = dyn_cast(Op0)) { + // Replace (-1 - A) with (~A). + if (C->isAllOnesValue()) + return BinaryOperator::CreateNot(Op1); + + // C - ~X == X + (1+C) + Value *X = 0; + if (match(Op1, m_Not(m_Value(X)))) + return BinaryOperator::CreateAdd(X, AddOne(C)); + + // -(X >>u 31) -> (X >>s 31) + // -(X >>s 31) -> (X >>u 31) + if (C->isZero()) { + if (BinaryOperator *SI = dyn_cast(Op1)) { + if (SI->getOpcode() == Instruction::LShr) { + if (ConstantInt *CU = dyn_cast(SI->getOperand(1))) { + // Check to see if we are shifting out everything but the sign bit. + if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) == + SI->getType()->getPrimitiveSizeInBits()-1) { + // Ok, the transformation is safe. Insert AShr. + return BinaryOperator::Create(Instruction::AShr, + SI->getOperand(0), CU, SI->getName()); + } + } + } else if (SI->getOpcode() == Instruction::AShr) { + if (ConstantInt *CU = dyn_cast(SI->getOperand(1))) { + // Check to see if we are shifting out everything but the sign bit. + if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) == + SI->getType()->getPrimitiveSizeInBits()-1) { + // Ok, the transformation is safe. Insert LShr. + return BinaryOperator::CreateLShr( + SI->getOperand(0), CU, SI->getName()); + } + } + } + } + } + + // Try to fold constant sub into select arguments. + if (SelectInst *SI = dyn_cast(Op1)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + + // C - zext(bool) -> bool ? C - 1 : C + if (ZExtInst *ZI = dyn_cast(Op1)) + if (ZI->getSrcTy() == Type::getInt1Ty(I.getContext())) + return SelectInst::Create(ZI->getOperand(0), SubOne(C), C); + } + + if (BinaryOperator *Op1I = dyn_cast(Op1)) { + if (Op1I->getOpcode() == Instruction::Add) { + if (Op1I->getOperand(0) == Op0) // X-(X+Y) == -Y + return BinaryOperator::CreateNeg(Op1I->getOperand(1), + I.getName()); + else if (Op1I->getOperand(1) == Op0) // X-(Y+X) == -Y + return BinaryOperator::CreateNeg(Op1I->getOperand(0), + I.getName()); + else if (ConstantInt *CI1 = dyn_cast(I.getOperand(0))) { + if (ConstantInt *CI2 = dyn_cast(Op1I->getOperand(1))) + // C1-(X+C2) --> (C1-C2)-X + return BinaryOperator::CreateSub( + ConstantExpr::getSub(CI1, CI2), Op1I->getOperand(0)); + } + } + + if (Op1I->hasOneUse()) { + // Replace (x - (y - z)) with (x + (z - y)) if the (y - z) subexpression + // is not used by anyone else... + // + if (Op1I->getOpcode() == Instruction::Sub) { + // Swap the two operands of the subexpr... + Value *IIOp0 = Op1I->getOperand(0), *IIOp1 = Op1I->getOperand(1); + Op1I->setOperand(0, IIOp1); + Op1I->setOperand(1, IIOp0); + + // Create the new top level add instruction... + return BinaryOperator::CreateAdd(Op0, Op1); + } + + // Replace (A - (A & B)) with (A & ~B) if this is the only use of (A&B)... + // + if (Op1I->getOpcode() == Instruction::And && + (Op1I->getOperand(0) == Op0 || Op1I->getOperand(1) == Op0)) { + Value *OtherOp = Op1I->getOperand(Op1I->getOperand(0) == Op0); + + Value *NewNot = Builder->CreateNot(OtherOp, "B.not"); + return BinaryOperator::CreateAnd(Op0, NewNot); + } + + // 0 - (X sdiv C) -> (X sdiv -C) + if (Op1I->getOpcode() == Instruction::SDiv) + if (ConstantInt *CSI = dyn_cast(Op0)) + if (CSI->isZero()) + if (Constant *DivRHS = dyn_cast(Op1I->getOperand(1))) + return BinaryOperator::CreateSDiv(Op1I->getOperand(0), + ConstantExpr::getNeg(DivRHS)); + + // X - X*C --> X * (1-C) + ConstantInt *C2 = 0; + if (dyn_castFoldableMul(Op1I, C2) == Op0) { + Constant *CP1 = + ConstantExpr::getSub(ConstantInt::get(I.getType(), 1), + C2); + return BinaryOperator::CreateMul(Op0, CP1); + } + } + } + + if (BinaryOperator *Op0I = dyn_cast(Op0)) { + if (Op0I->getOpcode() == Instruction::Add) { + if (Op0I->getOperand(0) == Op1) // (Y+X)-Y == X + return ReplaceInstUsesWith(I, Op0I->getOperand(1)); + else if (Op0I->getOperand(1) == Op1) // (X+Y)-Y == X + return ReplaceInstUsesWith(I, Op0I->getOperand(0)); + } else if (Op0I->getOpcode() == Instruction::Sub) { + if (Op0I->getOperand(0) == Op1) // (X-Y)-X == -Y + return BinaryOperator::CreateNeg(Op0I->getOperand(1), + I.getName()); + } + } + + ConstantInt *C1; + if (Value *X = dyn_castFoldableMul(Op0, C1)) { + if (X == Op1) // X*C - X --> X * (C-1) + return BinaryOperator::CreateMul(Op1, SubOne(C1)); + + ConstantInt *C2; // X*C1 - X*C2 -> X * (C1-C2) + if (X == dyn_castFoldableMul(Op1, C2)) + return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2)); + } + + // Optimize pointer differences into the same array into a size. Consider: + // &A[10] - &A[0]: we should compile this to "10". + if (TD) { + Value *LHSOp, *RHSOp; + if (match(Op0, m_PtrToInt(m_Value(LHSOp))) && + match(Op1, m_PtrToInt(m_Value(RHSOp)))) + if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) + return ReplaceInstUsesWith(I, Res); + + // trunc(p)-trunc(q) -> trunc(p-q) + if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) && + match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp))))) + if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) + return ReplaceInstUsesWith(I, Res); + } + + return 0; +} + +Instruction *InstCombiner::visitFSub(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + // If this is a 'B = x-(-A)', change to B = x+A... + if (Value *V = dyn_castFNegVal(Op1)) + return BinaryOperator::CreateFAdd(Op0, V); + + return 0; +} diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp new file mode 100644 index 000000000000..af300fc3577b --- /dev/null +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -0,0 +1,1990 @@ +//===- InstCombineAndOrXor.cpp --------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the visitAnd, visitOr, and visitXor functions. +// +//===----------------------------------------------------------------------===// + +#include "InstCombine.h" +#include "llvm/Intrinsics.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Support/PatternMatch.h" +using namespace llvm; +using namespace PatternMatch; + + +/// AddOne - Add one to a ConstantInt. +static Constant *AddOne(Constant *C) { + return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1)); +} +/// SubOne - Subtract one from a ConstantInt. +static Constant *SubOne(ConstantInt *C) { + return ConstantInt::get(C->getContext(), C->getValue()-1); +} + +/// isFreeToInvert - Return true if the specified value is free to invert (apply +/// ~ to). This happens in cases where the ~ can be eliminated. +static inline bool isFreeToInvert(Value *V) { + // ~(~(X)) -> X. + if (BinaryOperator::isNot(V)) + return true; + + // Constants can be considered to be not'ed values. + if (isa(V)) + return true; + + // Compares can be inverted if they have a single use. + if (CmpInst *CI = dyn_cast(V)) + return CI->hasOneUse(); + + return false; +} + +static inline Value *dyn_castNotVal(Value *V) { + // If this is not(not(x)) don't return that this is a not: we want the two + // not's to be folded first. + if (BinaryOperator::isNot(V)) { + Value *Operand = BinaryOperator::getNotArgument(V); + if (!isFreeToInvert(Operand)) + return Operand; + } + + // Constants can be considered to be not'ed values... + if (ConstantInt *C = dyn_cast(V)) + return ConstantInt::get(C->getType(), ~C->getValue()); + return 0; +} + + +/// getICmpCode - Encode a icmp predicate into a three bit mask. These bits +/// are carefully arranged to allow folding of expressions such as: +/// +/// (A < B) | (A > B) --> (A != B) +/// +/// Note that this is only valid if the first and second predicates have the +/// same sign. Is illegal to do: (A u< B) | (A s> B) +/// +/// Three bits are used to represent the condition, as follows: +/// 0 A > B +/// 1 A == B +/// 2 A < B +/// +/// <=> Value Definition +/// 000 0 Always false +/// 001 1 A > B +/// 010 2 A == B +/// 011 3 A >= B +/// 100 4 A < B +/// 101 5 A != B +/// 110 6 A <= B +/// 111 7 Always true +/// +static unsigned getICmpCode(const ICmpInst *ICI) { + switch (ICI->getPredicate()) { + // False -> 0 + case ICmpInst::ICMP_UGT: return 1; // 001 + case ICmpInst::ICMP_SGT: return 1; // 001 + case ICmpInst::ICMP_EQ: return 2; // 010 + case ICmpInst::ICMP_UGE: return 3; // 011 + case ICmpInst::ICMP_SGE: return 3; // 011 + case ICmpInst::ICMP_ULT: return 4; // 100 + case ICmpInst::ICMP_SLT: return 4; // 100 + case ICmpInst::ICMP_NE: return 5; // 101 + case ICmpInst::ICMP_ULE: return 6; // 110 + case ICmpInst::ICMP_SLE: return 6; // 110 + // True -> 7 + default: + llvm_unreachable("Invalid ICmp predicate!"); + return 0; + } +} + +/// getFCmpCode - Similar to getICmpCode but for FCmpInst. This encodes a fcmp +/// predicate into a three bit mask. It also returns whether it is an ordered +/// predicate by reference. +static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) { + isOrdered = false; + switch (CC) { + case FCmpInst::FCMP_ORD: isOrdered = true; return 0; // 000 + case FCmpInst::FCMP_UNO: return 0; // 000 + case FCmpInst::FCMP_OGT: isOrdered = true; return 1; // 001 + case FCmpInst::FCMP_UGT: return 1; // 001 + case FCmpInst::FCMP_OEQ: isOrdered = true; return 2; // 010 + case FCmpInst::FCMP_UEQ: return 2; // 010 + case FCmpInst::FCMP_OGE: isOrdered = true; return 3; // 011 + case FCmpInst::FCMP_UGE: return 3; // 011 + case FCmpInst::FCMP_OLT: isOrdered = true; return 4; // 100 + case FCmpInst::FCMP_ULT: return 4; // 100 + case FCmpInst::FCMP_ONE: isOrdered = true; return 5; // 101 + case FCmpInst::FCMP_UNE: return 5; // 101 + case FCmpInst::FCMP_OLE: isOrdered = true; return 6; // 110 + case FCmpInst::FCMP_ULE: return 6; // 110 + // True -> 7 + default: + // Not expecting FCMP_FALSE and FCMP_TRUE; + llvm_unreachable("Unexpected FCmp predicate!"); + return 0; + } +} + +/// getICmpValue - This is the complement of getICmpCode, which turns an +/// opcode and two operands into either a constant true or false, or a brand +/// new ICmp instruction. The sign is passed in to determine which kind +/// of predicate to use in the new icmp instruction. +static Value *getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS) { + switch (Code) { + default: assert(0 && "Illegal ICmp code!"); + case 0: + return ConstantInt::getFalse(LHS->getContext()); + case 1: + if (Sign) + return new ICmpInst(ICmpInst::ICMP_SGT, LHS, RHS); + return new ICmpInst(ICmpInst::ICMP_UGT, LHS, RHS); + case 2: + return new ICmpInst(ICmpInst::ICMP_EQ, LHS, RHS); + case 3: + if (Sign) + return new ICmpInst(ICmpInst::ICMP_SGE, LHS, RHS); + return new ICmpInst(ICmpInst::ICMP_UGE, LHS, RHS); + case 4: + if (Sign) + return new ICmpInst(ICmpInst::ICMP_SLT, LHS, RHS); + return new ICmpInst(ICmpInst::ICMP_ULT, LHS, RHS); + case 5: + return new ICmpInst(ICmpInst::ICMP_NE, LHS, RHS); + case 6: + if (Sign) + return new ICmpInst(ICmpInst::ICMP_SLE, LHS, RHS); + return new ICmpInst(ICmpInst::ICMP_ULE, LHS, RHS); + case 7: + return ConstantInt::getTrue(LHS->getContext()); + } +} + +/// getFCmpValue - This is the complement of getFCmpCode, which turns an +/// opcode and two operands into either a FCmp instruction. isordered is passed +/// in to determine which kind of predicate to use in the new fcmp instruction. +static Value *getFCmpValue(bool isordered, unsigned code, + Value *LHS, Value *RHS) { + switch (code) { + default: llvm_unreachable("Illegal FCmp code!"); + case 0: + if (isordered) + return new FCmpInst(FCmpInst::FCMP_ORD, LHS, RHS); + else + return new FCmpInst(FCmpInst::FCMP_UNO, LHS, RHS); + case 1: + if (isordered) + return new FCmpInst(FCmpInst::FCMP_OGT, LHS, RHS); + else + return new FCmpInst(FCmpInst::FCMP_UGT, LHS, RHS); + case 2: + if (isordered) + return new FCmpInst(FCmpInst::FCMP_OEQ, LHS, RHS); + else + return new FCmpInst(FCmpInst::FCMP_UEQ, LHS, RHS); + case 3: + if (isordered) + return new FCmpInst(FCmpInst::FCMP_OGE, LHS, RHS); + else + return new FCmpInst(FCmpInst::FCMP_UGE, LHS, RHS); + case 4: + if (isordered) + return new FCmpInst(FCmpInst::FCMP_OLT, LHS, RHS); + else + return new FCmpInst(FCmpInst::FCMP_ULT, LHS, RHS); + case 5: + if (isordered) + return new FCmpInst(FCmpInst::FCMP_ONE, LHS, RHS); + else + return new FCmpInst(FCmpInst::FCMP_UNE, LHS, RHS); + case 6: + if (isordered) + return new FCmpInst(FCmpInst::FCMP_OLE, LHS, RHS); + else + return new FCmpInst(FCmpInst::FCMP_ULE, LHS, RHS); + case 7: return ConstantInt::getTrue(LHS->getContext()); + } +} + +/// PredicatesFoldable - Return true if both predicates match sign or if at +/// least one of them is an equality comparison (which is signless). +static bool PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) { + return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) || + (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) || + (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1)); +} + +// OptAndOp - This handles expressions of the form ((val OP C1) & C2). Where +// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'. Op is +// guaranteed to be a binary operator. +Instruction *InstCombiner::OptAndOp(Instruction *Op, + ConstantInt *OpRHS, + ConstantInt *AndRHS, + BinaryOperator &TheAnd) { + Value *X = Op->getOperand(0); + Constant *Together = 0; + if (!Op->isShift()) + Together = ConstantExpr::getAnd(AndRHS, OpRHS); + + switch (Op->getOpcode()) { + case Instruction::Xor: + if (Op->hasOneUse()) { + // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2) + Value *And = Builder->CreateAnd(X, AndRHS); + And->takeName(Op); + return BinaryOperator::CreateXor(And, Together); + } + break; + case Instruction::Or: + if (Together == AndRHS) // (X | C) & C --> C + return ReplaceInstUsesWith(TheAnd, AndRHS); + + if (Op->hasOneUse() && Together != OpRHS) { + // (X | C1) & C2 --> (X | (C1&C2)) & C2 + Value *Or = Builder->CreateOr(X, Together); + Or->takeName(Op); + return BinaryOperator::CreateAnd(Or, AndRHS); + } + break; + case Instruction::Add: + if (Op->hasOneUse()) { + // Adding a one to a single bit bit-field should be turned into an XOR + // of the bit. First thing to check is to see if this AND is with a + // single bit constant. + const APInt &AndRHSV = cast(AndRHS)->getValue(); + + // If there is only one bit set. + if (AndRHSV.isPowerOf2()) { + // Ok, at this point, we know that we are masking the result of the + // ADD down to exactly one bit. If the constant we are adding has + // no bits set below this bit, then we can eliminate the ADD. + const APInt& AddRHS = cast(OpRHS)->getValue(); + + // Check to see if any bits below the one bit set in AndRHSV are set. + if ((AddRHS & (AndRHSV-1)) == 0) { + // If not, the only thing that can effect the output of the AND is + // the bit specified by AndRHSV. If that bit is set, the effect of + // the XOR is to toggle the bit. If it is clear, then the ADD has + // no effect. + if ((AddRHS & AndRHSV) == 0) { // Bit is not set, noop + TheAnd.setOperand(0, X); + return &TheAnd; + } else { + // Pull the XOR out of the AND. + Value *NewAnd = Builder->CreateAnd(X, AndRHS); + NewAnd->takeName(Op); + return BinaryOperator::CreateXor(NewAnd, AndRHS); + } + } + } + } + break; + + case Instruction::Shl: { + // We know that the AND will not produce any of the bits shifted in, so if + // the anded constant includes them, clear them now! + // + uint32_t BitWidth = AndRHS->getType()->getBitWidth(); + uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); + APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal)); + ConstantInt *CI = ConstantInt::get(AndRHS->getContext(), + AndRHS->getValue() & ShlMask); + + if (CI->getValue() == ShlMask) { + // Masking out bits that the shift already masks + return ReplaceInstUsesWith(TheAnd, Op); // No need for the and. + } else if (CI != AndRHS) { // Reducing bits set in and. + TheAnd.setOperand(1, CI); + return &TheAnd; + } + break; + } + case Instruction::LShr: { + // We know that the AND will not produce any of the bits shifted in, so if + // the anded constant includes them, clear them now! This only applies to + // unsigned shifts, because a signed shr may bring in set bits! + // + uint32_t BitWidth = AndRHS->getType()->getBitWidth(); + uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); + APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); + ConstantInt *CI = ConstantInt::get(Op->getContext(), + AndRHS->getValue() & ShrMask); + + if (CI->getValue() == ShrMask) { + // Masking out bits that the shift already masks. + return ReplaceInstUsesWith(TheAnd, Op); + } else if (CI != AndRHS) { + TheAnd.setOperand(1, CI); // Reduce bits set in and cst. + return &TheAnd; + } + break; + } + case Instruction::AShr: + // Signed shr. + // See if this is shifting in some sign extension, then masking it out + // with an and. + if (Op->hasOneUse()) { + uint32_t BitWidth = AndRHS->getType()->getBitWidth(); + uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); + APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); + Constant *C = ConstantInt::get(Op->getContext(), + AndRHS->getValue() & ShrMask); + if (C == AndRHS) { // Masking out bits shifted in. + // (Val ashr C1) & C2 -> (Val lshr C1) & C2 + // Make the argument unsigned. + Value *ShVal = Op->getOperand(0); + ShVal = Builder->CreateLShr(ShVal, OpRHS, Op->getName()); + return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName()); + } + } + break; + } + return 0; +} + + +/// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is +/// true, otherwise (V < Lo || V >= Hi). In pratice, we emit the more efficient +/// (V-Lo) (ConstantExpr::getICmp((isSigned ? + ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() && + "Lo is not <= Hi in range emission code!"); + + if (Inside) { + if (Lo == Hi) // Trivially false. + return new ICmpInst(ICmpInst::ICMP_NE, V, V); + + // V >= Min && V < Hi --> V < Hi + if (cast(Lo)->isMinValue(isSigned)) { + ICmpInst::Predicate pred = (isSigned ? + ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT); + return new ICmpInst(pred, V, Hi); + } + + // Emit V-Lo CreateAdd(V, NegLo, V->getName()+".off"); + Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi); + return new ICmpInst(ICmpInst::ICMP_ULT, Add, UpperBound); + } + + if (Lo == Hi) // Trivially true. + return new ICmpInst(ICmpInst::ICMP_EQ, V, V); + + // V < Min || V >= Hi -> V > Hi-1 + Hi = SubOne(cast(Hi)); + if (cast(Lo)->isMinValue(isSigned)) { + ICmpInst::Predicate pred = (isSigned ? + ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT); + return new ICmpInst(pred, V, Hi); + } + + // Emit V-Lo >u Hi-1-Lo + // Note that Hi has already had one subtracted from it, above. + ConstantInt *NegLo = cast(ConstantExpr::getNeg(Lo)); + Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off"); + Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi); + return new ICmpInst(ICmpInst::ICMP_UGT, Add, LowerBound); +} + +// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s with +// any number of 0s on either side. The 1s are allowed to wrap from LSB to +// MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is +// not, since all 1s are not contiguous. +static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) { + const APInt& V = Val->getValue(); + uint32_t BitWidth = Val->getType()->getBitWidth(); + if (!APIntOps::isShiftedMask(BitWidth, V)) return false; + + // look for the first zero bit after the run of ones + MB = BitWidth - ((V - 1) ^ V).countLeadingZeros(); + // look for the first non-zero bit + ME = V.getActiveBits(); + return true; +} + +/// FoldLogicalPlusAnd - This is part of an expression (LHS +/- RHS) & Mask, +/// where isSub determines whether the operator is a sub. If we can fold one of +/// the following xforms: +/// +/// ((A & N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == Mask +/// ((A | N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0 +/// ((A ^ N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0 +/// +/// return (A +/- B). +/// +Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, + ConstantInt *Mask, bool isSub, + Instruction &I) { + Instruction *LHSI = dyn_cast(LHS); + if (!LHSI || LHSI->getNumOperands() != 2 || + !isa(LHSI->getOperand(1))) return 0; + + ConstantInt *N = cast(LHSI->getOperand(1)); + + switch (LHSI->getOpcode()) { + default: return 0; + case Instruction::And: + if (ConstantExpr::getAnd(N, Mask) == Mask) { + // If the AndRHS is a power of two minus one (0+1+), this is simple. + if ((Mask->getValue().countLeadingZeros() + + Mask->getValue().countPopulation()) == + Mask->getValue().getBitWidth()) + break; + + // Otherwise, if Mask is 0+1+0+, and if B is known to have the low 0+ + // part, we don't need any explicit masks to take them out of A. If that + // is all N is, ignore it. + uint32_t MB = 0, ME = 0; + if (isRunOfOnes(Mask, MB, ME)) { // begin/end bit of run, inclusive + uint32_t BitWidth = cast(RHS->getType())->getBitWidth(); + APInt Mask(APInt::getLowBitsSet(BitWidth, MB-1)); + if (MaskedValueIsZero(RHS, Mask)) + break; + } + } + return 0; + case Instruction::Or: + case Instruction::Xor: + // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0 + if ((Mask->getValue().countLeadingZeros() + + Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth() + && ConstantExpr::getAnd(N, Mask)->isNullValue()) + break; + return 0; + } + + if (isSub) + return Builder->CreateSub(LHSI->getOperand(0), RHS, "fold"); + return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold"); +} + +/// FoldAndOfICmps - Fold (icmp)&(icmp) if possible. +Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, + ICmpInst *LHS, ICmpInst *RHS) { + ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); + + // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B) + if (PredicatesFoldable(LHSCC, RHSCC)) { + if (LHS->getOperand(0) == RHS->getOperand(1) && + LHS->getOperand(1) == RHS->getOperand(0)) + LHS->swapOperands(); + if (LHS->getOperand(0) == RHS->getOperand(0) && + LHS->getOperand(1) == RHS->getOperand(1)) { + Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1); + unsigned Code = getICmpCode(LHS) & getICmpCode(RHS); + bool isSigned = LHS->isSigned() || RHS->isSigned(); + Value *RV = getICmpValue(isSigned, Code, Op0, Op1); + if (Instruction *I = dyn_cast(RV)) + return I; + // Otherwise, it's a constant boolean value. + return ReplaceInstUsesWith(I, RV); + } + } + + // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2). + Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0); + ConstantInt *LHSCst = dyn_cast(LHS->getOperand(1)); + ConstantInt *RHSCst = dyn_cast(RHS->getOperand(1)); + if (LHSCst == 0 || RHSCst == 0) return 0; + + if (LHSCst == RHSCst && LHSCC == RHSCC) { + // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C) + // where C is a power of 2 + if (LHSCC == ICmpInst::ICMP_ULT && + LHSCst->getValue().isPowerOf2()) { + Value *NewOr = Builder->CreateOr(Val, Val2); + return new ICmpInst(LHSCC, NewOr, LHSCst); + } + + // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0) + if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) { + Value *NewOr = Builder->CreateOr(Val, Val2); + return new ICmpInst(LHSCC, NewOr, LHSCst); + } + } + + // From here on, we only handle: + // (icmp1 A, C1) & (icmp2 A, C2) --> something simpler. + if (Val != Val2) return 0; + + // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere. + if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE || + RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE || + LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || + RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) + return 0; + + // We can't fold (ugt x, C) & (sgt x, C2). + if (!PredicatesFoldable(LHSCC, RHSCC)) + return 0; + + // Ensure that the larger constant is on the RHS. + bool ShouldSwap; + if (CmpInst::isSigned(LHSCC) || + (ICmpInst::isEquality(LHSCC) && + CmpInst::isSigned(RHSCC))) + ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue()); + else + ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue()); + + if (ShouldSwap) { + std::swap(LHS, RHS); + std::swap(LHSCst, RHSCst); + std::swap(LHSCC, RHSCC); + } + + // At this point, we know we have have two icmp instructions + // comparing a value against two constants and and'ing the result + // together. Because of the above check, we know that we only have + // icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know + // (from the icmp folding check above), that the two constants + // are not equal and that the larger constant is on the RHS + assert(LHSCst != RHSCst && "Compares not folded above?"); + + switch (LHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X == 13 & X == 15) -> false + case ICmpInst::ICMP_UGT: // (X == 13 & X > 15) -> false + case ICmpInst::ICMP_SGT: // (X == 13 & X > 15) -> false + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + case ICmpInst::ICMP_NE: // (X == 13 & X != 15) -> X == 13 + case ICmpInst::ICMP_ULT: // (X == 13 & X < 15) -> X == 13 + case ICmpInst::ICMP_SLT: // (X == 13 & X < 15) -> X == 13 + return ReplaceInstUsesWith(I, LHS); + } + case ICmpInst::ICMP_NE: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_ULT: + if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13 + return new ICmpInst(ICmpInst::ICMP_ULT, Val, LHSCst); + break; // (X != 13 & X u< 15) -> no change + case ICmpInst::ICMP_SLT: + if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13 + return new ICmpInst(ICmpInst::ICMP_SLT, Val, LHSCst); + break; // (X != 13 & X s< 15) -> no change + case ICmpInst::ICMP_EQ: // (X != 13 & X == 15) -> X == 15 + case ICmpInst::ICMP_UGT: // (X != 13 & X u> 15) -> X u> 15 + case ICmpInst::ICMP_SGT: // (X != 13 & X s> 15) -> X s> 15 + return ReplaceInstUsesWith(I, RHS); + case ICmpInst::ICMP_NE: + if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1 + Constant *AddCST = ConstantExpr::getNeg(LHSCst); + Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); + return new ICmpInst(ICmpInst::ICMP_UGT, Add, + ConstantInt::get(Add->getType(), 1)); + } + break; // (X != 13 & X != 15) -> no change + } + break; + case ICmpInst::ICMP_ULT: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X u< 13 & X == 15) -> false + case ICmpInst::ICMP_UGT: // (X u< 13 & X u> 15) -> false + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + case ICmpInst::ICMP_SGT: // (X u< 13 & X s> 15) -> no change + break; + case ICmpInst::ICMP_NE: // (X u< 13 & X != 15) -> X u< 13 + case ICmpInst::ICMP_ULT: // (X u< 13 & X u< 15) -> X u< 13 + return ReplaceInstUsesWith(I, LHS); + case ICmpInst::ICMP_SLT: // (X u< 13 & X s< 15) -> no change + break; + } + break; + case ICmpInst::ICMP_SLT: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X s< 13 & X == 15) -> false + case ICmpInst::ICMP_SGT: // (X s< 13 & X s> 15) -> false + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + case ICmpInst::ICMP_UGT: // (X s< 13 & X u> 15) -> no change + break; + case ICmpInst::ICMP_NE: // (X s< 13 & X != 15) -> X < 13 + case ICmpInst::ICMP_SLT: // (X s< 13 & X s< 15) -> X < 13 + return ReplaceInstUsesWith(I, LHS); + case ICmpInst::ICMP_ULT: // (X s< 13 & X u< 15) -> no change + break; + } + break; + case ICmpInst::ICMP_UGT: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X u> 13 & X == 15) -> X == 15 + case ICmpInst::ICMP_UGT: // (X u> 13 & X u> 15) -> X u> 15 + return ReplaceInstUsesWith(I, RHS); + case ICmpInst::ICMP_SGT: // (X u> 13 & X s> 15) -> no change + break; + case ICmpInst::ICMP_NE: + if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14 + return new ICmpInst(LHSCC, Val, RHSCst); + break; // (X u> 13 & X != 15) -> no change + case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) 13 & X s< 15) -> no change + break; + } + break; + case ICmpInst::ICMP_SGT: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X s> 13 & X == 15) -> X == 15 + case ICmpInst::ICMP_SGT: // (X s> 13 & X s> 15) -> X s> 15 + return ReplaceInstUsesWith(I, RHS); + case ICmpInst::ICMP_UGT: // (X s> 13 & X u> 15) -> no change + break; + case ICmpInst::ICMP_NE: + if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14 + return new ICmpInst(LHSCC, Val, RHSCst); + break; // (X s> 13 & X != 15) -> no change + case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) s< 1 + return InsertRangeTest(Val, AddOne(LHSCst), + RHSCst, true, true, I); + case ICmpInst::ICMP_ULT: // (X s> 13 & X u< 15) -> no change + break; + } + break; + } + + return 0; +} + +Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, + FCmpInst *RHS) { + + if (LHS->getPredicate() == FCmpInst::FCMP_ORD && + RHS->getPredicate() == FCmpInst::FCMP_ORD) { + // (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y) + if (ConstantFP *LHSC = dyn_cast(LHS->getOperand(1))) + if (ConstantFP *RHSC = dyn_cast(RHS->getOperand(1))) { + // If either of the constants are nans, then the whole thing returns + // false. + if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return new FCmpInst(FCmpInst::FCMP_ORD, + LHS->getOperand(0), RHS->getOperand(0)); + } + + // Handle vector zeros. This occurs because the canonical form of + // "fcmp ord x,x" is "fcmp ord x, 0". + if (isa(LHS->getOperand(1)) && + isa(RHS->getOperand(1))) + return new FCmpInst(FCmpInst::FCMP_ORD, + LHS->getOperand(0), RHS->getOperand(0)); + return 0; + } + + Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); + Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); + FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); + + + if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { + // Swap RHS operands to match LHS. + Op1CC = FCmpInst::getSwappedPredicate(Op1CC); + std::swap(Op1LHS, Op1RHS); + } + + if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { + // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y). + if (Op0CC == Op1CC) + return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS); + + if (Op0CC == FCmpInst::FCMP_FALSE || Op1CC == FCmpInst::FCMP_FALSE) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + if (Op0CC == FCmpInst::FCMP_TRUE) + return ReplaceInstUsesWith(I, RHS); + if (Op1CC == FCmpInst::FCMP_TRUE) + return ReplaceInstUsesWith(I, LHS); + + bool Op0Ordered; + bool Op1Ordered; + unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); + unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); + if (Op1Pred == 0) { + std::swap(LHS, RHS); + std::swap(Op0Pred, Op1Pred); + std::swap(Op0Ordered, Op1Ordered); + } + if (Op0Pred == 0) { + // uno && ueq -> uno && (uno || eq) -> ueq + // ord && olt -> ord && (ord && lt) -> olt + if (Op0Ordered == Op1Ordered) + return ReplaceInstUsesWith(I, RHS); + + // uno && oeq -> uno && (ord && eq) -> false + // uno && ord -> false + if (!Op0Ordered) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + // ord && ueq -> ord && (uno || eq) -> oeq + return cast(getFCmpValue(true, Op1Pred, Op0LHS, Op0RHS)); + } + } + + return 0; +} + + +Instruction *InstCombiner::visitAnd(BinaryOperator &I) { + bool Changed = SimplifyCommutative(I); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (Value *V = SimplifyAndInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + + // See if we can simplify any instructions used by the instruction whose sole + // purpose is to compute bits we don't care about. + if (SimplifyDemandedInstructionBits(I)) + return &I; + + if (ConstantInt *AndRHS = dyn_cast(Op1)) { + const APInt &AndRHSMask = AndRHS->getValue(); + APInt NotAndRHS(~AndRHSMask); + + // Optimize a variety of ((val OP C1) & C2) combinations... + if (BinaryOperator *Op0I = dyn_cast(Op0)) { + Value *Op0LHS = Op0I->getOperand(0); + Value *Op0RHS = Op0I->getOperand(1); + switch (Op0I->getOpcode()) { + default: break; + case Instruction::Xor: + case Instruction::Or: + // If the mask is only needed on one incoming arm, push it up. + if (!Op0I->hasOneUse()) break; + + if (MaskedValueIsZero(Op0LHS, NotAndRHS)) { + // Not masking anything out for the LHS, move to RHS. + Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS, + Op0RHS->getName()+".masked"); + return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS); + } + if (!isa(Op0RHS) && + MaskedValueIsZero(Op0RHS, NotAndRHS)) { + // Not masking anything out for the RHS, move to LHS. + Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS, + Op0LHS->getName()+".masked"); + return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS); + } + + break; + case Instruction::Add: + // ((A & N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == AndRHS. + // ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0 + // ((A ^ N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0 + if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, false, I)) + return BinaryOperator::CreateAnd(V, AndRHS); + if (Value *V = FoldLogicalPlusAnd(Op0RHS, Op0LHS, AndRHS, false, I)) + return BinaryOperator::CreateAnd(V, AndRHS); // Add commutes + break; + + case Instruction::Sub: + // ((A & N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == AndRHS. + // ((A | N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0 + // ((A ^ N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0 + if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, true, I)) + return BinaryOperator::CreateAnd(V, AndRHS); + + // (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS + // has 1's for all bits that the subtraction with A might affect. + if (Op0I->hasOneUse()) { + uint32_t BitWidth = AndRHSMask.getBitWidth(); + uint32_t Zeros = AndRHSMask.countLeadingZeros(); + APInt Mask = APInt::getLowBitsSet(BitWidth, BitWidth - Zeros); + + ConstantInt *A = dyn_cast(Op0LHS); + if (!(A && A->isZero()) && // avoid infinite recursion. + MaskedValueIsZero(Op0LHS, Mask)) { + Value *NewNeg = Builder->CreateNeg(Op0RHS); + return BinaryOperator::CreateAnd(NewNeg, AndRHS); + } + } + break; + + case Instruction::Shl: + case Instruction::LShr: + // (1 << x) & 1 --> zext(x == 0) + // (1 >> x) & 1 --> zext(x == 0) + if (AndRHSMask == 1 && Op0LHS == AndRHS) { + Value *NewICmp = + Builder->CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType())); + return new ZExtInst(NewICmp, I.getType()); + } + break; + } + + if (ConstantInt *Op0CI = dyn_cast(Op0I->getOperand(1))) + if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I)) + return Res; + } else if (CastInst *CI = dyn_cast(Op0)) { + // If this is an integer truncation or change from signed-to-unsigned, and + // if the source is an and/or with immediate, transform it. This + // frequently occurs for bitfield accesses. + if (Instruction *CastOp = dyn_cast(CI->getOperand(0))) { + if ((isa(CI) || isa(CI)) && + CastOp->getNumOperands() == 2) + if (ConstantInt *AndCI =dyn_cast(CastOp->getOperand(1))){ + if (CastOp->getOpcode() == Instruction::And) { + // Change: and (cast (and X, C1) to T), C2 + // into : and (cast X to T), trunc_or_bitcast(C1)&C2 + // This will fold the two constants together, which may allow + // other simplifications. + Value *NewCast = Builder->CreateTruncOrBitCast( + CastOp->getOperand(0), I.getType(), + CastOp->getName()+".shrunk"); + // trunc_or_bitcast(C1)&C2 + Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType()); + C3 = ConstantExpr::getAnd(C3, AndRHS); + return BinaryOperator::CreateAnd(NewCast, C3); + } else if (CastOp->getOpcode() == Instruction::Or) { + // Change: and (cast (or X, C1) to T), C2 + // into : trunc(C1)&C2 iff trunc(C1)&C2 == C2 + Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType()); + if (ConstantExpr::getAnd(C3, AndRHS) == AndRHS) + // trunc(C1)&C2 + return ReplaceInstUsesWith(I, AndRHS); + } + } + } + } + + // Try to fold constant and into select arguments. + if (SelectInst *SI = dyn_cast(Op0)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + if (isa(Op0)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + } + + + // (~A & ~B) == (~(A | B)) - De Morgan's Law + if (Value *Op0NotVal = dyn_castNotVal(Op0)) + if (Value *Op1NotVal = dyn_castNotVal(Op1)) + if (Op0->hasOneUse() && Op1->hasOneUse()) { + Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal, + I.getName()+".demorgan"); + return BinaryOperator::CreateNot(Or); + } + + { + Value *A = 0, *B = 0, *C = 0, *D = 0; + // (A|B) & ~(A&B) -> A^B + if (match(Op0, m_Or(m_Value(A), m_Value(B))) && + match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) && + ((A == C && B == D) || (A == D && B == C))) + return BinaryOperator::CreateXor(A, B); + + // ~(A&B) & (A|B) -> A^B + if (match(Op1, m_Or(m_Value(A), m_Value(B))) && + match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) && + ((A == C && B == D) || (A == D && B == C))) + return BinaryOperator::CreateXor(A, B); + + if (Op0->hasOneUse() && + match(Op0, m_Xor(m_Value(A), m_Value(B)))) { + if (A == Op1) { // (A^B)&A -> A&(A^B) + I.swapOperands(); // Simplify below + std::swap(Op0, Op1); + } else if (B == Op1) { // (A^B)&B -> B&(B^A) + cast(Op0)->swapOperands(); + I.swapOperands(); // Simplify below + std::swap(Op0, Op1); + } + } + + if (Op1->hasOneUse() && + match(Op1, m_Xor(m_Value(A), m_Value(B)))) { + if (B == Op0) { // B&(A^B) -> B&(B^A) + cast(Op1)->swapOperands(); + std::swap(A, B); + } + if (A == Op0) // A&(A^B) -> A & ~B + return BinaryOperator::CreateAnd(A, Builder->CreateNot(B, "tmp")); + } + + // (A&((~A)|B)) -> A&B + if (match(Op0, m_Or(m_Not(m_Specific(Op1)), m_Value(A))) || + match(Op0, m_Or(m_Value(A), m_Not(m_Specific(Op1))))) + return BinaryOperator::CreateAnd(A, Op1); + if (match(Op1, m_Or(m_Not(m_Specific(Op0)), m_Value(A))) || + match(Op1, m_Or(m_Value(A), m_Not(m_Specific(Op0))))) + return BinaryOperator::CreateAnd(A, Op0); + } + + if (ICmpInst *RHS = dyn_cast(Op1)) + if (ICmpInst *LHS = dyn_cast(Op0)) + if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS)) + return Res; + + // fold (and (cast A), (cast B)) -> (cast (and A, B)) + if (CastInst *Op0C = dyn_cast(Op0)) + if (CastInst *Op1C = dyn_cast(Op1)) + if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind ? + const Type *SrcTy = Op0C->getOperand(0)->getType(); + if (SrcTy == Op1C->getOperand(0)->getType() && + SrcTy->isIntOrIntVector() && + // Only do this if the casts both really cause code to be generated. + ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), + I.getType()) && + ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), + I.getType())) { + Value *NewOp = Builder->CreateAnd(Op0C->getOperand(0), + Op1C->getOperand(0), I.getName()); + return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); + } + } + + // (X >> Z) & (Y >> Z) -> (X&Y) >> Z for all shifts. + if (BinaryOperator *SI1 = dyn_cast(Op1)) { + if (BinaryOperator *SI0 = dyn_cast(Op0)) + if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && + SI0->getOperand(1) == SI1->getOperand(1) && + (SI0->hasOneUse() || SI1->hasOneUse())) { + Value *NewOp = + Builder->CreateAnd(SI0->getOperand(0), SI1->getOperand(0), + SI0->getName()); + return BinaryOperator::Create(SI1->getOpcode(), NewOp, + SI1->getOperand(1)); + } + } + + // If and'ing two fcmp, try combine them into one. + if (FCmpInst *LHS = dyn_cast(I.getOperand(0))) { + if (FCmpInst *RHS = dyn_cast(I.getOperand(1))) + if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS)) + return Res; + } + + return Changed ? &I : 0; +} + +/// CollectBSwapParts - Analyze the specified subexpression and see if it is +/// capable of providing pieces of a bswap. The subexpression provides pieces +/// of a bswap if it is proven that each of the non-zero bytes in the output of +/// the expression came from the corresponding "byte swapped" byte in some other +/// value. For example, if the current subexpression is "(shl i32 %X, 24)" then +/// we know that the expression deposits the low byte of %X into the high byte +/// of the bswap result and that all other bytes are zero. This expression is +/// accepted, the high byte of ByteValues is set to X to indicate a correct +/// match. +/// +/// This function returns true if the match was unsuccessful and false if so. +/// On entry to the function the "OverallLeftShift" is a signed integer value +/// indicating the number of bytes that the subexpression is later shifted. For +/// example, if the expression is later right shifted by 16 bits, the +/// OverallLeftShift value would be -2 on entry. This is used to specify which +/// byte of ByteValues is actually being set. +/// +/// Similarly, ByteMask is a bitmask where a bit is clear if its corresponding +/// byte is masked to zero by a user. For example, in (X & 255), X will be +/// processed with a bytemask of 1. Because bytemask is 32-bits, this limits +/// this function to working on up to 32-byte (256 bit) values. ByteMask is +/// always in the local (OverallLeftShift) coordinate space. +/// +static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask, + SmallVector &ByteValues) { + if (Instruction *I = dyn_cast(V)) { + // If this is an or instruction, it may be an inner node of the bswap. + if (I->getOpcode() == Instruction::Or) { + return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, + ByteValues) || + CollectBSwapParts(I->getOperand(1), OverallLeftShift, ByteMask, + ByteValues); + } + + // If this is a logical shift by a constant multiple of 8, recurse with + // OverallLeftShift and ByteMask adjusted. + if (I->isLogicalShift() && isa(I->getOperand(1))) { + unsigned ShAmt = + cast(I->getOperand(1))->getLimitedValue(~0U); + // Ensure the shift amount is defined and of a byte value. + if ((ShAmt & 7) || (ShAmt > 8*ByteValues.size())) + return true; + + unsigned ByteShift = ShAmt >> 3; + if (I->getOpcode() == Instruction::Shl) { + // X << 2 -> collect(X, +2) + OverallLeftShift += ByteShift; + ByteMask >>= ByteShift; + } else { + // X >>u 2 -> collect(X, -2) + OverallLeftShift -= ByteShift; + ByteMask <<= ByteShift; + ByteMask &= (~0U >> (32-ByteValues.size())); + } + + if (OverallLeftShift >= (int)ByteValues.size()) return true; + if (OverallLeftShift <= -(int)ByteValues.size()) return true; + + return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, + ByteValues); + } + + // If this is a logical 'and' with a mask that clears bytes, clear the + // corresponding bytes in ByteMask. + if (I->getOpcode() == Instruction::And && + isa(I->getOperand(1))) { + // Scan every byte of the and mask, seeing if the byte is either 0 or 255. + unsigned NumBytes = ByteValues.size(); + APInt Byte(I->getType()->getPrimitiveSizeInBits(), 255); + const APInt &AndMask = cast(I->getOperand(1))->getValue(); + + for (unsigned i = 0; i != NumBytes; ++i, Byte <<= 8) { + // If this byte is masked out by a later operation, we don't care what + // the and mask is. + if ((ByteMask & (1 << i)) == 0) + continue; + + // If the AndMask is all zeros for this byte, clear the bit. + APInt MaskB = AndMask & Byte; + if (MaskB == 0) { + ByteMask &= ~(1U << i); + continue; + } + + // If the AndMask is not all ones for this byte, it's not a bytezap. + if (MaskB != Byte) + return true; + + // Otherwise, this byte is kept. + } + + return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, + ByteValues); + } + } + + // Okay, we got to something that isn't a shift, 'or' or 'and'. This must be + // the input value to the bswap. Some observations: 1) if more than one byte + // is demanded from this input, then it could not be successfully assembled + // into a byteswap. At least one of the two bytes would not be aligned with + // their ultimate destination. + if (!isPowerOf2_32(ByteMask)) return true; + unsigned InputByteNo = CountTrailingZeros_32(ByteMask); + + // 2) The input and ultimate destinations must line up: if byte 3 of an i32 + // is demanded, it needs to go into byte 0 of the result. This means that the + // byte needs to be shifted until it lands in the right byte bucket. The + // shift amount depends on the position: if the byte is coming from the high + // part of the value (e.g. byte 3) then it must be shifted right. If from the + // low part, it must be shifted left. + unsigned DestByteNo = InputByteNo + OverallLeftShift; + if (InputByteNo < ByteValues.size()/2) { + if (ByteValues.size()-1-DestByteNo != InputByteNo) + return true; + } else { + if (ByteValues.size()-1-DestByteNo != InputByteNo) + return true; + } + + // If the destination byte value is already defined, the values are or'd + // together, which isn't a bswap (unless it's an or of the same bits). + if (ByteValues[DestByteNo] && ByteValues[DestByteNo] != V) + return true; + ByteValues[DestByteNo] = V; + return false; +} + +/// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom. +/// If so, insert the new bswap intrinsic and return it. +Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) { + const IntegerType *ITy = dyn_cast(I.getType()); + if (!ITy || ITy->getBitWidth() % 16 || + // ByteMask only allows up to 32-byte values. + ITy->getBitWidth() > 32*8) + return 0; // Can only bswap pairs of bytes. Can't do vectors. + + /// ByteValues - For each byte of the result, we keep track of which value + /// defines each byte. + SmallVector ByteValues; + ByteValues.resize(ITy->getBitWidth()/8); + + // Try to find all the pieces corresponding to the bswap. + uint32_t ByteMask = ~0U >> (32-ByteValues.size()); + if (CollectBSwapParts(&I, 0, ByteMask, ByteValues)) + return 0; + + // Check to see if all of the bytes come from the same value. + Value *V = ByteValues[0]; + if (V == 0) return 0; // Didn't find a byte? Must be zero. + + // Check to make sure that all of the bytes come from the same value. + for (unsigned i = 1, e = ByteValues.size(); i != e; ++i) + if (ByteValues[i] != V) + return 0; + const Type *Tys[] = { ITy }; + Module *M = I.getParent()->getParent()->getParent(); + Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1); + return CallInst::Create(F, V); +} + +/// MatchSelectFromAndOr - We have an expression of the form (A&C)|(B&D). Check +/// If A is (cond?-1:0) and either B or D is ~(cond?-1,0) or (cond?0,-1), then +/// we can simplify this expression to "cond ? C : D or B". +static Instruction *MatchSelectFromAndOr(Value *A, Value *B, + Value *C, Value *D) { + // If A is not a select of -1/0, this cannot match. + Value *Cond = 0; + if (!match(A, m_SelectCst<-1, 0>(m_Value(Cond)))) + return 0; + + // ((cond?-1:0)&C) | (B&(cond?0:-1)) -> cond ? C : B. + if (match(D, m_SelectCst<0, -1>(m_Specific(Cond)))) + return SelectInst::Create(Cond, C, B); + if (match(D, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond))))) + return SelectInst::Create(Cond, C, B); + // ((cond?-1:0)&C) | ((cond?0:-1)&D) -> cond ? C : D. + if (match(B, m_SelectCst<0, -1>(m_Specific(Cond)))) + return SelectInst::Create(Cond, C, D); + if (match(B, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond))))) + return SelectInst::Create(Cond, C, D); + return 0; +} + +/// FoldOrOfICmps - Fold (icmp)|(icmp) if possible. +Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, + ICmpInst *LHS, ICmpInst *RHS) { + ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); + + // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B) + if (PredicatesFoldable(LHSCC, RHSCC)) { + if (LHS->getOperand(0) == RHS->getOperand(1) && + LHS->getOperand(1) == RHS->getOperand(0)) + LHS->swapOperands(); + if (LHS->getOperand(0) == RHS->getOperand(0) && + LHS->getOperand(1) == RHS->getOperand(1)) { + Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1); + unsigned Code = getICmpCode(LHS) | getICmpCode(RHS); + bool isSigned = LHS->isSigned() || RHS->isSigned(); + Value *RV = getICmpValue(isSigned, Code, Op0, Op1); + if (Instruction *I = dyn_cast(RV)) + return I; + // Otherwise, it's a constant boolean value. + return ReplaceInstUsesWith(I, RV); + } + } + + // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2). + Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0); + ConstantInt *LHSCst = dyn_cast(LHS->getOperand(1)); + ConstantInt *RHSCst = dyn_cast(RHS->getOperand(1)); + if (LHSCst == 0 || RHSCst == 0) return 0; + + // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0) + if (LHSCst == RHSCst && LHSCC == RHSCC && + LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) { + Value *NewOr = Builder->CreateOr(Val, Val2); + return new ICmpInst(LHSCC, NewOr, LHSCst); + } + + // From here on, we only handle: + // (icmp1 A, C1) | (icmp2 A, C2) --> something simpler. + if (Val != Val2) return 0; + + // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere. + if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE || + RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE || + LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || + RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) + return 0; + + // We can't fold (ugt x, C) | (sgt x, C2). + if (!PredicatesFoldable(LHSCC, RHSCC)) + return 0; + + // Ensure that the larger constant is on the RHS. + bool ShouldSwap; + if (CmpInst::isSigned(LHSCC) || + (ICmpInst::isEquality(LHSCC) && + CmpInst::isSigned(RHSCC))) + ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue()); + else + ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue()); + + if (ShouldSwap) { + std::swap(LHS, RHS); + std::swap(LHSCst, RHSCst); + std::swap(LHSCC, RHSCC); + } + + // At this point, we know we have have two icmp instructions + // comparing a value against two constants and or'ing the result + // together. Because of the above check, we know that we only have + // ICMP_EQ, ICMP_NE, ICMP_LT, and ICMP_GT here. We also know (from the + // icmp folding check above), that the two constants are not + // equal. + assert(LHSCst != RHSCst && "Compares not folded above?"); + + switch (LHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: + if (LHSCst == SubOne(RHSCst)) { + // (X == 13 | X == 14) -> X-13 CreateAdd(Val, AddCST, Val->getName()+".off"); + AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst); + return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST); + } + break; // (X == 13 | X == 15) -> no change + case ICmpInst::ICMP_UGT: // (X == 13 | X u> 14) -> no change + case ICmpInst::ICMP_SGT: // (X == 13 | X s> 14) -> no change + break; + case ICmpInst::ICMP_NE: // (X == 13 | X != 15) -> X != 15 + case ICmpInst::ICMP_ULT: // (X == 13 | X u< 15) -> X u< 15 + case ICmpInst::ICMP_SLT: // (X == 13 | X s< 15) -> X s< 15 + return ReplaceInstUsesWith(I, RHS); + } + break; + case ICmpInst::ICMP_NE: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X != 13 | X == 15) -> X != 13 + case ICmpInst::ICMP_UGT: // (X != 13 | X u> 15) -> X != 13 + case ICmpInst::ICMP_SGT: // (X != 13 | X s> 15) -> X != 13 + return ReplaceInstUsesWith(I, LHS); + case ICmpInst::ICMP_NE: // (X != 13 | X != 15) -> true + case ICmpInst::ICMP_ULT: // (X != 13 | X u< 15) -> true + case ICmpInst::ICMP_SLT: // (X != 13 | X s< 15) -> true + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + } + break; + case ICmpInst::ICMP_ULT: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X u< 13 | X == 14) -> no change + break; + case ICmpInst::ICMP_UGT: // (X u< 13 | X u> 15) -> (X-13) u> 2 + // If RHSCst is [us]MAXINT, it is always false. Not handling + // this can cause overflow. + if (RHSCst->isMaxValue(false)) + return ReplaceInstUsesWith(I, LHS); + return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), + false, false, I); + case ICmpInst::ICMP_SGT: // (X u< 13 | X s> 15) -> no change + break; + case ICmpInst::ICMP_NE: // (X u< 13 | X != 15) -> X != 15 + case ICmpInst::ICMP_ULT: // (X u< 13 | X u< 15) -> X u< 15 + return ReplaceInstUsesWith(I, RHS); + case ICmpInst::ICMP_SLT: // (X u< 13 | X s< 15) -> no change + break; + } + break; + case ICmpInst::ICMP_SLT: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X s< 13 | X == 14) -> no change + break; + case ICmpInst::ICMP_SGT: // (X s< 13 | X s> 15) -> (X-13) s> 2 + // If RHSCst is [us]MAXINT, it is always false. Not handling + // this can cause overflow. + if (RHSCst->isMaxValue(true)) + return ReplaceInstUsesWith(I, LHS); + return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), + true, false, I); + case ICmpInst::ICMP_UGT: // (X s< 13 | X u> 15) -> no change + break; + case ICmpInst::ICMP_NE: // (X s< 13 | X != 15) -> X != 15 + case ICmpInst::ICMP_SLT: // (X s< 13 | X s< 15) -> X s< 15 + return ReplaceInstUsesWith(I, RHS); + case ICmpInst::ICMP_ULT: // (X s< 13 | X u< 15) -> no change + break; + } + break; + case ICmpInst::ICMP_UGT: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X u> 13 | X == 15) -> X u> 13 + case ICmpInst::ICMP_UGT: // (X u> 13 | X u> 15) -> X u> 13 + return ReplaceInstUsesWith(I, LHS); + case ICmpInst::ICMP_SGT: // (X u> 13 | X s> 15) -> no change + break; + case ICmpInst::ICMP_NE: // (X u> 13 | X != 15) -> true + case ICmpInst::ICMP_ULT: // (X u> 13 | X u< 15) -> true + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + case ICmpInst::ICMP_SLT: // (X u> 13 | X s< 15) -> no change + break; + } + break; + case ICmpInst::ICMP_SGT: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X s> 13 | X == 15) -> X > 13 + case ICmpInst::ICMP_SGT: // (X s> 13 | X s> 15) -> X > 13 + return ReplaceInstUsesWith(I, LHS); + case ICmpInst::ICMP_UGT: // (X s> 13 | X u> 15) -> no change + break; + case ICmpInst::ICMP_NE: // (X s> 13 | X != 15) -> true + case ICmpInst::ICMP_SLT: // (X s> 13 | X s< 15) -> true + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + case ICmpInst::ICMP_ULT: // (X s> 13 | X u< 15) -> no change + break; + } + break; + } + return 0; +} + +Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, + FCmpInst *RHS) { + if (LHS->getPredicate() == FCmpInst::FCMP_UNO && + RHS->getPredicate() == FCmpInst::FCMP_UNO && + LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) { + if (ConstantFP *LHSC = dyn_cast(LHS->getOperand(1))) + if (ConstantFP *RHSC = dyn_cast(RHS->getOperand(1))) { + // If either of the constants are nans, then the whole thing returns + // true. + if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + + // Otherwise, no need to compare the two constants, compare the + // rest. + return new FCmpInst(FCmpInst::FCMP_UNO, + LHS->getOperand(0), RHS->getOperand(0)); + } + + // Handle vector zeros. This occurs because the canonical form of + // "fcmp uno x,x" is "fcmp uno x, 0". + if (isa(LHS->getOperand(1)) && + isa(RHS->getOperand(1))) + return new FCmpInst(FCmpInst::FCMP_UNO, + LHS->getOperand(0), RHS->getOperand(0)); + + return 0; + } + + Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); + Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); + FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); + + if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { + // Swap RHS operands to match LHS. + Op1CC = FCmpInst::getSwappedPredicate(Op1CC); + std::swap(Op1LHS, Op1RHS); + } + if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { + // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y). + if (Op0CC == Op1CC) + return new FCmpInst((FCmpInst::Predicate)Op0CC, + Op0LHS, Op0RHS); + if (Op0CC == FCmpInst::FCMP_TRUE || Op1CC == FCmpInst::FCMP_TRUE) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + if (Op0CC == FCmpInst::FCMP_FALSE) + return ReplaceInstUsesWith(I, RHS); + if (Op1CC == FCmpInst::FCMP_FALSE) + return ReplaceInstUsesWith(I, LHS); + bool Op0Ordered; + bool Op1Ordered; + unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); + unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); + if (Op0Ordered == Op1Ordered) { + // If both are ordered or unordered, return a new fcmp with + // or'ed predicates. + Value *RV = getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, Op0LHS, Op0RHS); + if (Instruction *I = dyn_cast(RV)) + return I; + // Otherwise, it's a constant boolean value... + return ReplaceInstUsesWith(I, RV); + } + } + return 0; +} + +/// FoldOrWithConstants - This helper function folds: +/// +/// ((A | B) & C1) | (B & C2) +/// +/// into: +/// +/// (A & C1) | B +/// +/// when the XOR of the two constants is "all ones" (-1). +Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op, + Value *A, Value *B, Value *C) { + ConstantInt *CI1 = dyn_cast(C); + if (!CI1) return 0; + + Value *V1 = 0; + ConstantInt *CI2 = 0; + if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return 0; + + APInt Xor = CI1->getValue() ^ CI2->getValue(); + if (!Xor.isAllOnesValue()) return 0; + + if (V1 == A || V1 == B) { + Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1); + return BinaryOperator::CreateOr(NewOp, V1); + } + + return 0; +} + +Instruction *InstCombiner::visitOr(BinaryOperator &I) { + bool Changed = SimplifyCommutative(I); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (Value *V = SimplifyOrInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + + + // See if we can simplify any instructions used by the instruction whose sole + // purpose is to compute bits we don't care about. + if (SimplifyDemandedInstructionBits(I)) + return &I; + + if (ConstantInt *RHS = dyn_cast(Op1)) { + ConstantInt *C1 = 0; Value *X = 0; + // (X & C1) | C2 --> (X | C2) & (C1|C2) + if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) && + Op0->hasOneUse()) { + Value *Or = Builder->CreateOr(X, RHS); + Or->takeName(Op0); + return BinaryOperator::CreateAnd(Or, + ConstantInt::get(I.getContext(), + RHS->getValue() | C1->getValue())); + } + + // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2) + if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) && + Op0->hasOneUse()) { + Value *Or = Builder->CreateOr(X, RHS); + Or->takeName(Op0); + return BinaryOperator::CreateXor(Or, + ConstantInt::get(I.getContext(), + C1->getValue() & ~RHS->getValue())); + } + + // Try to fold constant and into select arguments. + if (SelectInst *SI = dyn_cast(Op0)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + if (isa(Op0)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + } + + Value *A = 0, *B = 0; + ConstantInt *C1 = 0, *C2 = 0; + + // (A | B) | C and A | (B | C) -> bswap if possible. + // (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible. + if (match(Op0, m_Or(m_Value(), m_Value())) || + match(Op1, m_Or(m_Value(), m_Value())) || + (match(Op0, m_Shift(m_Value(), m_Value())) && + match(Op1, m_Shift(m_Value(), m_Value())))) { + if (Instruction *BSwap = MatchBSwap(I)) + return BSwap; + } + + // (X^C)|Y -> (X|Y)^C iff Y&C == 0 + if (Op0->hasOneUse() && + match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) && + MaskedValueIsZero(Op1, C1->getValue())) { + Value *NOr = Builder->CreateOr(A, Op1); + NOr->takeName(Op0); + return BinaryOperator::CreateXor(NOr, C1); + } + + // Y|(X^C) -> (X|Y)^C iff Y&C == 0 + if (Op1->hasOneUse() && + match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) && + MaskedValueIsZero(Op0, C1->getValue())) { + Value *NOr = Builder->CreateOr(A, Op0); + NOr->takeName(Op0); + return BinaryOperator::CreateXor(NOr, C1); + } + + // (A & C)|(B & D) + Value *C = 0, *D = 0; + if (match(Op0, m_And(m_Value(A), m_Value(C))) && + match(Op1, m_And(m_Value(B), m_Value(D)))) { + Value *V1 = 0, *V2 = 0, *V3 = 0; + C1 = dyn_cast(C); + C2 = dyn_cast(D); + if (C1 && C2) { // (A & C1)|(B & C2) + // If we have: ((V + N) & C1) | (V & C2) + // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0 + // replace with V+N. + if (C1->getValue() == ~C2->getValue()) { + if ((C2->getValue() & (C2->getValue()+1)) == 0 && // C2 == 0+1+ + match(A, m_Add(m_Value(V1), m_Value(V2)))) { + // Add commutes, try both ways. + if (V1 == B && MaskedValueIsZero(V2, C2->getValue())) + return ReplaceInstUsesWith(I, A); + if (V2 == B && MaskedValueIsZero(V1, C2->getValue())) + return ReplaceInstUsesWith(I, A); + } + // Or commutes, try both ways. + if ((C1->getValue() & (C1->getValue()+1)) == 0 && + match(B, m_Add(m_Value(V1), m_Value(V2)))) { + // Add commutes, try both ways. + if (V1 == A && MaskedValueIsZero(V2, C1->getValue())) + return ReplaceInstUsesWith(I, B); + if (V2 == A && MaskedValueIsZero(V1, C1->getValue())) + return ReplaceInstUsesWith(I, B); + } + } + + if ((C1->getValue() & C2->getValue()) == 0) { + // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2) + // iff (C1&C2) == 0 and (N&~C1) == 0 + if (match(A, m_Or(m_Value(V1), m_Value(V2))) && + ((V1 == B && MaskedValueIsZero(V2, ~C1->getValue())) || // (V|N) + (V2 == B && MaskedValueIsZero(V1, ~C1->getValue())))) // (N|V) + return BinaryOperator::CreateAnd(A, + ConstantInt::get(A->getContext(), + C1->getValue()|C2->getValue())); + // Or commutes, try both ways. + if (match(B, m_Or(m_Value(V1), m_Value(V2))) && + ((V1 == A && MaskedValueIsZero(V2, ~C2->getValue())) || // (V|N) + (V2 == A && MaskedValueIsZero(V1, ~C2->getValue())))) // (N|V) + return BinaryOperator::CreateAnd(B, + ConstantInt::get(B->getContext(), + C1->getValue()|C2->getValue())); + + // ((V|C3)&C1) | ((V|C4)&C2) --> (V|C3|C4)&(C1|C2) + // iff (C1&C2) == 0 and (C3&~C1) == 0 and (C4&~C2) == 0. + ConstantInt *C3 = 0, *C4 = 0; + if (match(A, m_Or(m_Value(V1), m_ConstantInt(C3))) && + (C3->getValue() & ~C1->getValue()) == 0 && + match(B, m_Or(m_Specific(V1), m_ConstantInt(C4))) && + (C4->getValue() & ~C2->getValue()) == 0) { + V2 = Builder->CreateOr(V1, ConstantExpr::getOr(C3, C4), "bitfield"); + return BinaryOperator::CreateAnd(V2, + ConstantInt::get(B->getContext(), + C1->getValue()|C2->getValue())); + } + } + } + + // Check to see if we have any common things being and'ed. If so, find the + // terms for V1 & (V2|V3). + if (Op0->hasOneUse() || Op1->hasOneUse()) { + V1 = 0; + if (A == B) // (A & C)|(A & D) == A & (C|D) + V1 = A, V2 = C, V3 = D; + else if (A == D) // (A & C)|(B & A) == A & (B|C) + V1 = A, V2 = B, V3 = C; + else if (C == B) // (A & C)|(C & D) == C & (A|D) + V1 = C, V2 = A, V3 = D; + else if (C == D) // (A & C)|(B & C) == C & (A|B) + V1 = C, V2 = A, V3 = B; + + if (V1) { + Value *Or = Builder->CreateOr(V2, V3, "tmp"); + return BinaryOperator::CreateAnd(V1, Or); + } + } + + // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) -> C0 ? A : B, and commuted variants + if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D)) + return Match; + if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C)) + return Match; + if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D)) + return Match; + if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C)) + return Match; + + // ((A&~B)|(~A&B)) -> A^B + if ((match(C, m_Not(m_Specific(D))) && + match(B, m_Not(m_Specific(A))))) + return BinaryOperator::CreateXor(A, D); + // ((~B&A)|(~A&B)) -> A^B + if ((match(A, m_Not(m_Specific(D))) && + match(B, m_Not(m_Specific(C))))) + return BinaryOperator::CreateXor(C, D); + // ((A&~B)|(B&~A)) -> A^B + if ((match(C, m_Not(m_Specific(B))) && + match(D, m_Not(m_Specific(A))))) + return BinaryOperator::CreateXor(A, B); + // ((~B&A)|(B&~A)) -> A^B + if ((match(A, m_Not(m_Specific(B))) && + match(D, m_Not(m_Specific(C))))) + return BinaryOperator::CreateXor(C, B); + } + + // (X >> Z) | (Y >> Z) -> (X|Y) >> Z for all shifts. + if (BinaryOperator *SI1 = dyn_cast(Op1)) { + if (BinaryOperator *SI0 = dyn_cast(Op0)) + if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && + SI0->getOperand(1) == SI1->getOperand(1) && + (SI0->hasOneUse() || SI1->hasOneUse())) { + Value *NewOp = Builder->CreateOr(SI0->getOperand(0), SI1->getOperand(0), + SI0->getName()); + return BinaryOperator::Create(SI1->getOpcode(), NewOp, + SI1->getOperand(1)); + } + } + + // ((A|B)&1)|(B&-2) -> (A&1) | B + if (match(Op0, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) || + match(Op0, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) { + Instruction *Ret = FoldOrWithConstants(I, Op1, A, B, C); + if (Ret) return Ret; + } + // (B&-2)|((A|B)&1) -> (A&1) | B + if (match(Op1, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) || + match(Op1, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) { + Instruction *Ret = FoldOrWithConstants(I, Op0, A, B, C); + if (Ret) return Ret; + } + + // (~A | ~B) == (~(A & B)) - De Morgan's Law + if (Value *Op0NotVal = dyn_castNotVal(Op0)) + if (Value *Op1NotVal = dyn_castNotVal(Op1)) + if (Op0->hasOneUse() && Op1->hasOneUse()) { + Value *And = Builder->CreateAnd(Op0NotVal, Op1NotVal, + I.getName()+".demorgan"); + return BinaryOperator::CreateNot(And); + } + + if (ICmpInst *RHS = dyn_cast(I.getOperand(1))) + if (ICmpInst *LHS = dyn_cast(I.getOperand(0))) + if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS)) + return Res; + + // fold (or (cast A), (cast B)) -> (cast (or A, B)) + if (CastInst *Op0C = dyn_cast(Op0)) { + if (CastInst *Op1C = dyn_cast(Op1)) + if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ? + if (!isa(Op0C->getOperand(0)) || + !isa(Op1C->getOperand(0))) { + const Type *SrcTy = Op0C->getOperand(0)->getType(); + if (SrcTy == Op1C->getOperand(0)->getType() && + SrcTy->isIntOrIntVector() && + // Only do this if the casts both really cause code to be + // generated. + ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), + I.getType()) && + ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), + I.getType())) { + Value *NewOp = Builder->CreateOr(Op0C->getOperand(0), + Op1C->getOperand(0), I.getName()); + return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); + } + } + } + } + + + // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y) + if (FCmpInst *LHS = dyn_cast(I.getOperand(0))) { + if (FCmpInst *RHS = dyn_cast(I.getOperand(1))) + if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS)) + return Res; + } + + return Changed ? &I : 0; +} + +Instruction *InstCombiner::visitXor(BinaryOperator &I) { + bool Changed = SimplifyCommutative(I); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (isa(Op1)) { + if (isa(Op0)) + // Handle undef ^ undef -> 0 special case. This is a common + // idiom (misuse). + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + return ReplaceInstUsesWith(I, Op1); // X ^ undef -> undef + } + + // xor X, X = 0 + if (Op0 == Op1) + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + + // See if we can simplify any instructions used by the instruction whose sole + // purpose is to compute bits we don't care about. + if (SimplifyDemandedInstructionBits(I)) + return &I; + if (isa(I.getType())) + if (isa(Op1)) + return ReplaceInstUsesWith(I, Op0); // X ^ <0,0> -> X + + // Is this a ~ operation? + if (Value *NotOp = dyn_castNotVal(&I)) { + if (BinaryOperator *Op0I = dyn_cast(NotOp)) { + if (Op0I->getOpcode() == Instruction::And || + Op0I->getOpcode() == Instruction::Or) { + // ~(~X & Y) --> (X | ~Y) - De Morgan's Law + // ~(~X | Y) === (X & ~Y) - De Morgan's Law + if (dyn_castNotVal(Op0I->getOperand(1))) + Op0I->swapOperands(); + if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) { + Value *NotY = + Builder->CreateNot(Op0I->getOperand(1), + Op0I->getOperand(1)->getName()+".not"); + if (Op0I->getOpcode() == Instruction::And) + return BinaryOperator::CreateOr(Op0NotVal, NotY); + return BinaryOperator::CreateAnd(Op0NotVal, NotY); + } + + // ~(X & Y) --> (~X | ~Y) - De Morgan's Law + // ~(X | Y) === (~X & ~Y) - De Morgan's Law + if (isFreeToInvert(Op0I->getOperand(0)) && + isFreeToInvert(Op0I->getOperand(1))) { + Value *NotX = + Builder->CreateNot(Op0I->getOperand(0), "notlhs"); + Value *NotY = + Builder->CreateNot(Op0I->getOperand(1), "notrhs"); + if (Op0I->getOpcode() == Instruction::And) + return BinaryOperator::CreateOr(NotX, NotY); + return BinaryOperator::CreateAnd(NotX, NotY); + } + } + } + } + + + if (ConstantInt *RHS = dyn_cast(Op1)) { + if (RHS->isOne() && Op0->hasOneUse()) { + // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B + if (ICmpInst *ICI = dyn_cast(Op0)) + return new ICmpInst(ICI->getInversePredicate(), + ICI->getOperand(0), ICI->getOperand(1)); + + if (FCmpInst *FCI = dyn_cast(Op0)) + return new FCmpInst(FCI->getInversePredicate(), + FCI->getOperand(0), FCI->getOperand(1)); + } + + // fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp). + if (CastInst *Op0C = dyn_cast(Op0)) { + if (CmpInst *CI = dyn_cast(Op0C->getOperand(0))) { + if (CI->hasOneUse() && Op0C->hasOneUse()) { + Instruction::CastOps Opcode = Op0C->getOpcode(); + if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) && + (RHS == ConstantExpr::getCast(Opcode, + ConstantInt::getTrue(I.getContext()), + Op0C->getDestTy()))) { + CI->setPredicate(CI->getInversePredicate()); + return CastInst::Create(Opcode, CI, Op0C->getType()); + } + } + } + } + + if (BinaryOperator *Op0I = dyn_cast(Op0)) { + // ~(c-X) == X-c-1 == X+(-c-1) + if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue()) + if (Constant *Op0I0C = dyn_cast(Op0I->getOperand(0))) { + Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C); + Constant *ConstantRHS = ConstantExpr::getSub(NegOp0I0C, + ConstantInt::get(I.getType(), 1)); + return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS); + } + + if (ConstantInt *Op0CI = dyn_cast(Op0I->getOperand(1))) { + if (Op0I->getOpcode() == Instruction::Add) { + // ~(X-c) --> (-c-1)-X + if (RHS->isAllOnesValue()) { + Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI); + return BinaryOperator::CreateSub( + ConstantExpr::getSub(NegOp0CI, + ConstantInt::get(I.getType(), 1)), + Op0I->getOperand(0)); + } else if (RHS->getValue().isSignBit()) { + // (X + C) ^ signbit -> (X + C + signbit) + Constant *C = ConstantInt::get(I.getContext(), + RHS->getValue() + Op0CI->getValue()); + return BinaryOperator::CreateAdd(Op0I->getOperand(0), C); + + } + } else if (Op0I->getOpcode() == Instruction::Or) { + // (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0 + if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue())) { + Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS); + // Anything in both C1 and C2 is known to be zero, remove it from + // NewRHS. + Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS); + NewRHS = ConstantExpr::getAnd(NewRHS, + ConstantExpr::getNot(CommonBits)); + Worklist.Add(Op0I); + I.setOperand(0, Op0I->getOperand(0)); + I.setOperand(1, NewRHS); + return &I; + } + } + } + } + + // Try to fold constant and into select arguments. + if (SelectInst *SI = dyn_cast(Op0)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + if (isa(Op0)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + } + + if (Value *X = dyn_castNotVal(Op0)) // ~A ^ A == -1 + if (X == Op1) + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); + + if (Value *X = dyn_castNotVal(Op1)) // A ^ ~A == -1 + if (X == Op0) + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); + + + BinaryOperator *Op1I = dyn_cast(Op1); + if (Op1I) { + Value *A, *B; + if (match(Op1I, m_Or(m_Value(A), m_Value(B)))) { + if (A == Op0) { // B^(B|A) == (A|B)^B + Op1I->swapOperands(); + I.swapOperands(); + std::swap(Op0, Op1); + } else if (B == Op0) { // B^(A|B) == (A|B)^B + I.swapOperands(); // Simplified below. + std::swap(Op0, Op1); + } + } else if (match(Op1I, m_Xor(m_Specific(Op0), m_Value(B)))) { + return ReplaceInstUsesWith(I, B); // A^(A^B) == B + } else if (match(Op1I, m_Xor(m_Value(A), m_Specific(Op0)))) { + return ReplaceInstUsesWith(I, A); // A^(B^A) == B + } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) && + Op1I->hasOneUse()){ + if (A == Op0) { // A^(A&B) -> A^(B&A) + Op1I->swapOperands(); + std::swap(A, B); + } + if (B == Op0) { // A^(B&A) -> (B&A)^A + I.swapOperands(); // Simplified below. + std::swap(Op0, Op1); + } + } + } + + BinaryOperator *Op0I = dyn_cast(Op0); + if (Op0I) { + Value *A, *B; + if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && + Op0I->hasOneUse()) { + if (A == Op1) // (B|A)^B == (A|B)^B + std::swap(A, B); + if (B == Op1) // (A|B)^B == A & ~B + return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1, "tmp")); + } else if (match(Op0I, m_Xor(m_Specific(Op1), m_Value(B)))) { + return ReplaceInstUsesWith(I, B); // (A^B)^A == B + } else if (match(Op0I, m_Xor(m_Value(A), m_Specific(Op1)))) { + return ReplaceInstUsesWith(I, A); // (B^A)^A == B + } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && + Op0I->hasOneUse()){ + if (A == Op1) // (A&B)^A -> (B&A)^A + std::swap(A, B); + if (B == Op1 && // (B&A)^A == ~B & A + !isa(Op1)) { // Canonical form is (B&C)^C + return BinaryOperator::CreateAnd(Builder->CreateNot(A, "tmp"), Op1); + } + } + } + + // (X >> Z) ^ (Y >> Z) -> (X^Y) >> Z for all shifts. + if (Op0I && Op1I && Op0I->isShift() && + Op0I->getOpcode() == Op1I->getOpcode() && + Op0I->getOperand(1) == Op1I->getOperand(1) && + (Op1I->hasOneUse() || Op1I->hasOneUse())) { + Value *NewOp = + Builder->CreateXor(Op0I->getOperand(0), Op1I->getOperand(0), + Op0I->getName()); + return BinaryOperator::Create(Op1I->getOpcode(), NewOp, + Op1I->getOperand(1)); + } + + if (Op0I && Op1I) { + Value *A, *B, *C, *D; + // (A & B)^(A | B) -> A ^ B + if (match(Op0I, m_And(m_Value(A), m_Value(B))) && + match(Op1I, m_Or(m_Value(C), m_Value(D)))) { + if ((A == C && B == D) || (A == D && B == C)) + return BinaryOperator::CreateXor(A, B); + } + // (A | B)^(A & B) -> A ^ B + if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && + match(Op1I, m_And(m_Value(C), m_Value(D)))) { + if ((A == C && B == D) || (A == D && B == C)) + return BinaryOperator::CreateXor(A, B); + } + + // (A & B)^(C & D) + if ((Op0I->hasOneUse() || Op1I->hasOneUse()) && + match(Op0I, m_And(m_Value(A), m_Value(B))) && + match(Op1I, m_And(m_Value(C), m_Value(D)))) { + // (X & Y)^(X & Y) -> (Y^Z) & X + Value *X = 0, *Y = 0, *Z = 0; + if (A == C) + X = A, Y = B, Z = D; + else if (A == D) + X = A, Y = B, Z = C; + else if (B == C) + X = B, Y = A, Z = D; + else if (B == D) + X = B, Y = A, Z = C; + + if (X) { + Value *NewOp = Builder->CreateXor(Y, Z, Op0->getName()); + return BinaryOperator::CreateAnd(NewOp, X); + } + } + } + + // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B) + if (ICmpInst *RHS = dyn_cast(I.getOperand(1))) + if (ICmpInst *LHS = dyn_cast(I.getOperand(0))) + if (PredicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) { + if (LHS->getOperand(0) == RHS->getOperand(1) && + LHS->getOperand(1) == RHS->getOperand(0)) + LHS->swapOperands(); + if (LHS->getOperand(0) == RHS->getOperand(0) && + LHS->getOperand(1) == RHS->getOperand(1)) { + Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1); + unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS); + bool isSigned = LHS->isSigned() || RHS->isSigned(); + Value *RV = getICmpValue(isSigned, Code, Op0, Op1); + if (Instruction *I = dyn_cast(RV)) + return I; + // Otherwise, it's a constant boolean value. + return ReplaceInstUsesWith(I, RV); + } + } + + // fold (xor (cast A), (cast B)) -> (cast (xor A, B)) + if (CastInst *Op0C = dyn_cast(Op0)) { + if (CastInst *Op1C = dyn_cast(Op1)) + if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind? + const Type *SrcTy = Op0C->getOperand(0)->getType(); + if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() && + // Only do this if the casts both really cause code to be generated. + ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), + I.getType()) && + ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), + I.getType())) { + Value *NewOp = Builder->CreateXor(Op0C->getOperand(0), + Op1C->getOperand(0), I.getName()); + return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); + } + } + } + + return Changed ? &I : 0; +} diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp new file mode 100644 index 000000000000..47c37c46587d --- /dev/null +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -0,0 +1,1142 @@ +//===- InstCombineCalls.cpp -----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the visitCall and visitInvoke functions. +// +//===----------------------------------------------------------------------===// + +#include "InstCombine.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Analysis/MemoryBuiltins.h" +using namespace llvm; + +/// getPromotedType - Return the specified type promoted as it would be to pass +/// though a va_arg area. +static const Type *getPromotedType(const Type *Ty) { + if (const IntegerType* ITy = dyn_cast(Ty)) { + if (ITy->getBitWidth() < 32) + return Type::getInt32Ty(Ty->getContext()); + } + return Ty; +} + +/// EnforceKnownAlignment - If the specified pointer points to an object that +/// we control, modify the object's alignment to PrefAlign. This isn't +/// often possible though. If alignment is important, a more reliable approach +/// is to simply align all global variables and allocation instructions to +/// their preferred alignment from the beginning. +/// +static unsigned EnforceKnownAlignment(Value *V, + unsigned Align, unsigned PrefAlign) { + + User *U = dyn_cast(V); + if (!U) return Align; + + switch (Operator::getOpcode(U)) { + default: break; + case Instruction::BitCast: + return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign); + case Instruction::GetElementPtr: { + // If all indexes are zero, it is just the alignment of the base pointer. + bool AllZeroOperands = true; + for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i) + if (!isa(*i) || + !cast(*i)->isNullValue()) { + AllZeroOperands = false; + break; + } + + if (AllZeroOperands) { + // Treat this like a bitcast. + return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign); + } + break; + } + } + + if (GlobalValue *GV = dyn_cast(V)) { + // If there is a large requested alignment and we can, bump up the alignment + // of the global. + if (!GV->isDeclaration()) { + if (GV->getAlignment() >= PrefAlign) + Align = GV->getAlignment(); + else { + GV->setAlignment(PrefAlign); + Align = PrefAlign; + } + } + } else if (AllocaInst *AI = dyn_cast(V)) { + // If there is a requested alignment and if this is an alloca, round up. + if (AI->getAlignment() >= PrefAlign) + Align = AI->getAlignment(); + else { + AI->setAlignment(PrefAlign); + Align = PrefAlign; + } + } + + return Align; +} + +/// GetOrEnforceKnownAlignment - If the specified pointer has an alignment that +/// we can determine, return it, otherwise return 0. If PrefAlign is specified, +/// and it is more than the alignment of the ultimate object, see if we can +/// increase the alignment of the ultimate object, making this check succeed. +unsigned InstCombiner::GetOrEnforceKnownAlignment(Value *V, + unsigned PrefAlign) { + unsigned BitWidth = TD ? TD->getTypeSizeInBits(V->getType()) : + sizeof(PrefAlign) * CHAR_BIT; + APInt Mask = APInt::getAllOnesValue(BitWidth); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + ComputeMaskedBits(V, Mask, KnownZero, KnownOne); + unsigned TrailZ = KnownZero.countTrailingOnes(); + unsigned Align = 1u << std::min(BitWidth - 1, TrailZ); + + if (PrefAlign > Align) + Align = EnforceKnownAlignment(V, Align, PrefAlign); + + // We don't need to make any adjustment. + return Align; +} + +Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { + unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getOperand(1)); + unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getOperand(2)); + unsigned MinAlign = std::min(DstAlign, SrcAlign); + unsigned CopyAlign = MI->getAlignment(); + + if (CopyAlign < MinAlign) { + MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), + MinAlign, false)); + return MI; + } + + // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with + // load/store. + ConstantInt *MemOpLength = dyn_cast(MI->getOperand(3)); + if (MemOpLength == 0) return 0; + + // Source and destination pointer types are always "i8*" for intrinsic. See + // if the size is something we can handle with a single primitive load/store. + // A single load+store correctly handles overlapping memory in the memmove + // case. + unsigned Size = MemOpLength->getZExtValue(); + if (Size == 0) return MI; // Delete this mem transfer. + + if (Size > 8 || (Size&(Size-1))) + return 0; // If not 1/2/4/8 bytes, exit. + + // Use an integer load+store unless we can find something better. + Type *NewPtrTy = + PointerType::getUnqual(IntegerType::get(MI->getContext(), Size<<3)); + + // Memcpy forces the use of i8* for the source and destination. That means + // that if you're using memcpy to move one double around, you'll get a cast + // from double* to i8*. We'd much rather use a double load+store rather than + // an i64 load+store, here because this improves the odds that the source or + // dest address will be promotable. See if we can find a better type than the + // integer datatype. + Value *StrippedDest = MI->getOperand(1)->stripPointerCasts(); + if (StrippedDest != MI->getOperand(1)) { + const Type *SrcETy = cast(StrippedDest->getType()) + ->getElementType(); + if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) { + // The SrcETy might be something like {{{double}}} or [1 x double]. Rip + // down through these levels if so. + while (!SrcETy->isSingleValueType()) { + if (const StructType *STy = dyn_cast(SrcETy)) { + if (STy->getNumElements() == 1) + SrcETy = STy->getElementType(0); + else + break; + } else if (const ArrayType *ATy = dyn_cast(SrcETy)) { + if (ATy->getNumElements() == 1) + SrcETy = ATy->getElementType(); + else + break; + } else + break; + } + + if (SrcETy->isSingleValueType()) + NewPtrTy = PointerType::getUnqual(SrcETy); + } + } + + + // If the memcpy/memmove provides better alignment info than we can + // infer, use it. + SrcAlign = std::max(SrcAlign, CopyAlign); + DstAlign = std::max(DstAlign, CopyAlign); + + Value *Src = Builder->CreateBitCast(MI->getOperand(2), NewPtrTy); + Value *Dest = Builder->CreateBitCast(MI->getOperand(1), NewPtrTy); + Instruction *L = new LoadInst(Src, "tmp", false, SrcAlign); + InsertNewInstBefore(L, *MI); + InsertNewInstBefore(new StoreInst(L, Dest, false, DstAlign), *MI); + + // Set the size of the copy to 0, it will be deleted on the next iteration. + MI->setOperand(3, Constant::getNullValue(MemOpLength->getType())); + return MI; +} + +Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { + unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest()); + if (MI->getAlignment() < Alignment) { + MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), + Alignment, false)); + return MI; + } + + // Extract the length and alignment and fill if they are constant. + ConstantInt *LenC = dyn_cast(MI->getLength()); + ConstantInt *FillC = dyn_cast(MI->getValue()); + if (!LenC || !FillC || !FillC->getType()->isInteger(8)) + return 0; + uint64_t Len = LenC->getZExtValue(); + Alignment = MI->getAlignment(); + + // If the length is zero, this is a no-op + if (Len == 0) return MI; // memset(d,c,0,a) -> noop + + // memset(s,c,n) -> store s, c (for n=1,2,4,8) + if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) { + const Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8. + + Value *Dest = MI->getDest(); + Dest = Builder->CreateBitCast(Dest, PointerType::getUnqual(ITy)); + + // Alignment 0 is identity for alignment 1 for memset, but not store. + if (Alignment == 0) Alignment = 1; + + // Extract the fill value and store. + uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL; + InsertNewInstBefore(new StoreInst(ConstantInt::get(ITy, Fill), + Dest, false, Alignment), *MI); + + // Set the size of the copy to 0, it will be deleted on the next iteration. + MI->setLength(Constant::getNullValue(LenC->getType())); + return MI; + } + + return 0; +} + + +/// visitCallInst - CallInst simplification. This mostly only handles folding +/// of intrinsic instructions. For normal calls, it allows visitCallSite to do +/// the heavy lifting. +/// +Instruction *InstCombiner::visitCallInst(CallInst &CI) { + if (isFreeCall(&CI)) + return visitFree(CI); + + // If the caller function is nounwind, mark the call as nounwind, even if the + // callee isn't. + if (CI.getParent()->getParent()->doesNotThrow() && + !CI.doesNotThrow()) { + CI.setDoesNotThrow(); + return &CI; + } + + IntrinsicInst *II = dyn_cast(&CI); + if (!II) return visitCallSite(&CI); + + // Intrinsics cannot occur in an invoke, so handle them here instead of in + // visitCallSite. + if (MemIntrinsic *MI = dyn_cast(II)) { + bool Changed = false; + + // memmove/cpy/set of zero bytes is a noop. + if (Constant *NumBytes = dyn_cast(MI->getLength())) { + if (NumBytes->isNullValue()) return EraseInstFromFunction(CI); + + if (ConstantInt *CI = dyn_cast(NumBytes)) + if (CI->getZExtValue() == 1) { + // Replace the instruction with just byte operations. We would + // transform other cases to loads/stores, but we don't know if + // alignment is sufficient. + } + } + + // If we have a memmove and the source operation is a constant global, + // then the source and dest pointers can't alias, so we can change this + // into a call to memcpy. + if (MemMoveInst *MMI = dyn_cast(MI)) { + if (GlobalVariable *GVSrc = dyn_cast(MMI->getSource())) + if (GVSrc->isConstant()) { + Module *M = CI.getParent()->getParent()->getParent(); + Intrinsic::ID MemCpyID = Intrinsic::memcpy; + const Type *Tys[1]; + Tys[0] = CI.getOperand(3)->getType(); + CI.setOperand(0, + Intrinsic::getDeclaration(M, MemCpyID, Tys, 1)); + Changed = true; + } + } + + if (MemTransferInst *MTI = dyn_cast(MI)) { + // memmove(x,x,size) -> noop. + if (MTI->getSource() == MTI->getDest()) + return EraseInstFromFunction(CI); + } + + // If we can determine a pointer alignment that is bigger than currently + // set, update the alignment. + if (isa(MI)) { + if (Instruction *I = SimplifyMemTransfer(MI)) + return I; + } else if (MemSetInst *MSI = dyn_cast(MI)) { + if (Instruction *I = SimplifyMemSet(MSI)) + return I; + } + + if (Changed) return II; + } + + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::bswap: + // bswap(bswap(x)) -> x + if (IntrinsicInst *Operand = dyn_cast(II->getOperand(1))) + if (Operand->getIntrinsicID() == Intrinsic::bswap) + return ReplaceInstUsesWith(CI, Operand->getOperand(1)); + + // bswap(trunc(bswap(x))) -> trunc(lshr(x, c)) + if (TruncInst *TI = dyn_cast(II->getOperand(1))) { + if (IntrinsicInst *Operand = dyn_cast(TI->getOperand(0))) + if (Operand->getIntrinsicID() == Intrinsic::bswap) { + unsigned C = Operand->getType()->getPrimitiveSizeInBits() - + TI->getType()->getPrimitiveSizeInBits(); + Value *CV = ConstantInt::get(Operand->getType(), C); + Value *V = Builder->CreateLShr(Operand->getOperand(1), CV); + return new TruncInst(V, TI->getType()); + } + } + + break; + case Intrinsic::powi: + if (ConstantInt *Power = dyn_cast(II->getOperand(2))) { + // powi(x, 0) -> 1.0 + if (Power->isZero()) + return ReplaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0)); + // powi(x, 1) -> x + if (Power->isOne()) + return ReplaceInstUsesWith(CI, II->getOperand(1)); + // powi(x, -1) -> 1/x + if (Power->isAllOnesValue()) + return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0), + II->getOperand(1)); + } + break; + case Intrinsic::cttz: { + // If all bits below the first known one are known zero, + // this value is constant. + const IntegerType *IT = cast(II->getOperand(1)->getType()); + uint32_t BitWidth = IT->getBitWidth(); + APInt KnownZero(BitWidth, 0); + APInt KnownOne(BitWidth, 0); + ComputeMaskedBits(II->getOperand(1), APInt::getAllOnesValue(BitWidth), + KnownZero, KnownOne); + unsigned TrailingZeros = KnownOne.countTrailingZeros(); + APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros)); + if ((Mask & KnownZero) == Mask) + return ReplaceInstUsesWith(CI, ConstantInt::get(IT, + APInt(BitWidth, TrailingZeros))); + + } + break; + case Intrinsic::ctlz: { + // If all bits above the first known one are known zero, + // this value is constant. + const IntegerType *IT = cast(II->getOperand(1)->getType()); + uint32_t BitWidth = IT->getBitWidth(); + APInt KnownZero(BitWidth, 0); + APInt KnownOne(BitWidth, 0); + ComputeMaskedBits(II->getOperand(1), APInt::getAllOnesValue(BitWidth), + KnownZero, KnownOne); + unsigned LeadingZeros = KnownOne.countLeadingZeros(); + APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros)); + if ((Mask & KnownZero) == Mask) + return ReplaceInstUsesWith(CI, ConstantInt::get(IT, + APInt(BitWidth, LeadingZeros))); + + } + break; + case Intrinsic::uadd_with_overflow: { + Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); + const IntegerType *IT = cast(II->getOperand(1)->getType()); + uint32_t BitWidth = IT->getBitWidth(); + APInt Mask = APInt::getSignBit(BitWidth); + APInt LHSKnownZero(BitWidth, 0); + APInt LHSKnownOne(BitWidth, 0); + ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne); + bool LHSKnownNegative = LHSKnownOne[BitWidth - 1]; + bool LHSKnownPositive = LHSKnownZero[BitWidth - 1]; + + if (LHSKnownNegative || LHSKnownPositive) { + APInt RHSKnownZero(BitWidth, 0); + APInt RHSKnownOne(BitWidth, 0); + ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne); + bool RHSKnownNegative = RHSKnownOne[BitWidth - 1]; + bool RHSKnownPositive = RHSKnownZero[BitWidth - 1]; + if (LHSKnownNegative && RHSKnownNegative) { + // The sign bit is set in both cases: this MUST overflow. + // Create a simple add instruction, and insert it into the struct. + Instruction *Add = BinaryOperator::CreateAdd(LHS, RHS, "", &CI); + Worklist.Add(Add); + Constant *V[] = { + UndefValue::get(LHS->getType()),ConstantInt::getTrue(II->getContext()) + }; + Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); + return InsertValueInst::Create(Struct, Add, 0); + } + + if (LHSKnownPositive && RHSKnownPositive) { + // The sign bit is clear in both cases: this CANNOT overflow. + // Create a simple add instruction, and insert it into the struct. + Instruction *Add = BinaryOperator::CreateNUWAdd(LHS, RHS, "", &CI); + Worklist.Add(Add); + Constant *V[] = { + UndefValue::get(LHS->getType()), + ConstantInt::getFalse(II->getContext()) + }; + Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); + return InsertValueInst::Create(Struct, Add, 0); + } + } + } + // FALL THROUGH uadd into sadd + case Intrinsic::sadd_with_overflow: + // Canonicalize constants into the RHS. + if (isa(II->getOperand(1)) && + !isa(II->getOperand(2))) { + Value *LHS = II->getOperand(1); + II->setOperand(1, II->getOperand(2)); + II->setOperand(2, LHS); + return II; + } + + // X + undef -> undef + if (isa(II->getOperand(2))) + return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); + + if (ConstantInt *RHS = dyn_cast(II->getOperand(2))) { + // X + 0 -> {X, false} + if (RHS->isZero()) { + Constant *V[] = { + UndefValue::get(II->getOperand(0)->getType()), + ConstantInt::getFalse(II->getContext()) + }; + Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); + return InsertValueInst::Create(Struct, II->getOperand(1), 0); + } + } + break; + case Intrinsic::usub_with_overflow: + case Intrinsic::ssub_with_overflow: + // undef - X -> undef + // X - undef -> undef + if (isa(II->getOperand(1)) || + isa(II->getOperand(2))) + return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); + + if (ConstantInt *RHS = dyn_cast(II->getOperand(2))) { + // X - 0 -> {X, false} + if (RHS->isZero()) { + Constant *V[] = { + UndefValue::get(II->getOperand(1)->getType()), + ConstantInt::getFalse(II->getContext()) + }; + Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); + return InsertValueInst::Create(Struct, II->getOperand(1), 0); + } + } + break; + case Intrinsic::umul_with_overflow: + case Intrinsic::smul_with_overflow: + // Canonicalize constants into the RHS. + if (isa(II->getOperand(1)) && + !isa(II->getOperand(2))) { + Value *LHS = II->getOperand(1); + II->setOperand(1, II->getOperand(2)); + II->setOperand(2, LHS); + return II; + } + + // X * undef -> undef + if (isa(II->getOperand(2))) + return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); + + if (ConstantInt *RHSI = dyn_cast(II->getOperand(2))) { + // X*0 -> {0, false} + if (RHSI->isZero()) + return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType())); + + // X * 1 -> {X, false} + if (RHSI->equalsInt(1)) { + Constant *V[] = { + UndefValue::get(II->getOperand(1)->getType()), + ConstantInt::getFalse(II->getContext()) + }; + Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); + return InsertValueInst::Create(Struct, II->getOperand(1), 0); + } + } + break; + case Intrinsic::ppc_altivec_lvx: + case Intrinsic::ppc_altivec_lvxl: + case Intrinsic::x86_sse_loadu_ps: + case Intrinsic::x86_sse2_loadu_pd: + case Intrinsic::x86_sse2_loadu_dq: + // Turn PPC lvx -> load if the pointer is known aligned. + // Turn X86 loadups -> load if the pointer is known aligned. + if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) { + Value *Ptr = Builder->CreateBitCast(II->getOperand(1), + PointerType::getUnqual(II->getType())); + return new LoadInst(Ptr); + } + break; + case Intrinsic::ppc_altivec_stvx: + case Intrinsic::ppc_altivec_stvxl: + // Turn stvx -> store if the pointer is known aligned. + if (GetOrEnforceKnownAlignment(II->getOperand(2), 16) >= 16) { + const Type *OpPtrTy = + PointerType::getUnqual(II->getOperand(1)->getType()); + Value *Ptr = Builder->CreateBitCast(II->getOperand(2), OpPtrTy); + return new StoreInst(II->getOperand(1), Ptr); + } + break; + case Intrinsic::x86_sse_storeu_ps: + case Intrinsic::x86_sse2_storeu_pd: + case Intrinsic::x86_sse2_storeu_dq: + // Turn X86 storeu -> store if the pointer is known aligned. + if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) { + const Type *OpPtrTy = + PointerType::getUnqual(II->getOperand(2)->getType()); + Value *Ptr = Builder->CreateBitCast(II->getOperand(1), OpPtrTy); + return new StoreInst(II->getOperand(2), Ptr); + } + break; + + case Intrinsic::x86_sse_cvttss2si: { + // These intrinsics only demands the 0th element of its input vector. If + // we can simplify the input based on that, do so now. + unsigned VWidth = + cast(II->getOperand(1)->getType())->getNumElements(); + APInt DemandedElts(VWidth, 1); + APInt UndefElts(VWidth, 0); + if (Value *V = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts, + UndefElts)) { + II->setOperand(1, V); + return II; + } + break; + } + + case Intrinsic::ppc_altivec_vperm: + // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. + if (ConstantVector *Mask = dyn_cast(II->getOperand(3))) { + assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!"); + + // Check that all of the elements are integer constants or undefs. + bool AllEltsOk = true; + for (unsigned i = 0; i != 16; ++i) { + if (!isa(Mask->getOperand(i)) && + !isa(Mask->getOperand(i))) { + AllEltsOk = false; + break; + } + } + + if (AllEltsOk) { + // Cast the input vectors to byte vectors. + Value *Op0 = Builder->CreateBitCast(II->getOperand(1), Mask->getType()); + Value *Op1 = Builder->CreateBitCast(II->getOperand(2), Mask->getType()); + Value *Result = UndefValue::get(Op0->getType()); + + // Only extract each element once. + Value *ExtractedElts[32]; + memset(ExtractedElts, 0, sizeof(ExtractedElts)); + + for (unsigned i = 0; i != 16; ++i) { + if (isa(Mask->getOperand(i))) + continue; + unsigned Idx=cast(Mask->getOperand(i))->getZExtValue(); + Idx &= 31; // Match the hardware behavior. + + if (ExtractedElts[Idx] == 0) { + ExtractedElts[Idx] = + Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1, + ConstantInt::get(Type::getInt32Ty(II->getContext()), + Idx&15, false), "tmp"); + } + + // Insert this value into the result vector. + Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx], + ConstantInt::get(Type::getInt32Ty(II->getContext()), + i, false), "tmp"); + } + return CastInst::Create(Instruction::BitCast, Result, CI.getType()); + } + } + break; + + case Intrinsic::stackrestore: { + // If the save is right next to the restore, remove the restore. This can + // happen when variable allocas are DCE'd. + if (IntrinsicInst *SS = dyn_cast(II->getOperand(1))) { + if (SS->getIntrinsicID() == Intrinsic::stacksave) { + BasicBlock::iterator BI = SS; + if (&*++BI == II) + return EraseInstFromFunction(CI); + } + } + + // Scan down this block to see if there is another stack restore in the + // same block without an intervening call/alloca. + BasicBlock::iterator BI = II; + TerminatorInst *TI = II->getParent()->getTerminator(); + bool CannotRemove = false; + for (++BI; &*BI != TI; ++BI) { + if (isa(BI) || isMalloc(BI)) { + CannotRemove = true; + break; + } + if (CallInst *BCI = dyn_cast(BI)) { + if (IntrinsicInst *II = dyn_cast(BCI)) { + // If there is a stackrestore below this one, remove this one. + if (II->getIntrinsicID() == Intrinsic::stackrestore) + return EraseInstFromFunction(CI); + // Otherwise, ignore the intrinsic. + } else { + // If we found a non-intrinsic call, we can't remove the stack + // restore. + CannotRemove = true; + break; + } + } + } + + // If the stack restore is in a return/unwind block and if there are no + // allocas or calls between the restore and the return, nuke the restore. + if (!CannotRemove && (isa(TI) || isa(TI))) + return EraseInstFromFunction(CI); + break; + } + case Intrinsic::objectsize: { + ConstantInt *Const = cast(II->getOperand(2)); + const Type *Ty = CI.getType(); + + // 0 is maximum number of bytes left, 1 is minimum number of bytes left. + // TODO: actually add these values, the current return values are "don't + // know". + if (Const->getZExtValue() == 0) + return ReplaceInstUsesWith(CI, Constant::getAllOnesValue(Ty)); + else + return ReplaceInstUsesWith(CI, ConstantInt::get(Ty, 0)); + } + } + + return visitCallSite(II); +} + +// InvokeInst simplification +// +Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) { + return visitCallSite(&II); +} + +/// isSafeToEliminateVarargsCast - If this cast does not affect the value +/// passed through the varargs area, we can eliminate the use of the cast. +static bool isSafeToEliminateVarargsCast(const CallSite CS, + const CastInst * const CI, + const TargetData * const TD, + const int ix) { + if (!CI->isLosslessCast()) + return false; + + // The size of ByVal arguments is derived from the type, so we + // can't change to a type with a different size. If the size were + // passed explicitly we could avoid this check. + if (!CS.paramHasAttr(ix, Attribute::ByVal)) + return true; + + const Type* SrcTy = + cast(CI->getOperand(0)->getType())->getElementType(); + const Type* DstTy = cast(CI->getType())->getElementType(); + if (!SrcTy->isSized() || !DstTy->isSized()) + return false; + if (!TD || TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy)) + return false; + return true; +} + +// visitCallSite - Improvements for call and invoke instructions. +// +Instruction *InstCombiner::visitCallSite(CallSite CS) { + bool Changed = false; + + // If the callee is a constexpr cast of a function, attempt to move the cast + // to the arguments of the call/invoke. + if (transformConstExprCastCall(CS)) return 0; + + Value *Callee = CS.getCalledValue(); + + if (Function *CalleeF = dyn_cast(Callee)) + if (CalleeF->getCallingConv() != CS.getCallingConv()) { + Instruction *OldCall = CS.getInstruction(); + // If the call and callee calling conventions don't match, this call must + // be unreachable, as the call is undefined. + new StoreInst(ConstantInt::getTrue(Callee->getContext()), + UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), + OldCall); + // If OldCall dues not return void then replaceAllUsesWith undef. + // This allows ValueHandlers and custom metadata to adjust itself. + if (!OldCall->getType()->isVoidTy()) + OldCall->replaceAllUsesWith(UndefValue::get(OldCall->getType())); + if (isa(OldCall)) // Not worth removing an invoke here. + return EraseInstFromFunction(*OldCall); + return 0; + } + + if (isa(Callee) || isa(Callee)) { + // This instruction is not reachable, just remove it. We insert a store to + // undef so that we know that this code is not reachable, despite the fact + // that we can't modify the CFG here. + new StoreInst(ConstantInt::getTrue(Callee->getContext()), + UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), + CS.getInstruction()); + + // If CS dues not return void then replaceAllUsesWith undef. + // This allows ValueHandlers and custom metadata to adjust itself. + if (!CS.getInstruction()->getType()->isVoidTy()) + CS.getInstruction()-> + replaceAllUsesWith(UndefValue::get(CS.getInstruction()->getType())); + + if (InvokeInst *II = dyn_cast(CS.getInstruction())) { + // Don't break the CFG, insert a dummy cond branch. + BranchInst::Create(II->getNormalDest(), II->getUnwindDest(), + ConstantInt::getTrue(Callee->getContext()), II); + } + return EraseInstFromFunction(*CS.getInstruction()); + } + + if (BitCastInst *BC = dyn_cast(Callee)) + if (IntrinsicInst *In = dyn_cast(BC->getOperand(0))) + if (In->getIntrinsicID() == Intrinsic::init_trampoline) + return transformCallThroughTrampoline(CS); + + const PointerType *PTy = cast(Callee->getType()); + const FunctionType *FTy = cast(PTy->getElementType()); + if (FTy->isVarArg()) { + int ix = FTy->getNumParams() + (isa(Callee) ? 3 : 1); + // See if we can optimize any arguments passed through the varargs area of + // the call. + for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(), + E = CS.arg_end(); I != E; ++I, ++ix) { + CastInst *CI = dyn_cast(*I); + if (CI && isSafeToEliminateVarargsCast(CS, CI, TD, ix)) { + *I = CI->getOperand(0); + Changed = true; + } + } + } + + if (isa(Callee) && !CS.doesNotThrow()) { + // Inline asm calls cannot throw - mark them 'nounwind'. + CS.setDoesNotThrow(); + Changed = true; + } + + return Changed ? CS.getInstruction() : 0; +} + +// transformConstExprCastCall - If the callee is a constexpr cast of a function, +// attempt to move the cast to the arguments of the call/invoke. +// +bool InstCombiner::transformConstExprCastCall(CallSite CS) { + if (!isa(CS.getCalledValue())) return false; + ConstantExpr *CE = cast(CS.getCalledValue()); + if (CE->getOpcode() != Instruction::BitCast || + !isa(CE->getOperand(0))) + return false; + Function *Callee = cast(CE->getOperand(0)); + Instruction *Caller = CS.getInstruction(); + const AttrListPtr &CallerPAL = CS.getAttributes(); + + // Okay, this is a cast from a function to a different type. Unless doing so + // would cause a type conversion of one of our arguments, change this call to + // be a direct call with arguments casted to the appropriate types. + // + const FunctionType *FT = Callee->getFunctionType(); + const Type *OldRetTy = Caller->getType(); + const Type *NewRetTy = FT->getReturnType(); + + if (isa(NewRetTy)) + return false; // TODO: Handle multiple return values. + + // Check to see if we are changing the return type... + if (OldRetTy != NewRetTy) { + if (Callee->isDeclaration() && + // Conversion is ok if changing from one pointer type to another or from + // a pointer to an integer of the same size. + !((isa(OldRetTy) || !TD || + OldRetTy == TD->getIntPtrType(Caller->getContext())) && + (isa(NewRetTy) || !TD || + NewRetTy == TD->getIntPtrType(Caller->getContext())))) + return false; // Cannot transform this return value. + + if (!Caller->use_empty() && + // void -> non-void is handled specially + !NewRetTy->isVoidTy() && !CastInst::isCastable(NewRetTy, OldRetTy)) + return false; // Cannot transform this return value. + + if (!CallerPAL.isEmpty() && !Caller->use_empty()) { + Attributes RAttrs = CallerPAL.getRetAttributes(); + if (RAttrs & Attribute::typeIncompatible(NewRetTy)) + return false; // Attribute not compatible with transformed value. + } + + // If the callsite is an invoke instruction, and the return value is used by + // a PHI node in a successor, we cannot change the return type of the call + // because there is no place to put the cast instruction (without breaking + // the critical edge). Bail out in this case. + if (!Caller->use_empty()) + if (InvokeInst *II = dyn_cast(Caller)) + for (Value::use_iterator UI = II->use_begin(), E = II->use_end(); + UI != E; ++UI) + if (PHINode *PN = dyn_cast(*UI)) + if (PN->getParent() == II->getNormalDest() || + PN->getParent() == II->getUnwindDest()) + return false; + } + + unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin()); + unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs); + + CallSite::arg_iterator AI = CS.arg_begin(); + for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) { + const Type *ParamTy = FT->getParamType(i); + const Type *ActTy = (*AI)->getType(); + + if (!CastInst::isCastable(ActTy, ParamTy)) + return false; // Cannot transform this parameter value. + + if (CallerPAL.getParamAttributes(i + 1) + & Attribute::typeIncompatible(ParamTy)) + return false; // Attribute not compatible with transformed value. + + // Converting from one pointer type to another or between a pointer and an + // integer of the same size is safe even if we do not have a body. + bool isConvertible = ActTy == ParamTy || + (TD && ((isa(ParamTy) || + ParamTy == TD->getIntPtrType(Caller->getContext())) && + (isa(ActTy) || + ActTy == TD->getIntPtrType(Caller->getContext())))); + if (Callee->isDeclaration() && !isConvertible) return false; + } + + if (FT->getNumParams() < NumActualArgs && !FT->isVarArg() && + Callee->isDeclaration()) + return false; // Do not delete arguments unless we have a function body. + + if (FT->getNumParams() < NumActualArgs && FT->isVarArg() && + !CallerPAL.isEmpty()) + // In this case we have more arguments than the new function type, but we + // won't be dropping them. Check that these extra arguments have attributes + // that are compatible with being a vararg call argument. + for (unsigned i = CallerPAL.getNumSlots(); i; --i) { + if (CallerPAL.getSlot(i - 1).Index <= FT->getNumParams()) + break; + Attributes PAttrs = CallerPAL.getSlot(i - 1).Attrs; + if (PAttrs & Attribute::VarArgsIncompatible) + return false; + } + + // Okay, we decided that this is a safe thing to do: go ahead and start + // inserting cast instructions as necessary... + std::vector Args; + Args.reserve(NumActualArgs); + SmallVector attrVec; + attrVec.reserve(NumCommonArgs); + + // Get any return attributes. + Attributes RAttrs = CallerPAL.getRetAttributes(); + + // If the return value is not being used, the type may not be compatible + // with the existing attributes. Wipe out any problematic attributes. + RAttrs &= ~Attribute::typeIncompatible(NewRetTy); + + // Add the new return attributes. + if (RAttrs) + attrVec.push_back(AttributeWithIndex::get(0, RAttrs)); + + AI = CS.arg_begin(); + for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) { + const Type *ParamTy = FT->getParamType(i); + if ((*AI)->getType() == ParamTy) { + Args.push_back(*AI); + } else { + Instruction::CastOps opcode = CastInst::getCastOpcode(*AI, + false, ParamTy, false); + Args.push_back(Builder->CreateCast(opcode, *AI, ParamTy, "tmp")); + } + + // Add any parameter attributes. + if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1)) + attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs)); + } + + // If the function takes more arguments than the call was taking, add them + // now. + for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) + Args.push_back(Constant::getNullValue(FT->getParamType(i))); + + // If we are removing arguments to the function, emit an obnoxious warning. + if (FT->getNumParams() < NumActualArgs) { + if (!FT->isVarArg()) { + errs() << "WARNING: While resolving call to function '" + << Callee->getName() << "' arguments were dropped!\n"; + } else { + // Add all of the arguments in their promoted form to the arg list. + for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) { + const Type *PTy = getPromotedType((*AI)->getType()); + if (PTy != (*AI)->getType()) { + // Must promote to pass through va_arg area! + Instruction::CastOps opcode = + CastInst::getCastOpcode(*AI, false, PTy, false); + Args.push_back(Builder->CreateCast(opcode, *AI, PTy, "tmp")); + } else { + Args.push_back(*AI); + } + + // Add any parameter attributes. + if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1)) + attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs)); + } + } + } + + if (Attributes FnAttrs = CallerPAL.getFnAttributes()) + attrVec.push_back(AttributeWithIndex::get(~0, FnAttrs)); + + if (NewRetTy->isVoidTy()) + Caller->setName(""); // Void type should not have a name. + + const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec.begin(), + attrVec.end()); + + Instruction *NC; + if (InvokeInst *II = dyn_cast(Caller)) { + NC = InvokeInst::Create(Callee, II->getNormalDest(), II->getUnwindDest(), + Args.begin(), Args.end(), + Caller->getName(), Caller); + cast(NC)->setCallingConv(II->getCallingConv()); + cast(NC)->setAttributes(NewCallerPAL); + } else { + NC = CallInst::Create(Callee, Args.begin(), Args.end(), + Caller->getName(), Caller); + CallInst *CI = cast(Caller); + if (CI->isTailCall()) + cast(NC)->setTailCall(); + cast(NC)->setCallingConv(CI->getCallingConv()); + cast(NC)->setAttributes(NewCallerPAL); + } + + // Insert a cast of the return type as necessary. + Value *NV = NC; + if (OldRetTy != NV->getType() && !Caller->use_empty()) { + if (!NV->getType()->isVoidTy()) { + Instruction::CastOps opcode = CastInst::getCastOpcode(NC, false, + OldRetTy, false); + NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp"); + + // If this is an invoke instruction, we should insert it after the first + // non-phi, instruction in the normal successor block. + if (InvokeInst *II = dyn_cast(Caller)) { + BasicBlock::iterator I = II->getNormalDest()->getFirstNonPHI(); + InsertNewInstBefore(NC, *I); + } else { + // Otherwise, it's a call, just insert cast right after the call instr + InsertNewInstBefore(NC, *Caller); + } + Worklist.AddUsersToWorkList(*Caller); + } else { + NV = UndefValue::get(Caller->getType()); + } + } + + + if (!Caller->use_empty()) + Caller->replaceAllUsesWith(NV); + + EraseInstFromFunction(*Caller); + return true; +} + +// transformCallThroughTrampoline - Turn a call to a function created by the +// init_trampoline intrinsic into a direct call to the underlying function. +// +Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { + Value *Callee = CS.getCalledValue(); + const PointerType *PTy = cast(Callee->getType()); + const FunctionType *FTy = cast(PTy->getElementType()); + const AttrListPtr &Attrs = CS.getAttributes(); + + // If the call already has the 'nest' attribute somewhere then give up - + // otherwise 'nest' would occur twice after splicing in the chain. + if (Attrs.hasAttrSomewhere(Attribute::Nest)) + return 0; + + IntrinsicInst *Tramp = + cast(cast(Callee)->getOperand(0)); + + Function *NestF = cast(Tramp->getOperand(2)->stripPointerCasts()); + const PointerType *NestFPTy = cast(NestF->getType()); + const FunctionType *NestFTy = cast(NestFPTy->getElementType()); + + const AttrListPtr &NestAttrs = NestF->getAttributes(); + if (!NestAttrs.isEmpty()) { + unsigned NestIdx = 1; + const Type *NestTy = 0; + Attributes NestAttr = Attribute::None; + + // Look for a parameter marked with the 'nest' attribute. + for (FunctionType::param_iterator I = NestFTy->param_begin(), + E = NestFTy->param_end(); I != E; ++NestIdx, ++I) + if (NestAttrs.paramHasAttr(NestIdx, Attribute::Nest)) { + // Record the parameter type and any other attributes. + NestTy = *I; + NestAttr = NestAttrs.getParamAttributes(NestIdx); + break; + } + + if (NestTy) { + Instruction *Caller = CS.getInstruction(); + std::vector NewArgs; + NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1); + + SmallVector NewAttrs; + NewAttrs.reserve(Attrs.getNumSlots() + 1); + + // Insert the nest argument into the call argument list, which may + // mean appending it. Likewise for attributes. + + // Add any result attributes. + if (Attributes Attr = Attrs.getRetAttributes()) + NewAttrs.push_back(AttributeWithIndex::get(0, Attr)); + + { + unsigned Idx = 1; + CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); + do { + if (Idx == NestIdx) { + // Add the chain argument and attributes. + Value *NestVal = Tramp->getOperand(3); + if (NestVal->getType() != NestTy) + NestVal = new BitCastInst(NestVal, NestTy, "nest", Caller); + NewArgs.push_back(NestVal); + NewAttrs.push_back(AttributeWithIndex::get(NestIdx, NestAttr)); + } + + if (I == E) + break; + + // Add the original argument and attributes. + NewArgs.push_back(*I); + if (Attributes Attr = Attrs.getParamAttributes(Idx)) + NewAttrs.push_back + (AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr)); + + ++Idx, ++I; + } while (1); + } + + // Add any function attributes. + if (Attributes Attr = Attrs.getFnAttributes()) + NewAttrs.push_back(AttributeWithIndex::get(~0, Attr)); + + // The trampoline may have been bitcast to a bogus type (FTy). + // Handle this by synthesizing a new function type, equal to FTy + // with the chain parameter inserted. + + std::vector NewTypes; + NewTypes.reserve(FTy->getNumParams()+1); + + // Insert the chain's type into the list of parameter types, which may + // mean appending it. + { + unsigned Idx = 1; + FunctionType::param_iterator I = FTy->param_begin(), + E = FTy->param_end(); + + do { + if (Idx == NestIdx) + // Add the chain's type. + NewTypes.push_back(NestTy); + + if (I == E) + break; + + // Add the original type. + NewTypes.push_back(*I); + + ++Idx, ++I; + } while (1); + } + + // Replace the trampoline call with a direct call. Let the generic + // code sort out any function type mismatches. + FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes, + FTy->isVarArg()); + Constant *NewCallee = + NestF->getType() == PointerType::getUnqual(NewFTy) ? + NestF : ConstantExpr::getBitCast(NestF, + PointerType::getUnqual(NewFTy)); + const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(), + NewAttrs.end()); + + Instruction *NewCaller; + if (InvokeInst *II = dyn_cast(Caller)) { + NewCaller = InvokeInst::Create(NewCallee, + II->getNormalDest(), II->getUnwindDest(), + NewArgs.begin(), NewArgs.end(), + Caller->getName(), Caller); + cast(NewCaller)->setCallingConv(II->getCallingConv()); + cast(NewCaller)->setAttributes(NewPAL); + } else { + NewCaller = CallInst::Create(NewCallee, NewArgs.begin(), NewArgs.end(), + Caller->getName(), Caller); + if (cast(Caller)->isTailCall()) + cast(NewCaller)->setTailCall(); + cast(NewCaller)-> + setCallingConv(cast(Caller)->getCallingConv()); + cast(NewCaller)->setAttributes(NewPAL); + } + if (!Caller->getType()->isVoidTy()) + Caller->replaceAllUsesWith(NewCaller); + Caller->eraseFromParent(); + Worklist.Remove(Caller); + return 0; + } + } + + // Replace the trampoline call with a direct call. Since there is no 'nest' + // parameter, there is no need to adjust the argument list. Let the generic + // code sort out any function type mismatches. + Constant *NewCallee = + NestF->getType() == PTy ? NestF : + ConstantExpr::getBitCast(NestF, PTy); + CS.setCalledFunction(NewCallee); + return CS.getInstruction(); +} + diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp new file mode 100644 index 000000000000..e018b351082a --- /dev/null +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -0,0 +1,1301 @@ +//===- InstCombineCasts.cpp -----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the visit functions for cast operations. +// +//===----------------------------------------------------------------------===// + +#include "InstCombine.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Support/PatternMatch.h" +using namespace llvm; +using namespace PatternMatch; + +/// DecomposeSimpleLinearExpr - Analyze 'Val', seeing if it is a simple linear +/// expression. If so, decompose it, returning some value X, such that Val is +/// X*Scale+Offset. +/// +static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, + int &Offset) { + assert(Val->getType()->isInteger(32) && "Unexpected allocation size type!"); + if (ConstantInt *CI = dyn_cast(Val)) { + Offset = CI->getZExtValue(); + Scale = 0; + return ConstantInt::get(Type::getInt32Ty(Val->getContext()), 0); + } + + if (BinaryOperator *I = dyn_cast(Val)) { + if (ConstantInt *RHS = dyn_cast(I->getOperand(1))) { + if (I->getOpcode() == Instruction::Shl) { + // This is a value scaled by '1 << the shift amt'. + Scale = 1U << RHS->getZExtValue(); + Offset = 0; + return I->getOperand(0); + } + + if (I->getOpcode() == Instruction::Mul) { + // This value is scaled by 'RHS'. + Scale = RHS->getZExtValue(); + Offset = 0; + return I->getOperand(0); + } + + if (I->getOpcode() == Instruction::Add) { + // We have X+C. Check to see if we really have (X*C2)+C1, + // where C1 is divisible by C2. + unsigned SubScale; + Value *SubVal = + DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset); + Offset += RHS->getZExtValue(); + Scale = SubScale; + return SubVal; + } + } + } + + // Otherwise, we can't look past this. + Scale = 1; + Offset = 0; + return Val; +} + +/// PromoteCastOfAllocation - If we find a cast of an allocation instruction, +/// try to eliminate the cast by moving the type information into the alloc. +Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, + AllocaInst &AI) { + // This requires TargetData to get the alloca alignment and size information. + if (!TD) return 0; + + const PointerType *PTy = cast(CI.getType()); + + BuilderTy AllocaBuilder(*Builder); + AllocaBuilder.SetInsertPoint(AI.getParent(), &AI); + + // Get the type really allocated and the type casted to. + const Type *AllocElTy = AI.getAllocatedType(); + const Type *CastElTy = PTy->getElementType(); + if (!AllocElTy->isSized() || !CastElTy->isSized()) return 0; + + unsigned AllocElTyAlign = TD->getABITypeAlignment(AllocElTy); + unsigned CastElTyAlign = TD->getABITypeAlignment(CastElTy); + if (CastElTyAlign < AllocElTyAlign) return 0; + + // If the allocation has multiple uses, only promote it if we are strictly + // increasing the alignment of the resultant allocation. If we keep it the + // same, we open the door to infinite loops of various kinds. (A reference + // from a dbg.declare doesn't count as a use for this purpose.) + if (!AI.hasOneUse() && !hasOneUsePlusDeclare(&AI) && + CastElTyAlign == AllocElTyAlign) return 0; + + uint64_t AllocElTySize = TD->getTypeAllocSize(AllocElTy); + uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy); + if (CastElTySize == 0 || AllocElTySize == 0) return 0; + + // See if we can satisfy the modulus by pulling a scale out of the array + // size argument. + unsigned ArraySizeScale; + int ArrayOffset; + Value *NumElements = // See if the array size is a decomposable linear expr. + DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset); + + // If we can now satisfy the modulus, by using a non-1 scale, we really can + // do the xform. + if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 || + (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return 0; + + unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize; + Value *Amt = 0; + if (Scale == 1) { + Amt = NumElements; + } else { + Amt = ConstantInt::get(Type::getInt32Ty(CI.getContext()), Scale); + // Insert before the alloca, not before the cast. + Amt = AllocaBuilder.CreateMul(Amt, NumElements, "tmp"); + } + + if (int Offset = (AllocElTySize*ArrayOffset)/CastElTySize) { + Value *Off = ConstantInt::get(Type::getInt32Ty(CI.getContext()), + Offset, true); + Amt = AllocaBuilder.CreateAdd(Amt, Off, "tmp"); + } + + AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt); + New->setAlignment(AI.getAlignment()); + New->takeName(&AI); + + // If the allocation has one real use plus a dbg.declare, just remove the + // declare. + if (DbgDeclareInst *DI = hasOneUsePlusDeclare(&AI)) { + EraseInstFromFunction(*(Instruction*)DI); + } + // If the allocation has multiple real uses, insert a cast and change all + // things that used it to use the new cast. This will also hack on CI, but it + // will die soon. + else if (!AI.hasOneUse()) { + // New is the allocation instruction, pointer typed. AI is the original + // allocation instruction, also pointer typed. Thus, cast to use is BitCast. + Value *NewCast = AllocaBuilder.CreateBitCast(New, AI.getType(), "tmpcast"); + AI.replaceAllUsesWith(NewCast); + } + return ReplaceInstUsesWith(CI, New); +} + + + +/// EvaluateInDifferentType - Given an expression that +/// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually +/// insert the code to evaluate the expression. +Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, + bool isSigned) { + if (Constant *C = dyn_cast(V)) { + C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/); + // If we got a constantexpr back, try to simplify it with TD info. + if (ConstantExpr *CE = dyn_cast(C)) + C = ConstantFoldConstantExpression(CE, TD); + return C; + } + + // Otherwise, it must be an instruction. + Instruction *I = cast(V); + Instruction *Res = 0; + unsigned Opc = I->getOpcode(); + switch (Opc) { + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::AShr: + case Instruction::LShr: + case Instruction::Shl: + case Instruction::UDiv: + case Instruction::URem: { + Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned); + Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned); + Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS); + break; + } + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + // If the source type of the cast is the type we're trying for then we can + // just return the source. There's no need to insert it because it is not + // new. + if (I->getOperand(0)->getType() == Ty) + return I->getOperand(0); + + // Otherwise, must be the same type of cast, so just reinsert a new one. + // This also handles the case of zext(trunc(x)) -> zext(x). + Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty, + Opc == Instruction::SExt); + break; + case Instruction::Select: { + Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned); + Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned); + Res = SelectInst::Create(I->getOperand(0), True, False); + break; + } + case Instruction::PHI: { + PHINode *OPN = cast(I); + PHINode *NPN = PHINode::Create(Ty); + for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) { + Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned); + NPN->addIncoming(V, OPN->getIncomingBlock(i)); + } + Res = NPN; + break; + } + default: + // TODO: Can handle more cases here. + llvm_unreachable("Unreachable!"); + break; + } + + Res->takeName(I); + return InsertNewInstBefore(Res, *I); +} + + +/// This function is a wrapper around CastInst::isEliminableCastPair. It +/// simply extracts arguments and returns what that function returns. +static Instruction::CastOps +isEliminableCastPair( + const CastInst *CI, ///< The first cast instruction + unsigned opcode, ///< The opcode of the second cast instruction + const Type *DstTy, ///< The target type for the second cast instruction + TargetData *TD ///< The target data for pointer size +) { + + const Type *SrcTy = CI->getOperand(0)->getType(); // A from above + const Type *MidTy = CI->getType(); // B from above + + // Get the opcodes of the two Cast instructions + Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode()); + Instruction::CastOps secondOp = Instruction::CastOps(opcode); + + unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy, + DstTy, + TD ? TD->getIntPtrType(CI->getContext()) : 0); + + // We don't want to form an inttoptr or ptrtoint that converts to an integer + // type that differs from the pointer size. + if ((Res == Instruction::IntToPtr && + (!TD || SrcTy != TD->getIntPtrType(CI->getContext()))) || + (Res == Instruction::PtrToInt && + (!TD || DstTy != TD->getIntPtrType(CI->getContext())))) + Res = 0; + + return Instruction::CastOps(Res); +} + +/// ValueRequiresCast - Return true if the cast from "V to Ty" actually results +/// in any code being generated. It does not require codegen if V is simple +/// enough or if the cast can be folded into other casts. +bool InstCombiner::ValueRequiresCast(Instruction::CastOps opcode,const Value *V, + const Type *Ty) { + if (V->getType() == Ty || isa(V)) return false; + + // If this is another cast that can be eliminated, it isn't codegen either. + if (const CastInst *CI = dyn_cast(V)) + if (isEliminableCastPair(CI, opcode, Ty, TD)) + return false; + return true; +} + + +/// @brief Implement the transforms common to all CastInst visitors. +Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { + Value *Src = CI.getOperand(0); + + // Many cases of "cast of a cast" are eliminable. If it's eliminable we just + // eliminate it now. + if (CastInst *CSrc = dyn_cast(Src)) { // A->B->C cast + if (Instruction::CastOps opc = + isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), TD)) { + // The first cast (CSrc) is eliminable so we need to fix up or replace + // the second cast (CI). CSrc will then have a good chance of being dead. + return CastInst::Create(opc, CSrc->getOperand(0), CI.getType()); + } + } + + // If we are casting a select then fold the cast into the select + if (SelectInst *SI = dyn_cast(Src)) + if (Instruction *NV = FoldOpIntoSelect(CI, SI)) + return NV; + + // If we are casting a PHI then fold the cast into the PHI + if (isa(Src)) { + // We don't do this if this would create a PHI node with an illegal type if + // it is currently legal. + if (!isa(Src->getType()) || + !isa(CI.getType()) || + ShouldChangeType(CI.getType(), Src->getType())) + if (Instruction *NV = FoldOpIntoPhi(CI)) + return NV; + } + + return 0; +} + +/// CanEvaluateTruncated - Return true if we can evaluate the specified +/// expression tree as type Ty instead of its larger type, and arrive with the +/// same value. This is used by code that tries to eliminate truncates. +/// +/// Ty will always be a type smaller than V. We should return true if trunc(V) +/// can be computed by computing V in the smaller type. If V is an instruction, +/// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only +/// makes sense if x and y can be efficiently truncated. +/// +/// This function works on both vectors and scalars. +/// +static bool CanEvaluateTruncated(Value *V, const Type *Ty) { + // We can always evaluate constants in another type. + if (isa(V)) + return true; + + Instruction *I = dyn_cast(V); + if (!I) return false; + + const Type *OrigTy = V->getType(); + + // If this is an extension from the dest type, we can eliminate it, even if it + // has multiple uses. + if ((isa(I) || isa(I)) && + I->getOperand(0)->getType() == Ty) + return true; + + // We can't extend or shrink something that has multiple uses: doing so would + // require duplicating the instruction in general, which isn't profitable. + if (!I->hasOneUse()) return false; + + unsigned Opc = I->getOpcode(); + switch (Opc) { + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + // These operators can all arbitrarily be extended or truncated. + return CanEvaluateTruncated(I->getOperand(0), Ty) && + CanEvaluateTruncated(I->getOperand(1), Ty); + + case Instruction::UDiv: + case Instruction::URem: { + // UDiv and URem can be truncated if all the truncated bits are zero. + uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); + uint32_t BitWidth = Ty->getScalarSizeInBits(); + if (BitWidth < OrigBitWidth) { + APInt Mask = APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth); + if (MaskedValueIsZero(I->getOperand(0), Mask) && + MaskedValueIsZero(I->getOperand(1), Mask)) { + return CanEvaluateTruncated(I->getOperand(0), Ty) && + CanEvaluateTruncated(I->getOperand(1), Ty); + } + } + break; + } + case Instruction::Shl: + // If we are truncating the result of this SHL, and if it's a shift of a + // constant amount, we can always perform a SHL in a smaller type. + if (ConstantInt *CI = dyn_cast(I->getOperand(1))) { + uint32_t BitWidth = Ty->getScalarSizeInBits(); + if (CI->getLimitedValue(BitWidth) < BitWidth) + return CanEvaluateTruncated(I->getOperand(0), Ty); + } + break; + case Instruction::LShr: + // If this is a truncate of a logical shr, we can truncate it to a smaller + // lshr iff we know that the bits we would otherwise be shifting in are + // already zeros. + if (ConstantInt *CI = dyn_cast(I->getOperand(1))) { + uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); + uint32_t BitWidth = Ty->getScalarSizeInBits(); + if (MaskedValueIsZero(I->getOperand(0), + APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) && + CI->getLimitedValue(BitWidth) < BitWidth) { + return CanEvaluateTruncated(I->getOperand(0), Ty); + } + } + break; + case Instruction::Trunc: + // trunc(trunc(x)) -> trunc(x) + return true; + case Instruction::Select: { + SelectInst *SI = cast(I); + return CanEvaluateTruncated(SI->getTrueValue(), Ty) && + CanEvaluateTruncated(SI->getFalseValue(), Ty); + } + case Instruction::PHI: { + // We can change a phi if we can change all operands. Note that we never + // get into trouble with cyclic PHIs here because we only consider + // instructions with a single use. + PHINode *PN = cast(I); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (!CanEvaluateTruncated(PN->getIncomingValue(i), Ty)) + return false; + return true; + } + default: + // TODO: Can handle more cases here. + break; + } + + return false; +} + +Instruction *InstCombiner::visitTrunc(TruncInst &CI) { + if (Instruction *Result = commonCastTransforms(CI)) + return Result; + + // See if we can simplify any instructions used by the input whose sole + // purpose is to compute bits we don't care about. + if (SimplifyDemandedInstructionBits(CI)) + return &CI; + + Value *Src = CI.getOperand(0); + const Type *DestTy = CI.getType(), *SrcTy = Src->getType(); + + // Attempt to truncate the entire input expression tree to the destination + // type. Only do this if the dest type is a simple type, don't convert the + // expression tree to something weird like i93 unless the source is also + // strange. + if ((isa(DestTy) || ShouldChangeType(SrcTy, DestTy)) && + CanEvaluateTruncated(Src, DestTy)) { + + // If this cast is a truncate, evaluting in a different type always + // eliminates the cast, so it is always a win. + DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type" + " to avoid cast: " << CI); + Value *Res = EvaluateInDifferentType(Src, DestTy, false); + assert(Res->getType() == DestTy); + return ReplaceInstUsesWith(CI, Res); + } + + // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0), likewise for vector. + if (DestTy->getScalarSizeInBits() == 1) { + Constant *One = ConstantInt::get(Src->getType(), 1); + Src = Builder->CreateAnd(Src, One, "tmp"); + Value *Zero = Constant::getNullValue(Src->getType()); + return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero); + } + + return 0; +} + +/// transformZExtICmp - Transform (zext icmp) to bitwise / integer operations +/// in order to eliminate the icmp. +Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, + bool DoXform) { + // If we are just checking for a icmp eq of a single bit and zext'ing it + // to an integer, then shift the bit to the appropriate place and then + // cast to integer to avoid the comparison. + if (ConstantInt *Op1C = dyn_cast(ICI->getOperand(1))) { + const APInt &Op1CV = Op1C->getValue(); + + // zext (x x>>u31 true if signbit set. + // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear. + if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) || + (ICI->getPredicate() == ICmpInst::ICMP_SGT &&Op1CV.isAllOnesValue())) { + if (!DoXform) return ICI; + + Value *In = ICI->getOperand(0); + Value *Sh = ConstantInt::get(In->getType(), + In->getType()->getScalarSizeInBits()-1); + In = Builder->CreateLShr(In, Sh, In->getName()+".lobit"); + if (In->getType() != CI.getType()) + In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/, "tmp"); + + if (ICI->getPredicate() == ICmpInst::ICMP_SGT) { + Constant *One = ConstantInt::get(In->getType(), 1); + In = Builder->CreateXor(In, One, In->getName()+".not"); + } + + return ReplaceInstUsesWith(CI, In); + } + + + + // zext (X == 0) to i32 --> X^1 iff X has only the low bit set. + // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set. + // zext (X == 1) to i32 --> X iff X has only the low bit set. + // zext (X == 2) to i32 --> X>>1 iff X has only the 2nd bit set. + // zext (X != 0) to i32 --> X iff X has only the low bit set. + // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set. + // zext (X != 1) to i32 --> X^1 iff X has only the low bit set. + // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set. + if ((Op1CV == 0 || Op1CV.isPowerOf2()) && + // This only works for EQ and NE + ICI->isEquality()) { + // If Op1C some other power of two, convert: + uint32_t BitWidth = Op1C->getType()->getBitWidth(); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + APInt TypeMask(APInt::getAllOnesValue(BitWidth)); + ComputeMaskedBits(ICI->getOperand(0), TypeMask, KnownZero, KnownOne); + + APInt KnownZeroMask(~KnownZero); + if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1? + if (!DoXform) return ICI; + + bool isNE = ICI->getPredicate() == ICmpInst::ICMP_NE; + if (Op1CV != 0 && (Op1CV != KnownZeroMask)) { + // (X&4) == 2 --> false + // (X&4) != 2 --> true + Constant *Res = ConstantInt::get(Type::getInt1Ty(CI.getContext()), + isNE); + Res = ConstantExpr::getZExt(Res, CI.getType()); + return ReplaceInstUsesWith(CI, Res); + } + + uint32_t ShiftAmt = KnownZeroMask.logBase2(); + Value *In = ICI->getOperand(0); + if (ShiftAmt) { + // Perform a logical shr by shiftamt. + // Insert the shift to put the result in the low bit. + In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt), + In->getName()+".lobit"); + } + + if ((Op1CV != 0) == isNE) { // Toggle the low bit. + Constant *One = ConstantInt::get(In->getType(), 1); + In = Builder->CreateXor(In, One, "tmp"); + } + + if (CI.getType() == In->getType()) + return ReplaceInstUsesWith(CI, In); + else + return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/); + } + } + } + + // icmp ne A, B is equal to xor A, B when A and B only really have one bit. + // It is also profitable to transform icmp eq into not(xor(A, B)) because that + // may lead to additional simplifications. + if (ICI->isEquality() && CI.getType() == ICI->getOperand(0)->getType()) { + if (const IntegerType *ITy = dyn_cast(CI.getType())) { + uint32_t BitWidth = ITy->getBitWidth(); + Value *LHS = ICI->getOperand(0); + Value *RHS = ICI->getOperand(1); + + APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0); + APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0); + APInt TypeMask(APInt::getAllOnesValue(BitWidth)); + ComputeMaskedBits(LHS, TypeMask, KnownZeroLHS, KnownOneLHS); + ComputeMaskedBits(RHS, TypeMask, KnownZeroRHS, KnownOneRHS); + + if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) { + APInt KnownBits = KnownZeroLHS | KnownOneLHS; + APInt UnknownBit = ~KnownBits; + if (UnknownBit.countPopulation() == 1) { + if (!DoXform) return ICI; + + Value *Result = Builder->CreateXor(LHS, RHS); + + // Mask off any bits that are set and won't be shifted away. + if (KnownOneLHS.uge(UnknownBit)) + Result = Builder->CreateAnd(Result, + ConstantInt::get(ITy, UnknownBit)); + + // Shift the bit we're testing down to the lsb. + Result = Builder->CreateLShr( + Result, ConstantInt::get(ITy, UnknownBit.countTrailingZeros())); + + if (ICI->getPredicate() == ICmpInst::ICMP_EQ) + Result = Builder->CreateXor(Result, ConstantInt::get(ITy, 1)); + Result->takeName(ICI); + return ReplaceInstUsesWith(CI, Result); + } + } + } + } + + return 0; +} + +/// CanEvaluateZExtd - Determine if the specified value can be computed in the +/// specified wider type and produce the same low bits. If not, return false. +/// +/// If this function returns true, it can also return a non-zero number of bits +/// (in BitsToClear) which indicates that the value it computes is correct for +/// the zero extend, but that the additional BitsToClear bits need to be zero'd +/// out. For example, to promote something like: +/// +/// %B = trunc i64 %A to i32 +/// %C = lshr i32 %B, 8 +/// %E = zext i32 %C to i64 +/// +/// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be +/// set to 8 to indicate that the promoted value needs to have bits 24-31 +/// cleared in addition to bits 32-63. Since an 'and' will be generated to +/// clear the top bits anyway, doing this has no extra cost. +/// +/// This function works on both vectors and scalars. +static bool CanEvaluateZExtd(Value *V, const Type *Ty, unsigned &BitsToClear) { + BitsToClear = 0; + if (isa(V)) + return true; + + Instruction *I = dyn_cast(V); + if (!I) return false; + + // If the input is a truncate from the destination type, we can trivially + // eliminate it, even if it has multiple uses. + // FIXME: This is currently disabled until codegen can handle this without + // pessimizing code, PR5997. + if (0 && isa(I) && I->getOperand(0)->getType() == Ty) + return true; + + // We can't extend or shrink something that has multiple uses: doing so would + // require duplicating the instruction in general, which isn't profitable. + if (!I->hasOneUse()) return false; + + unsigned Opc = I->getOpcode(), Tmp; + switch (Opc) { + case Instruction::ZExt: // zext(zext(x)) -> zext(x). + case Instruction::SExt: // zext(sext(x)) -> sext(x). + case Instruction::Trunc: // zext(trunc(x)) -> trunc(x) or zext(x) + return true; + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::Shl: + if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear) || + !CanEvaluateZExtd(I->getOperand(1), Ty, Tmp)) + return false; + // These can all be promoted if neither operand has 'bits to clear'. + if (BitsToClear == 0 && Tmp == 0) + return true; + + // If the operation is an AND/OR/XOR and the bits to clear are zero in the + // other side, BitsToClear is ok. + if (Tmp == 0 && + (Opc == Instruction::And || Opc == Instruction::Or || + Opc == Instruction::Xor)) { + // We use MaskedValueIsZero here for generality, but the case we care + // about the most is constant RHS. + unsigned VSize = V->getType()->getScalarSizeInBits(); + if (MaskedValueIsZero(I->getOperand(1), + APInt::getHighBitsSet(VSize, BitsToClear))) + return true; + } + + // Otherwise, we don't know how to analyze this BitsToClear case yet. + return false; + + case Instruction::LShr: + // We can promote lshr(x, cst) if we can promote x. This requires the + // ultimate 'and' to clear out the high zero bits we're clearing out though. + if (ConstantInt *Amt = dyn_cast(I->getOperand(1))) { + if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear)) + return false; + BitsToClear += Amt->getZExtValue(); + if (BitsToClear > V->getType()->getScalarSizeInBits()) + BitsToClear = V->getType()->getScalarSizeInBits(); + return true; + } + // Cannot promote variable LSHR. + return false; + case Instruction::Select: + if (!CanEvaluateZExtd(I->getOperand(1), Ty, Tmp) || + !CanEvaluateZExtd(I->getOperand(2), Ty, BitsToClear) || + // TODO: If important, we could handle the case when the BitsToClear are + // known zero in the disagreeing side. + Tmp != BitsToClear) + return false; + return true; + + case Instruction::PHI: { + // We can change a phi if we can change all operands. Note that we never + // get into trouble with cyclic PHIs here because we only consider + // instructions with a single use. + PHINode *PN = cast(I); + if (!CanEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear)) + return false; + for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) + if (!CanEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp) || + // TODO: If important, we could handle the case when the BitsToClear + // are known zero in the disagreeing input. + Tmp != BitsToClear) + return false; + return true; + } + default: + // TODO: Can handle more cases here. + return false; + } +} + +Instruction *InstCombiner::visitZExt(ZExtInst &CI) { + // If this zero extend is only used by a truncate, let the truncate by + // eliminated before we try to optimize this zext. + if (CI.hasOneUse() && isa(CI.use_back())) + return 0; + + // If one of the common conversion will work, do it. + if (Instruction *Result = commonCastTransforms(CI)) + return Result; + + // See if we can simplify any instructions used by the input whose sole + // purpose is to compute bits we don't care about. + if (SimplifyDemandedInstructionBits(CI)) + return &CI; + + Value *Src = CI.getOperand(0); + const Type *SrcTy = Src->getType(), *DestTy = CI.getType(); + + // Attempt to extend the entire input expression tree to the destination + // type. Only do this if the dest type is a simple type, don't convert the + // expression tree to something weird like i93 unless the source is also + // strange. + unsigned BitsToClear; + if ((isa(DestTy) || ShouldChangeType(SrcTy, DestTy)) && + CanEvaluateZExtd(Src, DestTy, BitsToClear)) { + assert(BitsToClear < SrcTy->getScalarSizeInBits() && + "Unreasonable BitsToClear"); + + // Okay, we can transform this! Insert the new expression now. + DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type" + " to avoid zero extend: " << CI); + Value *Res = EvaluateInDifferentType(Src, DestTy, false); + assert(Res->getType() == DestTy); + + uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits()-BitsToClear; + uint32_t DestBitSize = DestTy->getScalarSizeInBits(); + + // If the high bits are already filled with zeros, just replace this + // cast with the result. + if (MaskedValueIsZero(Res, APInt::getHighBitsSet(DestBitSize, + DestBitSize-SrcBitsKept))) + return ReplaceInstUsesWith(CI, Res); + + // We need to emit an AND to clear the high bits. + Constant *C = ConstantInt::get(Res->getType(), + APInt::getLowBitsSet(DestBitSize, SrcBitsKept)); + return BinaryOperator::CreateAnd(Res, C); + } + + // If this is a TRUNC followed by a ZEXT then we are dealing with integral + // types and if the sizes are just right we can convert this into a logical + // 'and' which will be much cheaper than the pair of casts. + if (TruncInst *CSrc = dyn_cast(Src)) { // A->B->C cast + // TODO: Subsume this into EvaluateInDifferentType. + + // Get the sizes of the types involved. We know that the intermediate type + // will be smaller than A or C, but don't know the relation between A and C. + Value *A = CSrc->getOperand(0); + unsigned SrcSize = A->getType()->getScalarSizeInBits(); + unsigned MidSize = CSrc->getType()->getScalarSizeInBits(); + unsigned DstSize = CI.getType()->getScalarSizeInBits(); + // If we're actually extending zero bits, then if + // SrcSize < DstSize: zext(a & mask) + // SrcSize == DstSize: a & mask + // SrcSize > DstSize: trunc(a) & mask + if (SrcSize < DstSize) { + APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); + Constant *AndConst = ConstantInt::get(A->getType(), AndValue); + Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask"); + return new ZExtInst(And, CI.getType()); + } + + if (SrcSize == DstSize) { + APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); + return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(), + AndValue)); + } + if (SrcSize > DstSize) { + Value *Trunc = Builder->CreateTrunc(A, CI.getType(), "tmp"); + APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize)); + return BinaryOperator::CreateAnd(Trunc, + ConstantInt::get(Trunc->getType(), + AndValue)); + } + } + + if (ICmpInst *ICI = dyn_cast(Src)) + return transformZExtICmp(ICI, CI); + + BinaryOperator *SrcI = dyn_cast(Src); + if (SrcI && SrcI->getOpcode() == Instruction::Or) { + // zext (or icmp, icmp) --> or (zext icmp), (zext icmp) if at least one + // of the (zext icmp) will be transformed. + ICmpInst *LHS = dyn_cast(SrcI->getOperand(0)); + ICmpInst *RHS = dyn_cast(SrcI->getOperand(1)); + if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() && + (transformZExtICmp(LHS, CI, false) || + transformZExtICmp(RHS, CI, false))) { + Value *LCast = Builder->CreateZExt(LHS, CI.getType(), LHS->getName()); + Value *RCast = Builder->CreateZExt(RHS, CI.getType(), RHS->getName()); + return BinaryOperator::Create(Instruction::Or, LCast, RCast); + } + } + + // zext(trunc(t) & C) -> (t & zext(C)). + if (SrcI && SrcI->getOpcode() == Instruction::And && SrcI->hasOneUse()) + if (ConstantInt *C = dyn_cast(SrcI->getOperand(1))) + if (TruncInst *TI = dyn_cast(SrcI->getOperand(0))) { + Value *TI0 = TI->getOperand(0); + if (TI0->getType() == CI.getType()) + return + BinaryOperator::CreateAnd(TI0, + ConstantExpr::getZExt(C, CI.getType())); + } + + // zext((trunc(t) & C) ^ C) -> ((t & zext(C)) ^ zext(C)). + if (SrcI && SrcI->getOpcode() == Instruction::Xor && SrcI->hasOneUse()) + if (ConstantInt *C = dyn_cast(SrcI->getOperand(1))) + if (BinaryOperator *And = dyn_cast(SrcI->getOperand(0))) + if (And->getOpcode() == Instruction::And && And->hasOneUse() && + And->getOperand(1) == C) + if (TruncInst *TI = dyn_cast(And->getOperand(0))) { + Value *TI0 = TI->getOperand(0); + if (TI0->getType() == CI.getType()) { + Constant *ZC = ConstantExpr::getZExt(C, CI.getType()); + Value *NewAnd = Builder->CreateAnd(TI0, ZC, "tmp"); + return BinaryOperator::CreateXor(NewAnd, ZC); + } + } + + // zext (xor i1 X, true) to i32 --> xor (zext i1 X to i32), 1 + Value *X; + if (SrcI && SrcI->hasOneUse() && SrcI->getType()->isInteger(1) && + match(SrcI, m_Not(m_Value(X))) && + (!X->hasOneUse() || !isa(X))) { + Value *New = Builder->CreateZExt(X, CI.getType()); + return BinaryOperator::CreateXor(New, ConstantInt::get(CI.getType(), 1)); + } + + return 0; +} + +/// CanEvaluateSExtd - Return true if we can take the specified value +/// and return it as type Ty without inserting any new casts and without +/// changing the value of the common low bits. This is used by code that tries +/// to promote integer operations to a wider types will allow us to eliminate +/// the extension. +/// +/// This function works on both vectors and scalars. +/// +static bool CanEvaluateSExtd(Value *V, const Type *Ty) { + assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() && + "Can't sign extend type to a smaller type"); + // If this is a constant, it can be trivially promoted. + if (isa(V)) + return true; + + Instruction *I = dyn_cast(V); + if (!I) return false; + + // If this is a truncate from the dest type, we can trivially eliminate it, + // even if it has multiple uses. + // FIXME: This is currently disabled until codegen can handle this without + // pessimizing code, PR5997. + if (0 && isa(I) && I->getOperand(0)->getType() == Ty) + return true; + + // We can't extend or shrink something that has multiple uses: doing so would + // require duplicating the instruction in general, which isn't profitable. + if (!I->hasOneUse()) return false; + + switch (I->getOpcode()) { + case Instruction::SExt: // sext(sext(x)) -> sext(x) + case Instruction::ZExt: // sext(zext(x)) -> zext(x) + case Instruction::Trunc: // sext(trunc(x)) -> trunc(x) or sext(x) + return true; + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + // These operators can all arbitrarily be extended if their inputs can. + return CanEvaluateSExtd(I->getOperand(0), Ty) && + CanEvaluateSExtd(I->getOperand(1), Ty); + + //case Instruction::Shl: TODO + //case Instruction::LShr: TODO + + case Instruction::Select: + return CanEvaluateSExtd(I->getOperand(1), Ty) && + CanEvaluateSExtd(I->getOperand(2), Ty); + + case Instruction::PHI: { + // We can change a phi if we can change all operands. Note that we never + // get into trouble with cyclic PHIs here because we only consider + // instructions with a single use. + PHINode *PN = cast(I); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (!CanEvaluateSExtd(PN->getIncomingValue(i), Ty)) return false; + return true; + } + default: + // TODO: Can handle more cases here. + break; + } + + return false; +} + +Instruction *InstCombiner::visitSExt(SExtInst &CI) { + // If this sign extend is only used by a truncate, let the truncate by + // eliminated before we try to optimize this zext. + if (CI.hasOneUse() && isa(CI.use_back())) + return 0; + + if (Instruction *I = commonCastTransforms(CI)) + return I; + + // See if we can simplify any instructions used by the input whose sole + // purpose is to compute bits we don't care about. + if (SimplifyDemandedInstructionBits(CI)) + return &CI; + + Value *Src = CI.getOperand(0); + const Type *SrcTy = Src->getType(), *DestTy = CI.getType(); + + // Canonicalize sign-extend from i1 to a select. + if (Src->getType()->isInteger(1)) + return SelectInst::Create(Src, + Constant::getAllOnesValue(CI.getType()), + Constant::getNullValue(CI.getType())); + + // Attempt to extend the entire input expression tree to the destination + // type. Only do this if the dest type is a simple type, don't convert the + // expression tree to something weird like i93 unless the source is also + // strange. + if ((isa(DestTy) || ShouldChangeType(SrcTy, DestTy)) && + CanEvaluateSExtd(Src, DestTy)) { + // Okay, we can transform this! Insert the new expression now. + DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type" + " to avoid sign extend: " << CI); + Value *Res = EvaluateInDifferentType(Src, DestTy, true); + assert(Res->getType() == DestTy); + + uint32_t SrcBitSize = SrcTy->getScalarSizeInBits(); + uint32_t DestBitSize = DestTy->getScalarSizeInBits(); + + // If the high bits are already filled with sign bit, just replace this + // cast with the result. + if (ComputeNumSignBits(Res) > DestBitSize - SrcBitSize) + return ReplaceInstUsesWith(CI, Res); + + // We need to emit a shl + ashr to do the sign extend. + Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize); + return BinaryOperator::CreateAShr(Builder->CreateShl(Res, ShAmt, "sext"), + ShAmt); + } + + // If the input is a shl/ashr pair of a same constant, then this is a sign + // extension from a smaller value. If we could trust arbitrary bitwidth + // integers, we could turn this into a truncate to the smaller bit and then + // use a sext for the whole extension. Since we don't, look deeper and check + // for a truncate. If the source and dest are the same type, eliminate the + // trunc and extend and just do shifts. For example, turn: + // %a = trunc i32 %i to i8 + // %b = shl i8 %a, 6 + // %c = ashr i8 %b, 6 + // %d = sext i8 %c to i32 + // into: + // %a = shl i32 %i, 30 + // %d = ashr i32 %a, 30 + Value *A = 0; + // TODO: Eventually this could be subsumed by EvaluateInDifferentType. + ConstantInt *BA = 0, *CA = 0; + if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_ConstantInt(BA)), + m_ConstantInt(CA))) && + BA == CA && A->getType() == CI.getType()) { + unsigned MidSize = Src->getType()->getScalarSizeInBits(); + unsigned SrcDstSize = CI.getType()->getScalarSizeInBits(); + unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize; + Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt); + A = Builder->CreateShl(A, ShAmtV, CI.getName()); + return BinaryOperator::CreateAShr(A, ShAmtV); + } + + return 0; +} + + +/// FitsInFPType - Return a Constant* for the specified FP constant if it fits +/// in the specified FP type without changing its value. +static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) { + bool losesInfo; + APFloat F = CFP->getValueAPF(); + (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo); + if (!losesInfo) + return ConstantFP::get(CFP->getContext(), F); + return 0; +} + +/// LookThroughFPExtensions - If this is an fp extension instruction, look +/// through it until we get the source value. +static Value *LookThroughFPExtensions(Value *V) { + if (Instruction *I = dyn_cast(V)) + if (I->getOpcode() == Instruction::FPExt) + return LookThroughFPExtensions(I->getOperand(0)); + + // If this value is a constant, return the constant in the smallest FP type + // that can accurately represent it. This allows us to turn + // (float)((double)X+2.0) into x+2.0f. + if (ConstantFP *CFP = dyn_cast(V)) { + if (CFP->getType() == Type::getPPC_FP128Ty(V->getContext())) + return V; // No constant folding of this. + // See if the value can be truncated to float and then reextended. + if (Value *V = FitsInFPType(CFP, APFloat::IEEEsingle)) + return V; + if (CFP->getType()->isDoubleTy()) + return V; // Won't shrink. + if (Value *V = FitsInFPType(CFP, APFloat::IEEEdouble)) + return V; + // Don't try to shrink to various long double types. + } + + return V; +} + +Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { + if (Instruction *I = commonCastTransforms(CI)) + return I; + + // If we have fptrunc(fadd (fpextend x), (fpextend y)), where x and y are + // smaller than the destination type, we can eliminate the truncate by doing + // the add as the smaller type. This applies to fadd/fsub/fmul/fdiv as well + // as many builtins (sqrt, etc). + BinaryOperator *OpI = dyn_cast(CI.getOperand(0)); + if (OpI && OpI->hasOneUse()) { + switch (OpI->getOpcode()) { + default: break; + case Instruction::FAdd: + case Instruction::FSub: + case Instruction::FMul: + case Instruction::FDiv: + case Instruction::FRem: + const Type *SrcTy = OpI->getType(); + Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0)); + Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1)); + if (LHSTrunc->getType() != SrcTy && + RHSTrunc->getType() != SrcTy) { + unsigned DstSize = CI.getType()->getScalarSizeInBits(); + // If the source types were both smaller than the destination type of + // the cast, do this xform. + if (LHSTrunc->getType()->getScalarSizeInBits() <= DstSize && + RHSTrunc->getType()->getScalarSizeInBits() <= DstSize) { + LHSTrunc = Builder->CreateFPExt(LHSTrunc, CI.getType()); + RHSTrunc = Builder->CreateFPExt(RHSTrunc, CI.getType()); + return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc); + } + } + break; + } + } + return 0; +} + +Instruction *InstCombiner::visitFPExt(CastInst &CI) { + return commonCastTransforms(CI); +} + +Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) { + Instruction *OpI = dyn_cast(FI.getOperand(0)); + if (OpI == 0) + return commonCastTransforms(FI); + + // fptoui(uitofp(X)) --> X + // fptoui(sitofp(X)) --> X + // This is safe if the intermediate type has enough bits in its mantissa to + // accurately represent all values of X. For example, do not do this with + // i64->float->i64. This is also safe for sitofp case, because any negative + // 'X' value would cause an undefined result for the fptoui. + if ((isa(OpI) || isa(OpI)) && + OpI->getOperand(0)->getType() == FI.getType() && + (int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */ + OpI->getType()->getFPMantissaWidth()) + return ReplaceInstUsesWith(FI, OpI->getOperand(0)); + + return commonCastTransforms(FI); +} + +Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) { + Instruction *OpI = dyn_cast(FI.getOperand(0)); + if (OpI == 0) + return commonCastTransforms(FI); + + // fptosi(sitofp(X)) --> X + // fptosi(uitofp(X)) --> X + // This is safe if the intermediate type has enough bits in its mantissa to + // accurately represent all values of X. For example, do not do this with + // i64->float->i64. This is also safe for sitofp case, because any negative + // 'X' value would cause an undefined result for the fptoui. + if ((isa(OpI) || isa(OpI)) && + OpI->getOperand(0)->getType() == FI.getType() && + (int)FI.getType()->getScalarSizeInBits() <= + OpI->getType()->getFPMantissaWidth()) + return ReplaceInstUsesWith(FI, OpI->getOperand(0)); + + return commonCastTransforms(FI); +} + +Instruction *InstCombiner::visitUIToFP(CastInst &CI) { + return commonCastTransforms(CI); +} + +Instruction *InstCombiner::visitSIToFP(CastInst &CI) { + return commonCastTransforms(CI); +} + +Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { + // If the source integer type is larger than the intptr_t type for + // this target, do a trunc to the intptr_t type, then inttoptr of it. This + // allows the trunc to be exposed to other transforms. Don't do this for + // extending inttoptr's, because we don't know if the target sign or zero + // extends to pointers. + if (TD && CI.getOperand(0)->getType()->getScalarSizeInBits() > + TD->getPointerSizeInBits()) { + Value *P = Builder->CreateTrunc(CI.getOperand(0), + TD->getIntPtrType(CI.getContext()), "tmp"); + return new IntToPtrInst(P, CI.getType()); + } + + if (Instruction *I = commonCastTransforms(CI)) + return I; + + return 0; +} + +/// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint) +Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { + Value *Src = CI.getOperand(0); + + if (GetElementPtrInst *GEP = dyn_cast(Src)) { + // If casting the result of a getelementptr instruction with no offset, turn + // this into a cast of the original pointer! + if (GEP->hasAllZeroIndices()) { + // Changing the cast operand is usually not a good idea but it is safe + // here because the pointer operand is being replaced with another + // pointer operand so the opcode doesn't need to change. + Worklist.Add(GEP); + CI.setOperand(0, GEP->getOperand(0)); + return &CI; + } + + // If the GEP has a single use, and the base pointer is a bitcast, and the + // GEP computes a constant offset, see if we can convert these three + // instructions into fewer. This typically happens with unions and other + // non-type-safe code. + if (TD && GEP->hasOneUse() && isa(GEP->getOperand(0)) && + GEP->hasAllConstantIndices()) { + // We are guaranteed to get a constant from EmitGEPOffset. + ConstantInt *OffsetV = cast(EmitGEPOffset(GEP)); + int64_t Offset = OffsetV->getSExtValue(); + + // Get the base pointer input of the bitcast, and the type it points to. + Value *OrigBase = cast(GEP->getOperand(0))->getOperand(0); + const Type *GEPIdxTy = + cast(OrigBase->getType())->getElementType(); + SmallVector NewIndices; + if (FindElementAtOffset(GEPIdxTy, Offset, NewIndices)) { + // If we were able to index down into an element, create the GEP + // and bitcast the result. This eliminates one bitcast, potentially + // two. + Value *NGEP = cast(GEP)->isInBounds() ? + Builder->CreateInBoundsGEP(OrigBase, + NewIndices.begin(), NewIndices.end()) : + Builder->CreateGEP(OrigBase, NewIndices.begin(), NewIndices.end()); + NGEP->takeName(GEP); + + if (isa(CI)) + return new BitCastInst(NGEP, CI.getType()); + assert(isa(CI)); + return new PtrToIntInst(NGEP, CI.getType()); + } + } + } + + return commonCastTransforms(CI); +} + +Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { + // If the destination integer type is smaller than the intptr_t type for + // this target, do a ptrtoint to intptr_t then do a trunc. This allows the + // trunc to be exposed to other transforms. Don't do this for extending + // ptrtoint's, because we don't know if the target sign or zero extends its + // pointers. + if (TD && + CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) { + Value *P = Builder->CreatePtrToInt(CI.getOperand(0), + TD->getIntPtrType(CI.getContext()), + "tmp"); + return new TruncInst(P, CI.getType()); + } + + return commonPointerCastTransforms(CI); +} + +Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { + // If the operands are integer typed then apply the integer transforms, + // otherwise just apply the common ones. + Value *Src = CI.getOperand(0); + const Type *SrcTy = Src->getType(); + const Type *DestTy = CI.getType(); + + // Get rid of casts from one type to the same type. These are useless and can + // be replaced by the operand. + if (DestTy == Src->getType()) + return ReplaceInstUsesWith(CI, Src); + + if (const PointerType *DstPTy = dyn_cast(DestTy)) { + const PointerType *SrcPTy = cast(SrcTy); + const Type *DstElTy = DstPTy->getElementType(); + const Type *SrcElTy = SrcPTy->getElementType(); + + // If the address spaces don't match, don't eliminate the bitcast, which is + // required for changing types. + if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace()) + return 0; + + // If we are casting a alloca to a pointer to a type of the same + // size, rewrite the allocation instruction to allocate the "right" type. + // There is no need to modify malloc calls because it is their bitcast that + // needs to be cleaned up. + if (AllocaInst *AI = dyn_cast(Src)) + if (Instruction *V = PromoteCastOfAllocation(CI, *AI)) + return V; + + // If the source and destination are pointers, and this cast is equivalent + // to a getelementptr X, 0, 0, 0... turn it into the appropriate gep. + // This can enhance SROA and other transforms that want type-safe pointers. + Constant *ZeroUInt = + Constant::getNullValue(Type::getInt32Ty(CI.getContext())); + unsigned NumZeros = 0; + while (SrcElTy != DstElTy && + isa(SrcElTy) && !isa(SrcElTy) && + SrcElTy->getNumContainedTypes() /* not "{}" */) { + SrcElTy = cast(SrcElTy)->getTypeAtIndex(ZeroUInt); + ++NumZeros; + } + + // If we found a path from the src to dest, create the getelementptr now. + if (SrcElTy == DstElTy) { + SmallVector Idxs(NumZeros+1, ZeroUInt); + return GetElementPtrInst::CreateInBounds(Src, Idxs.begin(), Idxs.end(),"", + ((Instruction*)NULL)); + } + } + + if (const VectorType *DestVTy = dyn_cast(DestTy)) { + if (DestVTy->getNumElements() == 1 && !isa(SrcTy)) { + Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType()); + return InsertElementInst::Create(UndefValue::get(DestTy), Elem, + Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); + // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast) + } + } + + if (const VectorType *SrcVTy = dyn_cast(SrcTy)) { + if (SrcVTy->getNumElements() == 1 && !isa(DestTy)) { + Value *Elem = + Builder->CreateExtractElement(Src, + Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); + return CastInst::Create(Instruction::BitCast, Elem, DestTy); + } + } + + if (ShuffleVectorInst *SVI = dyn_cast(Src)) { + // Okay, we have (bitcast (shuffle ..)). Check to see if this is + // a bitconvert to a vector with the same # elts. + if (SVI->hasOneUse() && isa(DestTy) && + cast(DestTy)->getNumElements() == + SVI->getType()->getNumElements() && + SVI->getType()->getNumElements() == + cast(SVI->getOperand(0)->getType())->getNumElements()) { + BitCastInst *Tmp; + // If either of the operands is a cast from CI.getType(), then + // evaluating the shuffle in the casted destination's type will allow + // us to eliminate at least one cast. + if (((Tmp = dyn_cast(SVI->getOperand(0))) && + Tmp->getOperand(0)->getType() == DestTy) || + ((Tmp = dyn_cast(SVI->getOperand(1))) && + Tmp->getOperand(0)->getType() == DestTy)) { + Value *LHS = Builder->CreateBitCast(SVI->getOperand(0), DestTy); + Value *RHS = Builder->CreateBitCast(SVI->getOperand(1), DestTy); + // Return a new shuffle vector. Use the same element ID's, as we + // know the vector types match #elts. + return new ShuffleVectorInst(LHS, RHS, SVI->getOperand(2)); + } + } + } + + if (isa(SrcTy)) + return commonPointerCastTransforms(CI); + return commonCastTransforms(CI); +} diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp new file mode 100644 index 000000000000..e59406c636ac --- /dev/null +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -0,0 +1,2475 @@ +//===- InstCombineCompares.cpp --------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the visitICmp and visitFCmp functions. +// +//===----------------------------------------------------------------------===// + +#include "InstCombine.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Support/ConstantRange.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/PatternMatch.h" +using namespace llvm; +using namespace PatternMatch; + +/// AddOne - Add one to a ConstantInt +static Constant *AddOne(Constant *C) { + return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1)); +} +/// SubOne - Subtract one from a ConstantInt +static Constant *SubOne(ConstantInt *C) { + return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1)); +} + +static ConstantInt *ExtractElement(Constant *V, Constant *Idx) { + return cast(ConstantExpr::getExtractElement(V, Idx)); +} + +static bool HasAddOverflow(ConstantInt *Result, + ConstantInt *In1, ConstantInt *In2, + bool IsSigned) { + if (IsSigned) + if (In2->getValue().isNegative()) + return Result->getValue().sgt(In1->getValue()); + else + return Result->getValue().slt(In1->getValue()); + else + return Result->getValue().ult(In1->getValue()); +} + +/// AddWithOverflow - Compute Result = In1+In2, returning true if the result +/// overflowed for this type. +static bool AddWithOverflow(Constant *&Result, Constant *In1, + Constant *In2, bool IsSigned = false) { + Result = ConstantExpr::getAdd(In1, In2); + + if (const VectorType *VTy = dyn_cast(In1->getType())) { + for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { + Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i); + if (HasAddOverflow(ExtractElement(Result, Idx), + ExtractElement(In1, Idx), + ExtractElement(In2, Idx), + IsSigned)) + return true; + } + return false; + } + + return HasAddOverflow(cast(Result), + cast(In1), cast(In2), + IsSigned); +} + +static bool HasSubOverflow(ConstantInt *Result, + ConstantInt *In1, ConstantInt *In2, + bool IsSigned) { + if (IsSigned) + if (In2->getValue().isNegative()) + return Result->getValue().slt(In1->getValue()); + else + return Result->getValue().sgt(In1->getValue()); + else + return Result->getValue().ugt(In1->getValue()); +} + +/// SubWithOverflow - Compute Result = In1-In2, returning true if the result +/// overflowed for this type. +static bool SubWithOverflow(Constant *&Result, Constant *In1, + Constant *In2, bool IsSigned = false) { + Result = ConstantExpr::getSub(In1, In2); + + if (const VectorType *VTy = dyn_cast(In1->getType())) { + for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { + Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i); + if (HasSubOverflow(ExtractElement(Result, Idx), + ExtractElement(In1, Idx), + ExtractElement(In2, Idx), + IsSigned)) + return true; + } + return false; + } + + return HasSubOverflow(cast(Result), + cast(In1), cast(In2), + IsSigned); +} + +/// isSignBitCheck - Given an exploded icmp instruction, return true if the +/// comparison only checks the sign bit. If it only checks the sign bit, set +/// TrueIfSigned if the result of the comparison is true when the input value is +/// signed. +static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS, + bool &TrueIfSigned) { + switch (pred) { + case ICmpInst::ICMP_SLT: // True if LHS s< 0 + TrueIfSigned = true; + return RHS->isZero(); + case ICmpInst::ICMP_SLE: // True if LHS s<= RHS and RHS == -1 + TrueIfSigned = true; + return RHS->isAllOnesValue(); + case ICmpInst::ICMP_SGT: // True if LHS s> -1 + TrueIfSigned = false; + return RHS->isAllOnesValue(); + case ICmpInst::ICMP_UGT: + // True if LHS u> RHS and RHS == high-bit-mask - 1 + TrueIfSigned = true; + return RHS->getValue() == + APInt::getSignedMaxValue(RHS->getType()->getPrimitiveSizeInBits()); + case ICmpInst::ICMP_UGE: + // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc) + TrueIfSigned = true; + return RHS->getValue().isSignBit(); + default: + return false; + } +} + +// isHighOnes - Return true if the constant is of the form 1+0+. +// This is the same as lowones(~X). +static bool isHighOnes(const ConstantInt *CI) { + return (~CI->getValue() + 1).isPowerOf2(); +} + +/// ComputeSignedMinMaxValuesFromKnownBits - Given a signed integer type and a +/// set of known zero and one bits, compute the maximum and minimum values that +/// could have the specified known zero and known one bits, returning them in +/// min/max. +static void ComputeSignedMinMaxValuesFromKnownBits(const APInt& KnownZero, + const APInt& KnownOne, + APInt& Min, APInt& Max) { + assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() && + KnownZero.getBitWidth() == Min.getBitWidth() && + KnownZero.getBitWidth() == Max.getBitWidth() && + "KnownZero, KnownOne and Min, Max must have equal bitwidth."); + APInt UnknownBits = ~(KnownZero|KnownOne); + + // The minimum value is when all unknown bits are zeros, EXCEPT for the sign + // bit if it is unknown. + Min = KnownOne; + Max = KnownOne|UnknownBits; + + if (UnknownBits.isNegative()) { // Sign bit is unknown + Min.set(Min.getBitWidth()-1); + Max.clear(Max.getBitWidth()-1); + } +} + +// ComputeUnsignedMinMaxValuesFromKnownBits - Given an unsigned integer type and +// a set of known zero and one bits, compute the maximum and minimum values that +// could have the specified known zero and known one bits, returning them in +// min/max. +static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero, + const APInt &KnownOne, + APInt &Min, APInt &Max) { + assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() && + KnownZero.getBitWidth() == Min.getBitWidth() && + KnownZero.getBitWidth() == Max.getBitWidth() && + "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth."); + APInt UnknownBits = ~(KnownZero|KnownOne); + + // The minimum value is when the unknown bits are all zeros. + Min = KnownOne; + // The maximum value is when the unknown bits are all ones. + Max = KnownOne|UnknownBits; +} + + + +/// FoldCmpLoadFromIndexedGlobal - Called we see this pattern: +/// cmp pred (load (gep GV, ...)), cmpcst +/// where GV is a global variable with a constant initializer. Try to simplify +/// this into some simple computation that does not need the load. For example +/// we can optimize "icmp eq (load (gep "foo", 0, i)), 0" into "icmp eq i, 3". +/// +/// If AndCst is non-null, then the loaded value is masked with that constant +/// before doing the comparison. This handles cases like "A[i]&4 == 0". +Instruction *InstCombiner:: +FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, + CmpInst &ICI, ConstantInt *AndCst) { + // We need TD information to know the pointer size unless this is inbounds. + if (!GEP->isInBounds() && TD == 0) return 0; + + ConstantArray *Init = dyn_cast(GV->getInitializer()); + if (Init == 0 || Init->getNumOperands() > 1024) return 0; + + // There are many forms of this optimization we can handle, for now, just do + // the simple index into a single-dimensional array. + // + // Require: GEP GV, 0, i {{, constant indices}} + if (GEP->getNumOperands() < 3 || + !isa(GEP->getOperand(1)) || + !cast(GEP->getOperand(1))->isZero() || + isa(GEP->getOperand(2))) + return 0; + + // Check that indices after the variable are constants and in-range for the + // type they index. Collect the indices. This is typically for arrays of + // structs. + SmallVector LaterIndices; + + const Type *EltTy = cast(Init->getType())->getElementType(); + for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) { + ConstantInt *Idx = dyn_cast(GEP->getOperand(i)); + if (Idx == 0) return 0; // Variable index. + + uint64_t IdxVal = Idx->getZExtValue(); + if ((unsigned)IdxVal != IdxVal) return 0; // Too large array index. + + if (const StructType *STy = dyn_cast(EltTy)) + EltTy = STy->getElementType(IdxVal); + else if (const ArrayType *ATy = dyn_cast(EltTy)) { + if (IdxVal >= ATy->getNumElements()) return 0; + EltTy = ATy->getElementType(); + } else { + return 0; // Unknown type. + } + + LaterIndices.push_back(IdxVal); + } + + enum { Overdefined = -3, Undefined = -2 }; + + // Variables for our state machines. + + // FirstTrueElement/SecondTrueElement - Used to emit a comparison of the form + // "i == 47 | i == 87", where 47 is the first index the condition is true for, + // and 87 is the second (and last) index. FirstTrueElement is -2 when + // undefined, otherwise set to the first true element. SecondTrueElement is + // -2 when undefined, -3 when overdefined and >= 0 when that index is true. + int FirstTrueElement = Undefined, SecondTrueElement = Undefined; + + // FirstFalseElement/SecondFalseElement - Used to emit a comparison of the + // form "i != 47 & i != 87". Same state transitions as for true elements. + int FirstFalseElement = Undefined, SecondFalseElement = Undefined; + + /// TrueRangeEnd/FalseRangeEnd - In conjunction with First*Element, these + /// define a state machine that triggers for ranges of values that the index + /// is true or false for. This triggers on things like "abbbbc"[i] == 'b'. + /// This is -2 when undefined, -3 when overdefined, and otherwise the last + /// index in the range (inclusive). We use -2 for undefined here because we + /// use relative comparisons and don't want 0-1 to match -1. + int TrueRangeEnd = Undefined, FalseRangeEnd = Undefined; + + // MagicBitvector - This is a magic bitvector where we set a bit if the + // comparison is true for element 'i'. If there are 64 elements or less in + // the array, this will fully represent all the comparison results. + uint64_t MagicBitvector = 0; + + + // Scan the array and see if one of our patterns matches. + Constant *CompareRHS = cast(ICI.getOperand(1)); + for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) { + Constant *Elt = Init->getOperand(i); + + // If this is indexing an array of structures, get the structure element. + if (!LaterIndices.empty()) + Elt = ConstantExpr::getExtractValue(Elt, LaterIndices.data(), + LaterIndices.size()); + + // If the element is masked, handle it. + if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst); + + // Find out if the comparison would be true or false for the i'th element. + Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt, + CompareRHS, TD); + // If the result is undef for this element, ignore it. + if (isa(C)) { + // Extend range state machines to cover this element in case there is an + // undef in the middle of the range. + if (TrueRangeEnd == (int)i-1) + TrueRangeEnd = i; + if (FalseRangeEnd == (int)i-1) + FalseRangeEnd = i; + continue; + } + + // If we can't compute the result for any of the elements, we have to give + // up evaluating the entire conditional. + if (!isa(C)) return 0; + + // Otherwise, we know if the comparison is true or false for this element, + // update our state machines. + bool IsTrueForElt = !cast(C)->isZero(); + + // State machine for single/double/range index comparison. + if (IsTrueForElt) { + // Update the TrueElement state machine. + if (FirstTrueElement == Undefined) + FirstTrueElement = TrueRangeEnd = i; // First true element. + else { + // Update double-compare state machine. + if (SecondTrueElement == Undefined) + SecondTrueElement = i; + else + SecondTrueElement = Overdefined; + + // Update range state machine. + if (TrueRangeEnd == (int)i-1) + TrueRangeEnd = i; + else + TrueRangeEnd = Overdefined; + } + } else { + // Update the FalseElement state machine. + if (FirstFalseElement == Undefined) + FirstFalseElement = FalseRangeEnd = i; // First false element. + else { + // Update double-compare state machine. + if (SecondFalseElement == Undefined) + SecondFalseElement = i; + else + SecondFalseElement = Overdefined; + + // Update range state machine. + if (FalseRangeEnd == (int)i-1) + FalseRangeEnd = i; + else + FalseRangeEnd = Overdefined; + } + } + + + // If this element is in range, update our magic bitvector. + if (i < 64 && IsTrueForElt) + MagicBitvector |= 1ULL << i; + + // If all of our states become overdefined, bail out early. Since the + // predicate is expensive, only check it every 8 elements. This is only + // really useful for really huge arrays. + if ((i & 8) == 0 && i >= 64 && SecondTrueElement == Overdefined && + SecondFalseElement == Overdefined && TrueRangeEnd == Overdefined && + FalseRangeEnd == Overdefined) + return 0; + } + + // Now that we've scanned the entire array, emit our new comparison(s). We + // order the state machines in complexity of the generated code. + Value *Idx = GEP->getOperand(2); + + // If the index is larger than the pointer size of the target, truncate the + // index down like the GEP would do implicitly. We don't have to do this for + // an inbounds GEP because the index can't be out of range. + if (!GEP->isInBounds() && + Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits()) + Idx = Builder->CreateTrunc(Idx, TD->getIntPtrType(Idx->getContext())); + + // If the comparison is only true for one or two elements, emit direct + // comparisons. + if (SecondTrueElement != Overdefined) { + // None true -> false. + if (FirstTrueElement == Undefined) + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(GEP->getContext())); + + Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement); + + // True for one element -> 'i == 47'. + if (SecondTrueElement == Undefined) + return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx); + + // True for two elements -> 'i == 47 | i == 72'. + Value *C1 = Builder->CreateICmpEQ(Idx, FirstTrueIdx); + Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement); + Value *C2 = Builder->CreateICmpEQ(Idx, SecondTrueIdx); + return BinaryOperator::CreateOr(C1, C2); + } + + // If the comparison is only false for one or two elements, emit direct + // comparisons. + if (SecondFalseElement != Overdefined) { + // None false -> true. + if (FirstFalseElement == Undefined) + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(GEP->getContext())); + + Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement); + + // False for one element -> 'i != 47'. + if (SecondFalseElement == Undefined) + return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx); + + // False for two elements -> 'i != 47 & i != 72'. + Value *C1 = Builder->CreateICmpNE(Idx, FirstFalseIdx); + Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement); + Value *C2 = Builder->CreateICmpNE(Idx, SecondFalseIdx); + return BinaryOperator::CreateAnd(C1, C2); + } + + // If the comparison can be replaced with a range comparison for the elements + // where it is true, emit the range check. + if (TrueRangeEnd != Overdefined) { + assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare"); + + // Generate (i-FirstTrue) getType(), -FirstTrueElement); + Idx = Builder->CreateAdd(Idx, Offs); + } + + Value *End = ConstantInt::get(Idx->getType(), + TrueRangeEnd-FirstTrueElement+1); + return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End); + } + + // False range check. + if (FalseRangeEnd != Overdefined) { + assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare"); + // Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse). + if (FirstFalseElement) { + Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement); + Idx = Builder->CreateAdd(Idx, Offs); + } + + Value *End = ConstantInt::get(Idx->getType(), + FalseRangeEnd-FirstFalseElement); + return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End); + } + + + // If a 32-bit or 64-bit magic bitvector captures the entire comparison state + // of this load, replace it with computation that does: + // ((magic_cst >> i) & 1) != 0 + if (Init->getNumOperands() <= 32 || + (TD && Init->getNumOperands() <= 64 && TD->isLegalInteger(64))) { + const Type *Ty; + if (Init->getNumOperands() <= 32) + Ty = Type::getInt32Ty(Init->getContext()); + else + Ty = Type::getInt64Ty(Init->getContext()); + Value *V = Builder->CreateIntCast(Idx, Ty, false); + V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V); + V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V); + return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0)); + } + + return 0; +} + + +/// EvaluateGEPOffsetExpression - Return a value that can be used to compare +/// the *offset* implied by a GEP to zero. For example, if we have &A[i], we +/// want to return 'i' for "icmp ne i, 0". Note that, in general, indices can +/// be complex, and scales are involved. The above expression would also be +/// legal to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32). +/// This later form is less amenable to optimization though, and we are allowed +/// to generate the first by knowing that pointer arithmetic doesn't overflow. +/// +/// If we can't emit an optimized form for this expression, this returns null. +/// +static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I, + InstCombiner &IC) { + TargetData &TD = *IC.getTargetData(); + gep_type_iterator GTI = gep_type_begin(GEP); + + // Check to see if this gep only has a single variable index. If so, and if + // any constant indices are a multiple of its scale, then we can compute this + // in terms of the scale of the variable index. For example, if the GEP + // implies an offset of "12 + i*4", then we can codegen this as "3 + i", + // because the expression will cross zero at the same point. + unsigned i, e = GEP->getNumOperands(); + int64_t Offset = 0; + for (i = 1; i != e; ++i, ++GTI) { + if (ConstantInt *CI = dyn_cast(GEP->getOperand(i))) { + // Compute the aggregate offset of constant indices. + if (CI->isZero()) continue; + + // Handle a struct index, which adds its field offset to the pointer. + if (const StructType *STy = dyn_cast(*GTI)) { + Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue()); + } else { + uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); + Offset += Size*CI->getSExtValue(); + } + } else { + // Found our variable index. + break; + } + } + + // If there are no variable indices, we must have a constant offset, just + // evaluate it the general way. + if (i == e) return 0; + + Value *VariableIdx = GEP->getOperand(i); + // Determine the scale factor of the variable element. For example, this is + // 4 if the variable index is into an array of i32. + uint64_t VariableScale = TD.getTypeAllocSize(GTI.getIndexedType()); + + // Verify that there are no other variable indices. If so, emit the hard way. + for (++i, ++GTI; i != e; ++i, ++GTI) { + ConstantInt *CI = dyn_cast(GEP->getOperand(i)); + if (!CI) return 0; + + // Compute the aggregate offset of constant indices. + if (CI->isZero()) continue; + + // Handle a struct index, which adds its field offset to the pointer. + if (const StructType *STy = dyn_cast(*GTI)) { + Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue()); + } else { + uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); + Offset += Size*CI->getSExtValue(); + } + } + + // Okay, we know we have a single variable index, which must be a + // pointer/array/vector index. If there is no offset, life is simple, return + // the index. + unsigned IntPtrWidth = TD.getPointerSizeInBits(); + if (Offset == 0) { + // Cast to intptrty in case a truncation occurs. If an extension is needed, + // we don't need to bother extending: the extension won't affect where the + // computation crosses zero. + if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) + VariableIdx = new TruncInst(VariableIdx, + TD.getIntPtrType(VariableIdx->getContext()), + VariableIdx->getName(), &I); + return VariableIdx; + } + + // Otherwise, there is an index. The computation we will do will be modulo + // the pointer size, so get it. + uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); + + Offset &= PtrSizeMask; + VariableScale &= PtrSizeMask; + + // To do this transformation, any constant index must be a multiple of the + // variable scale factor. For example, we can evaluate "12 + 4*i" as "3 + i", + // but we can't evaluate "10 + 3*i" in terms of i. Check that the offset is a + // multiple of the variable scale. + int64_t NewOffs = Offset / (int64_t)VariableScale; + if (Offset != NewOffs*(int64_t)VariableScale) + return 0; + + // Okay, we can do this evaluation. Start by converting the index to intptr. + const Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext()); + if (VariableIdx->getType() != IntPtrTy) + VariableIdx = CastInst::CreateIntegerCast(VariableIdx, IntPtrTy, + true /*SExt*/, + VariableIdx->getName(), &I); + Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs); + return BinaryOperator::CreateAdd(VariableIdx, OffsetVal, "offset", &I); +} + +/// FoldGEPICmp - Fold comparisons between a GEP instruction and something +/// else. At this point we know that the GEP is on the LHS of the comparison. +Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, + ICmpInst::Predicate Cond, + Instruction &I) { + // Look through bitcasts. + if (BitCastInst *BCI = dyn_cast(RHS)) + RHS = BCI->getOperand(0); + + Value *PtrBase = GEPLHS->getOperand(0); + if (TD && PtrBase == RHS && GEPLHS->isInBounds()) { + // ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0). + // This transformation (ignoring the base and scales) is valid because we + // know pointers can't overflow since the gep is inbounds. See if we can + // output an optimized form. + Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, I, *this); + + // If not, synthesize the offset the hard way. + if (Offset == 0) + Offset = EmitGEPOffset(GEPLHS); + return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset, + Constant::getNullValue(Offset->getType())); + } else if (GEPOperator *GEPRHS = dyn_cast(RHS)) { + // If the base pointers are different, but the indices are the same, just + // compare the base pointer. + if (PtrBase != GEPRHS->getOperand(0)) { + bool IndicesTheSame = GEPLHS->getNumOperands()==GEPRHS->getNumOperands(); + IndicesTheSame &= GEPLHS->getOperand(0)->getType() == + GEPRHS->getOperand(0)->getType(); + if (IndicesTheSame) + for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i) + if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) { + IndicesTheSame = false; + break; + } + + // If all indices are the same, just compare the base pointers. + if (IndicesTheSame) + return new ICmpInst(ICmpInst::getSignedPredicate(Cond), + GEPLHS->getOperand(0), GEPRHS->getOperand(0)); + + // Otherwise, the base pointers are different and the indices are + // different, bail out. + return 0; + } + + // If one of the GEPs has all zero indices, recurse. + bool AllZeros = true; + for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i) + if (!isa(GEPLHS->getOperand(i)) || + !cast(GEPLHS->getOperand(i))->isNullValue()) { + AllZeros = false; + break; + } + if (AllZeros) + return FoldGEPICmp(GEPRHS, GEPLHS->getOperand(0), + ICmpInst::getSwappedPredicate(Cond), I); + + // If the other GEP has all zero indices, recurse. + AllZeros = true; + for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i) + if (!isa(GEPRHS->getOperand(i)) || + !cast(GEPRHS->getOperand(i))->isNullValue()) { + AllZeros = false; + break; + } + if (AllZeros) + return FoldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I); + + if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands()) { + // If the GEPs only differ by one index, compare it. + unsigned NumDifferences = 0; // Keep track of # differences. + unsigned DiffOperand = 0; // The operand that differs. + for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i) + if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) { + if (GEPLHS->getOperand(i)->getType()->getPrimitiveSizeInBits() != + GEPRHS->getOperand(i)->getType()->getPrimitiveSizeInBits()) { + // Irreconcilable differences. + NumDifferences = 2; + break; + } else { + if (NumDifferences++) break; + DiffOperand = i; + } + } + + if (NumDifferences == 0) // SAME GEP? + return ReplaceInstUsesWith(I, // No comparison is needed here. + ConstantInt::get(Type::getInt1Ty(I.getContext()), + ICmpInst::isTrueWhenEqual(Cond))); + + else if (NumDifferences == 1) { + Value *LHSV = GEPLHS->getOperand(DiffOperand); + Value *RHSV = GEPRHS->getOperand(DiffOperand); + // Make sure we do a signed comparison here. + return new ICmpInst(ICmpInst::getSignedPredicate(Cond), LHSV, RHSV); + } + } + + // Only lower this if the icmp is the only user of the GEP or if we expect + // the result to fold to a constant! + if (TD && + (isa(GEPLHS) || GEPLHS->hasOneUse()) && + (isa(GEPRHS) || GEPRHS->hasOneUse())) { + // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2) + Value *L = EmitGEPOffset(GEPLHS); + Value *R = EmitGEPOffset(GEPRHS); + return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R); + } + } + return 0; +} + +/// FoldICmpAddOpCst - Fold "icmp pred (X+CI), X". +Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI, + Value *X, ConstantInt *CI, + ICmpInst::Predicate Pred, + Value *TheAdd) { + // If we have X+0, exit early (simplifying logic below) and let it get folded + // elsewhere. icmp X+0, X -> icmp X, X + if (CI->isZero()) { + bool isTrue = ICmpInst::isTrueWhenEqual(Pred); + return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue)); + } + + // (X+4) == X -> false. + if (Pred == ICmpInst::ICMP_EQ) + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext())); + + // (X+4) != X -> true. + if (Pred == ICmpInst::ICMP_NE) + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext())); + + // If this is an instruction (as opposed to constantexpr) get NUW/NSW info. + bool isNUW = false, isNSW = false; + if (BinaryOperator *Add = dyn_cast(TheAdd)) { + isNUW = Add->hasNoUnsignedWrap(); + isNSW = Add->hasNoSignedWrap(); + } + + // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0, + // so the values can never be equal. Similiarly for all other "or equals" + // operators. + + // (X+1) X >u (MAXUINT-1) --> X == 255 + // (X+2) X >u (MAXUINT-2) --> X > 253 + // (X+MAXUINT) X >u (MAXUINT-MAXUINT) --> X != 0 + if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) { + // If this is an NUW add, then this is always false. + if (isNUW) + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext())); + + Value *R = + ConstantExpr::getSub(ConstantInt::getAllOnesValue(CI->getType()), CI); + return new ICmpInst(ICmpInst::ICMP_UGT, X, R); + } + + // (X+1) >u X --> X X != 255 + // (X+2) >u X --> X X u X --> X X X == 0 + if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) { + // If this is an NUW add, then this is always true. + if (isNUW) + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext())); + return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI)); + } + + unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits(); + ConstantInt *SMax = ConstantInt::get(X->getContext(), + APInt::getSignedMaxValue(BitWidth)); + + // (X+ 1) X >s (MAXSINT-1) --> X == 127 + // (X+ 2) X >s (MAXSINT-2) --> X >s 125 + // (X+MAXSINT) X >s (MAXSINT-MAXSINT) --> X >s 0 + // (X+MINSINT) X >s (MAXSINT-MINSINT) --> X >s -1 + // (X+ -2) X >s (MAXSINT- -2) --> X >s 126 + // (X+ -1) X >s (MAXSINT- -1) --> X != 127 + if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) { + // If this is an NSW add, then we have two cases: if the constant is + // positive, then this is always false, if negative, this is always true. + if (isNSW) { + bool isTrue = CI->getValue().isNegative(); + return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue)); + } + + return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI)); + } + + // (X+ 1) >s X --> X X != 127 + // (X+ 2) >s X --> X X s X --> X X s X --> X X s X --> X X s X --> X X == -128 + + // If this is an NSW add, then we have two cases: if the constant is + // positive, then this is always true, if negative, this is always false. + if (isNSW) { + bool isTrue = !CI->getValue().isNegative(); + return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue)); + } + + assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE); + Constant *C = ConstantInt::get(X->getContext(), CI->getValue()-1); + return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C)); +} + +/// FoldICmpDivCst - Fold "icmp pred, ([su]div X, DivRHS), CmpRHS" where DivRHS +/// and CmpRHS are both known to be integer constants. +Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, + ConstantInt *DivRHS) { + ConstantInt *CmpRHS = cast(ICI.getOperand(1)); + const APInt &CmpRHSV = CmpRHS->getValue(); + + // FIXME: If the operand types don't match the type of the divide + // then don't attempt this transform. The code below doesn't have the + // logic to deal with a signed divide and an unsigned compare (and + // vice versa). This is because (x /s C1) getOpcode() == Instruction::SDiv; + if (!ICI.isEquality() && DivIsSigned != ICI.isSigned()) + return 0; + if (DivRHS->isZero()) + return 0; // The ProdOV computation fails on divide by zero. + if (DivIsSigned && DivRHS->isAllOnesValue()) + return 0; // The overflow computation also screws up here + if (DivRHS->isOne()) + return 0; // Not worth bothering, and eliminates some funny cases + // with INT_MIN. + + // Compute Prod = CI * DivRHS. We are essentially solving an equation + // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and + // C2 (CI). By solving for X we can turn this into a range check + // instead of computing a divide. + Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS); + + // Determine if the product overflows by seeing if the product is + // not equal to the divide. Make sure we do the same kind of divide + // as in the LHS instruction that we're folding. + bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) : + ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS; + + // Get the ICmp opcode + ICmpInst::Predicate Pred = ICI.getPredicate(); + + // Figure out the interval that is being checked. For example, a comparison + // like "X /u 5 == 0" is really checking that X is in the interval [0, 5). + // Compute this interval based on the constants involved and the signedness of + // the compare/divide. This computes a half-open interval, keeping track of + // whether either value in the interval overflows. After analysis each + // overflow variable is set to 0 if it's corresponding bound variable is valid + // -1 if overflowed off the bottom end, or +1 if overflowed off the top end. + int LoOverflow = 0, HiOverflow = 0; + Constant *LoBound = 0, *HiBound = 0; + + if (!DivIsSigned) { // udiv + // e.g. X/5 op 3 --> [15, 20) + LoBound = Prod; + HiOverflow = LoOverflow = ProdOV; + if (!HiOverflow) + HiOverflow = AddWithOverflow(HiBound, LoBound, DivRHS, false); + } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0. + if (CmpRHSV == 0) { // (X / pos) op 0 + // Can't overflow. e.g. X/2 op 0 --> [-1, 2) + LoBound = cast(ConstantExpr::getNeg(SubOne(DivRHS))); + HiBound = DivRHS; + } else if (CmpRHSV.isStrictlyPositive()) { // (X / pos) op pos + LoBound = Prod; // e.g. X/5 op 3 --> [15, 20) + HiOverflow = LoOverflow = ProdOV; + if (!HiOverflow) + HiOverflow = AddWithOverflow(HiBound, Prod, DivRHS, true); + } else { // (X / pos) op neg + // e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14) + HiBound = AddOne(Prod); + LoOverflow = HiOverflow = ProdOV ? -1 : 0; + if (!LoOverflow) { + ConstantInt* DivNeg = + cast(ConstantExpr::getNeg(DivRHS)); + LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0; + } + } + } else if (DivRHS->getValue().isNegative()) { // Divisor is < 0. + if (CmpRHSV == 0) { // (X / neg) op 0 + // e.g. X/-5 op 0 --> [-4, 5) + LoBound = AddOne(DivRHS); + HiBound = cast(ConstantExpr::getNeg(DivRHS)); + if (HiBound == DivRHS) { // -INTMIN = INTMIN + HiOverflow = 1; // [INTMIN+1, overflow) + HiBound = 0; // e.g. X/INTMIN = 0 --> X > INTMIN + } + } else if (CmpRHSV.isStrictlyPositive()) { // (X / neg) op pos + // e.g. X/-5 op 3 --> [-19, -14) + HiBound = AddOne(Prod); + HiOverflow = LoOverflow = ProdOV ? -1 : 0; + if (!LoOverflow) + LoOverflow = AddWithOverflow(LoBound, HiBound, DivRHS, true) ? -1 : 0; + } else { // (X / neg) op neg + LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20) + LoOverflow = HiOverflow = ProdOV; + if (!HiOverflow) + HiOverflow = SubWithOverflow(HiBound, Prod, DivRHS, true); + } + + // Dividing by a negative swaps the condition. LT <-> GT + Pred = ICmpInst::getSwappedPredicate(Pred); + } + + Value *X = DivI->getOperand(0); + switch (Pred) { + default: llvm_unreachable("Unhandled icmp opcode!"); + case ICmpInst::ICMP_EQ: + if (LoOverflow && HiOverflow) + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext())); + else if (HiOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : + ICmpInst::ICMP_UGE, X, LoBound); + else if (LoOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : + ICmpInst::ICMP_ULT, X, HiBound); + else + return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, true, ICI); + case ICmpInst::ICMP_NE: + if (LoOverflow && HiOverflow) + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext())); + else if (HiOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : + ICmpInst::ICMP_ULT, X, LoBound); + else if (LoOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : + ICmpInst::ICMP_UGE, X, HiBound); + else + return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, false, ICI); + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_SLT: + if (LoOverflow == +1) // Low bound is greater than input range. + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext())); + if (LoOverflow == -1) // Low bound is less than input range. + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext())); + return new ICmpInst(Pred, X, LoBound); + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_SGT: + if (HiOverflow == +1) // High bound greater than input range. + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext())); + else if (HiOverflow == -1) // High bound less than input range. + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext())); + if (Pred == ICmpInst::ICMP_UGT) + return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound); + else + return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound); + } +} + + +/// visitICmpInstWithInstAndIntCst - Handle "icmp (instr, intcst)". +/// +Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, + Instruction *LHSI, + ConstantInt *RHS) { + const APInt &RHSV = RHS->getValue(); + + switch (LHSI->getOpcode()) { + case Instruction::Trunc: + if (ICI.isEquality() && LHSI->hasOneUse()) { + // Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all + // of the high bits truncated out of x are known. + unsigned DstBits = LHSI->getType()->getPrimitiveSizeInBits(), + SrcBits = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits(); + APInt Mask(APInt::getHighBitsSet(SrcBits, SrcBits-DstBits)); + APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0); + ComputeMaskedBits(LHSI->getOperand(0), Mask, KnownZero, KnownOne); + + // If all the high bits are known, we can do this xform. + if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) { + // Pull in the high bits from known-ones set. + APInt NewRHS(RHS->getValue()); + NewRHS.zext(SrcBits); + NewRHS |= KnownOne; + return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), + ConstantInt::get(ICI.getContext(), NewRHS)); + } + } + break; + + case Instruction::Xor: // (icmp pred (xor X, XorCST), CI) + if (ConstantInt *XorCST = dyn_cast(LHSI->getOperand(1))) { + // If this is a comparison that tests the signbit (X < 0) or (x > -1), + // fold the xor. + if ((ICI.getPredicate() == ICmpInst::ICMP_SLT && RHSV == 0) || + (ICI.getPredicate() == ICmpInst::ICMP_SGT && RHSV.isAllOnesValue())) { + Value *CompareVal = LHSI->getOperand(0); + + // If the sign bit of the XorCST is not set, there is no change to + // the operation, just stop using the Xor. + if (!XorCST->getValue().isNegative()) { + ICI.setOperand(0, CompareVal); + Worklist.Add(LHSI); + return &ICI; + } + + // Was the old condition true if the operand is positive? + bool isTrueIfPositive = ICI.getPredicate() == ICmpInst::ICMP_SGT; + + // If so, the new one isn't. + isTrueIfPositive ^= true; + + if (isTrueIfPositive) + return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal, + SubOne(RHS)); + else + return new ICmpInst(ICmpInst::ICMP_SLT, CompareVal, + AddOne(RHS)); + } + + if (LHSI->hasOneUse()) { + // (icmp u/s (xor A SignBit), C) -> (icmp s/u A, (xor C SignBit)) + if (!ICI.isEquality() && XorCST->getValue().isSignBit()) { + const APInt &SignBit = XorCST->getValue(); + ICmpInst::Predicate Pred = ICI.isSigned() + ? ICI.getUnsignedPredicate() + : ICI.getSignedPredicate(); + return new ICmpInst(Pred, LHSI->getOperand(0), + ConstantInt::get(ICI.getContext(), + RHSV ^ SignBit)); + } + + // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A) + if (!ICI.isEquality() && XorCST->getValue().isMaxSignedValue()) { + const APInt &NotSignBit = XorCST->getValue(); + ICmpInst::Predicate Pred = ICI.isSigned() + ? ICI.getUnsignedPredicate() + : ICI.getSignedPredicate(); + Pred = ICI.getSwappedPredicate(Pred); + return new ICmpInst(Pred, LHSI->getOperand(0), + ConstantInt::get(ICI.getContext(), + RHSV ^ NotSignBit)); + } + } + } + break; + case Instruction::And: // (icmp pred (and X, AndCST), RHS) + if (LHSI->hasOneUse() && isa(LHSI->getOperand(1)) && + LHSI->getOperand(0)->hasOneUse()) { + ConstantInt *AndCST = cast(LHSI->getOperand(1)); + + // If the LHS is an AND of a truncating cast, we can widen the + // and/compare to be the input width without changing the value + // produced, eliminating a cast. + if (TruncInst *Cast = dyn_cast(LHSI->getOperand(0))) { + // We can do this transformation if either the AND constant does not + // have its sign bit set or if it is an equality comparison. + // Extending a relational comparison when we're checking the sign + // bit would not work. + if (Cast->hasOneUse() && + (ICI.isEquality() || + (AndCST->getValue().isNonNegative() && RHSV.isNonNegative()))) { + uint32_t BitWidth = + cast(Cast->getOperand(0)->getType())->getBitWidth(); + APInt NewCST = AndCST->getValue(); + NewCST.zext(BitWidth); + APInt NewCI = RHSV; + NewCI.zext(BitWidth); + Value *NewAnd = + Builder->CreateAnd(Cast->getOperand(0), + ConstantInt::get(ICI.getContext(), NewCST), + LHSI->getName()); + return new ICmpInst(ICI.getPredicate(), NewAnd, + ConstantInt::get(ICI.getContext(), NewCI)); + } + } + + // If this is: (X >> C1) & C2 != C3 (where any shift and any compare + // could exist), turn it into (X & (C2 << C1)) != (C3 << C1). This + // happens a LOT in code produced by the C front-end, for bitfield + // access. + BinaryOperator *Shift = dyn_cast(LHSI->getOperand(0)); + if (Shift && !Shift->isShift()) + Shift = 0; + + ConstantInt *ShAmt; + ShAmt = Shift ? dyn_cast(Shift->getOperand(1)) : 0; + const Type *Ty = Shift ? Shift->getType() : 0; // Type of the shift. + const Type *AndTy = AndCST->getType(); // Type of the and. + + // We can fold this as long as we can't shift unknown bits + // into the mask. This can only happen with signed shift + // rights, as they sign-extend. + if (ShAmt) { + bool CanFold = Shift->isLogicalShift(); + if (!CanFold) { + // To test for the bad case of the signed shr, see if any + // of the bits shifted in could be tested after the mask. + uint32_t TyBits = Ty->getPrimitiveSizeInBits(); + int ShAmtVal = TyBits - ShAmt->getLimitedValue(TyBits); + + uint32_t BitWidth = AndTy->getPrimitiveSizeInBits(); + if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) & + AndCST->getValue()) == 0) + CanFold = true; + } + + if (CanFold) { + Constant *NewCst; + if (Shift->getOpcode() == Instruction::Shl) + NewCst = ConstantExpr::getLShr(RHS, ShAmt); + else + NewCst = ConstantExpr::getShl(RHS, ShAmt); + + // Check to see if we are shifting out any of the bits being + // compared. + if (ConstantExpr::get(Shift->getOpcode(), + NewCst, ShAmt) != RHS) { + // If we shifted bits out, the fold is not going to work out. + // As a special case, check to see if this means that the + // result is always true or false now. + if (ICI.getPredicate() == ICmpInst::ICMP_EQ) + return ReplaceInstUsesWith(ICI, + ConstantInt::getFalse(ICI.getContext())); + if (ICI.getPredicate() == ICmpInst::ICMP_NE) + return ReplaceInstUsesWith(ICI, + ConstantInt::getTrue(ICI.getContext())); + } else { + ICI.setOperand(1, NewCst); + Constant *NewAndCST; + if (Shift->getOpcode() == Instruction::Shl) + NewAndCST = ConstantExpr::getLShr(AndCST, ShAmt); + else + NewAndCST = ConstantExpr::getShl(AndCST, ShAmt); + LHSI->setOperand(1, NewAndCST); + LHSI->setOperand(0, Shift->getOperand(0)); + Worklist.Add(Shift); // Shift is dead. + return &ICI; + } + } + } + + // Turn ((X >> Y) & C) == 0 into (X & (C << Y)) == 0. The later is + // preferable because it allows the C<hasOneUse() && RHSV == 0 && + ICI.isEquality() && !Shift->isArithmeticShift() && + !isa(Shift->getOperand(0))) { + // Compute C << Y. + Value *NS; + if (Shift->getOpcode() == Instruction::LShr) { + NS = Builder->CreateShl(AndCST, Shift->getOperand(1), "tmp"); + } else { + // Insert a logical shift. + NS = Builder->CreateLShr(AndCST, Shift->getOperand(1), "tmp"); + } + + // Compute X & (C << Y). + Value *NewAnd = + Builder->CreateAnd(Shift->getOperand(0), NS, LHSI->getName()); + + ICI.setOperand(0, NewAnd); + return &ICI; + } + } + + // Try to optimize things like "A[i]&42 == 0" to index computations. + if (LoadInst *LI = dyn_cast(LHSI->getOperand(0))) { + if (GetElementPtrInst *GEP = + dyn_cast(LI->getOperand(0))) + if (GlobalVariable *GV = dyn_cast(GEP->getOperand(0))) + if (GV->isConstant() && GV->hasDefinitiveInitializer() && + !LI->isVolatile() && isa(LHSI->getOperand(1))) { + ConstantInt *C = cast(LHSI->getOperand(1)); + if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV,ICI, C)) + return Res; + } + } + break; + + case Instruction::Or: { + if (!ICI.isEquality() || !RHS->isNullValue() || !LHSI->hasOneUse()) + break; + Value *P, *Q; + if (match(LHSI, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) { + // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0 + // -> and (icmp eq P, null), (icmp eq Q, null). + + Value *ICIP = Builder->CreateICmp(ICI.getPredicate(), P, + Constant::getNullValue(P->getType())); + Value *ICIQ = Builder->CreateICmp(ICI.getPredicate(), Q, + Constant::getNullValue(Q->getType())); + Instruction *Op; + if (ICI.getPredicate() == ICmpInst::ICMP_EQ) + Op = BinaryOperator::CreateAnd(ICIP, ICIQ); + else + Op = BinaryOperator::CreateOr(ICIP, ICIQ); + return Op; + } + break; + } + + case Instruction::Shl: { // (icmp pred (shl X, ShAmt), CI) + ConstantInt *ShAmt = dyn_cast(LHSI->getOperand(1)); + if (!ShAmt) break; + + uint32_t TypeBits = RHSV.getBitWidth(); + + // Check that the shift amount is in range. If not, don't perform + // undefined shifts. When the shift is visited it will be + // simplified. + if (ShAmt->uge(TypeBits)) + break; + + if (ICI.isEquality()) { + // If we are comparing against bits always shifted out, the + // comparison cannot succeed. + Constant *Comp = + ConstantExpr::getShl(ConstantExpr::getLShr(RHS, ShAmt), + ShAmt); + if (Comp != RHS) {// Comparing against a bit that we know is zero. + bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; + Constant *Cst = + ConstantInt::get(Type::getInt1Ty(ICI.getContext()), IsICMP_NE); + return ReplaceInstUsesWith(ICI, Cst); + } + + if (LHSI->hasOneUse()) { + // Otherwise strength reduce the shift into an and. + uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); + Constant *Mask = + ConstantInt::get(ICI.getContext(), APInt::getLowBitsSet(TypeBits, + TypeBits-ShAmtVal)); + + Value *And = + Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask"); + return new ICmpInst(ICI.getPredicate(), And, + ConstantInt::get(ICI.getContext(), + RHSV.lshr(ShAmtVal))); + } + } + + // Otherwise, if this is a comparison of the sign bit, simplify to and/test. + bool TrueIfSigned = false; + if (LHSI->hasOneUse() && + isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) { + // (X << 31) (X&1) != 0 + Constant *Mask = ConstantInt::get(ICI.getContext(), APInt(TypeBits, 1) << + (TypeBits-ShAmt->getZExtValue()-1)); + Value *And = + Builder->CreateAnd(LHSI->getOperand(0), Mask, LHSI->getName()+".mask"); + return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ, + And, Constant::getNullValue(And->getType())); + } + break; + } + + case Instruction::LShr: // (icmp pred (shr X, ShAmt), CI) + case Instruction::AShr: { + // Only handle equality comparisons of shift-by-constant. + ConstantInt *ShAmt = dyn_cast(LHSI->getOperand(1)); + if (!ShAmt || !ICI.isEquality()) break; + + // Check that the shift amount is in range. If not, don't perform + // undefined shifts. When the shift is visited it will be + // simplified. + uint32_t TypeBits = RHSV.getBitWidth(); + if (ShAmt->uge(TypeBits)) + break; + + uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); + + // If we are comparing against bits always shifted out, the + // comparison cannot succeed. + APInt Comp = RHSV << ShAmtVal; + if (LHSI->getOpcode() == Instruction::LShr) + Comp = Comp.lshr(ShAmtVal); + else + Comp = Comp.ashr(ShAmtVal); + + if (Comp != RHSV) { // Comparing against a bit that we know is zero. + bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; + Constant *Cst = ConstantInt::get(Type::getInt1Ty(ICI.getContext()), + IsICMP_NE); + return ReplaceInstUsesWith(ICI, Cst); + } + + // Otherwise, check to see if the bits shifted out are known to be zero. + // If so, we can compare against the unshifted value: + // (X & 4) >> 1 == 2 --> (X & 4) == 4. + if (LHSI->hasOneUse() && + MaskedValueIsZero(LHSI->getOperand(0), + APInt::getLowBitsSet(Comp.getBitWidth(), ShAmtVal))) { + return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), + ConstantExpr::getShl(RHS, ShAmt)); + } + + if (LHSI->hasOneUse()) { + // Otherwise strength reduce the shift into an and. + APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); + Constant *Mask = ConstantInt::get(ICI.getContext(), Val); + + Value *And = Builder->CreateAnd(LHSI->getOperand(0), + Mask, LHSI->getName()+".mask"); + return new ICmpInst(ICI.getPredicate(), And, + ConstantExpr::getShl(RHS, ShAmt)); + } + break; + } + + case Instruction::SDiv: + case Instruction::UDiv: + // Fold: icmp pred ([us]div X, C1), C2 -> range test + // Fold this div into the comparison, producing a range check. + // Determine, based on the divide type, what the range is being + // checked. If there is an overflow on the low or high side, remember + // it, otherwise compute the range [low, hi) bounding the new value. + // See: InsertRangeTest above for the kinds of replacements possible. + if (ConstantInt *DivRHS = dyn_cast(LHSI->getOperand(1))) + if (Instruction *R = FoldICmpDivCst(ICI, cast(LHSI), + DivRHS)) + return R; + break; + + case Instruction::Add: + // Fold: icmp pred (add X, C1), C2 + if (!ICI.isEquality()) { + ConstantInt *LHSC = dyn_cast(LHSI->getOperand(1)); + if (!LHSC) break; + const APInt &LHSV = LHSC->getValue(); + + ConstantRange CR = ICI.makeConstantRange(ICI.getPredicate(), RHSV) + .subtract(LHSV); + + if (ICI.isSigned()) { + if (CR.getLower().isSignBit()) { + return new ICmpInst(ICmpInst::ICMP_SLT, LHSI->getOperand(0), + ConstantInt::get(ICI.getContext(),CR.getUpper())); + } else if (CR.getUpper().isSignBit()) { + return new ICmpInst(ICmpInst::ICMP_SGE, LHSI->getOperand(0), + ConstantInt::get(ICI.getContext(),CR.getLower())); + } + } else { + if (CR.getLower().isMinValue()) { + return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0), + ConstantInt::get(ICI.getContext(),CR.getUpper())); + } else if (CR.getUpper().isMinValue()) { + return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0), + ConstantInt::get(ICI.getContext(),CR.getLower())); + } + } + } + break; + } + + // Simplify icmp_eq and icmp_ne instructions with integer constant RHS. + if (ICI.isEquality()) { + bool isICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; + + // If the first operand is (add|sub|and|or|xor|rem) with a constant, and + // the second operand is a constant, simplify a bit. + if (BinaryOperator *BO = dyn_cast(LHSI)) { + switch (BO->getOpcode()) { + case Instruction::SRem: + // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one. + if (RHSV == 0 && isa(BO->getOperand(1)) &&BO->hasOneUse()){ + const APInt &V = cast(BO->getOperand(1))->getValue(); + if (V.sgt(APInt(V.getBitWidth(), 1)) && V.isPowerOf2()) { + Value *NewRem = + Builder->CreateURem(BO->getOperand(0), BO->getOperand(1), + BO->getName()); + return new ICmpInst(ICI.getPredicate(), NewRem, + Constant::getNullValue(BO->getType())); + } + } + break; + case Instruction::Add: + // Replace ((add A, B) != C) with (A != C-B) if B & C are constants. + if (ConstantInt *BOp1C = dyn_cast(BO->getOperand(1))) { + if (BO->hasOneUse()) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), + ConstantExpr::getSub(RHS, BOp1C)); + } else if (RHSV == 0) { + // Replace ((add A, B) != 0) with (A != -B) if A or B is + // efficiently invertible, or if the add has just this one use. + Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1); + + if (Value *NegVal = dyn_castNegVal(BOp1)) + return new ICmpInst(ICI.getPredicate(), BOp0, NegVal); + else if (Value *NegVal = dyn_castNegVal(BOp0)) + return new ICmpInst(ICI.getPredicate(), NegVal, BOp1); + else if (BO->hasOneUse()) { + Value *Neg = Builder->CreateNeg(BOp1); + Neg->takeName(BO); + return new ICmpInst(ICI.getPredicate(), BOp0, Neg); + } + } + break; + case Instruction::Xor: + // For the xor case, we can xor two constants together, eliminating + // the explicit xor. + if (Constant *BOC = dyn_cast(BO->getOperand(1))) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), + ConstantExpr::getXor(RHS, BOC)); + + // FALLTHROUGH + case Instruction::Sub: + // Replace (([sub|xor] A, B) != 0) with (A != B) + if (RHSV == 0) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), + BO->getOperand(1)); + break; + + case Instruction::Or: + // If bits are being or'd in that are not present in the constant we + // are comparing against, then the comparison could never succeed! + if (Constant *BOC = dyn_cast(BO->getOperand(1))) { + Constant *NotCI = ConstantExpr::getNot(RHS); + if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue()) + return ReplaceInstUsesWith(ICI, + ConstantInt::get(Type::getInt1Ty(ICI.getContext()), + isICMP_NE)); + } + break; + + case Instruction::And: + if (ConstantInt *BOC = dyn_cast(BO->getOperand(1))) { + // If bits are being compared against that are and'd out, then the + // comparison can never succeed! + if ((RHSV & ~BOC->getValue()) != 0) + return ReplaceInstUsesWith(ICI, + ConstantInt::get(Type::getInt1Ty(ICI.getContext()), + isICMP_NE)); + + // If we have ((X & C) == C), turn it into ((X & C) != 0). + if (RHS == BOC && RHSV.isPowerOf2()) + return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : + ICmpInst::ICMP_NE, LHSI, + Constant::getNullValue(RHS->getType())); + + // Replace (and X, (1 << size(X)-1) != 0) with x s< 0 + if (BOC->getValue().isSignBit()) { + Value *X = BO->getOperand(0); + Constant *Zero = Constant::getNullValue(X->getType()); + ICmpInst::Predicate pred = isICMP_NE ? + ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE; + return new ICmpInst(pred, X, Zero); + } + + // ((X & ~7) == 0) --> X < 8 + if (RHSV == 0 && isHighOnes(BOC)) { + Value *X = BO->getOperand(0); + Constant *NegX = ConstantExpr::getNeg(BOC); + ICmpInst::Predicate pred = isICMP_NE ? + ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; + return new ICmpInst(pred, X, NegX); + } + } + default: break; + } + } else if (IntrinsicInst *II = dyn_cast(LHSI)) { + // Handle icmp {eq|ne} , intcst. + switch (II->getIntrinsicID()) { + case Intrinsic::bswap: + Worklist.Add(II); + ICI.setOperand(0, II->getOperand(1)); + ICI.setOperand(1, ConstantInt::get(II->getContext(), RHSV.byteSwap())); + return &ICI; + case Intrinsic::ctlz: + case Intrinsic::cttz: + // ctz(A) == bitwidth(a) -> A == 0 and likewise for != + if (RHSV == RHS->getType()->getBitWidth()) { + Worklist.Add(II); + ICI.setOperand(0, II->getOperand(1)); + ICI.setOperand(1, ConstantInt::get(RHS->getType(), 0)); + return &ICI; + } + break; + case Intrinsic::ctpop: + // popcount(A) == 0 -> A == 0 and likewise for != + if (RHS->isZero()) { + Worklist.Add(II); + ICI.setOperand(0, II->getOperand(1)); + ICI.setOperand(1, RHS); + return &ICI; + } + break; + default: + break; + } + } + } + return 0; +} + +/// visitICmpInstWithCastAndCast - Handle icmp (cast x to y), (cast/cst). +/// We only handle extending casts so far. +/// +Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { + const CastInst *LHSCI = cast(ICI.getOperand(0)); + Value *LHSCIOp = LHSCI->getOperand(0); + const Type *SrcTy = LHSCIOp->getType(); + const Type *DestTy = LHSCI->getType(); + Value *RHSCIOp; + + // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the + // integer type is the same size as the pointer type. + if (TD && LHSCI->getOpcode() == Instruction::PtrToInt && + TD->getPointerSizeInBits() == + cast(DestTy)->getBitWidth()) { + Value *RHSOp = 0; + if (Constant *RHSC = dyn_cast(ICI.getOperand(1))) { + RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy); + } else if (PtrToIntInst *RHSC = dyn_cast(ICI.getOperand(1))) { + RHSOp = RHSC->getOperand(0); + // If the pointer types don't match, insert a bitcast. + if (LHSCIOp->getType() != RHSOp->getType()) + RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType()); + } + + if (RHSOp) + return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSOp); + } + + // The code below only handles extension cast instructions, so far. + // Enforce this. + if (LHSCI->getOpcode() != Instruction::ZExt && + LHSCI->getOpcode() != Instruction::SExt) + return 0; + + bool isSignedExt = LHSCI->getOpcode() == Instruction::SExt; + bool isSignedCmp = ICI.isSigned(); + + if (CastInst *CI = dyn_cast(ICI.getOperand(1))) { + // Not an extension from the same type? + RHSCIOp = CI->getOperand(0); + if (RHSCIOp->getType() != LHSCIOp->getType()) + return 0; + + // If the signedness of the two casts doesn't agree (i.e. one is a sext + // and the other is a zext), then we can't handle this. + if (CI->getOpcode() != LHSCI->getOpcode()) + return 0; + + // Deal with equality cases early. + if (ICI.isEquality()) + return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp); + + // A signed comparison of sign extended values simplifies into a + // signed comparison. + if (isSignedCmp && isSignedExt) + return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp); + + // The other three cases all fold into an unsigned comparison. + return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, RHSCIOp); + } + + // If we aren't dealing with a constant on the RHS, exit early + ConstantInt *CI = dyn_cast(ICI.getOperand(1)); + if (!CI) + return 0; + + // Compute the constant that would happen if we truncated to SrcTy then + // reextended to DestTy. + Constant *Res1 = ConstantExpr::getTrunc(CI, SrcTy); + Constant *Res2 = ConstantExpr::getCast(LHSCI->getOpcode(), + Res1, DestTy); + + // If the re-extended constant didn't change... + if (Res2 == CI) { + // Deal with equality cases early. + if (ICI.isEquality()) + return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1); + + // A signed comparison of sign extended values simplifies into a + // signed comparison. + if (isSignedExt && isSignedCmp) + return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1); + + // The other three cases all fold into an unsigned comparison. + return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, Res1); + } + + // The re-extended constant changed so the constant cannot be represented + // in the shorter type. Consequently, we cannot emit a simple comparison. + + // First, handle some easy cases. We know the result cannot be equal at this + // point so handle the ICI.isEquality() cases + if (ICI.getPredicate() == ICmpInst::ICMP_EQ) + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext())); + if (ICI.getPredicate() == ICmpInst::ICMP_NE) + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext())); + + // Evaluate the comparison for LT (we invert for GT below). LE and GE cases + // should have been folded away previously and not enter in here. + Value *Result; + if (isSignedCmp) { + // We're performing a signed comparison. + if (cast(CI)->getValue().isNegative()) + Result = ConstantInt::getFalse(ICI.getContext()); // X < (small) --> false + else + Result = ConstantInt::getTrue(ICI.getContext()); // X < (large) --> true + } else { + // We're performing an unsigned comparison. + if (isSignedExt) { + // We're performing an unsigned comp with a sign extended value. + // This is true if the input is >= 0. [aka >s -1] + Constant *NegOne = Constant::getAllOnesValue(SrcTy); + Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICI.getName()); + } else { + // Unsigned extend & unsigned compare -> always true. + Result = ConstantInt::getTrue(ICI.getContext()); + } + } + + // Finally, return the value computed. + if (ICI.getPredicate() == ICmpInst::ICMP_ULT || + ICI.getPredicate() == ICmpInst::ICMP_SLT) + return ReplaceInstUsesWith(ICI, Result); + + assert((ICI.getPredicate()==ICmpInst::ICMP_UGT || + ICI.getPredicate()==ICmpInst::ICMP_SGT) && + "ICmp should be folded!"); + if (Constant *CI = dyn_cast(Result)) + return ReplaceInstUsesWith(ICI, ConstantExpr::getNot(CI)); + return BinaryOperator::CreateNot(Result); +} + + + +Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { + bool Changed = false; + + /// Orders the operands of the compare so that they are listed from most + /// complex to least complex. This puts constants before unary operators, + /// before binary operators. + if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) { + I.swapOperands(); + Changed = true; + } + + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + + const Type *Ty = Op0->getType(); + + // icmp's with boolean values can always be turned into bitwise operations + if (Ty == Type::getInt1Ty(I.getContext())) { + switch (I.getPredicate()) { + default: llvm_unreachable("Invalid icmp instruction!"); + case ICmpInst::ICMP_EQ: { // icmp eq i1 A, B -> ~(A^B) + Value *Xor = Builder->CreateXor(Op0, Op1, I.getName()+"tmp"); + return BinaryOperator::CreateNot(Xor); + } + case ICmpInst::ICMP_NE: // icmp eq i1 A, B -> A^B + return BinaryOperator::CreateXor(Op0, Op1); + + case ICmpInst::ICMP_UGT: + std::swap(Op0, Op1); // Change icmp ugt -> icmp ult + // FALL THROUGH + case ICmpInst::ICMP_ULT:{ // icmp ult i1 A, B -> ~A & B + Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp"); + return BinaryOperator::CreateAnd(Not, Op1); + } + case ICmpInst::ICMP_SGT: + std::swap(Op0, Op1); // Change icmp sgt -> icmp slt + // FALL THROUGH + case ICmpInst::ICMP_SLT: { // icmp slt i1 A, B -> A & ~B + Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp"); + return BinaryOperator::CreateAnd(Not, Op0); + } + case ICmpInst::ICMP_UGE: + std::swap(Op0, Op1); // Change icmp uge -> icmp ule + // FALL THROUGH + case ICmpInst::ICMP_ULE: { // icmp ule i1 A, B -> ~A | B + Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp"); + return BinaryOperator::CreateOr(Not, Op1); + } + case ICmpInst::ICMP_SGE: + std::swap(Op0, Op1); // Change icmp sge -> icmp sle + // FALL THROUGH + case ICmpInst::ICMP_SLE: { // icmp sle i1 A, B -> A | ~B + Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp"); + return BinaryOperator::CreateOr(Not, Op0); + } + } + } + + unsigned BitWidth = 0; + if (TD) + BitWidth = TD->getTypeSizeInBits(Ty->getScalarType()); + else if (Ty->isIntOrIntVector()) + BitWidth = Ty->getScalarSizeInBits(); + + bool isSignBit = false; + + // See if we are doing a comparison with a constant. + if (ConstantInt *CI = dyn_cast(Op1)) { + Value *A = 0, *B = 0; + + // (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B) + if (I.isEquality() && CI->isZero() && + match(Op0, m_Sub(m_Value(A), m_Value(B)))) { + // (icmp cond A B) if cond is equality + return new ICmpInst(I.getPredicate(), A, B); + } + + // If we have an icmp le or icmp ge instruction, turn it into the + // appropriate icmp lt or icmp gt instruction. This allows us to rely on + // them being folded in the code below. The SimplifyICmpInst code has + // already handled the edge cases for us, so we just assert on them. + switch (I.getPredicate()) { + default: break; + case ICmpInst::ICMP_ULE: + assert(!CI->isMaxValue(false)); // A <=u MAX -> TRUE + return new ICmpInst(ICmpInst::ICMP_ULT, Op0, + ConstantInt::get(CI->getContext(), CI->getValue()+1)); + case ICmpInst::ICMP_SLE: + assert(!CI->isMaxValue(true)); // A <=s MAX -> TRUE + return new ICmpInst(ICmpInst::ICMP_SLT, Op0, + ConstantInt::get(CI->getContext(), CI->getValue()+1)); + case ICmpInst::ICMP_UGE: + assert(!CI->isMinValue(false)); // A >=u MIN -> TRUE + return new ICmpInst(ICmpInst::ICMP_UGT, Op0, + ConstantInt::get(CI->getContext(), CI->getValue()-1)); + case ICmpInst::ICMP_SGE: + assert(!CI->isMinValue(true)); // A >=s MIN -> TRUE + return new ICmpInst(ICmpInst::ICMP_SGT, Op0, + ConstantInt::get(CI->getContext(), CI->getValue()-1)); + } + + // If this comparison is a normal comparison, it demands all + // bits, if it is a sign bit comparison, it only demands the sign bit. + bool UnusedBit; + isSignBit = isSignBitCheck(I.getPredicate(), CI, UnusedBit); + } + + // See if we can fold the comparison based on range information we can get + // by checking whether bits are known to be zero or one in the input. + if (BitWidth != 0) { + APInt Op0KnownZero(BitWidth, 0), Op0KnownOne(BitWidth, 0); + APInt Op1KnownZero(BitWidth, 0), Op1KnownOne(BitWidth, 0); + + if (SimplifyDemandedBits(I.getOperandUse(0), + isSignBit ? APInt::getSignBit(BitWidth) + : APInt::getAllOnesValue(BitWidth), + Op0KnownZero, Op0KnownOne, 0)) + return &I; + if (SimplifyDemandedBits(I.getOperandUse(1), + APInt::getAllOnesValue(BitWidth), + Op1KnownZero, Op1KnownOne, 0)) + return &I; + + // Given the known and unknown bits, compute a range that the LHS could be + // in. Compute the Min, Max and RHS values based on the known bits. For the + // EQ and NE we use unsigned values. + APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0); + APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0); + if (I.isSigned()) { + ComputeSignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne, + Op0Min, Op0Max); + ComputeSignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne, + Op1Min, Op1Max); + } else { + ComputeUnsignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne, + Op0Min, Op0Max); + ComputeUnsignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne, + Op1Min, Op1Max); + } + + // If Min and Max are known to be the same, then SimplifyDemandedBits + // figured out that the LHS is a constant. Just constant fold this now so + // that code below can assume that Min != Max. + if (!isa(Op0) && Op0Min == Op0Max) + return new ICmpInst(I.getPredicate(), + ConstantInt::get(I.getContext(), Op0Min), Op1); + if (!isa(Op1) && Op1Min == Op1Max) + return new ICmpInst(I.getPredicate(), Op0, + ConstantInt::get(I.getContext(), Op1Min)); + + // Based on the range information we know about the LHS, see if we can + // simplify this comparison. For example, (x&4) < 8 is always true. + switch (I.getPredicate()) { + default: llvm_unreachable("Unknown icmp opcode!"); + case ICmpInst::ICMP_EQ: + if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + break; + case ICmpInst::ICMP_NE: + if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + break; + case ICmpInst::ICMP_ULT: + if (Op0Max.ult(Op1Min)) // A true if max(A) < min(B) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + if (Op0Min.uge(Op1Max)) // A false if min(A) >= max(B) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + if (Op1Min == Op0Max) // A A != B if max(A) == min(B) + return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); + if (ConstantInt *CI = dyn_cast(Op1)) { + if (Op1Max == Op0Min+1) // A A == C-1 if min(A)+1 == C + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, + ConstantInt::get(CI->getContext(), CI->getValue()-1)); + + // (x (x >s -1) -> true if sign bit clear + if (CI->isMinValue(true)) + return new ICmpInst(ICmpInst::ICMP_SGT, Op0, + Constant::getAllOnesValue(Op0->getType())); + } + break; + case ICmpInst::ICMP_UGT: + if (Op0Min.ugt(Op1Max)) // A >u B -> true if min(A) > max(B) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + + if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B) + return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); + if (ConstantInt *CI = dyn_cast(Op1)) { + if (Op1Min == Op0Max-1) // A >u C -> A == C+1 if max(a)-1 == C + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, + ConstantInt::get(CI->getContext(), CI->getValue()+1)); + + // (x >u 2147483647) -> (x true if sign bit set + if (CI->isMaxValue(true)) + return new ICmpInst(ICmpInst::ICMP_SLT, Op0, + Constant::getNullValue(Op0->getType())); + } + break; + case ICmpInst::ICMP_SLT: + if (Op0Max.slt(Op1Min)) // A true if max(A) < min(C) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + if (Op0Min.sge(Op1Max)) // A false if min(A) >= max(C) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + if (Op1Min == Op0Max) // A A != B if max(A) == min(B) + return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); + if (ConstantInt *CI = dyn_cast(Op1)) { + if (Op1Max == Op0Min+1) // A A == C-1 if min(A)+1 == C + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, + ConstantInt::get(CI->getContext(), CI->getValue()-1)); + } + break; + case ICmpInst::ICMP_SGT: + if (Op0Min.sgt(Op1Max)) // A >s B -> true if min(A) > max(B) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + + if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B) + return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); + if (ConstantInt *CI = dyn_cast(Op1)) { + if (Op1Min == Op0Max-1) // A >s C -> A == C+1 if max(A)-1 == C + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, + ConstantInt::get(CI->getContext(), CI->getValue()+1)); + } + break; + case ICmpInst::ICMP_SGE: + assert(!isa(Op1) && "ICMP_SGE with ConstantInt not folded!"); + if (Op0Min.sge(Op1Max)) // A >=s B -> true if min(A) >= max(B) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + if (Op0Max.slt(Op1Min)) // A >=s B -> false if max(A) < min(B) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + break; + case ICmpInst::ICMP_SLE: + assert(!isa(Op1) && "ICMP_SLE with ConstantInt not folded!"); + if (Op0Max.sle(Op1Min)) // A <=s B -> true if max(A) <= min(B) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + if (Op0Min.sgt(Op1Max)) // A <=s B -> false if min(A) > max(B) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + break; + case ICmpInst::ICMP_UGE: + assert(!isa(Op1) && "ICMP_UGE with ConstantInt not folded!"); + if (Op0Min.uge(Op1Max)) // A >=u B -> true if min(A) >= max(B) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + if (Op0Max.ult(Op1Min)) // A >=u B -> false if max(A) < min(B) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + break; + case ICmpInst::ICMP_ULE: + assert(!isa(Op1) && "ICMP_ULE with ConstantInt not folded!"); + if (Op0Max.ule(Op1Min)) // A <=u B -> true if max(A) <= min(B) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + if (Op0Min.ugt(Op1Max)) // A <=u B -> false if min(A) > max(B) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + break; + } + + // Turn a signed comparison into an unsigned one if both operands + // are known to have the same sign. + if (I.isSigned() && + ((Op0KnownZero.isNegative() && Op1KnownZero.isNegative()) || + (Op0KnownOne.isNegative() && Op1KnownOne.isNegative()))) + return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1); + } + + // Test if the ICmpInst instruction is used exclusively by a select as + // part of a minimum or maximum operation. If so, refrain from doing + // any other folding. This helps out other analyses which understand + // non-obfuscated minimum and maximum idioms, such as ScalarEvolution + // and CodeGen. And in this case, at least one of the comparison + // operands has at least one user besides the compare (the select), + // which would often largely negate the benefit of folding anyway. + if (I.hasOneUse()) + if (SelectInst *SI = dyn_cast(*I.use_begin())) + if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) || + (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1)) + return 0; + + // See if we are doing a comparison between a constant and an instruction that + // can be folded into the comparison. + if (ConstantInt *CI = dyn_cast(Op1)) { + // Since the RHS is a ConstantInt (CI), if the left hand side is an + // instruction, see if that instruction also has constants so that the + // instruction can be folded into the icmp + if (Instruction *LHSI = dyn_cast(Op0)) + if (Instruction *Res = visitICmpInstWithInstAndIntCst(I, LHSI, CI)) + return Res; + } + + // Handle icmp with constant (but not simple integer constant) RHS + if (Constant *RHSC = dyn_cast(Op1)) { + if (Instruction *LHSI = dyn_cast(Op0)) + switch (LHSI->getOpcode()) { + case Instruction::GetElementPtr: + // icmp pred GEP (P, int 0, int 0, int 0), null -> icmp pred P, null + if (RHSC->isNullValue() && + cast(LHSI)->hasAllZeroIndices()) + return new ICmpInst(I.getPredicate(), LHSI->getOperand(0), + Constant::getNullValue(LHSI->getOperand(0)->getType())); + break; + case Instruction::PHI: + // Only fold icmp into the PHI if the phi and icmp are in the same + // block. If in the same block, we're encouraging jump threading. If + // not, we are just pessimizing the code by making an i1 phi. + if (LHSI->getParent() == I.getParent()) + if (Instruction *NV = FoldOpIntoPhi(I, true)) + return NV; + break; + case Instruction::Select: { + // If either operand of the select is a constant, we can fold the + // comparison into the select arms, which will cause one to be + // constant folded and the select turned into a bitwise or. + Value *Op1 = 0, *Op2 = 0; + if (Constant *C = dyn_cast(LHSI->getOperand(1))) + Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); + if (Constant *C = dyn_cast(LHSI->getOperand(2))) + Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); + + // We only want to perform this transformation if it will not lead to + // additional code. This is true if either both sides of the select + // fold to a constant (in which case the icmp is replaced with a select + // which will usually simplify) or this is the only user of the + // select (in which case we are trading a select+icmp for a simpler + // select+icmp). + if ((Op1 && Op2) || (LHSI->hasOneUse() && (Op1 || Op2))) { + if (!Op1) + Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1), + RHSC, I.getName()); + if (!Op2) + Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2), + RHSC, I.getName()); + return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); + } + break; + } + case Instruction::Call: + // If we have (malloc != null), and if the malloc has a single use, we + // can assume it is successful and remove the malloc. + if (isMalloc(LHSI) && LHSI->hasOneUse() && + isa(RHSC)) { + // Need to explicitly erase malloc call here, instead of adding it to + // Worklist, because it won't get DCE'd from the Worklist since + // isInstructionTriviallyDead() returns false for function calls. + // It is OK to replace LHSI/MallocCall with Undef because the + // instruction that uses it will be erased via Worklist. + if (extractMallocCall(LHSI)) { + LHSI->replaceAllUsesWith(UndefValue::get(LHSI->getType())); + EraseInstFromFunction(*LHSI); + return ReplaceInstUsesWith(I, + ConstantInt::get(Type::getInt1Ty(I.getContext()), + !I.isTrueWhenEqual())); + } + if (CallInst* MallocCall = extractMallocCallFromBitCast(LHSI)) + if (MallocCall->hasOneUse()) { + MallocCall->replaceAllUsesWith( + UndefValue::get(MallocCall->getType())); + EraseInstFromFunction(*MallocCall); + Worklist.Add(LHSI); // The malloc's bitcast use. + return ReplaceInstUsesWith(I, + ConstantInt::get(Type::getInt1Ty(I.getContext()), + !I.isTrueWhenEqual())); + } + } + break; + case Instruction::IntToPtr: + // icmp pred inttoptr(X), null -> icmp pred X, 0 + if (RHSC->isNullValue() && TD && + TD->getIntPtrType(RHSC->getContext()) == + LHSI->getOperand(0)->getType()) + return new ICmpInst(I.getPredicate(), LHSI->getOperand(0), + Constant::getNullValue(LHSI->getOperand(0)->getType())); + break; + + case Instruction::Load: + // Try to optimize things like "A[i] > 4" to index computations. + if (GetElementPtrInst *GEP = + dyn_cast(LHSI->getOperand(0))) { + if (GlobalVariable *GV = dyn_cast(GEP->getOperand(0))) + if (GV->isConstant() && GV->hasDefinitiveInitializer() && + !cast(LHSI)->isVolatile()) + if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I)) + return Res; + } + break; + } + } + + // If we can optimize a 'icmp GEP, P' or 'icmp P, GEP', do so now. + if (GEPOperator *GEP = dyn_cast(Op0)) + if (Instruction *NI = FoldGEPICmp(GEP, Op1, I.getPredicate(), I)) + return NI; + if (GEPOperator *GEP = dyn_cast(Op1)) + if (Instruction *NI = FoldGEPICmp(GEP, Op0, + ICmpInst::getSwappedPredicate(I.getPredicate()), I)) + return NI; + + // Test to see if the operands of the icmp are casted versions of other + // values. If the ptr->ptr cast can be stripped off both arguments, we do so + // now. + if (BitCastInst *CI = dyn_cast(Op0)) { + if (isa(Op0->getType()) && + (isa(Op1) || isa(Op1))) { + // We keep moving the cast from the left operand over to the right + // operand, where it can often be eliminated completely. + Op0 = CI->getOperand(0); + + // If operand #1 is a bitcast instruction, it must also be a ptr->ptr cast + // so eliminate it as well. + if (BitCastInst *CI2 = dyn_cast(Op1)) + Op1 = CI2->getOperand(0); + + // If Op1 is a constant, we can fold the cast into the constant. + if (Op0->getType() != Op1->getType()) { + if (Constant *Op1C = dyn_cast(Op1)) { + Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType()); + } else { + // Otherwise, cast the RHS right before the icmp + Op1 = Builder->CreateBitCast(Op1, Op0->getType()); + } + } + return new ICmpInst(I.getPredicate(), Op0, Op1); + } + } + + if (isa(Op0)) { + // Handle the special case of: icmp (cast bool to X), + // This comes up when you have code like + // int X = A < B; + // if (X) ... + // For generality, we handle any zero-extension of any operand comparison + // with a constant or another cast from the same type. + if (isa(Op1) || isa(Op1)) + if (Instruction *R = visitICmpInstWithCastAndCast(I)) + return R; + } + + // See if it's the same type of instruction on the left and right. + if (BinaryOperator *Op0I = dyn_cast(Op0)) { + if (BinaryOperator *Op1I = dyn_cast(Op1)) { + if (Op0I->getOpcode() == Op1I->getOpcode() && Op0I->hasOneUse() && + Op1I->hasOneUse() && Op0I->getOperand(1) == Op1I->getOperand(1)) { + switch (Op0I->getOpcode()) { + default: break; + case Instruction::Add: + case Instruction::Sub: + case Instruction::Xor: + if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b + return new ICmpInst(I.getPredicate(), Op0I->getOperand(0), + Op1I->getOperand(0)); + // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b + if (ConstantInt *CI = dyn_cast(Op0I->getOperand(1))) { + if (CI->getValue().isSignBit()) { + ICmpInst::Predicate Pred = I.isSigned() + ? I.getUnsignedPredicate() + : I.getSignedPredicate(); + return new ICmpInst(Pred, Op0I->getOperand(0), + Op1I->getOperand(0)); + } + + if (CI->getValue().isMaxSignedValue()) { + ICmpInst::Predicate Pred = I.isSigned() + ? I.getUnsignedPredicate() + : I.getSignedPredicate(); + Pred = I.getSwappedPredicate(Pred); + return new ICmpInst(Pred, Op0I->getOperand(0), + Op1I->getOperand(0)); + } + } + break; + case Instruction::Mul: + if (!I.isEquality()) + break; + + if (ConstantInt *CI = dyn_cast(Op0I->getOperand(1))) { + // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask + // Mask = -1 >> count-trailing-zeros(Cst). + if (!CI->isZero() && !CI->isOne()) { + const APInt &AP = CI->getValue(); + ConstantInt *Mask = ConstantInt::get(I.getContext(), + APInt::getLowBitsSet(AP.getBitWidth(), + AP.getBitWidth() - + AP.countTrailingZeros())); + Value *And1 = Builder->CreateAnd(Op0I->getOperand(0), Mask); + Value *And2 = Builder->CreateAnd(Op1I->getOperand(0), Mask); + return new ICmpInst(I.getPredicate(), And1, And2); + } + } + break; + } + } + } + } + + // ~x < ~y --> y < x + { Value *A, *B; + if (match(Op0, m_Not(m_Value(A))) && + match(Op1, m_Not(m_Value(B)))) + return new ICmpInst(I.getPredicate(), B, A); + } + + if (I.isEquality()) { + Value *A, *B, *C, *D; + + // -x == -y --> x == y + if (match(Op0, m_Neg(m_Value(A))) && + match(Op1, m_Neg(m_Value(B)))) + return new ICmpInst(I.getPredicate(), A, B); + + if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) { + if (A == Op1 || B == Op1) { // (A^B) == A -> B == 0 + Value *OtherVal = A == Op1 ? B : A; + return new ICmpInst(I.getPredicate(), OtherVal, + Constant::getNullValue(A->getType())); + } + + if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) { + // A^c1 == C^c2 --> A == C^(c1^c2) + ConstantInt *C1, *C2; + if (match(B, m_ConstantInt(C1)) && + match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) { + Constant *NC = ConstantInt::get(I.getContext(), + C1->getValue() ^ C2->getValue()); + Value *Xor = Builder->CreateXor(C, NC, "tmp"); + return new ICmpInst(I.getPredicate(), A, Xor); + } + + // A^B == A^D -> B == D + if (A == C) return new ICmpInst(I.getPredicate(), B, D); + if (A == D) return new ICmpInst(I.getPredicate(), B, C); + if (B == C) return new ICmpInst(I.getPredicate(), A, D); + if (B == D) return new ICmpInst(I.getPredicate(), A, C); + } + } + + if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && + (A == Op0 || B == Op0)) { + // A == (A^B) -> B == 0 + Value *OtherVal = A == Op0 ? B : A; + return new ICmpInst(I.getPredicate(), OtherVal, + Constant::getNullValue(A->getType())); + } + + // (A-B) == A -> B == 0 + if (match(Op0, m_Sub(m_Specific(Op1), m_Value(B)))) + return new ICmpInst(I.getPredicate(), B, + Constant::getNullValue(B->getType())); + + // A == (A-B) -> B == 0 + if (match(Op1, m_Sub(m_Specific(Op0), m_Value(B)))) + return new ICmpInst(I.getPredicate(), B, + Constant::getNullValue(B->getType())); + + // (X&Z) == (Y&Z) -> (X^Y) & Z == 0 + if (Op0->hasOneUse() && Op1->hasOneUse() && + match(Op0, m_And(m_Value(A), m_Value(B))) && + match(Op1, m_And(m_Value(C), m_Value(D)))) { + Value *X = 0, *Y = 0, *Z = 0; + + if (A == C) { + X = B; Y = D; Z = A; + } else if (A == D) { + X = B; Y = C; Z = A; + } else if (B == C) { + X = A; Y = D; Z = B; + } else if (B == D) { + X = A; Y = C; Z = B; + } + + if (X) { // Build (X^Y) & Z + Op1 = Builder->CreateXor(X, Y, "tmp"); + Op1 = Builder->CreateAnd(Op1, Z, "tmp"); + I.setOperand(0, Op1); + I.setOperand(1, Constant::getNullValue(Op1->getType())); + return &I; + } + } + } + + { + Value *X; ConstantInt *Cst; + // icmp X+Cst, X + if (match(Op0, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op1 == X) + return FoldICmpAddOpCst(I, X, Cst, I.getPredicate(), Op0); + + // icmp X, X+Cst + if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X) + return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate(), Op1); + } + return Changed ? &I : 0; +} + + + + + + +/// FoldFCmp_IntToFP_Cst - Fold fcmp ([us]itofp x, cst) if possible. +/// +Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, + Instruction *LHSI, + Constant *RHSC) { + if (!isa(RHSC)) return 0; + const APFloat &RHS = cast(RHSC)->getValueAPF(); + + // Get the width of the mantissa. We don't want to hack on conversions that + // might lose information from the integer, e.g. "i64 -> float" + int MantissaWidth = LHSI->getType()->getFPMantissaWidth(); + if (MantissaWidth == -1) return 0; // Unknown. + + // Check to see that the input is converted from an integer type that is small + // enough that preserves all bits. TODO: check here for "known" sign bits. + // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e. + unsigned InputSize = LHSI->getOperand(0)->getType()->getScalarSizeInBits(); + + // If this is a uitofp instruction, we need an extra bit to hold the sign. + bool LHSUnsigned = isa(LHSI); + if (LHSUnsigned) + ++InputSize; + + // If the conversion would lose info, don't hack on this. + if ((int)InputSize > MantissaWidth) + return 0; + + // Otherwise, we can potentially simplify the comparison. We know that it + // will always come through as an integer value and we know the constant is + // not a NAN (it would have been previously simplified). + assert(!RHS.isNaN() && "NaN comparison not already folded!"); + + ICmpInst::Predicate Pred; + switch (I.getPredicate()) { + default: llvm_unreachable("Unexpected predicate!"); + case FCmpInst::FCMP_UEQ: + case FCmpInst::FCMP_OEQ: + Pred = ICmpInst::ICMP_EQ; + break; + case FCmpInst::FCMP_UGT: + case FCmpInst::FCMP_OGT: + Pred = LHSUnsigned ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_SGT; + break; + case FCmpInst::FCMP_UGE: + case FCmpInst::FCMP_OGE: + Pred = LHSUnsigned ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_SGE; + break; + case FCmpInst::FCMP_ULT: + case FCmpInst::FCMP_OLT: + Pred = LHSUnsigned ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_SLT; + break; + case FCmpInst::FCMP_ULE: + case FCmpInst::FCMP_OLE: + Pred = LHSUnsigned ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_SLE; + break; + case FCmpInst::FCMP_UNE: + case FCmpInst::FCMP_ONE: + Pred = ICmpInst::ICMP_NE; + break; + case FCmpInst::FCMP_ORD: + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + case FCmpInst::FCMP_UNO: + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + } + + const IntegerType *IntTy = cast(LHSI->getOperand(0)->getType()); + + // Now we know that the APFloat is a normal number, zero or inf. + + // See if the FP constant is too large for the integer. For example, + // comparing an i8 to 300.0. + unsigned IntWidth = IntTy->getScalarSizeInBits(); + + if (!LHSUnsigned) { + // If the RHS value is > SignedMax, fold the comparison. This handles +INF + // and large values. + APFloat SMax(RHS.getSemantics(), APFloat::fcZero, false); + SMax.convertFromAPInt(APInt::getSignedMaxValue(IntWidth), true, + APFloat::rmNearestTiesToEven); + if (SMax.compare(RHS) == APFloat::cmpLessThan) { // smax < 13123.0 + if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SLT || + Pred == ICmpInst::ICMP_SLE) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + } + } else { + // If the RHS value is > UnsignedMax, fold the comparison. This handles + // +INF and large values. + APFloat UMax(RHS.getSemantics(), APFloat::fcZero, false); + UMax.convertFromAPInt(APInt::getMaxValue(IntWidth), false, + APFloat::rmNearestTiesToEven); + if (UMax.compare(RHS) == APFloat::cmpLessThan) { // umax < 13123.0 + if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_ULT || + Pred == ICmpInst::ICMP_ULE) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + } + } + + if (!LHSUnsigned) { + // See if the RHS value is < SignedMin. + APFloat SMin(RHS.getSemantics(), APFloat::fcZero, false); + SMin.convertFromAPInt(APInt::getSignedMinValue(IntWidth), true, + APFloat::rmNearestTiesToEven); + if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0 + if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT || + Pred == ICmpInst::ICMP_SGE) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + } + } + + // Okay, now we know that the FP constant fits in the range [SMIN, SMAX] or + // [0, UMAX], but it may still be fractional. See if it is fractional by + // casting the FP value to the integer value and back, checking for equality. + // Don't do this for zero, because -0.0 is not fractional. + Constant *RHSInt = LHSUnsigned + ? ConstantExpr::getFPToUI(RHSC, IntTy) + : ConstantExpr::getFPToSI(RHSC, IntTy); + if (!RHS.isZero()) { + bool Equal = LHSUnsigned + ? ConstantExpr::getUIToFP(RHSInt, RHSC->getType()) == RHSC + : ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) == RHSC; + if (!Equal) { + // If we had a comparison against a fractional value, we have to adjust + // the compare predicate and sometimes the value. RHSC is rounded towards + // zero at this point. + switch (Pred) { + default: llvm_unreachable("Unexpected integer comparison!"); + case ICmpInst::ICMP_NE: // (float)int != 4.4 --> true + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + case ICmpInst::ICMP_EQ: // (float)int == 4.4 --> false + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + case ICmpInst::ICMP_ULE: + // (float)int <= 4.4 --> int <= 4 + // (float)int <= -4.4 --> false + if (RHS.isNegative()) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + break; + case ICmpInst::ICMP_SLE: + // (float)int <= 4.4 --> int <= 4 + // (float)int <= -4.4 --> int < -4 + if (RHS.isNegative()) + Pred = ICmpInst::ICMP_SLT; + break; + case ICmpInst::ICMP_ULT: + // (float)int < -4.4 --> false + // (float)int < 4.4 --> int <= 4 + if (RHS.isNegative()) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + Pred = ICmpInst::ICMP_ULE; + break; + case ICmpInst::ICMP_SLT: + // (float)int < -4.4 --> int < -4 + // (float)int < 4.4 --> int <= 4 + if (!RHS.isNegative()) + Pred = ICmpInst::ICMP_SLE; + break; + case ICmpInst::ICMP_UGT: + // (float)int > 4.4 --> int > 4 + // (float)int > -4.4 --> true + if (RHS.isNegative()) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + break; + case ICmpInst::ICMP_SGT: + // (float)int > 4.4 --> int > 4 + // (float)int > -4.4 --> int >= -4 + if (RHS.isNegative()) + Pred = ICmpInst::ICMP_SGE; + break; + case ICmpInst::ICMP_UGE: + // (float)int >= -4.4 --> true + // (float)int >= 4.4 --> int > 4 + if (!RHS.isNegative()) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + Pred = ICmpInst::ICMP_UGT; + break; + case ICmpInst::ICMP_SGE: + // (float)int >= -4.4 --> int >= -4 + // (float)int >= 4.4 --> int > 4 + if (!RHS.isNegative()) + Pred = ICmpInst::ICMP_SGT; + break; + } + } + } + + // Lower this FP comparison into an appropriate integer version of the + // comparison. + return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt); +} + +Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { + bool Changed = false; + + /// Orders the operands of the compare so that they are listed from most + /// complex to least complex. This puts constants before unary operators, + /// before binary operators. + if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) { + I.swapOperands(); + Changed = true; + } + + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + + // Simplify 'fcmp pred X, X' + if (Op0 == Op1) { + switch (I.getPredicate()) { + default: llvm_unreachable("Unknown predicate!"); + case FCmpInst::FCMP_UNO: // True if unordered: isnan(X) | isnan(Y) + case FCmpInst::FCMP_ULT: // True if unordered or less than + case FCmpInst::FCMP_UGT: // True if unordered or greater than + case FCmpInst::FCMP_UNE: // True if unordered or not equal + // Canonicalize these to be 'fcmp uno %X, 0.0'. + I.setPredicate(FCmpInst::FCMP_UNO); + I.setOperand(1, Constant::getNullValue(Op0->getType())); + return &I; + + case FCmpInst::FCMP_ORD: // True if ordered (no nans) + case FCmpInst::FCMP_OEQ: // True if ordered and equal + case FCmpInst::FCMP_OGE: // True if ordered and greater than or equal + case FCmpInst::FCMP_OLE: // True if ordered and less than or equal + // Canonicalize these to be 'fcmp ord %X, 0.0'. + I.setPredicate(FCmpInst::FCMP_ORD); + I.setOperand(1, Constant::getNullValue(Op0->getType())); + return &I; + } + } + + // Handle fcmp with constant RHS + if (Constant *RHSC = dyn_cast(Op1)) { + if (Instruction *LHSI = dyn_cast(Op0)) + switch (LHSI->getOpcode()) { + case Instruction::PHI: + // Only fold fcmp into the PHI if the phi and fcmp are in the same + // block. If in the same block, we're encouraging jump threading. If + // not, we are just pessimizing the code by making an i1 phi. + if (LHSI->getParent() == I.getParent()) + if (Instruction *NV = FoldOpIntoPhi(I, true)) + return NV; + break; + case Instruction::SIToFP: + case Instruction::UIToFP: + if (Instruction *NV = FoldFCmp_IntToFP_Cst(I, LHSI, RHSC)) + return NV; + break; + case Instruction::Select: { + // If either operand of the select is a constant, we can fold the + // comparison into the select arms, which will cause one to be + // constant folded and the select turned into a bitwise or. + Value *Op1 = 0, *Op2 = 0; + if (LHSI->hasOneUse()) { + if (Constant *C = dyn_cast(LHSI->getOperand(1))) { + // Fold the known value into the constant operand. + Op1 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC); + // Insert a new FCmp of the other select operand. + Op2 = Builder->CreateFCmp(I.getPredicate(), + LHSI->getOperand(2), RHSC, I.getName()); + } else if (Constant *C = dyn_cast(LHSI->getOperand(2))) { + // Fold the known value into the constant operand. + Op2 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC); + // Insert a new FCmp of the other select operand. + Op1 = Builder->CreateFCmp(I.getPredicate(), LHSI->getOperand(1), + RHSC, I.getName()); + } + } + + if (Op1) + return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); + break; + } + case Instruction::Load: + if (GetElementPtrInst *GEP = + dyn_cast(LHSI->getOperand(0))) { + if (GlobalVariable *GV = dyn_cast(GEP->getOperand(0))) + if (GV->isConstant() && GV->hasDefinitiveInitializer() && + !cast(LHSI)->isVolatile()) + if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I)) + return Res; + } + break; + } + } + + return Changed ? &I : 0; +} diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp new file mode 100644 index 000000000000..6c0ecc9f9358 --- /dev/null +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -0,0 +1,613 @@ +//===- InstCombineLoadStoreAlloca.cpp -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the visit functions for load, store and alloca. +// +//===----------------------------------------------------------------------===// + +#include "InstCombine.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumDeadStore, "Number of dead stores eliminated"); + +Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { + // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 + if (AI.isArrayAllocation()) { // Check C != 1 + if (const ConstantInt *C = dyn_cast(AI.getArraySize())) { + const Type *NewTy = + ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); + assert(isa(AI) && "Unknown type of allocation inst!"); + AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName()); + New->setAlignment(AI.getAlignment()); + + // Scan to the end of the allocation instructions, to skip over a block of + // allocas if possible...also skip interleaved debug info + // + BasicBlock::iterator It = New; + while (isa(*It) || isa(*It)) ++It; + + // Now that I is pointing to the first non-allocation-inst in the block, + // insert our getelementptr instruction... + // + Value *NullIdx =Constant::getNullValue(Type::getInt32Ty(AI.getContext())); + Value *Idx[2]; + Idx[0] = NullIdx; + Idx[1] = NullIdx; + Value *V = GetElementPtrInst::CreateInBounds(New, Idx, Idx + 2, + New->getName()+".sub", It); + + // Now make everything use the getelementptr instead of the original + // allocation. + return ReplaceInstUsesWith(AI, V); + } else if (isa(AI.getArraySize())) { + return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); + } + } + + if (TD && isa(AI) && AI.getAllocatedType()->isSized()) { + // If alloca'ing a zero byte object, replace the alloca with a null pointer. + // Note that we only do this for alloca's, because malloc should allocate + // and return a unique pointer, even for a zero byte allocation. + if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0) + return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); + + // If the alignment is 0 (unspecified), assign it the preferred alignment. + if (AI.getAlignment() == 0) + AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType())); + } + + return 0; +} + + +/// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible. +static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, + const TargetData *TD) { + User *CI = cast(LI.getOperand(0)); + Value *CastOp = CI->getOperand(0); + + const PointerType *DestTy = cast(CI->getType()); + const Type *DestPTy = DestTy->getElementType(); + if (const PointerType *SrcTy = dyn_cast(CastOp->getType())) { + + // If the address spaces don't match, don't eliminate the cast. + if (DestTy->getAddressSpace() != SrcTy->getAddressSpace()) + return 0; + + const Type *SrcPTy = SrcTy->getElementType(); + + if (DestPTy->isInteger() || isa(DestPTy) || + isa(DestPTy)) { + // If the source is an array, the code below will not succeed. Check to + // see if a trivial 'gep P, 0, 0' will help matters. Only do this for + // constants. + if (const ArrayType *ASrcTy = dyn_cast(SrcPTy)) + if (Constant *CSrc = dyn_cast(CastOp)) + if (ASrcTy->getNumElements() != 0) { + Value *Idxs[2]; + Idxs[0] = Constant::getNullValue(Type::getInt32Ty(LI.getContext())); + Idxs[1] = Idxs[0]; + CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs, 2); + SrcTy = cast(CastOp->getType()); + SrcPTy = SrcTy->getElementType(); + } + + if (IC.getTargetData() && + (SrcPTy->isInteger() || isa(SrcPTy) || + isa(SrcPTy)) && + // Do not allow turning this into a load of an integer, which is then + // casted to a pointer, this pessimizes pointer analysis a lot. + (isa(SrcPTy) == isa(LI.getType())) && + IC.getTargetData()->getTypeSizeInBits(SrcPTy) == + IC.getTargetData()->getTypeSizeInBits(DestPTy)) { + + // Okay, we are casting from one integer or pointer type to another of + // the same size. Instead of casting the pointer before the load, cast + // the result of the loaded value. + Value *NewLoad = + IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName()); + // Now cast the result of the load. + return new BitCastInst(NewLoad, LI.getType()); + } + } + } + return 0; +} + +Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { + Value *Op = LI.getOperand(0); + + // Attempt to improve the alignment. + if (TD) { + unsigned KnownAlign = + GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType())); + if (KnownAlign > + (LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) : + LI.getAlignment())) + LI.setAlignment(KnownAlign); + } + + // load (cast X) --> cast (load X) iff safe. + if (isa(Op)) + if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) + return Res; + + // None of the following transforms are legal for volatile loads. + if (LI.isVolatile()) return 0; + + // Do really simple store-to-load forwarding and load CSE, to catch cases + // where there are several consequtive memory accesses to the same location, + // separated by a few arithmetic operations. + BasicBlock::iterator BBI = &LI; + if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6)) + return ReplaceInstUsesWith(LI, AvailableVal); + + // load(gep null, ...) -> unreachable + if (GetElementPtrInst *GEPI = dyn_cast(Op)) { + const Value *GEPI0 = GEPI->getOperand(0); + // TODO: Consider a target hook for valid address spaces for this xform. + if (isa(GEPI0) && GEPI->getPointerAddressSpace() == 0){ + // Insert a new store to null instruction before the load to indicate + // that this code is not reachable. We do this instead of inserting + // an unreachable instruction directly because we cannot modify the + // CFG. + new StoreInst(UndefValue::get(LI.getType()), + Constant::getNullValue(Op->getType()), &LI); + return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); + } + } + + // load null/undef -> unreachable + // TODO: Consider a target hook for valid address spaces for this xform. + if (isa(Op) || + (isa(Op) && LI.getPointerAddressSpace() == 0)) { + // Insert a new store to null instruction before the load to indicate that + // this code is not reachable. We do this instead of inserting an + // unreachable instruction directly because we cannot modify the CFG. + new StoreInst(UndefValue::get(LI.getType()), + Constant::getNullValue(Op->getType()), &LI); + return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); + } + + // Instcombine load (constantexpr_cast global) -> cast (load global) + if (ConstantExpr *CE = dyn_cast(Op)) + if (CE->isCast()) + if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) + return Res; + + if (Op->hasOneUse()) { + // Change select and PHI nodes to select values instead of addresses: this + // helps alias analysis out a lot, allows many others simplifications, and + // exposes redundancy in the code. + // + // Note that we cannot do the transformation unless we know that the + // introduced loads cannot trap! Something like this is valid as long as + // the condition is always false: load (select bool %C, int* null, int* %G), + // but it would not be valid if we transformed it to load from null + // unconditionally. + // + if (SelectInst *SI = dyn_cast(Op)) { + // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2). + if (isSafeToLoadUnconditionally(SI->getOperand(1), SI) && + isSafeToLoadUnconditionally(SI->getOperand(2), SI)) { + Value *V1 = Builder->CreateLoad(SI->getOperand(1), + SI->getOperand(1)->getName()+".val"); + Value *V2 = Builder->CreateLoad(SI->getOperand(2), + SI->getOperand(2)->getName()+".val"); + return SelectInst::Create(SI->getCondition(), V1, V2); + } + + // load (select (cond, null, P)) -> load P + if (Constant *C = dyn_cast(SI->getOperand(1))) + if (C->isNullValue()) { + LI.setOperand(0, SI->getOperand(2)); + return &LI; + } + + // load (select (cond, P, null)) -> load P + if (Constant *C = dyn_cast(SI->getOperand(2))) + if (C->isNullValue()) { + LI.setOperand(0, SI->getOperand(1)); + return &LI; + } + } + } + return 0; +} + +/// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P +/// when possible. This makes it generally easy to do alias analysis and/or +/// SROA/mem2reg of the memory object. +static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { + User *CI = cast(SI.getOperand(1)); + Value *CastOp = CI->getOperand(0); + + const Type *DestPTy = cast(CI->getType())->getElementType(); + const PointerType *SrcTy = dyn_cast(CastOp->getType()); + if (SrcTy == 0) return 0; + + const Type *SrcPTy = SrcTy->getElementType(); + + if (!DestPTy->isInteger() && !isa(DestPTy)) + return 0; + + /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep" + /// to its first element. This allows us to handle things like: + /// store i32 xxx, (bitcast {foo*, float}* %P to i32*) + /// on 32-bit hosts. + SmallVector NewGEPIndices; + + // If the source is an array, the code below will not succeed. Check to + // see if a trivial 'gep P, 0, 0' will help matters. Only do this for + // constants. + if (isa(SrcPTy) || isa(SrcPTy)) { + // Index through pointer. + Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext())); + NewGEPIndices.push_back(Zero); + + while (1) { + if (const StructType *STy = dyn_cast(SrcPTy)) { + if (!STy->getNumElements()) /* Struct can be empty {} */ + break; + NewGEPIndices.push_back(Zero); + SrcPTy = STy->getElementType(0); + } else if (const ArrayType *ATy = dyn_cast(SrcPTy)) { + NewGEPIndices.push_back(Zero); + SrcPTy = ATy->getElementType(); + } else { + break; + } + } + + SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace()); + } + + if (!SrcPTy->isInteger() && !isa(SrcPTy)) + return 0; + + // If the pointers point into different address spaces or if they point to + // values with different sizes, we can't do the transformation. + if (!IC.getTargetData() || + SrcTy->getAddressSpace() != + cast(CI->getType())->getAddressSpace() || + IC.getTargetData()->getTypeSizeInBits(SrcPTy) != + IC.getTargetData()->getTypeSizeInBits(DestPTy)) + return 0; + + // Okay, we are casting from one integer or pointer type to another of + // the same size. Instead of casting the pointer before + // the store, cast the value to be stored. + Value *NewCast; + Value *SIOp0 = SI.getOperand(0); + Instruction::CastOps opcode = Instruction::BitCast; + const Type* CastSrcTy = SIOp0->getType(); + const Type* CastDstTy = SrcPTy; + if (isa(CastDstTy)) { + if (CastSrcTy->isInteger()) + opcode = Instruction::IntToPtr; + } else if (isa(CastDstTy)) { + if (isa(SIOp0->getType())) + opcode = Instruction::PtrToInt; + } + + // SIOp0 is a pointer to aggregate and this is a store to the first field, + // emit a GEP to index into its first field. + if (!NewGEPIndices.empty()) + CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices.begin(), + NewGEPIndices.end()); + + NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy, + SIOp0->getName()+".c"); + return new StoreInst(NewCast, CastOp); +} + +/// equivalentAddressValues - Test if A and B will obviously have the same +/// value. This includes recognizing that %t0 and %t1 will have the same +/// value in code like this: +/// %t0 = getelementptr \@a, 0, 3 +/// store i32 0, i32* %t0 +/// %t1 = getelementptr \@a, 0, 3 +/// %t2 = load i32* %t1 +/// +static bool equivalentAddressValues(Value *A, Value *B) { + // Test if the values are trivially equivalent. + if (A == B) return true; + + // Test if the values come form identical arithmetic instructions. + // This uses isIdenticalToWhenDefined instead of isIdenticalTo because + // its only used to compare two uses within the same basic block, which + // means that they'll always either have the same value or one of them + // will have an undefined value. + if (isa(A) || + isa(A) || + isa(A) || + isa(A)) + if (Instruction *BI = dyn_cast(B)) + if (cast(A)->isIdenticalToWhenDefined(BI)) + return true; + + // Otherwise they may not be equivalent. + return false; +} + +// If this instruction has two uses, one of which is a llvm.dbg.declare, +// return the llvm.dbg.declare. +DbgDeclareInst *InstCombiner::hasOneUsePlusDeclare(Value *V) { + if (!V->hasNUses(2)) + return 0; + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI) { + if (DbgDeclareInst *DI = dyn_cast(UI)) + return DI; + if (isa(UI) && UI->hasOneUse()) { + if (DbgDeclareInst *DI = dyn_cast(UI->use_begin())) + return DI; + } + } + return 0; +} + +Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { + Value *Val = SI.getOperand(0); + Value *Ptr = SI.getOperand(1); + + // If the RHS is an alloca with a single use, zapify the store, making the + // alloca dead. + // If the RHS is an alloca with a two uses, the other one being a + // llvm.dbg.declare, zapify the store and the declare, making the + // alloca dead. We must do this to prevent declare's from affecting + // codegen. + if (!SI.isVolatile()) { + if (Ptr->hasOneUse()) { + if (isa(Ptr)) + return EraseInstFromFunction(SI); + if (GetElementPtrInst *GEP = dyn_cast(Ptr)) { + if (isa(GEP->getOperand(0))) { + if (GEP->getOperand(0)->hasOneUse()) + return EraseInstFromFunction(SI); + if (DbgDeclareInst *DI = hasOneUsePlusDeclare(GEP->getOperand(0))) { + EraseInstFromFunction(*DI); + return EraseInstFromFunction(SI); + } + } + } + } + if (DbgDeclareInst *DI = hasOneUsePlusDeclare(Ptr)) { + EraseInstFromFunction(*DI); + return EraseInstFromFunction(SI); + } + } + + // Attempt to improve the alignment. + if (TD) { + unsigned KnownAlign = + GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType())); + if (KnownAlign > + (SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) : + SI.getAlignment())) + SI.setAlignment(KnownAlign); + } + + // Do really simple DSE, to catch cases where there are several consecutive + // stores to the same location, separated by a few arithmetic operations. This + // situation often occurs with bitfield accesses. + BasicBlock::iterator BBI = &SI; + for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts; + --ScanInsts) { + --BBI; + // Don't count debug info directives, lest they affect codegen, + // and we skip pointer-to-pointer bitcasts, which are NOPs. + // It is necessary for correctness to skip those that feed into a + // llvm.dbg.declare, as these are not present when debugging is off. + if (isa(BBI) || + (isa(BBI) && isa(BBI->getType()))) { + ScanInsts++; + continue; + } + + if (StoreInst *PrevSI = dyn_cast(BBI)) { + // Prev store isn't volatile, and stores to the same location? + if (!PrevSI->isVolatile() &&equivalentAddressValues(PrevSI->getOperand(1), + SI.getOperand(1))) { + ++NumDeadStore; + ++BBI; + EraseInstFromFunction(*PrevSI); + continue; + } + break; + } + + // If this is a load, we have to stop. However, if the loaded value is from + // the pointer we're loading and is producing the pointer we're storing, + // then *this* store is dead (X = load P; store X -> P). + if (LoadInst *LI = dyn_cast(BBI)) { + if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) && + !SI.isVolatile()) + return EraseInstFromFunction(SI); + + // Otherwise, this is a load from some other location. Stores before it + // may not be dead. + break; + } + + // Don't skip over loads or things that can modify memory. + if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory()) + break; + } + + + if (SI.isVolatile()) return 0; // Don't hack volatile stores. + + // store X, null -> turns into 'unreachable' in SimplifyCFG + if (isa(Ptr) && SI.getPointerAddressSpace() == 0) { + if (!isa(Val)) { + SI.setOperand(0, UndefValue::get(Val->getType())); + if (Instruction *U = dyn_cast(Val)) + Worklist.Add(U); // Dropped a use. + } + return 0; // Do not modify these! + } + + // store undef, Ptr -> noop + if (isa(Val)) + return EraseInstFromFunction(SI); + + // If the pointer destination is a cast, see if we can fold the cast into the + // source instead. + if (isa(Ptr)) + if (Instruction *Res = InstCombineStoreToCast(*this, SI)) + return Res; + if (ConstantExpr *CE = dyn_cast(Ptr)) + if (CE->isCast()) + if (Instruction *Res = InstCombineStoreToCast(*this, SI)) + return Res; + + + // If this store is the last instruction in the basic block (possibly + // excepting debug info instructions and the pointer bitcasts that feed + // into them), and if the block ends with an unconditional branch, try + // to move it to the successor block. + BBI = &SI; + do { + ++BBI; + } while (isa(BBI) || + (isa(BBI) && isa(BBI->getType()))); + if (BranchInst *BI = dyn_cast(BBI)) + if (BI->isUnconditional()) + if (SimplifyStoreAtEndOfBlock(SI)) + return 0; // xform done! + + return 0; +} + +/// SimplifyStoreAtEndOfBlock - Turn things like: +/// if () { *P = v1; } else { *P = v2 } +/// into a phi node with a store in the successor. +/// +/// Simplify things like: +/// *P = v1; if () { *P = v2; } +/// into a phi node with a store in the successor. +/// +bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { + BasicBlock *StoreBB = SI.getParent(); + + // Check to see if the successor block has exactly two incoming edges. If + // so, see if the other predecessor contains a store to the same location. + // if so, insert a PHI node (if needed) and move the stores down. + BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0); + + // Determine whether Dest has exactly two predecessors and, if so, compute + // the other predecessor. + pred_iterator PI = pred_begin(DestBB); + BasicBlock *OtherBB = 0; + if (*PI != StoreBB) + OtherBB = *PI; + ++PI; + if (PI == pred_end(DestBB)) + return false; + + if (*PI != StoreBB) { + if (OtherBB) + return false; + OtherBB = *PI; + } + if (++PI != pred_end(DestBB)) + return false; + + // Bail out if all the relevant blocks aren't distinct (this can happen, + // for example, if SI is in an infinite loop) + if (StoreBB == DestBB || OtherBB == DestBB) + return false; + + // Verify that the other block ends in a branch and is not otherwise empty. + BasicBlock::iterator BBI = OtherBB->getTerminator(); + BranchInst *OtherBr = dyn_cast(BBI); + if (!OtherBr || BBI == OtherBB->begin()) + return false; + + // If the other block ends in an unconditional branch, check for the 'if then + // else' case. there is an instruction before the branch. + StoreInst *OtherStore = 0; + if (OtherBr->isUnconditional()) { + --BBI; + // Skip over debugging info. + while (isa(BBI) || + (isa(BBI) && isa(BBI->getType()))) { + if (BBI==OtherBB->begin()) + return false; + --BBI; + } + // If this isn't a store, isn't a store to the same location, or if the + // alignments differ, bail out. + OtherStore = dyn_cast(BBI); + if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) || + OtherStore->getAlignment() != SI.getAlignment()) + return false; + } else { + // Otherwise, the other block ended with a conditional branch. If one of the + // destinations is StoreBB, then we have the if/then case. + if (OtherBr->getSuccessor(0) != StoreBB && + OtherBr->getSuccessor(1) != StoreBB) + return false; + + // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an + // if/then triangle. See if there is a store to the same ptr as SI that + // lives in OtherBB. + for (;; --BBI) { + // Check to see if we find the matching store. + if ((OtherStore = dyn_cast(BBI))) { + if (OtherStore->getOperand(1) != SI.getOperand(1) || + OtherStore->getAlignment() != SI.getAlignment()) + return false; + break; + } + // If we find something that may be using or overwriting the stored + // value, or if we run out of instructions, we can't do the xform. + if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() || + BBI == OtherBB->begin()) + return false; + } + + // In order to eliminate the store in OtherBr, we have to + // make sure nothing reads or overwrites the stored value in + // StoreBB. + for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) { + // FIXME: This should really be AA driven. + if (I->mayReadFromMemory() || I->mayWriteToMemory()) + return false; + } + } + + // Insert a PHI node now if we need it. + Value *MergedVal = OtherStore->getOperand(0); + if (MergedVal != SI.getOperand(0)) { + PHINode *PN = PHINode::Create(MergedVal->getType(), "storemerge"); + PN->reserveOperandSpace(2); + PN->addIncoming(SI.getOperand(0), SI.getParent()); + PN->addIncoming(OtherStore->getOperand(0), OtherBB); + MergedVal = InsertNewInstBefore(PN, DestBB->front()); + } + + // Advance to a place where it is safe to insert the new store and + // insert it. + BBI = DestBB->getFirstNonPHI(); + InsertNewInstBefore(new StoreInst(MergedVal, SI.getOperand(1), + OtherStore->isVolatile(), + SI.getAlignment()), *BBI); + + // Nuke the old stores. + EraseInstFromFunction(SI); + EraseInstFromFunction(*OtherStore); + return true; +} diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp new file mode 100644 index 000000000000..6afc0cdf5360 --- /dev/null +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -0,0 +1,695 @@ +//===- InstCombineMulDivRem.cpp -------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the visit functions for mul, fmul, sdiv, udiv, fdiv, +// srem, urem, frem. +// +//===----------------------------------------------------------------------===// + +#include "InstCombine.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Support/PatternMatch.h" +using namespace llvm; +using namespace PatternMatch; + +/// SubOne - Subtract one from a ConstantInt. +static Constant *SubOne(ConstantInt *C) { + return ConstantInt::get(C->getContext(), C->getValue()-1); +} + +/// MultiplyOverflows - True if the multiply can not be expressed in an int +/// this size. +static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) { + uint32_t W = C1->getBitWidth(); + APInt LHSExt = C1->getValue(), RHSExt = C2->getValue(); + if (sign) { + LHSExt.sext(W * 2); + RHSExt.sext(W * 2); + } else { + LHSExt.zext(W * 2); + RHSExt.zext(W * 2); + } + + APInt MulExt = LHSExt * RHSExt; + + if (!sign) + return MulExt.ugt(APInt::getLowBitsSet(W * 2, W)); + + APInt Min = APInt::getSignedMinValue(W).sext(W * 2); + APInt Max = APInt::getSignedMaxValue(W).sext(W * 2); + return MulExt.slt(Min) || MulExt.sgt(Max); +} + +Instruction *InstCombiner::visitMul(BinaryOperator &I) { + bool Changed = SimplifyCommutative(I); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (isa(Op1)) // undef * X -> 0 + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + + // Simplify mul instructions with a constant RHS. + if (Constant *Op1C = dyn_cast(Op1)) { + if (ConstantInt *CI = dyn_cast(Op1C)) { + + // ((X << C1)*C2) == (X * (C2 << C1)) + if (BinaryOperator *SI = dyn_cast(Op0)) + if (SI->getOpcode() == Instruction::Shl) + if (Constant *ShOp = dyn_cast(SI->getOperand(1))) + return BinaryOperator::CreateMul(SI->getOperand(0), + ConstantExpr::getShl(CI, ShOp)); + + if (CI->isZero()) + return ReplaceInstUsesWith(I, Op1C); // X * 0 == 0 + if (CI->equalsInt(1)) // X * 1 == X + return ReplaceInstUsesWith(I, Op0); + if (CI->isAllOnesValue()) // X * -1 == 0 - X + return BinaryOperator::CreateNeg(Op0, I.getName()); + + const APInt& Val = cast(CI)->getValue(); + if (Val.isPowerOf2()) { // Replace X*(2^C) with X << C + return BinaryOperator::CreateShl(Op0, + ConstantInt::get(Op0->getType(), Val.logBase2())); + } + } else if (isa(Op1C->getType())) { + if (Op1C->isNullValue()) + return ReplaceInstUsesWith(I, Op1C); + + if (ConstantVector *Op1V = dyn_cast(Op1C)) { + if (Op1V->isAllOnesValue()) // X * -1 == 0 - X + return BinaryOperator::CreateNeg(Op0, I.getName()); + + // As above, vector X*splat(1.0) -> X in all defined cases. + if (Constant *Splat = Op1V->getSplatValue()) { + if (ConstantInt *CI = dyn_cast(Splat)) + if (CI->equalsInt(1)) + return ReplaceInstUsesWith(I, Op0); + } + } + } + + if (BinaryOperator *Op0I = dyn_cast(Op0)) + if (Op0I->getOpcode() == Instruction::Add && Op0I->hasOneUse() && + isa(Op0I->getOperand(1)) && isa(Op1C)) { + // Canonicalize (X+C1)*C2 -> X*C2+C1*C2. + Value *Add = Builder->CreateMul(Op0I->getOperand(0), Op1C, "tmp"); + Value *C1C2 = Builder->CreateMul(Op1C, Op0I->getOperand(1)); + return BinaryOperator::CreateAdd(Add, C1C2); + + } + + // Try to fold constant mul into select arguments. + if (SelectInst *SI = dyn_cast(Op0)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + + if (isa(Op0)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + } + + if (Value *Op0v = dyn_castNegVal(Op0)) // -X * -Y = X*Y + if (Value *Op1v = dyn_castNegVal(Op1)) + return BinaryOperator::CreateMul(Op0v, Op1v); + + // (X / Y) * Y = X - (X % Y) + // (X / Y) * -Y = (X % Y) - X + { + Value *Op1C = Op1; + BinaryOperator *BO = dyn_cast(Op0); + if (!BO || + (BO->getOpcode() != Instruction::UDiv && + BO->getOpcode() != Instruction::SDiv)) { + Op1C = Op0; + BO = dyn_cast(Op1); + } + Value *Neg = dyn_castNegVal(Op1C); + if (BO && BO->hasOneUse() && + (BO->getOperand(1) == Op1C || BO->getOperand(1) == Neg) && + (BO->getOpcode() == Instruction::UDiv || + BO->getOpcode() == Instruction::SDiv)) { + Value *Op0BO = BO->getOperand(0), *Op1BO = BO->getOperand(1); + + // If the division is exact, X % Y is zero. + if (SDivOperator *SDiv = dyn_cast(BO)) + if (SDiv->isExact()) { + if (Op1BO == Op1C) + return ReplaceInstUsesWith(I, Op0BO); + return BinaryOperator::CreateNeg(Op0BO); + } + + Value *Rem; + if (BO->getOpcode() == Instruction::UDiv) + Rem = Builder->CreateURem(Op0BO, Op1BO); + else + Rem = Builder->CreateSRem(Op0BO, Op1BO); + Rem->takeName(BO); + + if (Op1BO == Op1C) + return BinaryOperator::CreateSub(Op0BO, Rem); + return BinaryOperator::CreateSub(Rem, Op0BO); + } + } + + /// i1 mul -> i1 and. + if (I.getType()->isInteger(1)) + return BinaryOperator::CreateAnd(Op0, Op1); + + // X*(1 << Y) --> X << Y + // (1 << Y)*X --> X << Y + { + Value *Y; + if (match(Op0, m_Shl(m_One(), m_Value(Y)))) + return BinaryOperator::CreateShl(Op1, Y); + if (match(Op1, m_Shl(m_One(), m_Value(Y)))) + return BinaryOperator::CreateShl(Op0, Y); + } + + // If one of the operands of the multiply is a cast from a boolean value, then + // we know the bool is either zero or one, so this is a 'masking' multiply. + // X * Y (where Y is 0 or 1) -> X & (0-Y) + if (!isa(I.getType())) { + // -2 is "-1 << 1" so it is all bits set except the low one. + APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true); + + Value *BoolCast = 0, *OtherOp = 0; + if (MaskedValueIsZero(Op0, Negative2)) + BoolCast = Op0, OtherOp = Op1; + else if (MaskedValueIsZero(Op1, Negative2)) + BoolCast = Op1, OtherOp = Op0; + + if (BoolCast) { + Value *V = Builder->CreateSub(Constant::getNullValue(I.getType()), + BoolCast, "tmp"); + return BinaryOperator::CreateAnd(V, OtherOp); + } + } + + return Changed ? &I : 0; +} + +Instruction *InstCombiner::visitFMul(BinaryOperator &I) { + bool Changed = SimplifyCommutative(I); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + // Simplify mul instructions with a constant RHS... + if (Constant *Op1C = dyn_cast(Op1)) { + if (ConstantFP *Op1F = dyn_cast(Op1C)) { + // "In IEEE floating point, x*1 is not equivalent to x for nans. However, + // ANSI says we can drop signals, so we can do this anyway." (from GCC) + if (Op1F->isExactlyValue(1.0)) + return ReplaceInstUsesWith(I, Op0); // Eliminate 'mul double %X, 1.0' + } else if (isa(Op1C->getType())) { + if (ConstantVector *Op1V = dyn_cast(Op1C)) { + // As above, vector X*splat(1.0) -> X in all defined cases. + if (Constant *Splat = Op1V->getSplatValue()) { + if (ConstantFP *F = dyn_cast(Splat)) + if (F->isExactlyValue(1.0)) + return ReplaceInstUsesWith(I, Op0); + } + } + } + + // Try to fold constant mul into select arguments. + if (SelectInst *SI = dyn_cast(Op0)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + + if (isa(Op0)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + } + + if (Value *Op0v = dyn_castFNegVal(Op0)) // -X * -Y = X*Y + if (Value *Op1v = dyn_castFNegVal(Op1)) + return BinaryOperator::CreateFMul(Op0v, Op1v); + + return Changed ? &I : 0; +} + +/// SimplifyDivRemOfSelect - Try to fold a divide or remainder of a select +/// instruction. +bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) { + SelectInst *SI = cast(I.getOperand(1)); + + // div/rem X, (Cond ? 0 : Y) -> div/rem X, Y + int NonNullOperand = -1; + if (Constant *ST = dyn_cast(SI->getOperand(1))) + if (ST->isNullValue()) + NonNullOperand = 2; + // div/rem X, (Cond ? Y : 0) -> div/rem X, Y + if (Constant *ST = dyn_cast(SI->getOperand(2))) + if (ST->isNullValue()) + NonNullOperand = 1; + + if (NonNullOperand == -1) + return false; + + Value *SelectCond = SI->getOperand(0); + + // Change the div/rem to use 'Y' instead of the select. + I.setOperand(1, SI->getOperand(NonNullOperand)); + + // Okay, we know we replace the operand of the div/rem with 'Y' with no + // problem. However, the select, or the condition of the select may have + // multiple uses. Based on our knowledge that the operand must be non-zero, + // propagate the known value for the select into other uses of it, and + // propagate a known value of the condition into its other users. + + // If the select and condition only have a single use, don't bother with this, + // early exit. + if (SI->use_empty() && SelectCond->hasOneUse()) + return true; + + // Scan the current block backward, looking for other uses of SI. + BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin(); + + while (BBI != BBFront) { + --BBI; + // If we found a call to a function, we can't assume it will return, so + // information from below it cannot be propagated above it. + if (isa(BBI) && !isa(BBI)) + break; + + // Replace uses of the select or its condition with the known values. + for (Instruction::op_iterator I = BBI->op_begin(), E = BBI->op_end(); + I != E; ++I) { + if (*I == SI) { + *I = SI->getOperand(NonNullOperand); + Worklist.Add(BBI); + } else if (*I == SelectCond) { + *I = NonNullOperand == 1 ? ConstantInt::getTrue(BBI->getContext()) : + ConstantInt::getFalse(BBI->getContext()); + Worklist.Add(BBI); + } + } + + // If we past the instruction, quit looking for it. + if (&*BBI == SI) + SI = 0; + if (&*BBI == SelectCond) + SelectCond = 0; + + // If we ran out of things to eliminate, break out of the loop. + if (SelectCond == 0 && SI == 0) + break; + + } + return true; +} + + +/// This function implements the transforms on div instructions that work +/// regardless of the kind of div instruction it is (udiv, sdiv, or fdiv). It is +/// used by the visitors to those instructions. +/// @brief Transforms common to all three div instructions +Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + // undef / X -> 0 for integer. + // undef / X -> undef for FP (the undef could be a snan). + if (isa(Op0)) { + if (Op0->getType()->isFPOrFPVector()) + return ReplaceInstUsesWith(I, Op0); + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + } + + // X / undef -> undef + if (isa(Op1)) + return ReplaceInstUsesWith(I, Op1); + + return 0; +} + +/// This function implements the transforms common to both integer division +/// instructions (udiv and sdiv). It is called by the visitors to those integer +/// division instructions. +/// @brief Common integer divide transforms +Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + // (sdiv X, X) --> 1 (udiv X, X) --> 1 + if (Op0 == Op1) { + if (const VectorType *Ty = dyn_cast(I.getType())) { + Constant *CI = ConstantInt::get(Ty->getElementType(), 1); + std::vector Elts(Ty->getNumElements(), CI); + return ReplaceInstUsesWith(I, ConstantVector::get(Elts)); + } + + Constant *CI = ConstantInt::get(I.getType(), 1); + return ReplaceInstUsesWith(I, CI); + } + + if (Instruction *Common = commonDivTransforms(I)) + return Common; + + // Handle cases involving: [su]div X, (select Cond, Y, Z) + // This does not apply for fdiv. + if (isa(Op1) && SimplifyDivRemOfSelect(I)) + return &I; + + if (ConstantInt *RHS = dyn_cast(Op1)) { + // div X, 1 == X + if (RHS->equalsInt(1)) + return ReplaceInstUsesWith(I, Op0); + + // (X / C1) / C2 -> X / (C1*C2) + if (Instruction *LHS = dyn_cast(Op0)) + if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode()) + if (ConstantInt *LHSRHS = dyn_cast(LHS->getOperand(1))) { + if (MultiplyOverflows(RHS, LHSRHS, + I.getOpcode()==Instruction::SDiv)) + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + else + return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0), + ConstantExpr::getMul(RHS, LHSRHS)); + } + + if (!RHS->isZero()) { // avoid X udiv 0 + if (SelectInst *SI = dyn_cast(Op0)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + if (isa(Op0)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + } + } + + // 0 / X == 0, we don't need to preserve faults! + if (ConstantInt *LHS = dyn_cast(Op0)) + if (LHS->equalsInt(0)) + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + + // It can't be division by zero, hence it must be division by one. + if (I.getType()->isInteger(1)) + return ReplaceInstUsesWith(I, Op0); + + if (ConstantVector *Op1V = dyn_cast(Op1)) { + if (ConstantInt *X = cast_or_null(Op1V->getSplatValue())) + // div X, 1 == X + if (X->isOne()) + return ReplaceInstUsesWith(I, Op0); + } + + return 0; +} + +Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + // Handle the integer div common cases + if (Instruction *Common = commonIDivTransforms(I)) + return Common; + + if (ConstantInt *C = dyn_cast(Op1)) { + // X udiv C^2 -> X >> C + // Check to see if this is an unsigned division with an exact power of 2, + // if so, convert to a right shift. + if (C->getValue().isPowerOf2()) // 0 not included in isPowerOf2 + return BinaryOperator::CreateLShr(Op0, + ConstantInt::get(Op0->getType(), C->getValue().logBase2())); + + // X udiv C, where C >= signbit + if (C->getValue().isNegative()) { + Value *IC = Builder->CreateICmpULT( Op0, C); + return SelectInst::Create(IC, Constant::getNullValue(I.getType()), + ConstantInt::get(I.getType(), 1)); + } + } + + // X udiv (C1 << N), where C1 is "1< X >> (N+C2) + if (BinaryOperator *RHSI = dyn_cast(I.getOperand(1))) { + if (RHSI->getOpcode() == Instruction::Shl && + isa(RHSI->getOperand(0))) { + const APInt& C1 = cast(RHSI->getOperand(0))->getValue(); + if (C1.isPowerOf2()) { + Value *N = RHSI->getOperand(1); + const Type *NTy = N->getType(); + if (uint32_t C2 = C1.logBase2()) + N = Builder->CreateAdd(N, ConstantInt::get(NTy, C2), "tmp"); + return BinaryOperator::CreateLShr(Op0, N); + } + } + } + + // udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2) + // where C1&C2 are powers of two. + if (SelectInst *SI = dyn_cast(Op1)) + if (ConstantInt *STO = dyn_cast(SI->getOperand(1))) + if (ConstantInt *SFO = dyn_cast(SI->getOperand(2))) { + const APInt &TVA = STO->getValue(), &FVA = SFO->getValue(); + if (TVA.isPowerOf2() && FVA.isPowerOf2()) { + // Compute the shift amounts + uint32_t TSA = TVA.logBase2(), FSA = FVA.logBase2(); + // Construct the "on true" case of the select + Constant *TC = ConstantInt::get(Op0->getType(), TSA); + Value *TSI = Builder->CreateLShr(Op0, TC, SI->getName()+".t"); + + // Construct the "on false" case of the select + Constant *FC = ConstantInt::get(Op0->getType(), FSA); + Value *FSI = Builder->CreateLShr(Op0, FC, SI->getName()+".f"); + + // construct the select instruction and return it. + return SelectInst::Create(SI->getOperand(0), TSI, FSI, SI->getName()); + } + } + return 0; +} + +Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + // Handle the integer div common cases + if (Instruction *Common = commonIDivTransforms(I)) + return Common; + + if (ConstantInt *RHS = dyn_cast(Op1)) { + // sdiv X, -1 == -X + if (RHS->isAllOnesValue()) + return BinaryOperator::CreateNeg(Op0); + + // sdiv X, C --> ashr X, log2(C) + if (cast(&I)->isExact() && + RHS->getValue().isNonNegative() && + RHS->getValue().isPowerOf2()) { + Value *ShAmt = llvm::ConstantInt::get(RHS->getType(), + RHS->getValue().exactLogBase2()); + return BinaryOperator::CreateAShr(Op0, ShAmt, I.getName()); + } + + // -X/C --> X/-C provided the negation doesn't overflow. + if (SubOperator *Sub = dyn_cast(Op0)) + if (isa(Sub->getOperand(0)) && + cast(Sub->getOperand(0))->isNullValue() && + Sub->hasNoSignedWrap()) + return BinaryOperator::CreateSDiv(Sub->getOperand(1), + ConstantExpr::getNeg(RHS)); + } + + // If the sign bits of both operands are zero (i.e. we can prove they are + // unsigned inputs), turn this into a udiv. + if (I.getType()->isInteger()) { + APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); + if (MaskedValueIsZero(Op0, Mask)) { + if (MaskedValueIsZero(Op1, Mask)) { + // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set + return BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); + } + ConstantInt *ShiftedInt; + if (match(Op1, m_Shl(m_ConstantInt(ShiftedInt), m_Value())) && + ShiftedInt->getValue().isPowerOf2()) { + // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y) + // Safe because the only negative value (1 << Y) can take on is + // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have + // the sign bit set. + return BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); + } + } + } + + return 0; +} + +Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { + return commonDivTransforms(I); +} + +/// This function implements the transforms on rem instructions that work +/// regardless of the kind of rem instruction it is (urem, srem, or frem). It +/// is used by the visitors to those instructions. +/// @brief Transforms common to all three rem instructions +Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (isa(Op0)) { // undef % X -> 0 + if (I.getType()->isFPOrFPVector()) + return ReplaceInstUsesWith(I, Op0); // X % undef -> undef (could be SNaN) + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + } + if (isa(Op1)) + return ReplaceInstUsesWith(I, Op1); // X % undef -> undef + + // Handle cases involving: rem X, (select Cond, Y, Z) + if (isa(Op1) && SimplifyDivRemOfSelect(I)) + return &I; + + return 0; +} + +/// This function implements the transforms common to both integer remainder +/// instructions (urem and srem). It is called by the visitors to those integer +/// remainder instructions. +/// @brief Common integer remainder transforms +Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (Instruction *common = commonRemTransforms(I)) + return common; + + // 0 % X == 0 for integer, we don't need to preserve faults! + if (Constant *LHS = dyn_cast(Op0)) + if (LHS->isNullValue()) + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + + if (ConstantInt *RHS = dyn_cast(Op1)) { + // X % 0 == undef, we don't need to preserve faults! + if (RHS->equalsInt(0)) + return ReplaceInstUsesWith(I, UndefValue::get(I.getType())); + + if (RHS->equalsInt(1)) // X % 1 == 0 + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + + if (Instruction *Op0I = dyn_cast(Op0)) { + if (SelectInst *SI = dyn_cast(Op0I)) { + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + } else if (isa(Op0I)) { + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + } + + // See if we can fold away this rem instruction. + if (SimplifyDemandedInstructionBits(I)) + return &I; + } + } + + return 0; +} + +Instruction *InstCombiner::visitURem(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (Instruction *common = commonIRemTransforms(I)) + return common; + + if (ConstantInt *RHS = dyn_cast(Op1)) { + // X urem C^2 -> X and C + // Check to see if this is an unsigned remainder with an exact power of 2, + // if so, convert to a bitwise and. + if (ConstantInt *C = dyn_cast(RHS)) + if (C->getValue().isPowerOf2()) + return BinaryOperator::CreateAnd(Op0, SubOne(C)); + } + + if (Instruction *RHSI = dyn_cast(I.getOperand(1))) { + // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1) + if (RHSI->getOpcode() == Instruction::Shl && + isa(RHSI->getOperand(0))) { + if (cast(RHSI->getOperand(0))->getValue().isPowerOf2()) { + Constant *N1 = Constant::getAllOnesValue(I.getType()); + Value *Add = Builder->CreateAdd(RHSI, N1, "tmp"); + return BinaryOperator::CreateAnd(Op0, Add); + } + } + } + + // urem X, (select Cond, 2^C1, 2^C2) --> select Cond, (and X, C1), (and X, C2) + // where C1&C2 are powers of two. + if (SelectInst *SI = dyn_cast(Op1)) { + if (ConstantInt *STO = dyn_cast(SI->getOperand(1))) + if (ConstantInt *SFO = dyn_cast(SI->getOperand(2))) { + // STO == 0 and SFO == 0 handled above. + if ((STO->getValue().isPowerOf2()) && + (SFO->getValue().isPowerOf2())) { + Value *TrueAnd = Builder->CreateAnd(Op0, SubOne(STO), + SI->getName()+".t"); + Value *FalseAnd = Builder->CreateAnd(Op0, SubOne(SFO), + SI->getName()+".f"); + return SelectInst::Create(SI->getOperand(0), TrueAnd, FalseAnd); + } + } + } + + return 0; +} + +Instruction *InstCombiner::visitSRem(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + // Handle the integer rem common cases + if (Instruction *Common = commonIRemTransforms(I)) + return Common; + + if (Value *RHSNeg = dyn_castNegVal(Op1)) + if (!isa(RHSNeg) || + (isa(RHSNeg) && + cast(RHSNeg)->getValue().isStrictlyPositive())) { + // X % -Y -> X % Y + Worklist.AddValue(I.getOperand(1)); + I.setOperand(1, RHSNeg); + return &I; + } + + // If the sign bits of both operands are zero (i.e. we can prove they are + // unsigned inputs), turn this into a urem. + if (I.getType()->isInteger()) { + APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); + if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) { + // X srem Y -> X urem Y, iff X and Y don't have sign bit set + return BinaryOperator::CreateURem(Op0, Op1, I.getName()); + } + } + + // If it's a constant vector, flip any negative values positive. + if (ConstantVector *RHSV = dyn_cast(Op1)) { + unsigned VWidth = RHSV->getNumOperands(); + + bool hasNegative = false; + for (unsigned i = 0; !hasNegative && i != VWidth; ++i) + if (ConstantInt *RHS = dyn_cast(RHSV->getOperand(i))) + if (RHS->getValue().isNegative()) + hasNegative = true; + + if (hasNegative) { + std::vector Elts(VWidth); + for (unsigned i = 0; i != VWidth; ++i) { + if (ConstantInt *RHS = dyn_cast(RHSV->getOperand(i))) { + if (RHS->getValue().isNegative()) + Elts[i] = cast(ConstantExpr::getNeg(RHS)); + else + Elts[i] = RHS; + } + } + + Constant *NewRHSV = ConstantVector::get(Elts); + if (NewRHSV != RHSV) { + Worklist.AddValue(I.getOperand(1)); + I.setOperand(1, NewRHSV); + return &I; + } + } + } + + return 0; +} + +Instruction *InstCombiner::visitFRem(BinaryOperator &I) { + return commonRemTransforms(I); +} + diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp new file mode 100644 index 000000000000..bb7632fd2d45 --- /dev/null +++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -0,0 +1,841 @@ +//===- InstCombinePHI.cpp -------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the visitPHINode function. +// +//===----------------------------------------------------------------------===// + +#include "InstCombine.h" +#include "llvm/Target/TargetData.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/STLExtras.h" +using namespace llvm; + +/// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(a,c)] +/// and if a/b/c and the add's all have a single use, turn this into a phi +/// and a single binop. +Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { + Instruction *FirstInst = cast(PN.getIncomingValue(0)); + assert(isa(FirstInst) || isa(FirstInst)); + unsigned Opc = FirstInst->getOpcode(); + Value *LHSVal = FirstInst->getOperand(0); + Value *RHSVal = FirstInst->getOperand(1); + + const Type *LHSType = LHSVal->getType(); + const Type *RHSType = RHSVal->getType(); + + // Scan to see if all operands are the same opcode, and all have one use. + for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { + Instruction *I = dyn_cast(PN.getIncomingValue(i)); + if (!I || I->getOpcode() != Opc || !I->hasOneUse() || + // Verify type of the LHS matches so we don't fold cmp's of different + // types or GEP's with different index types. + I->getOperand(0)->getType() != LHSType || + I->getOperand(1)->getType() != RHSType) + return 0; + + // If they are CmpInst instructions, check their predicates + if (Opc == Instruction::ICmp || Opc == Instruction::FCmp) + if (cast(I)->getPredicate() != + cast(FirstInst)->getPredicate()) + return 0; + + // Keep track of which operand needs a phi node. + if (I->getOperand(0) != LHSVal) LHSVal = 0; + if (I->getOperand(1) != RHSVal) RHSVal = 0; + } + + // If both LHS and RHS would need a PHI, don't do this transformation, + // because it would increase the number of PHIs entering the block, + // which leads to higher register pressure. This is especially + // bad when the PHIs are in the header of a loop. + if (!LHSVal && !RHSVal) + return 0; + + // Otherwise, this is safe to transform! + + Value *InLHS = FirstInst->getOperand(0); + Value *InRHS = FirstInst->getOperand(1); + PHINode *NewLHS = 0, *NewRHS = 0; + if (LHSVal == 0) { + NewLHS = PHINode::Create(LHSType, + FirstInst->getOperand(0)->getName() + ".pn"); + NewLHS->reserveOperandSpace(PN.getNumOperands()/2); + NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0)); + InsertNewInstBefore(NewLHS, PN); + LHSVal = NewLHS; + } + + if (RHSVal == 0) { + NewRHS = PHINode::Create(RHSType, + FirstInst->getOperand(1)->getName() + ".pn"); + NewRHS->reserveOperandSpace(PN.getNumOperands()/2); + NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0)); + InsertNewInstBefore(NewRHS, PN); + RHSVal = NewRHS; + } + + // Add all operands to the new PHIs. + if (NewLHS || NewRHS) { + for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { + Instruction *InInst = cast(PN.getIncomingValue(i)); + if (NewLHS) { + Value *NewInLHS = InInst->getOperand(0); + NewLHS->addIncoming(NewInLHS, PN.getIncomingBlock(i)); + } + if (NewRHS) { + Value *NewInRHS = InInst->getOperand(1); + NewRHS->addIncoming(NewInRHS, PN.getIncomingBlock(i)); + } + } + } + + if (BinaryOperator *BinOp = dyn_cast(FirstInst)) + return BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal); + CmpInst *CIOp = cast(FirstInst); + return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), + LHSVal, RHSVal); +} + +Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { + GetElementPtrInst *FirstInst =cast(PN.getIncomingValue(0)); + + SmallVector FixedOperands(FirstInst->op_begin(), + FirstInst->op_end()); + // This is true if all GEP bases are allocas and if all indices into them are + // constants. + bool AllBasePointersAreAllocas = true; + + // We don't want to replace this phi if the replacement would require + // more than one phi, which leads to higher register pressure. This is + // especially bad when the PHIs are in the header of a loop. + bool NeededPhi = false; + + // Scan to see if all operands are the same opcode, and all have one use. + for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { + GetElementPtrInst *GEP= dyn_cast(PN.getIncomingValue(i)); + if (!GEP || !GEP->hasOneUse() || GEP->getType() != FirstInst->getType() || + GEP->getNumOperands() != FirstInst->getNumOperands()) + return 0; + + // Keep track of whether or not all GEPs are of alloca pointers. + if (AllBasePointersAreAllocas && + (!isa(GEP->getOperand(0)) || + !GEP->hasAllConstantIndices())) + AllBasePointersAreAllocas = false; + + // Compare the operand lists. + for (unsigned op = 0, e = FirstInst->getNumOperands(); op != e; ++op) { + if (FirstInst->getOperand(op) == GEP->getOperand(op)) + continue; + + // Don't merge two GEPs when two operands differ (introducing phi nodes) + // if one of the PHIs has a constant for the index. The index may be + // substantially cheaper to compute for the constants, so making it a + // variable index could pessimize the path. This also handles the case + // for struct indices, which must always be constant. + if (isa(FirstInst->getOperand(op)) || + isa(GEP->getOperand(op))) + return 0; + + if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType()) + return 0; + + // If we already needed a PHI for an earlier operand, and another operand + // also requires a PHI, we'd be introducing more PHIs than we're + // eliminating, which increases register pressure on entry to the PHI's + // block. + if (NeededPhi) + return 0; + + FixedOperands[op] = 0; // Needs a PHI. + NeededPhi = true; + } + } + + // If all of the base pointers of the PHI'd GEPs are from allocas, don't + // bother doing this transformation. At best, this will just save a bit of + // offset calculation, but all the predecessors will have to materialize the + // stack address into a register anyway. We'd actually rather *clone* the + // load up into the predecessors so that we have a load of a gep of an alloca, + // which can usually all be folded into the load. + if (AllBasePointersAreAllocas) + return 0; + + // Otherwise, this is safe to transform. Insert PHI nodes for each operand + // that is variable. + SmallVector OperandPhis(FixedOperands.size()); + + bool HasAnyPHIs = false; + for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) { + if (FixedOperands[i]) continue; // operand doesn't need a phi. + Value *FirstOp = FirstInst->getOperand(i); + PHINode *NewPN = PHINode::Create(FirstOp->getType(), + FirstOp->getName()+".pn"); + InsertNewInstBefore(NewPN, PN); + + NewPN->reserveOperandSpace(e); + NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0)); + OperandPhis[i] = NewPN; + FixedOperands[i] = NewPN; + HasAnyPHIs = true; + } + + + // Add all operands to the new PHIs. + if (HasAnyPHIs) { + for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { + GetElementPtrInst *InGEP =cast(PN.getIncomingValue(i)); + BasicBlock *InBB = PN.getIncomingBlock(i); + + for (unsigned op = 0, e = OperandPhis.size(); op != e; ++op) + if (PHINode *OpPhi = OperandPhis[op]) + OpPhi->addIncoming(InGEP->getOperand(op), InBB); + } + } + + Value *Base = FixedOperands[0]; + return cast(FirstInst)->isInBounds() ? + GetElementPtrInst::CreateInBounds(Base, FixedOperands.begin()+1, + FixedOperands.end()) : + GetElementPtrInst::Create(Base, FixedOperands.begin()+1, + FixedOperands.end()); +} + + +/// isSafeAndProfitableToSinkLoad - Return true if we know that it is safe to +/// sink the load out of the block that defines it. This means that it must be +/// obvious the value of the load is not changed from the point of the load to +/// the end of the block it is in. +/// +/// Finally, it is safe, but not profitable, to sink a load targetting a +/// non-address-taken alloca. Doing so will cause us to not promote the alloca +/// to a register. +static bool isSafeAndProfitableToSinkLoad(LoadInst *L) { + BasicBlock::iterator BBI = L, E = L->getParent()->end(); + + for (++BBI; BBI != E; ++BBI) + if (BBI->mayWriteToMemory()) + return false; + + // Check for non-address taken alloca. If not address-taken already, it isn't + // profitable to do this xform. + if (AllocaInst *AI = dyn_cast(L->getOperand(0))) { + bool isAddressTaken = false; + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); + UI != E; ++UI) { + if (isa(UI)) continue; + if (StoreInst *SI = dyn_cast(*UI)) { + // If storing TO the alloca, then the address isn't taken. + if (SI->getOperand(1) == AI) continue; + } + isAddressTaken = true; + break; + } + + if (!isAddressTaken && AI->isStaticAlloca()) + return false; + } + + // If this load is a load from a GEP with a constant offset from an alloca, + // then we don't want to sink it. In its present form, it will be + // load [constant stack offset]. Sinking it will cause us to have to + // materialize the stack addresses in each predecessor in a register only to + // do a shared load from register in the successor. + if (GetElementPtrInst *GEP = dyn_cast(L->getOperand(0))) + if (AllocaInst *AI = dyn_cast(GEP->getOperand(0))) + if (AI->isStaticAlloca() && GEP->hasAllConstantIndices()) + return false; + + return true; +} + +Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { + LoadInst *FirstLI = cast(PN.getIncomingValue(0)); + + // When processing loads, we need to propagate two bits of information to the + // sunk load: whether it is volatile, and what its alignment is. We currently + // don't sink loads when some have their alignment specified and some don't. + // visitLoadInst will propagate an alignment onto the load when TD is around, + // and if TD isn't around, we can't handle the mixed case. + bool isVolatile = FirstLI->isVolatile(); + unsigned LoadAlignment = FirstLI->getAlignment(); + + // We can't sink the load if the loaded value could be modified between the + // load and the PHI. + if (FirstLI->getParent() != PN.getIncomingBlock(0) || + !isSafeAndProfitableToSinkLoad(FirstLI)) + return 0; + + // If the PHI is of volatile loads and the load block has multiple + // successors, sinking it would remove a load of the volatile value from + // the path through the other successor. + if (isVolatile && + FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1) + return 0; + + // Check to see if all arguments are the same operation. + for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { + LoadInst *LI = dyn_cast(PN.getIncomingValue(i)); + if (!LI || !LI->hasOneUse()) + return 0; + + // We can't sink the load if the loaded value could be modified between + // the load and the PHI. + if (LI->isVolatile() != isVolatile || + LI->getParent() != PN.getIncomingBlock(i) || + !isSafeAndProfitableToSinkLoad(LI)) + return 0; + + // If some of the loads have an alignment specified but not all of them, + // we can't do the transformation. + if ((LoadAlignment != 0) != (LI->getAlignment() != 0)) + return 0; + + LoadAlignment = std::min(LoadAlignment, LI->getAlignment()); + + // If the PHI is of volatile loads and the load block has multiple + // successors, sinking it would remove a load of the volatile value from + // the path through the other successor. + if (isVolatile && + LI->getParent()->getTerminator()->getNumSuccessors() != 1) + return 0; + } + + // Okay, they are all the same operation. Create a new PHI node of the + // correct type, and PHI together all of the LHS's of the instructions. + PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(), + PN.getName()+".in"); + NewPN->reserveOperandSpace(PN.getNumOperands()/2); + + Value *InVal = FirstLI->getOperand(0); + NewPN->addIncoming(InVal, PN.getIncomingBlock(0)); + + // Add all operands to the new PHI. + for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { + Value *NewInVal = cast(PN.getIncomingValue(i))->getOperand(0); + if (NewInVal != InVal) + InVal = 0; + NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i)); + } + + Value *PhiVal; + if (InVal) { + // The new PHI unions all of the same values together. This is really + // common, so we handle it intelligently here for compile-time speed. + PhiVal = InVal; + delete NewPN; + } else { + InsertNewInstBefore(NewPN, PN); + PhiVal = NewPN; + } + + // If this was a volatile load that we are merging, make sure to loop through + // and mark all the input loads as non-volatile. If we don't do this, we will + // insert a new volatile load and the old ones will not be deletable. + if (isVolatile) + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) + cast(PN.getIncomingValue(i))->setVolatile(false); + + return new LoadInst(PhiVal, "", isVolatile, LoadAlignment); +} + + + +/// FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary" +/// operator and they all are only used by the PHI, PHI together their +/// inputs, and do the operation once, to the result of the PHI. +Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { + Instruction *FirstInst = cast(PN.getIncomingValue(0)); + + if (isa(FirstInst)) + return FoldPHIArgGEPIntoPHI(PN); + if (isa(FirstInst)) + return FoldPHIArgLoadIntoPHI(PN); + + // Scan the instruction, looking for input operations that can be folded away. + // If all input operands to the phi are the same instruction (e.g. a cast from + // the same type or "+42") we can pull the operation through the PHI, reducing + // code size and simplifying code. + Constant *ConstantOp = 0; + const Type *CastSrcTy = 0; + + if (isa(FirstInst)) { + CastSrcTy = FirstInst->getOperand(0)->getType(); + + // Be careful about transforming integer PHIs. We don't want to pessimize + // the code by turning an i32 into an i1293. + if (isa(PN.getType()) && isa(CastSrcTy)) { + if (!ShouldChangeType(PN.getType(), CastSrcTy)) + return 0; + } + } else if (isa(FirstInst) || isa(FirstInst)) { + // Can fold binop, compare or shift here if the RHS is a constant, + // otherwise call FoldPHIArgBinOpIntoPHI. + ConstantOp = dyn_cast(FirstInst->getOperand(1)); + if (ConstantOp == 0) + return FoldPHIArgBinOpIntoPHI(PN); + } else { + return 0; // Cannot fold this operation. + } + + // Check to see if all arguments are the same operation. + for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { + Instruction *I = dyn_cast(PN.getIncomingValue(i)); + if (I == 0 || !I->hasOneUse() || !I->isSameOperationAs(FirstInst)) + return 0; + if (CastSrcTy) { + if (I->getOperand(0)->getType() != CastSrcTy) + return 0; // Cast operation must match. + } else if (I->getOperand(1) != ConstantOp) { + return 0; + } + } + + // Okay, they are all the same operation. Create a new PHI node of the + // correct type, and PHI together all of the LHS's of the instructions. + PHINode *NewPN = PHINode::Create(FirstInst->getOperand(0)->getType(), + PN.getName()+".in"); + NewPN->reserveOperandSpace(PN.getNumOperands()/2); + + Value *InVal = FirstInst->getOperand(0); + NewPN->addIncoming(InVal, PN.getIncomingBlock(0)); + + // Add all operands to the new PHI. + for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { + Value *NewInVal = cast(PN.getIncomingValue(i))->getOperand(0); + if (NewInVal != InVal) + InVal = 0; + NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i)); + } + + Value *PhiVal; + if (InVal) { + // The new PHI unions all of the same values together. This is really + // common, so we handle it intelligently here for compile-time speed. + PhiVal = InVal; + delete NewPN; + } else { + InsertNewInstBefore(NewPN, PN); + PhiVal = NewPN; + } + + // Insert and return the new operation. + if (CastInst *FirstCI = dyn_cast(FirstInst)) + return CastInst::Create(FirstCI->getOpcode(), PhiVal, PN.getType()); + + if (BinaryOperator *BinOp = dyn_cast(FirstInst)) + return BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp); + + CmpInst *CIOp = cast(FirstInst); + return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), + PhiVal, ConstantOp); +} + +/// DeadPHICycle - Return true if this PHI node is only used by a PHI node cycle +/// that is dead. +static bool DeadPHICycle(PHINode *PN, + SmallPtrSet &PotentiallyDeadPHIs) { + if (PN->use_empty()) return true; + if (!PN->hasOneUse()) return false; + + // Remember this node, and if we find the cycle, return. + if (!PotentiallyDeadPHIs.insert(PN)) + return true; + + // Don't scan crazily complex things. + if (PotentiallyDeadPHIs.size() == 16) + return false; + + if (PHINode *PU = dyn_cast(PN->use_back())) + return DeadPHICycle(PU, PotentiallyDeadPHIs); + + return false; +} + +/// PHIsEqualValue - Return true if this phi node is always equal to +/// NonPhiInVal. This happens with mutually cyclic phi nodes like: +/// z = some value; x = phi (y, z); y = phi (x, z) +static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal, + SmallPtrSet &ValueEqualPHIs) { + // See if we already saw this PHI node. + if (!ValueEqualPHIs.insert(PN)) + return true; + + // Don't scan crazily complex things. + if (ValueEqualPHIs.size() == 16) + return false; + + // Scan the operands to see if they are either phi nodes or are equal to + // the value. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *Op = PN->getIncomingValue(i); + if (PHINode *OpPN = dyn_cast(Op)) { + if (!PHIsEqualValue(OpPN, NonPhiInVal, ValueEqualPHIs)) + return false; + } else if (Op != NonPhiInVal) + return false; + } + + return true; +} + + +namespace { +struct PHIUsageRecord { + unsigned PHIId; // The ID # of the PHI (something determinstic to sort on) + unsigned Shift; // The amount shifted. + Instruction *Inst; // The trunc instruction. + + PHIUsageRecord(unsigned pn, unsigned Sh, Instruction *User) + : PHIId(pn), Shift(Sh), Inst(User) {} + + bool operator<(const PHIUsageRecord &RHS) const { + if (PHIId < RHS.PHIId) return true; + if (PHIId > RHS.PHIId) return false; + if (Shift < RHS.Shift) return true; + if (Shift > RHS.Shift) return false; + return Inst->getType()->getPrimitiveSizeInBits() < + RHS.Inst->getType()->getPrimitiveSizeInBits(); + } +}; + +struct LoweredPHIRecord { + PHINode *PN; // The PHI that was lowered. + unsigned Shift; // The amount shifted. + unsigned Width; // The width extracted. + + LoweredPHIRecord(PHINode *pn, unsigned Sh, const Type *Ty) + : PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {} + + // Ctor form used by DenseMap. + LoweredPHIRecord(PHINode *pn, unsigned Sh) + : PN(pn), Shift(Sh), Width(0) {} +}; +} + +namespace llvm { + template<> + struct DenseMapInfo { + static inline LoweredPHIRecord getEmptyKey() { + return LoweredPHIRecord(0, 0); + } + static inline LoweredPHIRecord getTombstoneKey() { + return LoweredPHIRecord(0, 1); + } + static unsigned getHashValue(const LoweredPHIRecord &Val) { + return DenseMapInfo::getHashValue(Val.PN) ^ (Val.Shift>>3) ^ + (Val.Width>>3); + } + static bool isEqual(const LoweredPHIRecord &LHS, + const LoweredPHIRecord &RHS) { + return LHS.PN == RHS.PN && LHS.Shift == RHS.Shift && + LHS.Width == RHS.Width; + } + }; + template <> + struct isPodLike { static const bool value = true; }; +} + + +/// SliceUpIllegalIntegerPHI - This is an integer PHI and we know that it has an +/// illegal type: see if it is only used by trunc or trunc(lshr) operations. If +/// so, we split the PHI into the various pieces being extracted. This sort of +/// thing is introduced when SROA promotes an aggregate to large integer values. +/// +/// TODO: The user of the trunc may be an bitcast to float/double/vector or an +/// inttoptr. We should produce new PHIs in the right type. +/// +Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { + // PHIUsers - Keep track of all of the truncated values extracted from a set + // of PHIs, along with their offset. These are the things we want to rewrite. + SmallVector PHIUsers; + + // PHIs are often mutually cyclic, so we keep track of a whole set of PHI + // nodes which are extracted from. PHIsToSlice is a set we use to avoid + // revisiting PHIs, PHIsInspected is a ordered list of PHIs that we need to + // check the uses of (to ensure they are all extracts). + SmallVector PHIsToSlice; + SmallPtrSet PHIsInspected; + + PHIsToSlice.push_back(&FirstPhi); + PHIsInspected.insert(&FirstPhi); + + for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) { + PHINode *PN = PHIsToSlice[PHIId]; + + // Scan the input list of the PHI. If any input is an invoke, and if the + // input is defined in the predecessor, then we won't be split the critical + // edge which is required to insert a truncate. Because of this, we have to + // bail out. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + InvokeInst *II = dyn_cast(PN->getIncomingValue(i)); + if (II == 0) continue; + if (II->getParent() != PN->getIncomingBlock(i)) + continue; + + // If we have a phi, and if it's directly in the predecessor, then we have + // a critical edge where we need to put the truncate. Since we can't + // split the edge in instcombine, we have to bail out. + return 0; + } + + + for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); + UI != E; ++UI) { + Instruction *User = cast(*UI); + + // If the user is a PHI, inspect its uses recursively. + if (PHINode *UserPN = dyn_cast(User)) { + if (PHIsInspected.insert(UserPN)) + PHIsToSlice.push_back(UserPN); + continue; + } + + // Truncates are always ok. + if (isa(User)) { + PHIUsers.push_back(PHIUsageRecord(PHIId, 0, User)); + continue; + } + + // Otherwise it must be a lshr which can only be used by one trunc. + if (User->getOpcode() != Instruction::LShr || + !User->hasOneUse() || !isa(User->use_back()) || + !isa(User->getOperand(1))) + return 0; + + unsigned Shift = cast(User->getOperand(1))->getZExtValue(); + PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, User->use_back())); + } + } + + // If we have no users, they must be all self uses, just nuke the PHI. + if (PHIUsers.empty()) + return ReplaceInstUsesWith(FirstPhi, UndefValue::get(FirstPhi.getType())); + + // If this phi node is transformable, create new PHIs for all the pieces + // extracted out of it. First, sort the users by their offset and size. + array_pod_sort(PHIUsers.begin(), PHIUsers.end()); + + DEBUG(errs() << "SLICING UP PHI: " << FirstPhi << '\n'; + for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i) + errs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] <<'\n'; + ); + + // PredValues - This is a temporary used when rewriting PHI nodes. It is + // hoisted out here to avoid construction/destruction thrashing. + DenseMap PredValues; + + // ExtractedVals - Each new PHI we introduce is saved here so we don't + // introduce redundant PHIs. + DenseMap ExtractedVals; + + for (unsigned UserI = 0, UserE = PHIUsers.size(); UserI != UserE; ++UserI) { + unsigned PHIId = PHIUsers[UserI].PHIId; + PHINode *PN = PHIsToSlice[PHIId]; + unsigned Offset = PHIUsers[UserI].Shift; + const Type *Ty = PHIUsers[UserI].Inst->getType(); + + PHINode *EltPHI; + + // If we've already lowered a user like this, reuse the previously lowered + // value. + if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) { + + // Otherwise, Create the new PHI node for this user. + EltPHI = PHINode::Create(Ty, PN->getName()+".off"+Twine(Offset), PN); + assert(EltPHI->getType() != PN->getType() && + "Truncate didn't shrink phi?"); + + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *Pred = PN->getIncomingBlock(i); + Value *&PredVal = PredValues[Pred]; + + // If we already have a value for this predecessor, reuse it. + if (PredVal) { + EltPHI->addIncoming(PredVal, Pred); + continue; + } + + // Handle the PHI self-reuse case. + Value *InVal = PN->getIncomingValue(i); + if (InVal == PN) { + PredVal = EltPHI; + EltPHI->addIncoming(PredVal, Pred); + continue; + } + + if (PHINode *InPHI = dyn_cast(PN)) { + // If the incoming value was a PHI, and if it was one of the PHIs we + // already rewrote it, just use the lowered value. + if (Value *Res = ExtractedVals[LoweredPHIRecord(InPHI, Offset, Ty)]) { + PredVal = Res; + EltPHI->addIncoming(PredVal, Pred); + continue; + } + } + + // Otherwise, do an extract in the predecessor. + Builder->SetInsertPoint(Pred, Pred->getTerminator()); + Value *Res = InVal; + if (Offset) + Res = Builder->CreateLShr(Res, ConstantInt::get(InVal->getType(), + Offset), "extract"); + Res = Builder->CreateTrunc(Res, Ty, "extract.t"); + PredVal = Res; + EltPHI->addIncoming(Res, Pred); + + // If the incoming value was a PHI, and if it was one of the PHIs we are + // rewriting, we will ultimately delete the code we inserted. This + // means we need to revisit that PHI to make sure we extract out the + // needed piece. + if (PHINode *OldInVal = dyn_cast(PN->getIncomingValue(i))) + if (PHIsInspected.count(OldInVal)) { + unsigned RefPHIId = std::find(PHIsToSlice.begin(),PHIsToSlice.end(), + OldInVal)-PHIsToSlice.begin(); + PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset, + cast(Res))); + ++UserE; + } + } + PredValues.clear(); + + DEBUG(errs() << " Made element PHI for offset " << Offset << ": " + << *EltPHI << '\n'); + ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI; + } + + // Replace the use of this piece with the PHI node. + ReplaceInstUsesWith(*PHIUsers[UserI].Inst, EltPHI); + } + + // Replace all the remaining uses of the PHI nodes (self uses and the lshrs) + // with undefs. + Value *Undef = UndefValue::get(FirstPhi.getType()); + for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i) + ReplaceInstUsesWith(*PHIsToSlice[i], Undef); + return ReplaceInstUsesWith(FirstPhi, Undef); +} + +// PHINode simplification +// +Instruction *InstCombiner::visitPHINode(PHINode &PN) { + // If LCSSA is around, don't mess with Phi nodes + if (MustPreserveLCSSA) return 0; + + if (Value *V = PN.hasConstantValue()) + return ReplaceInstUsesWith(PN, V); + + // If all PHI operands are the same operation, pull them through the PHI, + // reducing code size. + if (isa(PN.getIncomingValue(0)) && + isa(PN.getIncomingValue(1)) && + cast(PN.getIncomingValue(0))->getOpcode() == + cast(PN.getIncomingValue(1))->getOpcode() && + // FIXME: The hasOneUse check will fail for PHIs that use the value more + // than themselves more than once. + PN.getIncomingValue(0)->hasOneUse()) + if (Instruction *Result = FoldPHIArgOpIntoPHI(PN)) + return Result; + + // If this is a trivial cycle in the PHI node graph, remove it. Basically, if + // this PHI only has a single use (a PHI), and if that PHI only has one use (a + // PHI)... break the cycle. + if (PN.hasOneUse()) { + Instruction *PHIUser = cast(PN.use_back()); + if (PHINode *PU = dyn_cast(PHIUser)) { + SmallPtrSet PotentiallyDeadPHIs; + PotentiallyDeadPHIs.insert(&PN); + if (DeadPHICycle(PU, PotentiallyDeadPHIs)) + return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType())); + } + + // If this phi has a single use, and if that use just computes a value for + // the next iteration of a loop, delete the phi. This occurs with unused + // induction variables, e.g. "for (int j = 0; ; ++j);". Detecting this + // common case here is good because the only other things that catch this + // are induction variable analysis (sometimes) and ADCE, which is only run + // late. + if (PHIUser->hasOneUse() && + (isa(PHIUser) || isa(PHIUser)) && + PHIUser->use_back() == &PN) { + return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType())); + } + } + + // We sometimes end up with phi cycles that non-obviously end up being the + // same value, for example: + // z = some value; x = phi (y, z); y = phi (x, z) + // where the phi nodes don't necessarily need to be in the same block. Do a + // quick check to see if the PHI node only contains a single non-phi value, if + // so, scan to see if the phi cycle is actually equal to that value. + { + unsigned InValNo = 0, NumOperandVals = PN.getNumIncomingValues(); + // Scan for the first non-phi operand. + while (InValNo != NumOperandVals && + isa(PN.getIncomingValue(InValNo))) + ++InValNo; + + if (InValNo != NumOperandVals) { + Value *NonPhiInVal = PN.getOperand(InValNo); + + // Scan the rest of the operands to see if there are any conflicts, if so + // there is no need to recursively scan other phis. + for (++InValNo; InValNo != NumOperandVals; ++InValNo) { + Value *OpVal = PN.getIncomingValue(InValNo); + if (OpVal != NonPhiInVal && !isa(OpVal)) + break; + } + + // If we scanned over all operands, then we have one unique value plus + // phi values. Scan PHI nodes to see if they all merge in each other or + // the value. + if (InValNo == NumOperandVals) { + SmallPtrSet ValueEqualPHIs; + if (PHIsEqualValue(&PN, NonPhiInVal, ValueEqualPHIs)) + return ReplaceInstUsesWith(PN, NonPhiInVal); + } + } + } + + // If there are multiple PHIs, sort their operands so that they all list + // the blocks in the same order. This will help identical PHIs be eliminated + // by other passes. Other passes shouldn't depend on this for correctness + // however. + PHINode *FirstPN = cast(PN.getParent()->begin()); + if (&PN != FirstPN) + for (unsigned i = 0, e = FirstPN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *BBA = PN.getIncomingBlock(i); + BasicBlock *BBB = FirstPN->getIncomingBlock(i); + if (BBA != BBB) { + Value *VA = PN.getIncomingValue(i); + unsigned j = PN.getBasicBlockIndex(BBB); + Value *VB = PN.getIncomingValue(j); + PN.setIncomingBlock(i, BBB); + PN.setIncomingValue(i, VB); + PN.setIncomingBlock(j, BBA); + PN.setIncomingValue(j, VA); + // NOTE: Instcombine normally would want us to "return &PN" if we + // modified any of the operands of an instruction. However, since we + // aren't adding or removing uses (just rearranging them) we don't do + // this in this case. + } + } + + // If this is an integer PHI and we know that it has an illegal type, see if + // it is only used by trunc or trunc(lshr) operations. If so, we split the + // PHI into the various pieces being extracted. This sort of thing is + // introduced when SROA promotes an aggregate to a single large integer type. + if (isa(PN.getType()) && TD && + !TD->isLegalInteger(PN.getType()->getPrimitiveSizeInBits())) + if (Instruction *Res = SliceUpIllegalIntegerPHI(PN)) + return Res; + + return 0; +} diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp new file mode 100644 index 000000000000..18b2dff2b65d --- /dev/null +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -0,0 +1,703 @@ +//===- InstCombineSelect.cpp ----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the visitSelect function. +// +//===----------------------------------------------------------------------===// + +#include "InstCombine.h" +#include "llvm/Support/PatternMatch.h" +using namespace llvm; +using namespace PatternMatch; + +/// MatchSelectPattern - Pattern match integer [SU]MIN, [SU]MAX, and ABS idioms, +/// returning the kind and providing the out parameter results if we +/// successfully match. +static SelectPatternFlavor +MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) { + SelectInst *SI = dyn_cast(V); + if (SI == 0) return SPF_UNKNOWN; + + ICmpInst *ICI = dyn_cast(SI->getCondition()); + if (ICI == 0) return SPF_UNKNOWN; + + LHS = ICI->getOperand(0); + RHS = ICI->getOperand(1); + + // (icmp X, Y) ? X : Y + if (SI->getTrueValue() == ICI->getOperand(0) && + SI->getFalseValue() == ICI->getOperand(1)) { + switch (ICI->getPredicate()) { + default: return SPF_UNKNOWN; // Equality. + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: return SPF_UMAX; + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: return SPF_SMAX; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: return SPF_UMIN; + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: return SPF_SMIN; + } + } + + // (icmp X, Y) ? Y : X + if (SI->getTrueValue() == ICI->getOperand(1) && + SI->getFalseValue() == ICI->getOperand(0)) { + switch (ICI->getPredicate()) { + default: return SPF_UNKNOWN; // Equality. + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: return SPF_UMIN; + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: return SPF_SMIN; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: return SPF_UMAX; + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: return SPF_SMAX; + } + } + + // TODO: (X > 4) ? X : 5 --> (X >= 5) ? X : 5 --> MAX(X, 5) + + return SPF_UNKNOWN; +} + + +/// GetSelectFoldableOperands - We want to turn code that looks like this: +/// %C = or %A, %B +/// %D = select %cond, %C, %A +/// into: +/// %C = select %cond, %B, 0 +/// %D = or %A, %C +/// +/// Assuming that the specified instruction is an operand to the select, return +/// a bitmask indicating which operands of this instruction are foldable if they +/// equal the other incoming value of the select. +/// +static unsigned GetSelectFoldableOperands(Instruction *I) { + switch (I->getOpcode()) { + case Instruction::Add: + case Instruction::Mul: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + return 3; // Can fold through either operand. + case Instruction::Sub: // Can only fold on the amount subtracted. + case Instruction::Shl: // Can only fold on the shift amount. + case Instruction::LShr: + case Instruction::AShr: + return 1; + default: + return 0; // Cannot fold + } +} + +/// GetSelectFoldableConstant - For the same transformation as the previous +/// function, return the identity constant that goes into the select. +static Constant *GetSelectFoldableConstant(Instruction *I) { + switch (I->getOpcode()) { + default: llvm_unreachable("This cannot happen!"); + case Instruction::Add: + case Instruction::Sub: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + return Constant::getNullValue(I->getType()); + case Instruction::And: + return Constant::getAllOnesValue(I->getType()); + case Instruction::Mul: + return ConstantInt::get(I->getType(), 1); + } +} + +/// FoldSelectOpOp - Here we have (select c, TI, FI), and we know that TI and FI +/// have the same opcode and only one use each. Try to simplify this. +Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, + Instruction *FI) { + if (TI->getNumOperands() == 1) { + // If this is a non-volatile load or a cast from the same type, + // merge. + if (TI->isCast()) { + if (TI->getOperand(0)->getType() != FI->getOperand(0)->getType()) + return 0; + } else { + return 0; // unknown unary op. + } + + // Fold this by inserting a select from the input values. + SelectInst *NewSI = SelectInst::Create(SI.getCondition(), TI->getOperand(0), + FI->getOperand(0), SI.getName()+".v"); + InsertNewInstBefore(NewSI, SI); + return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI, + TI->getType()); + } + + // Only handle binary operators here. + if (!isa(TI)) + return 0; + + // Figure out if the operations have any operands in common. + Value *MatchOp, *OtherOpT, *OtherOpF; + bool MatchIsOpZero; + if (TI->getOperand(0) == FI->getOperand(0)) { + MatchOp = TI->getOperand(0); + OtherOpT = TI->getOperand(1); + OtherOpF = FI->getOperand(1); + MatchIsOpZero = true; + } else if (TI->getOperand(1) == FI->getOperand(1)) { + MatchOp = TI->getOperand(1); + OtherOpT = TI->getOperand(0); + OtherOpF = FI->getOperand(0); + MatchIsOpZero = false; + } else if (!TI->isCommutative()) { + return 0; + } else if (TI->getOperand(0) == FI->getOperand(1)) { + MatchOp = TI->getOperand(0); + OtherOpT = TI->getOperand(1); + OtherOpF = FI->getOperand(0); + MatchIsOpZero = true; + } else if (TI->getOperand(1) == FI->getOperand(0)) { + MatchOp = TI->getOperand(1); + OtherOpT = TI->getOperand(0); + OtherOpF = FI->getOperand(1); + MatchIsOpZero = true; + } else { + return 0; + } + + // If we reach here, they do have operations in common. + SelectInst *NewSI = SelectInst::Create(SI.getCondition(), OtherOpT, + OtherOpF, SI.getName()+".v"); + InsertNewInstBefore(NewSI, SI); + + if (BinaryOperator *BO = dyn_cast(TI)) { + if (MatchIsOpZero) + return BinaryOperator::Create(BO->getOpcode(), MatchOp, NewSI); + else + return BinaryOperator::Create(BO->getOpcode(), NewSI, MatchOp); + } + llvm_unreachable("Shouldn't get here"); + return 0; +} + +static bool isSelect01(Constant *C1, Constant *C2) { + ConstantInt *C1I = dyn_cast(C1); + if (!C1I) + return false; + ConstantInt *C2I = dyn_cast(C2); + if (!C2I) + return false; + return (C1I->isZero() || C1I->isOne()) && (C2I->isZero() || C2I->isOne()); +} + +/// FoldSelectIntoOp - Try fold the select into one of the operands to +/// facilitate further optimization. +Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, + Value *FalseVal) { + // See the comment above GetSelectFoldableOperands for a description of the + // transformation we are doing here. + if (Instruction *TVI = dyn_cast(TrueVal)) { + if (TVI->hasOneUse() && TVI->getNumOperands() == 2 && + !isa(FalseVal)) { + if (unsigned SFO = GetSelectFoldableOperands(TVI)) { + unsigned OpToFold = 0; + if ((SFO & 1) && FalseVal == TVI->getOperand(0)) { + OpToFold = 1; + } else if ((SFO & 2) && FalseVal == TVI->getOperand(1)) { + OpToFold = 2; + } + + if (OpToFold) { + Constant *C = GetSelectFoldableConstant(TVI); + Value *OOp = TVI->getOperand(2-OpToFold); + // Avoid creating select between 2 constants unless it's selecting + // between 0 and 1. + if (!isa(OOp) || isSelect01(C, cast(OOp))) { + Instruction *NewSel = SelectInst::Create(SI.getCondition(), OOp, C); + InsertNewInstBefore(NewSel, SI); + NewSel->takeName(TVI); + if (BinaryOperator *BO = dyn_cast(TVI)) + return BinaryOperator::Create(BO->getOpcode(), FalseVal, NewSel); + llvm_unreachable("Unknown instruction!!"); + } + } + } + } + } + + if (Instruction *FVI = dyn_cast(FalseVal)) { + if (FVI->hasOneUse() && FVI->getNumOperands() == 2 && + !isa(TrueVal)) { + if (unsigned SFO = GetSelectFoldableOperands(FVI)) { + unsigned OpToFold = 0; + if ((SFO & 1) && TrueVal == FVI->getOperand(0)) { + OpToFold = 1; + } else if ((SFO & 2) && TrueVal == FVI->getOperand(1)) { + OpToFold = 2; + } + + if (OpToFold) { + Constant *C = GetSelectFoldableConstant(FVI); + Value *OOp = FVI->getOperand(2-OpToFold); + // Avoid creating select between 2 constants unless it's selecting + // between 0 and 1. + if (!isa(OOp) || isSelect01(C, cast(OOp))) { + Instruction *NewSel = SelectInst::Create(SI.getCondition(), C, OOp); + InsertNewInstBefore(NewSel, SI); + NewSel->takeName(FVI); + if (BinaryOperator *BO = dyn_cast(FVI)) + return BinaryOperator::Create(BO->getOpcode(), TrueVal, NewSel); + llvm_unreachable("Unknown instruction!!"); + } + } + } + } + } + + return 0; +} + +/// visitSelectInstWithICmp - Visit a SelectInst that has an +/// ICmpInst as its first operand. +/// +Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, + ICmpInst *ICI) { + bool Changed = false; + ICmpInst::Predicate Pred = ICI->getPredicate(); + Value *CmpLHS = ICI->getOperand(0); + Value *CmpRHS = ICI->getOperand(1); + Value *TrueVal = SI.getTrueValue(); + Value *FalseVal = SI.getFalseValue(); + + // Check cases where the comparison is with a constant that + // can be adjusted to fit the min/max idiom. We may edit ICI in + // place here, so make sure the select is the only user. + if (ICI->hasOneUse()) + if (ConstantInt *CI = dyn_cast(CmpRHS)) { + switch (Pred) { + default: break; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_SLT: { + // X < MIN ? T : F --> F + if (CI->isMinValue(Pred == ICmpInst::ICMP_SLT)) + return ReplaceInstUsesWith(SI, FalseVal); + // X < C ? X : C-1 --> X > C-1 ? C-1 : X + Constant *AdjustedRHS = + ConstantInt::get(CI->getContext(), CI->getValue()-1); + if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) || + (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) { + Pred = ICmpInst::getSwappedPredicate(Pred); + CmpRHS = AdjustedRHS; + std::swap(FalseVal, TrueVal); + ICI->setPredicate(Pred); + ICI->setOperand(1, CmpRHS); + SI.setOperand(1, TrueVal); + SI.setOperand(2, FalseVal); + Changed = true; + } + break; + } + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_SGT: { + // X > MAX ? T : F --> F + if (CI->isMaxValue(Pred == ICmpInst::ICMP_SGT)) + return ReplaceInstUsesWith(SI, FalseVal); + // X > C ? X : C+1 --> X < C+1 ? C+1 : X + Constant *AdjustedRHS = + ConstantInt::get(CI->getContext(), CI->getValue()+1); + if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) || + (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) { + Pred = ICmpInst::getSwappedPredicate(Pred); + CmpRHS = AdjustedRHS; + std::swap(FalseVal, TrueVal); + ICI->setPredicate(Pred); + ICI->setOperand(1, CmpRHS); + SI.setOperand(1, TrueVal); + SI.setOperand(2, FalseVal); + Changed = true; + } + break; + } + } + + // (x ashr x, 31 -> all ones if signed + // (x >s -1) ? -1 : 0 -> ashr x, 31 -> all ones if not signed + CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE; + if (match(TrueVal, m_ConstantInt<-1>()) && + match(FalseVal, m_ConstantInt<0>())) + Pred = ICI->getPredicate(); + else if (match(TrueVal, m_ConstantInt<0>()) && + match(FalseVal, m_ConstantInt<-1>())) + Pred = CmpInst::getInversePredicate(ICI->getPredicate()); + + if (Pred != CmpInst::BAD_ICMP_PREDICATE) { + // If we are just checking for a icmp eq of a single bit and zext'ing it + // to an integer, then shift the bit to the appropriate place and then + // cast to integer to avoid the comparison. + const APInt &Op1CV = CI->getValue(); + + // sext (x x>>s31 true if signbit set. + // sext (x >s -1) to i32 --> (x>>s31)^-1 true if signbit clear. + if ((Pred == ICmpInst::ICMP_SLT && Op1CV == 0) || + (Pred == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) { + Value *In = ICI->getOperand(0); + Value *Sh = ConstantInt::get(In->getType(), + In->getType()->getScalarSizeInBits()-1); + In = InsertNewInstBefore(BinaryOperator::CreateAShr(In, Sh, + In->getName()+".lobit"), + *ICI); + if (In->getType() != SI.getType()) + In = CastInst::CreateIntegerCast(In, SI.getType(), + true/*SExt*/, "tmp", ICI); + + if (Pred == ICmpInst::ICMP_SGT) + In = InsertNewInstBefore(BinaryOperator::CreateNot(In, + In->getName()+".not"), *ICI); + + return ReplaceInstUsesWith(SI, In); + } + } + } + + if (CmpLHS == TrueVal && CmpRHS == FalseVal) { + // Transform (X == Y) ? X : Y -> Y + if (Pred == ICmpInst::ICMP_EQ) + return ReplaceInstUsesWith(SI, FalseVal); + // Transform (X != Y) ? X : Y -> X + if (Pred == ICmpInst::ICMP_NE) + return ReplaceInstUsesWith(SI, TrueVal); + /// NOTE: if we wanted to, this is where to detect integer MIN/MAX + + } else if (CmpLHS == FalseVal && CmpRHS == TrueVal) { + // Transform (X == Y) ? Y : X -> X + if (Pred == ICmpInst::ICMP_EQ) + return ReplaceInstUsesWith(SI, FalseVal); + // Transform (X != Y) ? Y : X -> Y + if (Pred == ICmpInst::ICMP_NE) + return ReplaceInstUsesWith(SI, TrueVal); + /// NOTE: if we wanted to, this is where to detect integer MIN/MAX + } + return Changed ? &SI : 0; +} + + +/// CanSelectOperandBeMappingIntoPredBlock - SI is a select whose condition is a +/// PHI node (but the two may be in different blocks). See if the true/false +/// values (V) are live in all of the predecessor blocks of the PHI. For +/// example, cases like this cannot be mapped: +/// +/// X = phi [ C1, BB1], [C2, BB2] +/// Y = add +/// Z = select X, Y, 0 +/// +/// because Y is not live in BB1/BB2. +/// +static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V, + const SelectInst &SI) { + // If the value is a non-instruction value like a constant or argument, it + // can always be mapped. + const Instruction *I = dyn_cast(V); + if (I == 0) return true; + + // If V is a PHI node defined in the same block as the condition PHI, we can + // map the arguments. + const PHINode *CondPHI = cast(SI.getCondition()); + + if (const PHINode *VP = dyn_cast(I)) + if (VP->getParent() == CondPHI->getParent()) + return true; + + // Otherwise, if the PHI and select are defined in the same block and if V is + // defined in a different block, then we can transform it. + if (SI.getParent() == CondPHI->getParent() && + I->getParent() != CondPHI->getParent()) + return true; + + // Otherwise we have a 'hard' case and we can't tell without doing more + // detailed dominator based analysis, punt. + return false; +} + +/// FoldSPFofSPF - We have an SPF (e.g. a min or max) of an SPF of the form: +/// SPF2(SPF1(A, B), C) +Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner, + SelectPatternFlavor SPF1, + Value *A, Value *B, + Instruction &Outer, + SelectPatternFlavor SPF2, Value *C) { + if (C == A || C == B) { + // MAX(MAX(A, B), B) -> MAX(A, B) + // MIN(MIN(a, b), a) -> MIN(a, b) + if (SPF1 == SPF2) + return ReplaceInstUsesWith(Outer, Inner); + + // MAX(MIN(a, b), a) -> a + // MIN(MAX(a, b), a) -> a + if ((SPF1 == SPF_SMIN && SPF2 == SPF_SMAX) || + (SPF1 == SPF_SMAX && SPF2 == SPF_SMIN) || + (SPF1 == SPF_UMIN && SPF2 == SPF_UMAX) || + (SPF1 == SPF_UMAX && SPF2 == SPF_UMIN)) + return ReplaceInstUsesWith(Outer, C); + } + + // TODO: MIN(MIN(A, 23), 97) + return 0; +} + + + + +Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { + Value *CondVal = SI.getCondition(); + Value *TrueVal = SI.getTrueValue(); + Value *FalseVal = SI.getFalseValue(); + + // select true, X, Y -> X + // select false, X, Y -> Y + if (ConstantInt *C = dyn_cast(CondVal)) + return ReplaceInstUsesWith(SI, C->getZExtValue() ? TrueVal : FalseVal); + + // select C, X, X -> X + if (TrueVal == FalseVal) + return ReplaceInstUsesWith(SI, TrueVal); + + if (isa(TrueVal)) // select C, undef, X -> X + return ReplaceInstUsesWith(SI, FalseVal); + if (isa(FalseVal)) // select C, X, undef -> X + return ReplaceInstUsesWith(SI, TrueVal); + if (isa(CondVal)) { // select undef, X, Y -> X or Y + if (isa(TrueVal)) + return ReplaceInstUsesWith(SI, TrueVal); + else + return ReplaceInstUsesWith(SI, FalseVal); + } + + if (SI.getType()->isInteger(1)) { + if (ConstantInt *C = dyn_cast(TrueVal)) { + if (C->getZExtValue()) { + // Change: A = select B, true, C --> A = or B, C + return BinaryOperator::CreateOr(CondVal, FalseVal); + } else { + // Change: A = select B, false, C --> A = and !B, C + Value *NotCond = + InsertNewInstBefore(BinaryOperator::CreateNot(CondVal, + "not."+CondVal->getName()), SI); + return BinaryOperator::CreateAnd(NotCond, FalseVal); + } + } else if (ConstantInt *C = dyn_cast(FalseVal)) { + if (C->getZExtValue() == false) { + // Change: A = select B, C, false --> A = and B, C + return BinaryOperator::CreateAnd(CondVal, TrueVal); + } else { + // Change: A = select B, C, true --> A = or !B, C + Value *NotCond = + InsertNewInstBefore(BinaryOperator::CreateNot(CondVal, + "not."+CondVal->getName()), SI); + return BinaryOperator::CreateOr(NotCond, TrueVal); + } + } + + // select a, b, a -> a&b + // select a, a, b -> a|b + if (CondVal == TrueVal) + return BinaryOperator::CreateOr(CondVal, FalseVal); + else if (CondVal == FalseVal) + return BinaryOperator::CreateAnd(CondVal, TrueVal); + } + + // Selecting between two integer constants? + if (ConstantInt *TrueValC = dyn_cast(TrueVal)) + if (ConstantInt *FalseValC = dyn_cast(FalseVal)) { + // select C, 1, 0 -> zext C to int + if (FalseValC->isZero() && TrueValC->getValue() == 1) { + return CastInst::Create(Instruction::ZExt, CondVal, SI.getType()); + } else if (TrueValC->isZero() && FalseValC->getValue() == 1) { + // select C, 0, 1 -> zext !C to int + Value *NotCond = + InsertNewInstBefore(BinaryOperator::CreateNot(CondVal, + "not."+CondVal->getName()), SI); + return CastInst::Create(Instruction::ZExt, NotCond, SI.getType()); + } + + if (ICmpInst *IC = dyn_cast(SI.getCondition())) { + // If one of the constants is zero (we know they can't both be) and we + // have an icmp instruction with zero, and we have an 'and' with the + // non-constant value, eliminate this whole mess. This corresponds to + // cases like this: ((X & 27) ? 27 : 0) + if (TrueValC->isZero() || FalseValC->isZero()) + if (IC->isEquality() && isa(IC->getOperand(1)) && + cast(IC->getOperand(1))->isNullValue()) + if (Instruction *ICA = dyn_cast(IC->getOperand(0))) + if (ICA->getOpcode() == Instruction::And && + isa(ICA->getOperand(1)) && + (ICA->getOperand(1) == TrueValC || + ICA->getOperand(1) == FalseValC) && + cast(ICA->getOperand(1))->getValue().isPowerOf2()) { + // Okay, now we know that everything is set up, we just don't + // know whether we have a icmp_ne or icmp_eq and whether the + // true or false val is the zero. + bool ShouldNotVal = !TrueValC->isZero(); + ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE; + Value *V = ICA; + if (ShouldNotVal) + V = InsertNewInstBefore(BinaryOperator::Create( + Instruction::Xor, V, ICA->getOperand(1)), SI); + return ReplaceInstUsesWith(SI, V); + } + } + } + + // See if we are selecting two values based on a comparison of the two values. + if (FCmpInst *FCI = dyn_cast(CondVal)) { + if (FCI->getOperand(0) == TrueVal && FCI->getOperand(1) == FalseVal) { + // Transform (X == Y) ? X : Y -> Y + if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) { + // This is not safe in general for floating point: + // consider X== -0, Y== +0. + // It becomes safe if either operand is a nonzero constant. + ConstantFP *CFPt, *CFPf; + if (((CFPt = dyn_cast(TrueVal)) && + !CFPt->getValueAPF().isZero()) || + ((CFPf = dyn_cast(FalseVal)) && + !CFPf->getValueAPF().isZero())) + return ReplaceInstUsesWith(SI, FalseVal); + } + // Transform (X != Y) ? X : Y -> X + if (FCI->getPredicate() == FCmpInst::FCMP_ONE) + return ReplaceInstUsesWith(SI, TrueVal); + // NOTE: if we wanted to, this is where to detect MIN/MAX + + } else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){ + // Transform (X == Y) ? Y : X -> X + if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) { + // This is not safe in general for floating point: + // consider X== -0, Y== +0. + // It becomes safe if either operand is a nonzero constant. + ConstantFP *CFPt, *CFPf; + if (((CFPt = dyn_cast(TrueVal)) && + !CFPt->getValueAPF().isZero()) || + ((CFPf = dyn_cast(FalseVal)) && + !CFPf->getValueAPF().isZero())) + return ReplaceInstUsesWith(SI, FalseVal); + } + // Transform (X != Y) ? Y : X -> Y + if (FCI->getPredicate() == FCmpInst::FCMP_ONE) + return ReplaceInstUsesWith(SI, TrueVal); + // NOTE: if we wanted to, this is where to detect MIN/MAX + } + // NOTE: if we wanted to, this is where to detect ABS + } + + // See if we are selecting two values based on a comparison of the two values. + if (ICmpInst *ICI = dyn_cast(CondVal)) + if (Instruction *Result = visitSelectInstWithICmp(SI, ICI)) + return Result; + + if (Instruction *TI = dyn_cast(TrueVal)) + if (Instruction *FI = dyn_cast(FalseVal)) + if (TI->hasOneUse() && FI->hasOneUse()) { + Instruction *AddOp = 0, *SubOp = 0; + + // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z)) + if (TI->getOpcode() == FI->getOpcode()) + if (Instruction *IV = FoldSelectOpOp(SI, TI, FI)) + return IV; + + // Turn select C, (X+Y), (X-Y) --> (X+(select C, Y, (-Y))). This is + // even legal for FP. + if ((TI->getOpcode() == Instruction::Sub && + FI->getOpcode() == Instruction::Add) || + (TI->getOpcode() == Instruction::FSub && + FI->getOpcode() == Instruction::FAdd)) { + AddOp = FI; SubOp = TI; + } else if ((FI->getOpcode() == Instruction::Sub && + TI->getOpcode() == Instruction::Add) || + (FI->getOpcode() == Instruction::FSub && + TI->getOpcode() == Instruction::FAdd)) { + AddOp = TI; SubOp = FI; + } + + if (AddOp) { + Value *OtherAddOp = 0; + if (SubOp->getOperand(0) == AddOp->getOperand(0)) { + OtherAddOp = AddOp->getOperand(1); + } else if (SubOp->getOperand(0) == AddOp->getOperand(1)) { + OtherAddOp = AddOp->getOperand(0); + } + + if (OtherAddOp) { + // So at this point we know we have (Y -> OtherAddOp): + // select C, (add X, Y), (sub X, Z) + Value *NegVal; // Compute -Z + if (Constant *C = dyn_cast(SubOp->getOperand(1))) { + NegVal = ConstantExpr::getNeg(C); + } else { + NegVal = InsertNewInstBefore( + BinaryOperator::CreateNeg(SubOp->getOperand(1), + "tmp"), SI); + } + + Value *NewTrueOp = OtherAddOp; + Value *NewFalseOp = NegVal; + if (AddOp != TI) + std::swap(NewTrueOp, NewFalseOp); + Instruction *NewSel = + SelectInst::Create(CondVal, NewTrueOp, + NewFalseOp, SI.getName() + ".p"); + + NewSel = InsertNewInstBefore(NewSel, SI); + return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel); + } + } + } + + // See if we can fold the select into one of our operands. + if (SI.getType()->isInteger()) { + if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal)) + return FoldI; + + // MAX(MAX(a, b), a) -> MAX(a, b) + // MIN(MIN(a, b), a) -> MIN(a, b) + // MAX(MIN(a, b), a) -> a + // MIN(MAX(a, b), a) -> a + Value *LHS, *RHS, *LHS2, *RHS2; + if (SelectPatternFlavor SPF = MatchSelectPattern(&SI, LHS, RHS)) { + if (SelectPatternFlavor SPF2 = MatchSelectPattern(LHS, LHS2, RHS2)) + if (Instruction *R = FoldSPFofSPF(cast(LHS),SPF2,LHS2,RHS2, + SI, SPF, RHS)) + return R; + if (SelectPatternFlavor SPF2 = MatchSelectPattern(RHS, LHS2, RHS2)) + if (Instruction *R = FoldSPFofSPF(cast(RHS),SPF2,LHS2,RHS2, + SI, SPF, LHS)) + return R; + } + + // TODO. + // ABS(-X) -> ABS(X) + // ABS(ABS(X)) -> ABS(X) + } + + // See if we can fold the select into a phi node if the condition is a select. + if (isa(SI.getCondition())) + // The true/false values have to be live in the PHI predecessor's blocks. + if (CanSelectOperandBeMappingIntoPredBlock(TrueVal, SI) && + CanSelectOperandBeMappingIntoPredBlock(FalseVal, SI)) + if (Instruction *NV = FoldOpIntoPhi(SI)) + return NV; + + if (BinaryOperator::isNot(CondVal)) { + SI.setOperand(0, BinaryOperator::getNotArgument(CondVal)); + SI.setOperand(1, FalseVal); + SI.setOperand(2, TrueVal); + return &SI; + } + + return 0; +} diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp new file mode 100644 index 000000000000..fe91da1b6af4 --- /dev/null +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -0,0 +1,427 @@ +//===- InstCombineShifts.cpp ----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the visitShl, visitLShr, and visitAShr functions. +// +//===----------------------------------------------------------------------===// + +#include "InstCombine.h" +#include "llvm/Support/PatternMatch.h" +using namespace llvm; +using namespace PatternMatch; + +Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { + assert(I.getOperand(1)->getType() == I.getOperand(0)->getType()); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + // shl X, 0 == X and shr X, 0 == X + // shl 0, X == 0 and shr 0, X == 0 + if (Op1 == Constant::getNullValue(Op1->getType()) || + Op0 == Constant::getNullValue(Op0->getType())) + return ReplaceInstUsesWith(I, Op0); + + if (isa(Op0)) { + if (I.getOpcode() == Instruction::AShr) // undef >>s X -> undef + return ReplaceInstUsesWith(I, Op0); + else // undef << X -> 0, undef >>u X -> 0 + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + } + if (isa(Op1)) { + if (I.getOpcode() == Instruction::AShr) // X >>s undef -> X + return ReplaceInstUsesWith(I, Op0); + else // X << undef, X >>u undef -> 0 + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + } + + // See if we can fold away this shift. + if (SimplifyDemandedInstructionBits(I)) + return &I; + + // Try to fold constant and into select arguments. + if (isa(Op0)) + if (SelectInst *SI = dyn_cast(Op1)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + + if (ConstantInt *CUI = dyn_cast(Op1)) + if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I)) + return Res; + return 0; +} + +Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, + BinaryOperator &I) { + bool isLeftShift = I.getOpcode() == Instruction::Shl; + + // See if we can simplify any instructions used by the instruction whose sole + // purpose is to compute bits we don't care about. + uint32_t TypeBits = Op0->getType()->getScalarSizeInBits(); + + // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate + // a signed shift. + // + if (Op1->uge(TypeBits)) { + if (I.getOpcode() != Instruction::AShr) + return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType())); + else { + I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1)); + return &I; + } + } + + // ((X*C1) << C2) == (X * (C1 << C2)) + if (BinaryOperator *BO = dyn_cast(Op0)) + if (BO->getOpcode() == Instruction::Mul && isLeftShift) + if (Constant *BOOp = dyn_cast(BO->getOperand(1))) + return BinaryOperator::CreateMul(BO->getOperand(0), + ConstantExpr::getShl(BOOp, Op1)); + + // Try to fold constant and into select arguments. + if (SelectInst *SI = dyn_cast(Op0)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + if (isa(Op0)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + + // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2)) + if (TruncInst *TI = dyn_cast(Op0)) { + Instruction *TrOp = dyn_cast(TI->getOperand(0)); + // If 'shift2' is an ashr, we would have to get the sign bit into a funny + // place. Don't try to do this transformation in this case. Also, we + // require that the input operand is a shift-by-constant so that we have + // confidence that the shifts will get folded together. We could do this + // xform in more cases, but it is unlikely to be profitable. + if (TrOp && I.isLogicalShift() && TrOp->isShift() && + isa(TrOp->getOperand(1))) { + // Okay, we'll do this xform. Make the shift of shift. + Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType()); + // (shift2 (shift1 & 0x00FF), c2) + Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName()); + + // For logical shifts, the truncation has the effect of making the high + // part of the register be zeros. Emulate this by inserting an AND to + // clear the top bits as needed. This 'and' will usually be zapped by + // other xforms later if dead. + unsigned SrcSize = TrOp->getType()->getScalarSizeInBits(); + unsigned DstSize = TI->getType()->getScalarSizeInBits(); + APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize)); + + // The mask we constructed says what the trunc would do if occurring + // between the shifts. We want to know the effect *after* the second + // shift. We know that it is a logical shift by a constant, so adjust the + // mask as appropriate. + if (I.getOpcode() == Instruction::Shl) + MaskV <<= Op1->getZExtValue(); + else { + assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift"); + MaskV = MaskV.lshr(Op1->getZExtValue()); + } + + // shift1 & 0x00FF + Value *And = Builder->CreateAnd(NSh, + ConstantInt::get(I.getContext(), MaskV), + TI->getName()); + + // Return the value truncated to the interesting size. + return new TruncInst(And, I.getType()); + } + } + + if (Op0->hasOneUse()) { + if (BinaryOperator *Op0BO = dyn_cast(Op0)) { + // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) + Value *V1, *V2; + ConstantInt *CC; + switch (Op0BO->getOpcode()) { + default: break; + case Instruction::Add: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + // These operators commute. + // Turn (Y + (X >> C)) << C -> (X + (Y << C)) & (~0 << C) + if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() && + match(Op0BO->getOperand(1), m_Shr(m_Value(V1), + m_Specific(Op1)))) { + Value *YS = // (Y << C) + Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName()); + // (X + (Y << C)) + Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1, + Op0BO->getOperand(1)->getName()); + uint32_t Op1Val = Op1->getLimitedValue(TypeBits); + return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), + APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); + } + + // Turn (Y + ((X >> C) & CC)) << C -> ((X & (CC << C)) + (Y << C)) + Value *Op0BOOp1 = Op0BO->getOperand(1); + if (isLeftShift && Op0BOOp1->hasOneUse() && + match(Op0BOOp1, + m_And(m_Shr(m_Value(V1), m_Specific(Op1)), + m_ConstantInt(CC))) && + cast(Op0BOOp1)->getOperand(0)->hasOneUse()) { + Value *YS = // (Y << C) + Builder->CreateShl(Op0BO->getOperand(0), Op1, + Op0BO->getName()); + // X & (CC << C) + Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), + V1->getName()+".mask"); + return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM); + } + } + + // FALL THROUGH. + case Instruction::Sub: { + // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) + if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() && + match(Op0BO->getOperand(0), m_Shr(m_Value(V1), + m_Specific(Op1)))) { + Value *YS = // (Y << C) + Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); + // (X + (Y << C)) + Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS, + Op0BO->getOperand(0)->getName()); + uint32_t Op1Val = Op1->getLimitedValue(TypeBits); + return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), + APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); + } + + // Turn (((X >> C)&CC) + Y) << C -> (X + (Y << C)) & (CC << C) + if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() && + match(Op0BO->getOperand(0), + m_And(m_Shr(m_Value(V1), m_Value(V2)), + m_ConstantInt(CC))) && V2 == Op1 && + cast(Op0BO->getOperand(0)) + ->getOperand(0)->hasOneUse()) { + Value *YS = // (Y << C) + Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); + // X & (CC << C) + Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), + V1->getName()+".mask"); + + return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS); + } + + break; + } + } + + + // If the operand is an bitwise operator with a constant RHS, and the + // shift is the only use, we can pull it out of the shift. + if (ConstantInt *Op0C = dyn_cast(Op0BO->getOperand(1))) { + bool isValid = true; // Valid only for And, Or, Xor + bool highBitSet = false; // Transform if high bit of constant set? + + switch (Op0BO->getOpcode()) { + default: isValid = false; break; // Do not perform transform! + case Instruction::Add: + isValid = isLeftShift; + break; + case Instruction::Or: + case Instruction::Xor: + highBitSet = false; + break; + case Instruction::And: + highBitSet = true; + break; + } + + // If this is a signed shift right, and the high bit is modified + // by the logical operation, do not perform the transformation. + // The highBitSet boolean indicates the value of the high bit of + // the constant which would cause it to be modified for this + // operation. + // + if (isValid && I.getOpcode() == Instruction::AShr) + isValid = Op0C->getValue()[TypeBits-1] == highBitSet; + + if (isValid) { + Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1); + + Value *NewShift = + Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1); + NewShift->takeName(Op0BO); + + return BinaryOperator::Create(Op0BO->getOpcode(), NewShift, + NewRHS); + } + } + } + } + + // Find out if this is a shift of a shift by a constant. + BinaryOperator *ShiftOp = dyn_cast(Op0); + if (ShiftOp && !ShiftOp->isShift()) + ShiftOp = 0; + + if (ShiftOp && isa(ShiftOp->getOperand(1))) { + ConstantInt *ShiftAmt1C = cast(ShiftOp->getOperand(1)); + uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits); + uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits); + assert(ShiftAmt2 != 0 && "Should have been simplified earlier"); + if (ShiftAmt1 == 0) return 0; // Will be simplified in the future. + Value *X = ShiftOp->getOperand(0); + + uint32_t AmtSum = ShiftAmt1+ShiftAmt2; // Fold into one big shift. + + const IntegerType *Ty = cast(I.getType()); + + // Check for (X << c1) << c2 and (X >> c1) >> c2 + if (I.getOpcode() == ShiftOp->getOpcode()) { + // If this is oversized composite shift, then unsigned shifts get 0, ashr + // saturates. + if (AmtSum >= TypeBits) { + if (I.getOpcode() != Instruction::AShr) + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + AmtSum = TypeBits-1; // Saturate to 31 for i32 ashr. + } + + return BinaryOperator::Create(I.getOpcode(), X, + ConstantInt::get(Ty, AmtSum)); + } + + if (ShiftOp->getOpcode() == Instruction::LShr && + I.getOpcode() == Instruction::AShr) { + if (AmtSum >= TypeBits) + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + + // ((X >>u C1) >>s C2) -> (X >>u (C1+C2)) since C1 != 0. + return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum)); + } + + if (ShiftOp->getOpcode() == Instruction::AShr && + I.getOpcode() == Instruction::LShr) { + // ((X >>s C1) >>u C2) -> ((X >>s (C1+C2)) & mask) since C1 != 0. + if (AmtSum >= TypeBits) + AmtSum = TypeBits-1; + + Value *Shift = Builder->CreateAShr(X, ConstantInt::get(Ty, AmtSum)); + + APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); + return BinaryOperator::CreateAnd(Shift, + ConstantInt::get(I.getContext(), Mask)); + } + + // Okay, if we get here, one shift must be left, and the other shift must be + // right. See if the amounts are equal. + if (ShiftAmt1 == ShiftAmt2) { + // If we have ((X >>? C) << C), turn this into X & (-1 << C). + if (I.getOpcode() == Instruction::Shl) { + APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1)); + return BinaryOperator::CreateAnd(X, + ConstantInt::get(I.getContext(),Mask)); + } + // If we have ((X << C) >>u C), turn this into X & (-1 >>u C). + if (I.getOpcode() == Instruction::LShr) { + APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1)); + return BinaryOperator::CreateAnd(X, + ConstantInt::get(I.getContext(), Mask)); + } + } else if (ShiftAmt1 < ShiftAmt2) { + uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1; + + // (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2) + if (I.getOpcode() == Instruction::Shl) { + assert(ShiftOp->getOpcode() == Instruction::LShr || + ShiftOp->getOpcode() == Instruction::AShr); + Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); + + APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); + return BinaryOperator::CreateAnd(Shift, + ConstantInt::get(I.getContext(),Mask)); + } + + // (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2) + if (I.getOpcode() == Instruction::LShr) { + assert(ShiftOp->getOpcode() == Instruction::Shl); + Value *Shift = Builder->CreateLShr(X, ConstantInt::get(Ty, ShiftDiff)); + + APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); + return BinaryOperator::CreateAnd(Shift, + ConstantInt::get(I.getContext(),Mask)); + } + + // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. + } else { + assert(ShiftAmt2 < ShiftAmt1); + uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2; + + // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2) + if (I.getOpcode() == Instruction::Shl) { + assert(ShiftOp->getOpcode() == Instruction::LShr || + ShiftOp->getOpcode() == Instruction::AShr); + Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), X, + ConstantInt::get(Ty, ShiftDiff)); + + APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); + return BinaryOperator::CreateAnd(Shift, + ConstantInt::get(I.getContext(),Mask)); + } + + // (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2) + if (I.getOpcode() == Instruction::LShr) { + assert(ShiftOp->getOpcode() == Instruction::Shl); + Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); + + APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); + return BinaryOperator::CreateAnd(Shift, + ConstantInt::get(I.getContext(),Mask)); + } + + // We can't handle (X << C1) >>a C2, it shifts arbitrary bits in. + } + } + return 0; +} + +Instruction *InstCombiner::visitShl(BinaryOperator &I) { + return commonShiftTransforms(I); +} + +Instruction *InstCombiner::visitLShr(BinaryOperator &I) { + return commonShiftTransforms(I); +} + +Instruction *InstCombiner::visitAShr(BinaryOperator &I) { + if (Instruction *R = commonShiftTransforms(I)) + return R; + + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (ConstantInt *CSI = dyn_cast(Op0)) { + // ashr int -1, X = -1 (for any arithmetic shift rights of ~0) + if (CSI->isAllOnesValue()) + return ReplaceInstUsesWith(I, CSI); + } + + if (ConstantInt *Op1C = dyn_cast(Op1)) { + // If the input is a SHL by the same constant (ashr (shl X, C), C), then we + // have a sign-extend idiom. If the input value is known to already be sign + // extended enough, delete the extension. + Value *X; + if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1))) && + ComputeNumSignBits(X) > Op1C->getZExtValue()) + return ReplaceInstUsesWith(I, X); + } + + // See if we can turn a signed shr into an unsigned shr. + if (MaskedValueIsZero(Op0, + APInt::getSignBit(I.getType()->getScalarSizeInBits()))) + return BinaryOperator::CreateLShr(Op0, Op1); + + // Arithmetic shifting an all-sign-bit value is a no-op. + unsigned NumSignBits = ComputeNumSignBits(Op0); + if (NumSignBits == Op0->getType()->getScalarSizeInBits()) + return ReplaceInstUsesWith(I, Op0); + + return 0; +} + diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp new file mode 100644 index 000000000000..74a1b6803d4d --- /dev/null +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -0,0 +1,1106 @@ +//===- InstCombineSimplifyDemanded.cpp ------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains logic for simplifying instructions based on information +// about how they are used. +// +//===----------------------------------------------------------------------===// + + +#include "InstCombine.h" +#include "llvm/Target/TargetData.h" +#include "llvm/IntrinsicInst.h" + +using namespace llvm; + + +/// ShrinkDemandedConstant - Check to see if the specified operand of the +/// specified instruction is a constant integer. If so, check to see if there +/// are any bits set in the constant that are not demanded. If so, shrink the +/// constant and return true. +static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, + APInt Demanded) { + assert(I && "No instruction?"); + assert(OpNo < I->getNumOperands() && "Operand index too large"); + + // If the operand is not a constant integer, nothing to do. + ConstantInt *OpC = dyn_cast(I->getOperand(OpNo)); + if (!OpC) return false; + + // If there are no bits set that aren't demanded, nothing to do. + Demanded.zextOrTrunc(OpC->getValue().getBitWidth()); + if ((~Demanded & OpC->getValue()) == 0) + return false; + + // This instruction is producing bits that are not demanded. Shrink the RHS. + Demanded &= OpC->getValue(); + I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Demanded)); + return true; +} + + + +/// SimplifyDemandedInstructionBits - Inst is an integer instruction that +/// SimplifyDemandedBits knows about. See if the instruction has any +/// properties that allow us to simplify its operands. +bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) { + unsigned BitWidth = Inst.getType()->getScalarSizeInBits(); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + APInt DemandedMask(APInt::getAllOnesValue(BitWidth)); + + Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, + KnownZero, KnownOne, 0); + if (V == 0) return false; + if (V == &Inst) return true; + ReplaceInstUsesWith(Inst, V); + return true; +} + +/// SimplifyDemandedBits - This form of SimplifyDemandedBits simplifies the +/// specified instruction operand if possible, updating it in place. It returns +/// true if it made any change and false otherwise. +bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask, + APInt &KnownZero, APInt &KnownOne, + unsigned Depth) { + Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, + KnownZero, KnownOne, Depth); + if (NewVal == 0) return false; + U = NewVal; + return true; +} + + +/// SimplifyDemandedUseBits - This function attempts to replace V with a simpler +/// value based on the demanded bits. When this function is called, it is known +/// that only the bits set in DemandedMask of the result of V are ever used +/// downstream. Consequently, depending on the mask and V, it may be possible +/// to replace V with a constant or one of its operands. In such cases, this +/// function does the replacement and returns true. In all other cases, it +/// returns false after analyzing the expression and setting KnownOne and known +/// to be one in the expression. KnownZero contains all the bits that are known +/// to be zero in the expression. These are provided to potentially allow the +/// caller (which might recursively be SimplifyDemandedBits itself) to simplify +/// the expression. KnownOne and KnownZero always follow the invariant that +/// KnownOne & KnownZero == 0. That is, a bit can't be both 1 and 0. Note that +/// the bits in KnownOne and KnownZero may only be accurate for those bits set +/// in DemandedMask. Note also that the bitwidth of V, DemandedMask, KnownZero +/// and KnownOne must all be the same. +/// +/// This returns null if it did not change anything and it permits no +/// simplification. This returns V itself if it did some simplification of V's +/// operands based on the information about what bits are demanded. This returns +/// some other non-null value if it found out that V is equal to another value +/// in the context where the specified bits are demanded, but not for all users. +Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, + APInt &KnownZero, APInt &KnownOne, + unsigned Depth) { + assert(V != 0 && "Null pointer of Value???"); + assert(Depth <= 6 && "Limit Search Depth"); + uint32_t BitWidth = DemandedMask.getBitWidth(); + const Type *VTy = V->getType(); + assert((TD || !isa(VTy)) && + "SimplifyDemandedBits needs to know bit widths!"); + assert((!TD || TD->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) && + (!VTy->isIntOrIntVector() || + VTy->getScalarSizeInBits() == BitWidth) && + KnownZero.getBitWidth() == BitWidth && + KnownOne.getBitWidth() == BitWidth && + "Value *V, DemandedMask, KnownZero and KnownOne " + "must have same BitWidth"); + if (ConstantInt *CI = dyn_cast(V)) { + // We know all of the bits for a constant! + KnownOne = CI->getValue() & DemandedMask; + KnownZero = ~KnownOne & DemandedMask; + return 0; + } + if (isa(V)) { + // We know all of the bits for a constant! + KnownOne.clear(); + KnownZero = DemandedMask; + return 0; + } + + KnownZero.clear(); + KnownOne.clear(); + if (DemandedMask == 0) { // Not demanding any bits from V. + if (isa(V)) + return 0; + return UndefValue::get(VTy); + } + + if (Depth == 6) // Limit search depth. + return 0; + + APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); + APInt &RHSKnownZero = KnownZero, &RHSKnownOne = KnownOne; + + Instruction *I = dyn_cast(V); + if (!I) { + ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth); + return 0; // Only analyze instructions. + } + + // If there are multiple uses of this value and we aren't at the root, then + // we can't do any simplifications of the operands, because DemandedMask + // only reflects the bits demanded by *one* of the users. + if (Depth != 0 && !I->hasOneUse()) { + // Despite the fact that we can't simplify this instruction in all User's + // context, we can at least compute the knownzero/knownone bits, and we can + // do simplifications that apply to *just* the one user if we know that + // this instruction has a simpler value in that context. + if (I->getOpcode() == Instruction::And) { + // If either the LHS or the RHS are Zero, the result is zero. + ComputeMaskedBits(I->getOperand(1), DemandedMask, + RHSKnownZero, RHSKnownOne, Depth+1); + ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownZero, + LHSKnownZero, LHSKnownOne, Depth+1); + + // If all of the demanded bits are known 1 on one side, return the other. + // These bits cannot contribute to the result of the 'and' in this + // context. + if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == + (DemandedMask & ~LHSKnownZero)) + return I->getOperand(0); + if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == + (DemandedMask & ~RHSKnownZero)) + return I->getOperand(1); + + // If all of the demanded bits in the inputs are known zeros, return zero. + if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask) + return Constant::getNullValue(VTy); + + } else if (I->getOpcode() == Instruction::Or) { + // We can simplify (X|Y) -> X or Y in the user's context if we know that + // only bits from X or Y are demanded. + + // If either the LHS or the RHS are One, the result is One. + ComputeMaskedBits(I->getOperand(1), DemandedMask, + RHSKnownZero, RHSKnownOne, Depth+1); + ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownOne, + LHSKnownZero, LHSKnownOne, Depth+1); + + // If all of the demanded bits are known zero on one side, return the + // other. These bits cannot contribute to the result of the 'or' in this + // context. + if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == + (DemandedMask & ~LHSKnownOne)) + return I->getOperand(0); + if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == + (DemandedMask & ~RHSKnownOne)) + return I->getOperand(1); + + // If all of the potentially set bits on one side are known to be set on + // the other side, just use the 'other' side. + if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == + (DemandedMask & (~RHSKnownZero))) + return I->getOperand(0); + if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == + (DemandedMask & (~LHSKnownZero))) + return I->getOperand(1); + } + + // Compute the KnownZero/KnownOne bits to simplify things downstream. + ComputeMaskedBits(I, DemandedMask, KnownZero, KnownOne, Depth); + return 0; + } + + // If this is the root being simplified, allow it to have multiple uses, + // just set the DemandedMask to all bits so that we can try to simplify the + // operands. This allows visitTruncInst (for example) to simplify the + // operand of a trunc without duplicating all the logic below. + if (Depth == 0 && !V->hasOneUse()) + DemandedMask = APInt::getAllOnesValue(BitWidth); + + switch (I->getOpcode()) { + default: + ComputeMaskedBits(I, DemandedMask, RHSKnownZero, RHSKnownOne, Depth); + break; + case Instruction::And: + // If either the LHS or the RHS are Zero, the result is zero. + if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, + RHSKnownZero, RHSKnownOne, Depth+1) || + SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero, + LHSKnownZero, LHSKnownOne, Depth+1)) + return I; + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); + + // If all of the demanded bits are known 1 on one side, return the other. + // These bits cannot contribute to the result of the 'and'. + if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == + (DemandedMask & ~LHSKnownZero)) + return I->getOperand(0); + if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == + (DemandedMask & ~RHSKnownZero)) + return I->getOperand(1); + + // If all of the demanded bits in the inputs are known zeros, return zero. + if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask) + return Constant::getNullValue(VTy); + + // If the RHS is a constant, see if we can simplify it. + if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero)) + return I; + + // Output known-1 bits are only known if set in both the LHS & RHS. + RHSKnownOne &= LHSKnownOne; + // Output known-0 are known to be clear if zero in either the LHS | RHS. + RHSKnownZero |= LHSKnownZero; + break; + case Instruction::Or: + // If either the LHS or the RHS are One, the result is One. + if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, + RHSKnownZero, RHSKnownOne, Depth+1) || + SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne, + LHSKnownZero, LHSKnownOne, Depth+1)) + return I; + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); + + // If all of the demanded bits are known zero on one side, return the other. + // These bits cannot contribute to the result of the 'or'. + if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == + (DemandedMask & ~LHSKnownOne)) + return I->getOperand(0); + if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == + (DemandedMask & ~RHSKnownOne)) + return I->getOperand(1); + + // If all of the potentially set bits on one side are known to be set on + // the other side, just use the 'other' side. + if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == + (DemandedMask & (~RHSKnownZero))) + return I->getOperand(0); + if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == + (DemandedMask & (~LHSKnownZero))) + return I->getOperand(1); + + // If the RHS is a constant, see if we can simplify it. + if (ShrinkDemandedConstant(I, 1, DemandedMask)) + return I; + + // Output known-0 bits are only known if clear in both the LHS & RHS. + RHSKnownZero &= LHSKnownZero; + // Output known-1 are known to be set if set in either the LHS | RHS. + RHSKnownOne |= LHSKnownOne; + break; + case Instruction::Xor: { + if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, + RHSKnownZero, RHSKnownOne, Depth+1) || + SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, + LHSKnownZero, LHSKnownOne, Depth+1)) + return I; + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); + + // If all of the demanded bits are known zero on one side, return the other. + // These bits cannot contribute to the result of the 'xor'. + if ((DemandedMask & RHSKnownZero) == DemandedMask) + return I->getOperand(0); + if ((DemandedMask & LHSKnownZero) == DemandedMask) + return I->getOperand(1); + + // Output known-0 bits are known if clear or set in both the LHS & RHS. + APInt KnownZeroOut = (RHSKnownZero & LHSKnownZero) | + (RHSKnownOne & LHSKnownOne); + // Output known-1 are known to be set if set in only one of the LHS, RHS. + APInt KnownOneOut = (RHSKnownZero & LHSKnownOne) | + (RHSKnownOne & LHSKnownZero); + + // If all of the demanded bits are known to be zero on one side or the + // other, turn this into an *inclusive* or. + // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 + if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) { + Instruction *Or = + BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1), + I->getName()); + return InsertNewInstBefore(Or, *I); + } + + // If all of the demanded bits on one side are known, and all of the set + // bits on that side are also known to be set on the other side, turn this + // into an AND, as we know the bits will be cleared. + // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 + if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) { + // all known + if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) { + Constant *AndC = Constant::getIntegerValue(VTy, + ~RHSKnownOne & DemandedMask); + Instruction *And = + BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp"); + return InsertNewInstBefore(And, *I); + } + } + + // If the RHS is a constant, see if we can simplify it. + // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1. + if (ShrinkDemandedConstant(I, 1, DemandedMask)) + return I; + + // If our LHS is an 'and' and if it has one use, and if any of the bits we + // are flipping are known to be set, then the xor is just resetting those + // bits to zero. We can just knock out bits from the 'and' and the 'xor', + // simplifying both of them. + if (Instruction *LHSInst = dyn_cast(I->getOperand(0))) + if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() && + isa(I->getOperand(1)) && + isa(LHSInst->getOperand(1)) && + (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) { + ConstantInt *AndRHS = cast(LHSInst->getOperand(1)); + ConstantInt *XorRHS = cast(I->getOperand(1)); + APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask); + + Constant *AndC = + ConstantInt::get(I->getType(), NewMask & AndRHS->getValue()); + Instruction *NewAnd = + BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp"); + InsertNewInstBefore(NewAnd, *I); + + Constant *XorC = + ConstantInt::get(I->getType(), NewMask & XorRHS->getValue()); + Instruction *NewXor = + BinaryOperator::CreateXor(NewAnd, XorC, "tmp"); + return InsertNewInstBefore(NewXor, *I); + } + + + RHSKnownZero = KnownZeroOut; + RHSKnownOne = KnownOneOut; + break; + } + case Instruction::Select: + if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask, + RHSKnownZero, RHSKnownOne, Depth+1) || + SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, + LHSKnownZero, LHSKnownOne, Depth+1)) + return I; + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); + + // If the operands are constants, see if we can simplify them. + if (ShrinkDemandedConstant(I, 1, DemandedMask) || + ShrinkDemandedConstant(I, 2, DemandedMask)) + return I; + + // Only known if known in both the LHS and RHS. + RHSKnownOne &= LHSKnownOne; + RHSKnownZero &= LHSKnownZero; + break; + case Instruction::Trunc: { + unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits(); + DemandedMask.zext(truncBf); + RHSKnownZero.zext(truncBf); + RHSKnownOne.zext(truncBf); + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, + RHSKnownZero, RHSKnownOne, Depth+1)) + return I; + DemandedMask.trunc(BitWidth); + RHSKnownZero.trunc(BitWidth); + RHSKnownOne.trunc(BitWidth); + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + break; + } + case Instruction::BitCast: + if (!I->getOperand(0)->getType()->isIntOrIntVector()) + return false; // vector->int or fp->int? + + if (const VectorType *DstVTy = dyn_cast(I->getType())) { + if (const VectorType *SrcVTy = + dyn_cast(I->getOperand(0)->getType())) { + if (DstVTy->getNumElements() != SrcVTy->getNumElements()) + // Don't touch a bitcast between vectors of different element counts. + return false; + } else + // Don't touch a scalar-to-vector bitcast. + return false; + } else if (isa(I->getOperand(0)->getType())) + // Don't touch a vector-to-scalar bitcast. + return false; + + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, + RHSKnownZero, RHSKnownOne, Depth+1)) + return I; + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + break; + case Instruction::ZExt: { + // Compute the bits in the result that are not present in the input. + unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits(); + + DemandedMask.trunc(SrcBitWidth); + RHSKnownZero.trunc(SrcBitWidth); + RHSKnownOne.trunc(SrcBitWidth); + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, + RHSKnownZero, RHSKnownOne, Depth+1)) + return I; + DemandedMask.zext(BitWidth); + RHSKnownZero.zext(BitWidth); + RHSKnownOne.zext(BitWidth); + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + // The top bits are known to be zero. + RHSKnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); + break; + } + case Instruction::SExt: { + // Compute the bits in the result that are not present in the input. + unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits(); + + APInt InputDemandedBits = DemandedMask & + APInt::getLowBitsSet(BitWidth, SrcBitWidth); + + APInt NewBits(APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth)); + // If any of the sign extended bits are demanded, we know that the sign + // bit is demanded. + if ((NewBits & DemandedMask) != 0) + InputDemandedBits.set(SrcBitWidth-1); + + InputDemandedBits.trunc(SrcBitWidth); + RHSKnownZero.trunc(SrcBitWidth); + RHSKnownOne.trunc(SrcBitWidth); + if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits, + RHSKnownZero, RHSKnownOne, Depth+1)) + return I; + InputDemandedBits.zext(BitWidth); + RHSKnownZero.zext(BitWidth); + RHSKnownOne.zext(BitWidth); + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + + // If the sign bit of the input is known set or clear, then we know the + // top bits of the result. + + // If the input sign bit is known zero, or if the NewBits are not demanded + // convert this into a zero extension. + if (RHSKnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) { + // Convert to ZExt cast + CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName()); + return InsertNewInstBefore(NewCast, *I); + } else if (RHSKnownOne[SrcBitWidth-1]) { // Input sign bit known set + RHSKnownOne |= NewBits; + } + break; + } + case Instruction::Add: { + // Figure out what the input bits are. If the top bits of the and result + // are not demanded, then the add doesn't demand them from its input + // either. + unsigned NLZ = DemandedMask.countLeadingZeros(); + + // If there is a constant on the RHS, there are a variety of xformations + // we can do. + if (ConstantInt *RHS = dyn_cast(I->getOperand(1))) { + // If null, this should be simplified elsewhere. Some of the xforms here + // won't work if the RHS is zero. + if (RHS->isZero()) + break; + + // If the top bit of the output is demanded, demand everything from the + // input. Otherwise, we demand all the input bits except NLZ top bits. + APInt InDemandedBits(APInt::getLowBitsSet(BitWidth, BitWidth - NLZ)); + + // Find information about known zero/one bits in the input. + if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits, + LHSKnownZero, LHSKnownOne, Depth+1)) + return I; + + // If the RHS of the add has bits set that can't affect the input, reduce + // the constant. + if (ShrinkDemandedConstant(I, 1, InDemandedBits)) + return I; + + // Avoid excess work. + if (LHSKnownZero == 0 && LHSKnownOne == 0) + break; + + // Turn it into OR if input bits are zero. + if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) { + Instruction *Or = + BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1), + I->getName()); + return InsertNewInstBefore(Or, *I); + } + + // We can say something about the output known-zero and known-one bits, + // depending on potential carries from the input constant and the + // unknowns. For example if the LHS is known to have at most the 0x0F0F0 + // bits set and the RHS constant is 0x01001, then we know we have a known + // one mask of 0x00001 and a known zero mask of 0xE0F0E. + + // To compute this, we first compute the potential carry bits. These are + // the bits which may be modified. I'm not aware of a better way to do + // this scan. + const APInt &RHSVal = RHS->getValue(); + APInt CarryBits((~LHSKnownZero + RHSVal) ^ (~LHSKnownZero ^ RHSVal)); + + // Now that we know which bits have carries, compute the known-1/0 sets. + + // Bits are known one if they are known zero in one operand and one in the + // other, and there is no input carry. + RHSKnownOne = ((LHSKnownZero & RHSVal) | + (LHSKnownOne & ~RHSVal)) & ~CarryBits; + + // Bits are known zero if they are known zero in both operands and there + // is no input carry. + RHSKnownZero = LHSKnownZero & ~RHSVal & ~CarryBits; + } else { + // If the high-bits of this ADD are not demanded, then it does not demand + // the high bits of its LHS or RHS. + if (DemandedMask[BitWidth-1] == 0) { + // Right fill the mask of bits for this ADD to demand the most + // significant bit and all those below it. + APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ)); + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps, + LHSKnownZero, LHSKnownOne, Depth+1) || + SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps, + LHSKnownZero, LHSKnownOne, Depth+1)) + return I; + } + } + break; + } + case Instruction::Sub: + // If the high-bits of this SUB are not demanded, then it does not demand + // the high bits of its LHS or RHS. + if (DemandedMask[BitWidth-1] == 0) { + // Right fill the mask of bits for this SUB to demand the most + // significant bit and all those below it. + uint32_t NLZ = DemandedMask.countLeadingZeros(); + APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ)); + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps, + LHSKnownZero, LHSKnownOne, Depth+1) || + SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps, + LHSKnownZero, LHSKnownOne, Depth+1)) + return I; + } + // Otherwise just hand the sub off to ComputeMaskedBits to fill in + // the known zeros and ones. + ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth); + break; + case Instruction::Shl: + if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { + uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); + APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt)); + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, + RHSKnownZero, RHSKnownOne, Depth+1)) + return I; + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + RHSKnownZero <<= ShiftAmt; + RHSKnownOne <<= ShiftAmt; + // low bits known zero. + if (ShiftAmt) + RHSKnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); + } + break; + case Instruction::LShr: + // For a logical shift right + if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { + uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); + + // Unsigned shift right. + APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, + RHSKnownZero, RHSKnownOne, Depth+1)) + return I; + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + RHSKnownZero = APIntOps::lshr(RHSKnownZero, ShiftAmt); + RHSKnownOne = APIntOps::lshr(RHSKnownOne, ShiftAmt); + if (ShiftAmt) { + // Compute the new bits that are at the top now. + APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); + RHSKnownZero |= HighBits; // high bits known zero. + } + } + break; + case Instruction::AShr: + // If this is an arithmetic shift right and only the low-bit is set, we can + // always convert this into a logical shr, even if the shift amount is + // variable. The low bit of the shift cannot be an input sign bit unless + // the shift amount is >= the size of the datatype, which is undefined. + if (DemandedMask == 1) { + // Perform the logical shift right. + Instruction *NewVal = BinaryOperator::CreateLShr( + I->getOperand(0), I->getOperand(1), I->getName()); + return InsertNewInstBefore(NewVal, *I); + } + + // If the sign bit is the only bit demanded by this ashr, then there is no + // need to do it, the shift doesn't change the high bit. + if (DemandedMask.isSignBit()) + return I->getOperand(0); + + if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { + uint32_t ShiftAmt = SA->getLimitedValue(BitWidth); + + // Signed shift right. + APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); + // If any of the "high bits" are demanded, we should set the sign bit as + // demanded. + if (DemandedMask.countLeadingZeros() <= ShiftAmt) + DemandedMaskIn.set(BitWidth-1); + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, + RHSKnownZero, RHSKnownOne, Depth+1)) + return I; + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + // Compute the new bits that are at the top now. + APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); + RHSKnownZero = APIntOps::lshr(RHSKnownZero, ShiftAmt); + RHSKnownOne = APIntOps::lshr(RHSKnownOne, ShiftAmt); + + // Handle the sign bits. + APInt SignBit(APInt::getSignBit(BitWidth)); + // Adjust to where it is now in the mask. + SignBit = APIntOps::lshr(SignBit, ShiftAmt); + + // If the input sign bit is known to be zero, or if none of the top bits + // are demanded, turn this into an unsigned shift right. + if (BitWidth <= ShiftAmt || RHSKnownZero[BitWidth-ShiftAmt-1] || + (HighBits & ~DemandedMask) == HighBits) { + // Perform the logical shift right. + Instruction *NewVal = BinaryOperator::CreateLShr( + I->getOperand(0), SA, I->getName()); + return InsertNewInstBefore(NewVal, *I); + } else if ((RHSKnownOne & SignBit) != 0) { // New bits are known one. + RHSKnownOne |= HighBits; + } + } + break; + case Instruction::SRem: + if (ConstantInt *Rem = dyn_cast(I->getOperand(1))) { + APInt RA = Rem->getValue().abs(); + if (RA.isPowerOf2()) { + if (DemandedMask.ult(RA)) // srem won't affect demanded bits + return I->getOperand(0); + + APInt LowBits = RA - 1; + APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); + if (SimplifyDemandedBits(I->getOperandUse(0), Mask2, + LHSKnownZero, LHSKnownOne, Depth+1)) + return I; + + if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits)) + LHSKnownZero |= ~LowBits; + + KnownZero |= LHSKnownZero & DemandedMask; + + assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); + } + } + break; + case Instruction::URem: { + APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0); + APInt AllOnes = APInt::getAllOnesValue(BitWidth); + if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes, + KnownZero2, KnownOne2, Depth+1) || + SimplifyDemandedBits(I->getOperandUse(1), AllOnes, + KnownZero2, KnownOne2, Depth+1)) + return I; + + unsigned Leaders = KnownZero2.countLeadingOnes(); + Leaders = std::max(Leaders, + KnownZero2.countLeadingOnes()); + KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask; + break; + } + case Instruction::Call: + if (IntrinsicInst *II = dyn_cast(I)) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::bswap: { + // If the only bits demanded come from one byte of the bswap result, + // just shift the input byte into position to eliminate the bswap. + unsigned NLZ = DemandedMask.countLeadingZeros(); + unsigned NTZ = DemandedMask.countTrailingZeros(); + + // Round NTZ down to the next byte. If we have 11 trailing zeros, then + // we need all the bits down to bit 8. Likewise, round NLZ. If we + // have 14 leading zeros, round to 8. + NLZ &= ~7; + NTZ &= ~7; + // If we need exactly one byte, we can do this transformation. + if (BitWidth-NLZ-NTZ == 8) { + unsigned ResultBit = NTZ; + unsigned InputBit = BitWidth-NTZ-8; + + // Replace this with either a left or right shift to get the byte into + // the right place. + Instruction *NewVal; + if (InputBit > ResultBit) + NewVal = BinaryOperator::CreateLShr(I->getOperand(1), + ConstantInt::get(I->getType(), InputBit-ResultBit)); + else + NewVal = BinaryOperator::CreateShl(I->getOperand(1), + ConstantInt::get(I->getType(), ResultBit-InputBit)); + NewVal->takeName(I); + return InsertNewInstBefore(NewVal, *I); + } + + // TODO: Could compute known zero/one bits based on the input. + break; + } + } + } + ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth); + break; + } + + // If the client is only demanding bits that we know, return the known + // constant. + if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) + return Constant::getIntegerValue(VTy, RHSKnownOne); + return false; +} + + +/// SimplifyDemandedVectorElts - The specified value produces a vector with +/// any number of elements. DemandedElts contains the set of elements that are +/// actually used by the caller. This method analyzes which elements of the +/// operand are undef and returns that information in UndefElts. +/// +/// If the information about demanded elements can be used to simplify the +/// operation, the operation is simplified, then the resultant value is +/// returned. This returns null if no change was made. +Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, + APInt& UndefElts, + unsigned Depth) { + unsigned VWidth = cast(V->getType())->getNumElements(); + APInt EltMask(APInt::getAllOnesValue(VWidth)); + assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!"); + + if (isa(V)) { + // If the entire vector is undefined, just return this info. + UndefElts = EltMask; + return 0; + } else if (DemandedElts == 0) { // If nothing is demanded, provide undef. + UndefElts = EltMask; + return UndefValue::get(V->getType()); + } + + UndefElts = 0; + if (ConstantVector *CP = dyn_cast(V)) { + const Type *EltTy = cast(V->getType())->getElementType(); + Constant *Undef = UndefValue::get(EltTy); + + std::vector Elts; + for (unsigned i = 0; i != VWidth; ++i) + if (!DemandedElts[i]) { // If not demanded, set to undef. + Elts.push_back(Undef); + UndefElts.set(i); + } else if (isa(CP->getOperand(i))) { // Already undef. + Elts.push_back(Undef); + UndefElts.set(i); + } else { // Otherwise, defined. + Elts.push_back(CP->getOperand(i)); + } + + // If we changed the constant, return it. + Constant *NewCP = ConstantVector::get(Elts); + return NewCP != CP ? NewCP : 0; + } else if (isa(V)) { + // Simplify the CAZ to a ConstantVector where the non-demanded elements are + // set to undef. + + // Check if this is identity. If so, return 0 since we are not simplifying + // anything. + if (DemandedElts == ((1ULL << VWidth) -1)) + return 0; + + const Type *EltTy = cast(V->getType())->getElementType(); + Constant *Zero = Constant::getNullValue(EltTy); + Constant *Undef = UndefValue::get(EltTy); + std::vector Elts; + for (unsigned i = 0; i != VWidth; ++i) { + Constant *Elt = DemandedElts[i] ? Zero : Undef; + Elts.push_back(Elt); + } + UndefElts = DemandedElts ^ EltMask; + return ConstantVector::get(Elts); + } + + // Limit search depth. + if (Depth == 10) + return 0; + + // If multiple users are using the root value, procede with + // simplification conservatively assuming that all elements + // are needed. + if (!V->hasOneUse()) { + // Quit if we find multiple users of a non-root value though. + // They'll be handled when it's their turn to be visited by + // the main instcombine process. + if (Depth != 0) + // TODO: Just compute the UndefElts information recursively. + return 0; + + // Conservatively assume that all elements are needed. + DemandedElts = EltMask; + } + + Instruction *I = dyn_cast(V); + if (!I) return 0; // Only analyze instructions. + + bool MadeChange = false; + APInt UndefElts2(VWidth, 0); + Value *TmpV; + switch (I->getOpcode()) { + default: break; + + case Instruction::InsertElement: { + // If this is a variable index, we don't know which element it overwrites. + // demand exactly the same input as we produce. + ConstantInt *Idx = dyn_cast(I->getOperand(2)); + if (Idx == 0) { + // Note that we can't propagate undef elt info, because we don't know + // which elt is getting updated. + TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, + UndefElts2, Depth+1); + if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } + break; + } + + // If this is inserting an element that isn't demanded, remove this + // insertelement. + unsigned IdxNo = Idx->getZExtValue(); + if (IdxNo >= VWidth || !DemandedElts[IdxNo]) { + Worklist.Add(I); + return I->getOperand(0); + } + + // Otherwise, the element inserted overwrites whatever was there, so the + // input demanded set is simpler than the output set. + APInt DemandedElts2 = DemandedElts; + DemandedElts2.clear(IdxNo); + TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2, + UndefElts, Depth+1); + if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } + + // The inserted element is defined. + UndefElts.clear(IdxNo); + break; + } + case Instruction::ShuffleVector: { + ShuffleVectorInst *Shuffle = cast(I); + uint64_t LHSVWidth = + cast(Shuffle->getOperand(0)->getType())->getNumElements(); + APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0); + for (unsigned i = 0; i < VWidth; i++) { + if (DemandedElts[i]) { + unsigned MaskVal = Shuffle->getMaskValue(i); + if (MaskVal != -1u) { + assert(MaskVal < LHSVWidth * 2 && + "shufflevector mask index out of range!"); + if (MaskVal < LHSVWidth) + LeftDemanded.set(MaskVal); + else + RightDemanded.set(MaskVal - LHSVWidth); + } + } + } + + APInt UndefElts4(LHSVWidth, 0); + TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded, + UndefElts4, Depth+1); + if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } + + APInt UndefElts3(LHSVWidth, 0); + TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded, + UndefElts3, Depth+1); + if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } + + bool NewUndefElts = false; + for (unsigned i = 0; i < VWidth; i++) { + unsigned MaskVal = Shuffle->getMaskValue(i); + if (MaskVal == -1u) { + UndefElts.set(i); + } else if (MaskVal < LHSVWidth) { + if (UndefElts4[MaskVal]) { + NewUndefElts = true; + UndefElts.set(i); + } + } else { + if (UndefElts3[MaskVal - LHSVWidth]) { + NewUndefElts = true; + UndefElts.set(i); + } + } + } + + if (NewUndefElts) { + // Add additional discovered undefs. + std::vector Elts; + for (unsigned i = 0; i < VWidth; ++i) { + if (UndefElts[i]) + Elts.push_back(UndefValue::get(Type::getInt32Ty(I->getContext()))); + else + Elts.push_back(ConstantInt::get(Type::getInt32Ty(I->getContext()), + Shuffle->getMaskValue(i))); + } + I->setOperand(2, ConstantVector::get(Elts)); + MadeChange = true; + } + break; + } + case Instruction::BitCast: { + // Vector->vector casts only. + const VectorType *VTy = dyn_cast(I->getOperand(0)->getType()); + if (!VTy) break; + unsigned InVWidth = VTy->getNumElements(); + APInt InputDemandedElts(InVWidth, 0); + unsigned Ratio; + + if (VWidth == InVWidth) { + // If we are converting from <4 x i32> -> <4 x f32>, we demand the same + // elements as are demanded of us. + Ratio = 1; + InputDemandedElts = DemandedElts; + } else if (VWidth > InVWidth) { + // Untested so far. + break; + + // If there are more elements in the result than there are in the source, + // then an input element is live if any of the corresponding output + // elements are live. + Ratio = VWidth/InVWidth; + for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) { + if (DemandedElts[OutIdx]) + InputDemandedElts.set(OutIdx/Ratio); + } + } else { + // Untested so far. + break; + + // If there are more elements in the source than there are in the result, + // then an input element is live if the corresponding output element is + // live. + Ratio = InVWidth/VWidth; + for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx) + if (DemandedElts[InIdx/Ratio]) + InputDemandedElts.set(InIdx); + } + + // div/rem demand all inputs, because they don't want divide by zero. + TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts, + UndefElts2, Depth+1); + if (TmpV) { + I->setOperand(0, TmpV); + MadeChange = true; + } + + UndefElts = UndefElts2; + if (VWidth > InVWidth) { + llvm_unreachable("Unimp"); + // If there are more elements in the result than there are in the source, + // then an output element is undef if the corresponding input element is + // undef. + for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) + if (UndefElts2[OutIdx/Ratio]) + UndefElts.set(OutIdx); + } else if (VWidth < InVWidth) { + llvm_unreachable("Unimp"); + // If there are more elements in the source than there are in the result, + // then a result element is undef if all of the corresponding input + // elements are undef. + UndefElts = ~0ULL >> (64-VWidth); // Start out all undef. + for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx) + if (!UndefElts2[InIdx]) // Not undef? + UndefElts.clear(InIdx/Ratio); // Clear undef bit. + } + break; + } + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + // div/rem demand all inputs, because they don't want divide by zero. + TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, + UndefElts, Depth+1); + if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } + TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts, + UndefElts2, Depth+1); + if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } + + // Output elements are undefined if both are undefined. Consider things + // like undef&0. The result is known zero, not undef. + UndefElts &= UndefElts2; + break; + + case Instruction::Call: { + IntrinsicInst *II = dyn_cast(I); + if (!II) break; + switch (II->getIntrinsicID()) { + default: break; + + // Binary vector operations that work column-wise. A dest element is a + // function of the corresponding input elements from the two inputs. + case Intrinsic::x86_sse_sub_ss: + case Intrinsic::x86_sse_mul_ss: + case Intrinsic::x86_sse_min_ss: + case Intrinsic::x86_sse_max_ss: + case Intrinsic::x86_sse2_sub_sd: + case Intrinsic::x86_sse2_mul_sd: + case Intrinsic::x86_sse2_min_sd: + case Intrinsic::x86_sse2_max_sd: + TmpV = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts, + UndefElts, Depth+1); + if (TmpV) { II->setOperand(1, TmpV); MadeChange = true; } + TmpV = SimplifyDemandedVectorElts(II->getOperand(2), DemandedElts, + UndefElts2, Depth+1); + if (TmpV) { II->setOperand(2, TmpV); MadeChange = true; } + + // If only the low elt is demanded and this is a scalarizable intrinsic, + // scalarize it now. + if (DemandedElts == 1) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::x86_sse_sub_ss: + case Intrinsic::x86_sse_mul_ss: + case Intrinsic::x86_sse2_sub_sd: + case Intrinsic::x86_sse2_mul_sd: + // TODO: Lower MIN/MAX/ABS/etc + Value *LHS = II->getOperand(1); + Value *RHS = II->getOperand(2); + // Extract the element as scalars. + LHS = InsertNewInstBefore(ExtractElementInst::Create(LHS, + ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II); + RHS = InsertNewInstBefore(ExtractElementInst::Create(RHS, + ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II); + + switch (II->getIntrinsicID()) { + default: llvm_unreachable("Case stmts out of sync!"); + case Intrinsic::x86_sse_sub_ss: + case Intrinsic::x86_sse2_sub_sd: + TmpV = InsertNewInstBefore(BinaryOperator::CreateFSub(LHS, RHS, + II->getName()), *II); + break; + case Intrinsic::x86_sse_mul_ss: + case Intrinsic::x86_sse2_mul_sd: + TmpV = InsertNewInstBefore(BinaryOperator::CreateFMul(LHS, RHS, + II->getName()), *II); + break; + } + + Instruction *New = + InsertElementInst::Create( + UndefValue::get(II->getType()), TmpV, + ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U, false), + II->getName()); + InsertNewInstBefore(New, *II); + return New; + } + } + + // Output elements are undefined if both are undefined. Consider things + // like undef&0. The result is known zero, not undef. + UndefElts &= UndefElts2; + break; + } + break; + } + } + return MadeChange ? I : 0; +} diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp new file mode 100644 index 000000000000..f11f55788806 --- /dev/null +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -0,0 +1,560 @@ +//===- InstCombineVectorOps.cpp -------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements instcombine for ExtractElement, InsertElement and +// ShuffleVector. +// +//===----------------------------------------------------------------------===// + +#include "InstCombine.h" +using namespace llvm; + +/// CheapToScalarize - Return true if the value is cheaper to scalarize than it +/// is to leave as a vector operation. +static bool CheapToScalarize(Value *V, bool isConstant) { + if (isa(V)) + return true; + if (ConstantVector *C = dyn_cast(V)) { + if (isConstant) return true; + // If all elts are the same, we can extract. + Constant *Op0 = C->getOperand(0); + for (unsigned i = 1; i < C->getNumOperands(); ++i) + if (C->getOperand(i) != Op0) + return false; + return true; + } + Instruction *I = dyn_cast(V); + if (!I) return false; + + // Insert element gets simplified to the inserted element or is deleted if + // this is constant idx extract element and its a constant idx insertelt. + if (I->getOpcode() == Instruction::InsertElement && isConstant && + isa(I->getOperand(2))) + return true; + if (I->getOpcode() == Instruction::Load && I->hasOneUse()) + return true; + if (BinaryOperator *BO = dyn_cast(I)) + if (BO->hasOneUse() && + (CheapToScalarize(BO->getOperand(0), isConstant) || + CheapToScalarize(BO->getOperand(1), isConstant))) + return true; + if (CmpInst *CI = dyn_cast(I)) + if (CI->hasOneUse() && + (CheapToScalarize(CI->getOperand(0), isConstant) || + CheapToScalarize(CI->getOperand(1), isConstant))) + return true; + + return false; +} + +/// Read and decode a shufflevector mask. +/// +/// It turns undef elements into values that are larger than the number of +/// elements in the input. +static std::vector getShuffleMask(const ShuffleVectorInst *SVI) { + unsigned NElts = SVI->getType()->getNumElements(); + if (isa(SVI->getOperand(2))) + return std::vector(NElts, 0); + if (isa(SVI->getOperand(2))) + return std::vector(NElts, 2*NElts); + + std::vector Result; + const ConstantVector *CP = cast(SVI->getOperand(2)); + for (User::const_op_iterator i = CP->op_begin(), e = CP->op_end(); i!=e; ++i) + if (isa(*i)) + Result.push_back(NElts*2); // undef -> 8 + else + Result.push_back(cast(*i)->getZExtValue()); + return Result; +} + +/// FindScalarElement - Given a vector and an element number, see if the scalar +/// value is already around as a register, for example if it were inserted then +/// extracted from the vector. +static Value *FindScalarElement(Value *V, unsigned EltNo) { + assert(isa(V->getType()) && "Not looking at a vector?"); + const VectorType *PTy = cast(V->getType()); + unsigned Width = PTy->getNumElements(); + if (EltNo >= Width) // Out of range access. + return UndefValue::get(PTy->getElementType()); + + if (isa(V)) + return UndefValue::get(PTy->getElementType()); + if (isa(V)) + return Constant::getNullValue(PTy->getElementType()); + if (ConstantVector *CP = dyn_cast(V)) + return CP->getOperand(EltNo); + + if (InsertElementInst *III = dyn_cast(V)) { + // If this is an insert to a variable element, we don't know what it is. + if (!isa(III->getOperand(2))) + return 0; + unsigned IIElt = cast(III->getOperand(2))->getZExtValue(); + + // If this is an insert to the element we are looking for, return the + // inserted value. + if (EltNo == IIElt) + return III->getOperand(1); + + // Otherwise, the insertelement doesn't modify the value, recurse on its + // vector input. + return FindScalarElement(III->getOperand(0), EltNo); + } + + if (ShuffleVectorInst *SVI = dyn_cast(V)) { + unsigned LHSWidth = + cast(SVI->getOperand(0)->getType())->getNumElements(); + unsigned InEl = getShuffleMask(SVI)[EltNo]; + if (InEl < LHSWidth) + return FindScalarElement(SVI->getOperand(0), InEl); + else if (InEl < LHSWidth*2) + return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth); + else + return UndefValue::get(PTy->getElementType()); + } + + // Otherwise, we don't know. + return 0; +} + +Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { + // If vector val is undef, replace extract with scalar undef. + if (isa(EI.getOperand(0))) + return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); + + // If vector val is constant 0, replace extract with scalar 0. + if (isa(EI.getOperand(0))) + return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType())); + + if (ConstantVector *C = dyn_cast(EI.getOperand(0))) { + // If vector val is constant with all elements the same, replace EI with + // that element. When the elements are not identical, we cannot replace yet + // (we do that below, but only when the index is constant). + Constant *op0 = C->getOperand(0); + for (unsigned i = 1; i != C->getNumOperands(); ++i) + if (C->getOperand(i) != op0) { + op0 = 0; + break; + } + if (op0) + return ReplaceInstUsesWith(EI, op0); + } + + // If extracting a specified index from the vector, see if we can recursively + // find a previously computed scalar that was inserted into the vector. + if (ConstantInt *IdxC = dyn_cast(EI.getOperand(1))) { + unsigned IndexVal = IdxC->getZExtValue(); + unsigned VectorWidth = EI.getVectorOperandType()->getNumElements(); + + // If this is extracting an invalid index, turn this into undef, to avoid + // crashing the code below. + if (IndexVal >= VectorWidth) + return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); + + // This instruction only demands the single element from the input vector. + // If the input vector has a single use, simplify it based on this use + // property. + if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) { + APInt UndefElts(VectorWidth, 0); + APInt DemandedMask(VectorWidth, 1 << IndexVal); + if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), + DemandedMask, UndefElts)) { + EI.setOperand(0, V); + return &EI; + } + } + + if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal)) + return ReplaceInstUsesWith(EI, Elt); + + // If the this extractelement is directly using a bitcast from a vector of + // the same number of elements, see if we can find the source element from + // it. In this case, we will end up needing to bitcast the scalars. + if (BitCastInst *BCI = dyn_cast(EI.getOperand(0))) { + if (const VectorType *VT = + dyn_cast(BCI->getOperand(0)->getType())) + if (VT->getNumElements() == VectorWidth) + if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal)) + return new BitCastInst(Elt, EI.getType()); + } + } + + if (Instruction *I = dyn_cast(EI.getOperand(0))) { + // Push extractelement into predecessor operation if legal and + // profitable to do so + if (BinaryOperator *BO = dyn_cast(I)) { + if (I->hasOneUse() && + CheapToScalarize(BO, isa(EI.getOperand(1)))) { + Value *newEI0 = + Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1), + EI.getName()+".lhs"); + Value *newEI1 = + Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1), + EI.getName()+".rhs"); + return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1); + } + } else if (InsertElementInst *IE = dyn_cast(I)) { + // Extracting the inserted element? + if (IE->getOperand(2) == EI.getOperand(1)) + return ReplaceInstUsesWith(EI, IE->getOperand(1)); + // If the inserted and extracted elements are constants, they must not + // be the same value, extract from the pre-inserted value instead. + if (isa(IE->getOperand(2)) && isa(EI.getOperand(1))) { + Worklist.AddValue(EI.getOperand(0)); + EI.setOperand(0, IE->getOperand(0)); + return &EI; + } + } else if (ShuffleVectorInst *SVI = dyn_cast(I)) { + // If this is extracting an element from a shufflevector, figure out where + // it came from and extract from the appropriate input element instead. + if (ConstantInt *Elt = dyn_cast(EI.getOperand(1))) { + unsigned SrcIdx = getShuffleMask(SVI)[Elt->getZExtValue()]; + Value *Src; + unsigned LHSWidth = + cast(SVI->getOperand(0)->getType())->getNumElements(); + + if (SrcIdx < LHSWidth) + Src = SVI->getOperand(0); + else if (SrcIdx < LHSWidth*2) { + SrcIdx -= LHSWidth; + Src = SVI->getOperand(1); + } else { + return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); + } + return ExtractElementInst::Create(Src, + ConstantInt::get(Type::getInt32Ty(EI.getContext()), + SrcIdx, false)); + } + } + // FIXME: Canonicalize extractelement(bitcast) -> bitcast(extractelement) + } + return 0; +} + +/// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns +/// elements from either LHS or RHS, return the shuffle mask and true. +/// Otherwise, return false. +static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, + std::vector &Mask) { + assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() && + "Invalid CollectSingleShuffleElements"); + unsigned NumElts = cast(V->getType())->getNumElements(); + + if (isa(V)) { + Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); + return true; + } + + if (V == LHS) { + for (unsigned i = 0; i != NumElts; ++i) + Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i)); + return true; + } + + if (V == RHS) { + for (unsigned i = 0; i != NumElts; ++i) + Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), + i+NumElts)); + return true; + } + + if (InsertElementInst *IEI = dyn_cast(V)) { + // If this is an insert of an extract from some other vector, include it. + Value *VecOp = IEI->getOperand(0); + Value *ScalarOp = IEI->getOperand(1); + Value *IdxOp = IEI->getOperand(2); + + if (!isa(IdxOp)) + return false; + unsigned InsertedIdx = cast(IdxOp)->getZExtValue(); + + if (isa(ScalarOp)) { // inserting undef into vector. + // Okay, we can handle this if the vector we are insertinting into is + // transitively ok. + if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { + // If so, update the mask to reflect the inserted undef. + Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext())); + return true; + } + } else if (ExtractElementInst *EI = dyn_cast(ScalarOp)){ + if (isa(EI->getOperand(1)) && + EI->getOperand(0)->getType() == V->getType()) { + unsigned ExtractedIdx = + cast(EI->getOperand(1))->getZExtValue(); + + // This must be extracting from either LHS or RHS. + if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) { + // Okay, we can handle this if the vector we are insertinting into is + // transitively ok. + if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { + // If so, update the mask to reflect the inserted value. + if (EI->getOperand(0) == LHS) { + Mask[InsertedIdx % NumElts] = + ConstantInt::get(Type::getInt32Ty(V->getContext()), + ExtractedIdx); + } else { + assert(EI->getOperand(0) == RHS); + Mask[InsertedIdx % NumElts] = + ConstantInt::get(Type::getInt32Ty(V->getContext()), + ExtractedIdx+NumElts); + + } + return true; + } + } + } + } + } + // TODO: Handle shufflevector here! + + return false; +} + +/// CollectShuffleElements - We are building a shuffle of V, using RHS as the +/// RHS of the shuffle instruction, if it is not null. Return a shuffle mask +/// that computes V and the LHS value of the shuffle. +static Value *CollectShuffleElements(Value *V, std::vector &Mask, + Value *&RHS) { + assert(isa(V->getType()) && + (RHS == 0 || V->getType() == RHS->getType()) && + "Invalid shuffle!"); + unsigned NumElts = cast(V->getType())->getNumElements(); + + if (isa(V)) { + Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); + return V; + } else if (isa(V)) { + Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0)); + return V; + } else if (InsertElementInst *IEI = dyn_cast(V)) { + // If this is an insert of an extract from some other vector, include it. + Value *VecOp = IEI->getOperand(0); + Value *ScalarOp = IEI->getOperand(1); + Value *IdxOp = IEI->getOperand(2); + + if (ExtractElementInst *EI = dyn_cast(ScalarOp)) { + if (isa(EI->getOperand(1)) && isa(IdxOp) && + EI->getOperand(0)->getType() == V->getType()) { + unsigned ExtractedIdx = + cast(EI->getOperand(1))->getZExtValue(); + unsigned InsertedIdx = cast(IdxOp)->getZExtValue(); + + // Either the extracted from or inserted into vector must be RHSVec, + // otherwise we'd end up with a shuffle of three inputs. + if (EI->getOperand(0) == RHS || RHS == 0) { + RHS = EI->getOperand(0); + Value *V = CollectShuffleElements(VecOp, Mask, RHS); + Mask[InsertedIdx % NumElts] = + ConstantInt::get(Type::getInt32Ty(V->getContext()), + NumElts+ExtractedIdx); + return V; + } + + if (VecOp == RHS) { + Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS); + // Everything but the extracted element is replaced with the RHS. + for (unsigned i = 0; i != NumElts; ++i) { + if (i != InsertedIdx) + Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()), + NumElts+i); + } + return V; + } + + // If this insertelement is a chain that comes from exactly these two + // vectors, return the vector and the effective shuffle. + if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask)) + return EI->getOperand(0); + } + } + } + // TODO: Handle shufflevector here! + + // Otherwise, can't do anything fancy. Return an identity vector. + for (unsigned i = 0; i != NumElts; ++i) + Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i)); + return V; +} + +Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { + Value *VecOp = IE.getOperand(0); + Value *ScalarOp = IE.getOperand(1); + Value *IdxOp = IE.getOperand(2); + + // Inserting an undef or into an undefined place, remove this. + if (isa(ScalarOp) || isa(IdxOp)) + ReplaceInstUsesWith(IE, VecOp); + + // If the inserted element was extracted from some other vector, and if the + // indexes are constant, try to turn this into a shufflevector operation. + if (ExtractElementInst *EI = dyn_cast(ScalarOp)) { + if (isa(EI->getOperand(1)) && isa(IdxOp) && + EI->getOperand(0)->getType() == IE.getType()) { + unsigned NumVectorElts = IE.getType()->getNumElements(); + unsigned ExtractedIdx = + cast(EI->getOperand(1))->getZExtValue(); + unsigned InsertedIdx = cast(IdxOp)->getZExtValue(); + + if (ExtractedIdx >= NumVectorElts) // Out of range extract. + return ReplaceInstUsesWith(IE, VecOp); + + if (InsertedIdx >= NumVectorElts) // Out of range insert. + return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType())); + + // If we are extracting a value from a vector, then inserting it right + // back into the same place, just use the input vector. + if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx) + return ReplaceInstUsesWith(IE, VecOp); + + // If this insertelement isn't used by some other insertelement, turn it + // (and any insertelements it points to), into one big shuffle. + if (!IE.hasOneUse() || !isa(IE.use_back())) { + std::vector Mask; + Value *RHS = 0; + Value *LHS = CollectShuffleElements(&IE, Mask, RHS); + if (RHS == 0) RHS = UndefValue::get(LHS->getType()); + // We now have a shuffle of LHS, RHS, Mask. + return new ShuffleVectorInst(LHS, RHS, + ConstantVector::get(Mask)); + } + } + } + + unsigned VWidth = cast(VecOp->getType())->getNumElements(); + APInt UndefElts(VWidth, 0); + APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); + if (SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) + return &IE; + + return 0; +} + + +Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { + Value *LHS = SVI.getOperand(0); + Value *RHS = SVI.getOperand(1); + std::vector Mask = getShuffleMask(&SVI); + + bool MadeChange = false; + + // Undefined shuffle mask -> undefined value. + if (isa(SVI.getOperand(2))) + return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType())); + + unsigned VWidth = cast(SVI.getType())->getNumElements(); + + if (VWidth != cast(LHS->getType())->getNumElements()) + return 0; + + APInt UndefElts(VWidth, 0); + APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); + if (SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) { + LHS = SVI.getOperand(0); + RHS = SVI.getOperand(1); + MadeChange = true; + } + + // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask') + // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask'). + if (LHS == RHS || isa(LHS)) { + if (isa(LHS) && LHS == RHS) { + // shuffle(undef,undef,mask) -> undef. + return ReplaceInstUsesWith(SVI, LHS); + } + + // Remap any references to RHS to use LHS. + std::vector Elts; + for (unsigned i = 0, e = Mask.size(); i != e; ++i) { + if (Mask[i] >= 2*e) + Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); + else { + if ((Mask[i] >= e && isa(RHS)) || + (Mask[i] < e && isa(LHS))) { + Mask[i] = 2*e; // Turn into undef. + Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); + } else { + Mask[i] = Mask[i] % e; // Force to LHS. + Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()), + Mask[i])); + } + } + } + SVI.setOperand(0, SVI.getOperand(1)); + SVI.setOperand(1, UndefValue::get(RHS->getType())); + SVI.setOperand(2, ConstantVector::get(Elts)); + LHS = SVI.getOperand(0); + RHS = SVI.getOperand(1); + MadeChange = true; + } + + // Analyze the shuffle, are the LHS or RHS and identity shuffles? + bool isLHSID = true, isRHSID = true; + + for (unsigned i = 0, e = Mask.size(); i != e; ++i) { + if (Mask[i] >= e*2) continue; // Ignore undef values. + // Is this an identity shuffle of the LHS value? + isLHSID &= (Mask[i] == i); + + // Is this an identity shuffle of the RHS value? + isRHSID &= (Mask[i]-e == i); + } + + // Eliminate identity shuffles. + if (isLHSID) return ReplaceInstUsesWith(SVI, LHS); + if (isRHSID) return ReplaceInstUsesWith(SVI, RHS); + + // If the LHS is a shufflevector itself, see if we can combine it with this + // one without producing an unusual shuffle. Here we are really conservative: + // we are absolutely afraid of producing a shuffle mask not in the input + // program, because the code gen may not be smart enough to turn a merged + // shuffle into two specific shuffles: it may produce worse code. As such, + // we only merge two shuffles if the result is one of the two input shuffle + // masks. In this case, merging the shuffles just removes one instruction, + // which we know is safe. This is good for things like turning: + // (splat(splat)) -> splat. + if (ShuffleVectorInst *LHSSVI = dyn_cast(LHS)) { + if (isa(RHS)) { + std::vector LHSMask = getShuffleMask(LHSSVI); + + if (LHSMask.size() == Mask.size()) { + std::vector NewMask; + for (unsigned i = 0, e = Mask.size(); i != e; ++i) + if (Mask[i] >= e) + NewMask.push_back(2*e); + else + NewMask.push_back(LHSMask[Mask[i]]); + + // If the result mask is equal to the src shuffle or this + // shuffle mask, do the replacement. + if (NewMask == LHSMask || NewMask == Mask) { + unsigned LHSInNElts = + cast(LHSSVI->getOperand(0)->getType())-> + getNumElements(); + std::vector Elts; + for (unsigned i = 0, e = NewMask.size(); i != e; ++i) { + if (NewMask[i] >= LHSInNElts*2) { + Elts.push_back(UndefValue::get( + Type::getInt32Ty(SVI.getContext()))); + } else { + Elts.push_back(ConstantInt::get( + Type::getInt32Ty(SVI.getContext()), + NewMask[i])); + } + } + return new ShuffleVectorInst(LHSSVI->getOperand(0), + LHSSVI->getOperand(1), + ConstantVector::get(Elts)); + } + } + } + } + + return MadeChange ? &SVI : 0; +} + diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h new file mode 100644 index 000000000000..9d88621cee1d --- /dev/null +++ b/lib/Transforms/InstCombine/InstCombineWorklist.h @@ -0,0 +1,105 @@ +//===- InstCombineWorklist.h - Worklist for the InstCombine pass ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef INSTCOMBINE_WORKLIST_H +#define INSTCOMBINE_WORKLIST_H + +#define DEBUG_TYPE "instcombine" +#include "llvm/Instruction.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +/// InstCombineWorklist - This is the worklist management logic for +/// InstCombine. +class VISIBILITY_HIDDEN InstCombineWorklist { + SmallVector Worklist; + DenseMap WorklistMap; + + void operator=(const InstCombineWorklist&RHS); // DO NOT IMPLEMENT + InstCombineWorklist(const InstCombineWorklist&); // DO NOT IMPLEMENT +public: + InstCombineWorklist() {} + + bool isEmpty() const { return Worklist.empty(); } + + /// Add - Add the specified instruction to the worklist if it isn't already + /// in it. + void Add(Instruction *I) { + if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) { + DEBUG(errs() << "IC: ADD: " << *I << '\n'); + Worklist.push_back(I); + } + } + + void AddValue(Value *V) { + if (Instruction *I = dyn_cast(V)) + Add(I); + } + + /// AddInitialGroup - Add the specified batch of stuff in reverse order. + /// which should only be done when the worklist is empty and when the group + /// has no duplicates. + void AddInitialGroup(Instruction *const *List, unsigned NumEntries) { + assert(Worklist.empty() && "Worklist must be empty to add initial group"); + Worklist.reserve(NumEntries+16); + DEBUG(errs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n"); + for (; NumEntries; --NumEntries) { + Instruction *I = List[NumEntries-1]; + WorklistMap.insert(std::make_pair(I, Worklist.size())); + Worklist.push_back(I); + } + } + + // Remove - remove I from the worklist if it exists. + void Remove(Instruction *I) { + DenseMap::iterator It = WorklistMap.find(I); + if (It == WorklistMap.end()) return; // Not in worklist. + + // Don't bother moving everything down, just null out the slot. + Worklist[It->second] = 0; + + WorklistMap.erase(It); + } + + Instruction *RemoveOne() { + Instruction *I = Worklist.back(); + Worklist.pop_back(); + WorklistMap.erase(I); + return I; + } + + /// AddUsersToWorkList - When an instruction is simplified, add all users of + /// the instruction to the work lists because they might get more simplified + /// now. + /// + void AddUsersToWorkList(Instruction &I) { + for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); + UI != UE; ++UI) + Add(cast(*UI)); + } + + + /// Zap - check that the worklist is empty and nuke the backing store for + /// the map if it is large. + void Zap() { + assert(WorklistMap.empty() && "Worklist empty, but map not?"); + + // Do an explicit clear, this shrinks the map if needed. + WorklistMap.clear(); + } +}; + +} // end namespace llvm. + +#endif diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp new file mode 100644 index 000000000000..93b196126b20 --- /dev/null +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -0,0 +1,1274 @@ +//===- InstructionCombining.cpp - Combine multiple instructions -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// InstructionCombining - Combine instructions to form fewer, simple +// instructions. This pass does not modify the CFG. This pass is where +// algebraic simplification happens. +// +// This pass combines things like: +// %Y = add i32 %X, 1 +// %Z = add i32 %Y, 1 +// into: +// %Z = add i32 %X, 2 +// +// This is a simple worklist driven algorithm. +// +// This pass guarantees that the following canonicalizations are performed on +// the program: +// 1. If a binary operator has a constant operand, it is moved to the RHS +// 2. Bitwise operators with constant operands are always grouped so that +// shifts are performed first, then or's, then and's, then xor's. +// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible +// 4. All cmp instructions on boolean values are replaced with logical ops +// 5. add X, X is represented as (X*2) => (X << 1) +// 6. Multiplies with a power-of-two constant argument are transformed into +// shifts. +// ... etc. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "instcombine" +#include "llvm/Transforms/Scalar.h" +#include "InstCombine.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/PatternMatch.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include +#include +using namespace llvm; +using namespace llvm::PatternMatch; + +STATISTIC(NumCombined , "Number of insts combined"); +STATISTIC(NumConstProp, "Number of constant folds"); +STATISTIC(NumDeadInst , "Number of dead inst eliminated"); +STATISTIC(NumSunkInst , "Number of instructions sunk"); + + +char InstCombiner::ID = 0; +static RegisterPass +X("instcombine", "Combine redundant instructions"); + +void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreservedID(LCSSAID); + AU.setPreservesCFG(); +} + + +/// ShouldChangeType - Return true if it is desirable to convert a computation +/// from 'From' to 'To'. We don't want to convert from a legal to an illegal +/// type for example, or from a smaller to a larger illegal type. +bool InstCombiner::ShouldChangeType(const Type *From, const Type *To) const { + assert(isa(From) && isa(To)); + + // If we don't have TD, we don't know if the source/dest are legal. + if (!TD) return false; + + unsigned FromWidth = From->getPrimitiveSizeInBits(); + unsigned ToWidth = To->getPrimitiveSizeInBits(); + bool FromLegal = TD->isLegalInteger(FromWidth); + bool ToLegal = TD->isLegalInteger(ToWidth); + + // If this is a legal integer from type, and the result would be an illegal + // type, don't do the transformation. + if (FromLegal && !ToLegal) + return false; + + // Otherwise, if both are illegal, do not increase the size of the result. We + // do allow things like i160 -> i64, but not i64 -> i160. + if (!FromLegal && !ToLegal && ToWidth > FromWidth) + return false; + + return true; +} + + +// SimplifyCommutative - This performs a few simplifications for commutative +// operators: +// +// 1. Order operands such that they are listed from right (least complex) to +// left (most complex). This puts constants before unary operators before +// binary operators. +// +// 2. Transform: (op (op V, C1), C2) ==> (op V, (op C1, C2)) +// 3. Transform: (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2)) +// +bool InstCombiner::SimplifyCommutative(BinaryOperator &I) { + bool Changed = false; + if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) + Changed = !I.swapOperands(); + + if (!I.isAssociative()) return Changed; + + Instruction::BinaryOps Opcode = I.getOpcode(); + if (BinaryOperator *Op = dyn_cast(I.getOperand(0))) + if (Op->getOpcode() == Opcode && isa(Op->getOperand(1))) { + if (isa(I.getOperand(1))) { + Constant *Folded = ConstantExpr::get(I.getOpcode(), + cast(I.getOperand(1)), + cast(Op->getOperand(1))); + I.setOperand(0, Op->getOperand(0)); + I.setOperand(1, Folded); + return true; + } + + if (BinaryOperator *Op1 = dyn_cast(I.getOperand(1))) + if (Op1->getOpcode() == Opcode && isa(Op1->getOperand(1)) && + Op->hasOneUse() && Op1->hasOneUse()) { + Constant *C1 = cast(Op->getOperand(1)); + Constant *C2 = cast(Op1->getOperand(1)); + + // Fold (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2)) + Constant *Folded = ConstantExpr::get(I.getOpcode(), C1, C2); + Instruction *New = BinaryOperator::Create(Opcode, Op->getOperand(0), + Op1->getOperand(0), + Op1->getName(), &I); + Worklist.Add(New); + I.setOperand(0, New); + I.setOperand(1, Folded); + return true; + } + } + return Changed; +} + +// dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction +// if the LHS is a constant zero (which is the 'negate' form). +// +Value *InstCombiner::dyn_castNegVal(Value *V) const { + if (BinaryOperator::isNeg(V)) + return BinaryOperator::getNegArgument(V); + + // Constants can be considered to be negated values if they can be folded. + if (ConstantInt *C = dyn_cast(V)) + return ConstantExpr::getNeg(C); + + if (ConstantVector *C = dyn_cast(V)) + if (C->getType()->getElementType()->isInteger()) + return ConstantExpr::getNeg(C); + + return 0; +} + +// dyn_castFNegVal - Given a 'fsub' instruction, return the RHS of the +// instruction if the LHS is a constant negative zero (which is the 'negate' +// form). +// +Value *InstCombiner::dyn_castFNegVal(Value *V) const { + if (BinaryOperator::isFNeg(V)) + return BinaryOperator::getFNegArgument(V); + + // Constants can be considered to be negated values if they can be folded. + if (ConstantFP *C = dyn_cast(V)) + return ConstantExpr::getFNeg(C); + + if (ConstantVector *C = dyn_cast(V)) + if (C->getType()->getElementType()->isFloatingPoint()) + return ConstantExpr::getFNeg(C); + + return 0; +} + +static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO, + InstCombiner *IC) { + if (CastInst *CI = dyn_cast(&I)) + return IC->Builder->CreateCast(CI->getOpcode(), SO, I.getType()); + + // Figure out if the constant is the left or the right argument. + bool ConstIsRHS = isa(I.getOperand(1)); + Constant *ConstOperand = cast(I.getOperand(ConstIsRHS)); + + if (Constant *SOC = dyn_cast(SO)) { + if (ConstIsRHS) + return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand); + return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC); + } + + Value *Op0 = SO, *Op1 = ConstOperand; + if (!ConstIsRHS) + std::swap(Op0, Op1); + + if (BinaryOperator *BO = dyn_cast(&I)) + return IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1, + SO->getName()+".op"); + if (ICmpInst *CI = dyn_cast(&I)) + return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1, + SO->getName()+".cmp"); + if (FCmpInst *CI = dyn_cast(&I)) + return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1, + SO->getName()+".cmp"); + llvm_unreachable("Unknown binary instruction type!"); +} + +// FoldOpIntoSelect - Given an instruction with a select as one operand and a +// constant as the other operand, try to fold the binary operator into the +// select arguments. This also works for Cast instructions, which obviously do +// not have a second operand. +Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { + // Don't modify shared select instructions + if (!SI->hasOneUse()) return 0; + Value *TV = SI->getOperand(1); + Value *FV = SI->getOperand(2); + + if (isa(TV) || isa(FV)) { + // Bool selects with constant operands can be folded to logical ops. + if (SI->getType()->isInteger(1)) return 0; + + Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this); + Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, this); + + return SelectInst::Create(SI->getCondition(), SelectTrueVal, + SelectFalseVal); + } + return 0; +} + + +/// FoldOpIntoPhi - Given a binary operator, cast instruction, or select which +/// has a PHI node as operand #0, see if we can fold the instruction into the +/// PHI (which is only possible if all operands to the PHI are constants). +/// +/// If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms +/// that would normally be unprofitable because they strongly encourage jump +/// threading. +Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I, + bool AllowAggressive) { + AllowAggressive = false; + PHINode *PN = cast(I.getOperand(0)); + unsigned NumPHIValues = PN->getNumIncomingValues(); + if (NumPHIValues == 0 || + // We normally only transform phis with a single use, unless we're trying + // hard to make jump threading happen. + (!PN->hasOneUse() && !AllowAggressive)) + return 0; + + + // Check to see if all of the operands of the PHI are simple constants + // (constantint/constantfp/undef). If there is one non-constant value, + // remember the BB it is in. If there is more than one or if *it* is a PHI, + // bail out. We don't do arbitrary constant expressions here because moving + // their computation can be expensive without a cost model. + BasicBlock *NonConstBB = 0; + for (unsigned i = 0; i != NumPHIValues; ++i) + if (!isa(PN->getIncomingValue(i)) || + isa(PN->getIncomingValue(i))) { + if (NonConstBB) return 0; // More than one non-const value. + if (isa(PN->getIncomingValue(i))) return 0; // Itself a phi. + NonConstBB = PN->getIncomingBlock(i); + + // If the incoming non-constant value is in I's block, we have an infinite + // loop. + if (NonConstBB == I.getParent()) + return 0; + } + + // If there is exactly one non-constant value, we can insert a copy of the + // operation in that block. However, if this is a critical edge, we would be + // inserting the computation one some other paths (e.g. inside a loop). Only + // do this if the pred block is unconditionally branching into the phi block. + if (NonConstBB != 0 && !AllowAggressive) { + BranchInst *BI = dyn_cast(NonConstBB->getTerminator()); + if (!BI || !BI->isUnconditional()) return 0; + } + + // Okay, we can do the transformation: create the new PHI node. + PHINode *NewPN = PHINode::Create(I.getType(), ""); + NewPN->reserveOperandSpace(PN->getNumOperands()/2); + InsertNewInstBefore(NewPN, *PN); + NewPN->takeName(PN); + + // Next, add all of the operands to the PHI. + if (SelectInst *SI = dyn_cast(&I)) { + // We only currently try to fold the condition of a select when it is a phi, + // not the true/false values. + Value *TrueV = SI->getTrueValue(); + Value *FalseV = SI->getFalseValue(); + BasicBlock *PhiTransBB = PN->getParent(); + for (unsigned i = 0; i != NumPHIValues; ++i) { + BasicBlock *ThisBB = PN->getIncomingBlock(i); + Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB); + Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB); + Value *InV = 0; + if (Constant *InC = dyn_cast(PN->getIncomingValue(i))) { + InV = InC->isNullValue() ? FalseVInPred : TrueVInPred; + } else { + assert(PN->getIncomingBlock(i) == NonConstBB); + InV = SelectInst::Create(PN->getIncomingValue(i), TrueVInPred, + FalseVInPred, + "phitmp", NonConstBB->getTerminator()); + Worklist.Add(cast(InV)); + } + NewPN->addIncoming(InV, ThisBB); + } + } else if (I.getNumOperands() == 2) { + Constant *C = cast(I.getOperand(1)); + for (unsigned i = 0; i != NumPHIValues; ++i) { + Value *InV = 0; + if (Constant *InC = dyn_cast(PN->getIncomingValue(i))) { + if (CmpInst *CI = dyn_cast(&I)) + InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C); + else + InV = ConstantExpr::get(I.getOpcode(), InC, C); + } else { + assert(PN->getIncomingBlock(i) == NonConstBB); + if (BinaryOperator *BO = dyn_cast(&I)) + InV = BinaryOperator::Create(BO->getOpcode(), + PN->getIncomingValue(i), C, "phitmp", + NonConstBB->getTerminator()); + else if (CmpInst *CI = dyn_cast(&I)) + InV = CmpInst::Create(CI->getOpcode(), + CI->getPredicate(), + PN->getIncomingValue(i), C, "phitmp", + NonConstBB->getTerminator()); + else + llvm_unreachable("Unknown binop!"); + + Worklist.Add(cast(InV)); + } + NewPN->addIncoming(InV, PN->getIncomingBlock(i)); + } + } else { + CastInst *CI = cast(&I); + const Type *RetTy = CI->getType(); + for (unsigned i = 0; i != NumPHIValues; ++i) { + Value *InV; + if (Constant *InC = dyn_cast(PN->getIncomingValue(i))) { + InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy); + } else { + assert(PN->getIncomingBlock(i) == NonConstBB); + InV = CastInst::Create(CI->getOpcode(), PN->getIncomingValue(i), + I.getType(), "phitmp", + NonConstBB->getTerminator()); + Worklist.Add(cast(InV)); + } + NewPN->addIncoming(InV, PN->getIncomingBlock(i)); + } + } + return ReplaceInstUsesWith(I, NewPN); +} + +/// FindElementAtOffset - Given a type and a constant offset, determine whether +/// or not there is a sequence of GEP indices into the type that will land us at +/// the specified offset. If so, fill them into NewIndices and return the +/// resultant element type, otherwise return null. +const Type *InstCombiner::FindElementAtOffset(const Type *Ty, int64_t Offset, + SmallVectorImpl &NewIndices) { + if (!TD) return 0; + if (!Ty->isSized()) return 0; + + // Start with the index over the outer type. Note that the type size + // might be zero (even if the offset isn't zero) if the indexed type + // is something like [0 x {int, int}] + const Type *IntPtrTy = TD->getIntPtrType(Ty->getContext()); + int64_t FirstIdx = 0; + if (int64_t TySize = TD->getTypeAllocSize(Ty)) { + FirstIdx = Offset/TySize; + Offset -= FirstIdx*TySize; + + // Handle hosts where % returns negative instead of values [0..TySize). + if (Offset < 0) { + --FirstIdx; + Offset += TySize; + assert(Offset >= 0); + } + assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset"); + } + + NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx)); + + // Index into the types. If we fail, set OrigBase to null. + while (Offset) { + // Indexing into tail padding between struct/array elements. + if (uint64_t(Offset*8) >= TD->getTypeSizeInBits(Ty)) + return 0; + + if (const StructType *STy = dyn_cast(Ty)) { + const StructLayout *SL = TD->getStructLayout(STy); + assert(Offset < (int64_t)SL->getSizeInBytes() && + "Offset must stay within the indexed type"); + + unsigned Elt = SL->getElementContainingOffset(Offset); + NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()), + Elt)); + + Offset -= SL->getElementOffset(Elt); + Ty = STy->getElementType(Elt); + } else if (const ArrayType *AT = dyn_cast(Ty)) { + uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType()); + assert(EltSize && "Cannot index into a zero-sized array"); + NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize)); + Offset %= EltSize; + Ty = AT->getElementType(); + } else { + // Otherwise, we can't index into the middle of this atomic type, bail. + return 0; + } + } + + return Ty; +} + + + +Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { + SmallVector Ops(GEP.op_begin(), GEP.op_end()); + + if (Value *V = SimplifyGEPInst(&Ops[0], Ops.size(), TD)) + return ReplaceInstUsesWith(GEP, V); + + Value *PtrOp = GEP.getOperand(0); + + if (isa(GEP.getOperand(0))) + return ReplaceInstUsesWith(GEP, UndefValue::get(GEP.getType())); + + // Eliminate unneeded casts for indices. + if (TD) { + bool MadeChange = false; + unsigned PtrSize = TD->getPointerSizeInBits(); + + gep_type_iterator GTI = gep_type_begin(GEP); + for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); + I != E; ++I, ++GTI) { + if (!isa(*GTI)) continue; + + // If we are using a wider index than needed for this platform, shrink it + // to what we need. If narrower, sign-extend it to what we need. This + // explicit cast can make subsequent optimizations more obvious. + unsigned OpBits = cast((*I)->getType())->getBitWidth(); + if (OpBits == PtrSize) + continue; + + *I = Builder->CreateIntCast(*I, TD->getIntPtrType(GEP.getContext()),true); + MadeChange = true; + } + if (MadeChange) return &GEP; + } + + // Combine Indices - If the source pointer to this getelementptr instruction + // is a getelementptr instruction, combine the indices of the two + // getelementptr instructions into a single instruction. + // + if (GEPOperator *Src = dyn_cast(PtrOp)) { + // Note that if our source is a gep chain itself that we wait for that + // chain to be resolved before we perform this transformation. This + // avoids us creating a TON of code in some cases. + // + if (GetElementPtrInst *SrcGEP = + dyn_cast(Src->getOperand(0))) + if (SrcGEP->getNumOperands() == 2) + return 0; // Wait until our source is folded to completion. + + SmallVector Indices; + + // Find out whether the last index in the source GEP is a sequential idx. + bool EndsWithSequential = false; + for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src); + I != E; ++I) + EndsWithSequential = !isa(*I); + + // Can we combine the two pointer arithmetics offsets? + if (EndsWithSequential) { + // Replace: gep (gep %P, long B), long A, ... + // With: T = long A+B; gep %P, T, ... + // + Value *Sum; + Value *SO1 = Src->getOperand(Src->getNumOperands()-1); + Value *GO1 = GEP.getOperand(1); + if (SO1 == Constant::getNullValue(SO1->getType())) { + Sum = GO1; + } else if (GO1 == Constant::getNullValue(GO1->getType())) { + Sum = SO1; + } else { + // If they aren't the same type, then the input hasn't been processed + // by the loop above yet (which canonicalizes sequential index types to + // intptr_t). Just avoid transforming this until the input has been + // normalized. + if (SO1->getType() != GO1->getType()) + return 0; + Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum"); + } + + // Update the GEP in place if possible. + if (Src->getNumOperands() == 2) { + GEP.setOperand(0, Src->getOperand(0)); + GEP.setOperand(1, Sum); + return &GEP; + } + Indices.append(Src->op_begin()+1, Src->op_end()-1); + Indices.push_back(Sum); + Indices.append(GEP.op_begin()+2, GEP.op_end()); + } else if (isa(*GEP.idx_begin()) && + cast(*GEP.idx_begin())->isNullValue() && + Src->getNumOperands() != 1) { + // Otherwise we can do the fold if the first index of the GEP is a zero + Indices.append(Src->op_begin()+1, Src->op_end()); + Indices.append(GEP.idx_begin()+1, GEP.idx_end()); + } + + if (!Indices.empty()) + return (GEP.isInBounds() && Src->isInBounds()) ? + GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices.begin(), + Indices.end(), GEP.getName()) : + GetElementPtrInst::Create(Src->getOperand(0), Indices.begin(), + Indices.end(), GEP.getName()); + } + + // Handle gep(bitcast x) and gep(gep x, 0, 0, 0). + Value *StrippedPtr = PtrOp->stripPointerCasts(); + if (StrippedPtr != PtrOp) { + const PointerType *StrippedPtrTy =cast(StrippedPtr->getType()); + + bool HasZeroPointerIndex = false; + if (ConstantInt *C = dyn_cast(GEP.getOperand(1))) + HasZeroPointerIndex = C->isZero(); + + // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... + // into : GEP [10 x i8]* X, i32 0, ... + // + // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ... + // into : GEP i8* X, ... + // + // This occurs when the program declares an array extern like "int X[];" + if (HasZeroPointerIndex) { + const PointerType *CPTy = cast(PtrOp->getType()); + if (const ArrayType *CATy = + dyn_cast(CPTy->getElementType())) { + // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ? + if (CATy->getElementType() == StrippedPtrTy->getElementType()) { + // -> GEP i8* X, ... + SmallVector Idx(GEP.idx_begin()+1, GEP.idx_end()); + GetElementPtrInst *Res = + GetElementPtrInst::Create(StrippedPtr, Idx.begin(), + Idx.end(), GEP.getName()); + Res->setIsInBounds(GEP.isInBounds()); + return Res; + } + + if (const ArrayType *XATy = + dyn_cast(StrippedPtrTy->getElementType())){ + // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ? + if (CATy->getElementType() == XATy->getElementType()) { + // -> GEP [10 x i8]* X, i32 0, ... + // At this point, we know that the cast source type is a pointer + // to an array of the same type as the destination pointer + // array. Because the array type is never stepped over (there + // is a leading zero) we can fold the cast into this GEP. + GEP.setOperand(0, StrippedPtr); + return &GEP; + } + } + } + } else if (GEP.getNumOperands() == 2) { + // Transform things like: + // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V + // into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast + const Type *SrcElTy = StrippedPtrTy->getElementType(); + const Type *ResElTy=cast(PtrOp->getType())->getElementType(); + if (TD && isa(SrcElTy) && + TD->getTypeAllocSize(cast(SrcElTy)->getElementType()) == + TD->getTypeAllocSize(ResElTy)) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext())); + Idx[1] = GEP.getOperand(1); + Value *NewGEP = GEP.isInBounds() ? + Builder->CreateInBoundsGEP(StrippedPtr, Idx, Idx + 2, GEP.getName()) : + Builder->CreateGEP(StrippedPtr, Idx, Idx + 2, GEP.getName()); + // V and GEP are both pointer types --> BitCast + return new BitCastInst(NewGEP, GEP.getType()); + } + + // Transform things like: + // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp + // (where tmp = 8*tmp2) into: + // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast + + if (TD && isa(SrcElTy) && ResElTy->isInteger(8)) { + uint64_t ArrayEltSize = + TD->getTypeAllocSize(cast(SrcElTy)->getElementType()); + + // Check to see if "tmp" is a scale by a multiple of ArrayEltSize. We + // allow either a mul, shift, or constant here. + Value *NewIdx = 0; + ConstantInt *Scale = 0; + if (ArrayEltSize == 1) { + NewIdx = GEP.getOperand(1); + Scale = ConstantInt::get(cast(NewIdx->getType()), 1); + } else if (ConstantInt *CI = dyn_cast(GEP.getOperand(1))) { + NewIdx = ConstantInt::get(CI->getType(), 1); + Scale = CI; + } else if (Instruction *Inst =dyn_cast(GEP.getOperand(1))){ + if (Inst->getOpcode() == Instruction::Shl && + isa(Inst->getOperand(1))) { + ConstantInt *ShAmt = cast(Inst->getOperand(1)); + uint32_t ShAmtVal = ShAmt->getLimitedValue(64); + Scale = ConstantInt::get(cast(Inst->getType()), + 1ULL << ShAmtVal); + NewIdx = Inst->getOperand(0); + } else if (Inst->getOpcode() == Instruction::Mul && + isa(Inst->getOperand(1))) { + Scale = cast(Inst->getOperand(1)); + NewIdx = Inst->getOperand(0); + } + } + + // If the index will be to exactly the right offset with the scale taken + // out, perform the transformation. Note, we don't know whether Scale is + // signed or not. We'll use unsigned version of division/modulo + // operation after making sure Scale doesn't have the sign bit set. + if (ArrayEltSize && Scale && Scale->getSExtValue() >= 0LL && + Scale->getZExtValue() % ArrayEltSize == 0) { + Scale = ConstantInt::get(Scale->getType(), + Scale->getZExtValue() / ArrayEltSize); + if (Scale->getZExtValue() != 1) { + Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(), + false /*ZExt*/); + NewIdx = Builder->CreateMul(NewIdx, C, "idxscale"); + } + + // Insert the new GEP instruction. + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext())); + Idx[1] = NewIdx; + Value *NewGEP = GEP.isInBounds() ? + Builder->CreateInBoundsGEP(StrippedPtr, Idx, Idx + 2,GEP.getName()): + Builder->CreateGEP(StrippedPtr, Idx, Idx + 2, GEP.getName()); + // The NewGEP must be pointer typed, so must the old one -> BitCast + return new BitCastInst(NewGEP, GEP.getType()); + } + } + } + } + + /// See if we can simplify: + /// X = bitcast A* to B* + /// Y = gep X, <...constant indices...> + /// into a gep of the original struct. This is important for SROA and alias + /// analysis of unions. If "A" is also a bitcast, wait for A/X to be merged. + if (BitCastInst *BCI = dyn_cast(PtrOp)) { + if (TD && + !isa(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) { + // Determine how much the GEP moves the pointer. We are guaranteed to get + // a constant back from EmitGEPOffset. + ConstantInt *OffsetV = cast(EmitGEPOffset(&GEP)); + int64_t Offset = OffsetV->getSExtValue(); + + // If this GEP instruction doesn't move the pointer, just replace the GEP + // with a bitcast of the real input to the dest type. + if (Offset == 0) { + // If the bitcast is of an allocation, and the allocation will be + // converted to match the type of the cast, don't touch this. + if (isa(BCI->getOperand(0)) || + isMalloc(BCI->getOperand(0))) { + // See if the bitcast simplifies, if so, don't nuke this GEP yet. + if (Instruction *I = visitBitCast(*BCI)) { + if (I != BCI) { + I->takeName(BCI); + BCI->getParent()->getInstList().insert(BCI, I); + ReplaceInstUsesWith(*BCI, I); + } + return &GEP; + } + } + return new BitCastInst(BCI->getOperand(0), GEP.getType()); + } + + // Otherwise, if the offset is non-zero, we need to find out if there is a + // field at Offset in 'A's type. If so, we can pull the cast through the + // GEP. + SmallVector NewIndices; + const Type *InTy = + cast(BCI->getOperand(0)->getType())->getElementType(); + if (FindElementAtOffset(InTy, Offset, NewIndices)) { + Value *NGEP = GEP.isInBounds() ? + Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices.begin(), + NewIndices.end()) : + Builder->CreateGEP(BCI->getOperand(0), NewIndices.begin(), + NewIndices.end()); + + if (NGEP->getType() == GEP.getType()) + return ReplaceInstUsesWith(GEP, NGEP); + NGEP->takeName(&GEP); + return new BitCastInst(NGEP, GEP.getType()); + } + } + } + + return 0; +} + +Instruction *InstCombiner::visitFree(Instruction &FI) { + Value *Op = FI.getOperand(1); + + // free undef -> unreachable. + if (isa(Op)) { + // Insert a new store to null because we cannot modify the CFG here. + new StoreInst(ConstantInt::getTrue(FI.getContext()), + UndefValue::get(Type::getInt1PtrTy(FI.getContext())), &FI); + return EraseInstFromFunction(FI); + } + + // If we have 'free null' delete the instruction. This can happen in stl code + // when lots of inlining happens. + if (isa(Op)) + return EraseInstFromFunction(FI); + + // If we have a malloc call whose only use is a free call, delete both. + if (isMalloc(Op)) { + if (CallInst* CI = extractMallocCallFromBitCast(Op)) { + if (Op->hasOneUse() && CI->hasOneUse()) { + EraseInstFromFunction(FI); + EraseInstFromFunction(*CI); + return EraseInstFromFunction(*cast(Op)); + } + } else { + // Op is a call to malloc + if (Op->hasOneUse()) { + EraseInstFromFunction(FI); + return EraseInstFromFunction(*cast(Op)); + } + } + } + + return 0; +} + + + +Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { + // Change br (not X), label True, label False to: br X, label False, True + Value *X = 0; + BasicBlock *TrueDest; + BasicBlock *FalseDest; + if (match(&BI, m_Br(m_Not(m_Value(X)), TrueDest, FalseDest)) && + !isa(X)) { + // Swap Destinations and condition... + BI.setCondition(X); + BI.setSuccessor(0, FalseDest); + BI.setSuccessor(1, TrueDest); + return &BI; + } + + // Cannonicalize fcmp_one -> fcmp_oeq + FCmpInst::Predicate FPred; Value *Y; + if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)), + TrueDest, FalseDest)) && + BI.getCondition()->hasOneUse()) + if (FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE || + FPred == FCmpInst::FCMP_OGE) { + FCmpInst *Cond = cast(BI.getCondition()); + Cond->setPredicate(FCmpInst::getInversePredicate(FPred)); + + // Swap Destinations and condition. + BI.setSuccessor(0, FalseDest); + BI.setSuccessor(1, TrueDest); + Worklist.Add(Cond); + return &BI; + } + + // Cannonicalize icmp_ne -> icmp_eq + ICmpInst::Predicate IPred; + if (match(&BI, m_Br(m_ICmp(IPred, m_Value(X), m_Value(Y)), + TrueDest, FalseDest)) && + BI.getCondition()->hasOneUse()) + if (IPred == ICmpInst::ICMP_NE || IPred == ICmpInst::ICMP_ULE || + IPred == ICmpInst::ICMP_SLE || IPred == ICmpInst::ICMP_UGE || + IPred == ICmpInst::ICMP_SGE) { + ICmpInst *Cond = cast(BI.getCondition()); + Cond->setPredicate(ICmpInst::getInversePredicate(IPred)); + // Swap Destinations and condition. + BI.setSuccessor(0, FalseDest); + BI.setSuccessor(1, TrueDest); + Worklist.Add(Cond); + return &BI; + } + + return 0; +} + +Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { + Value *Cond = SI.getCondition(); + if (Instruction *I = dyn_cast(Cond)) { + if (I->getOpcode() == Instruction::Add) + if (ConstantInt *AddRHS = dyn_cast(I->getOperand(1))) { + // change 'switch (X+4) case 1:' into 'switch (X) case -3' + for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2) + SI.setOperand(i, + ConstantExpr::getSub(cast(SI.getOperand(i)), + AddRHS)); + SI.setOperand(0, I->getOperand(0)); + Worklist.Add(I); + return &SI; + } + } + return 0; +} + +Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { + Value *Agg = EV.getAggregateOperand(); + + if (!EV.hasIndices()) + return ReplaceInstUsesWith(EV, Agg); + + if (Constant *C = dyn_cast(Agg)) { + if (isa(C)) + return ReplaceInstUsesWith(EV, UndefValue::get(EV.getType())); + + if (isa(C)) + return ReplaceInstUsesWith(EV, Constant::getNullValue(EV.getType())); + + if (isa(C) || isa(C)) { + // Extract the element indexed by the first index out of the constant + Value *V = C->getOperand(*EV.idx_begin()); + if (EV.getNumIndices() > 1) + // Extract the remaining indices out of the constant indexed by the + // first index + return ExtractValueInst::Create(V, EV.idx_begin() + 1, EV.idx_end()); + else + return ReplaceInstUsesWith(EV, V); + } + return 0; // Can't handle other constants + } + if (InsertValueInst *IV = dyn_cast(Agg)) { + // We're extracting from an insertvalue instruction, compare the indices + const unsigned *exti, *exte, *insi, *inse; + for (exti = EV.idx_begin(), insi = IV->idx_begin(), + exte = EV.idx_end(), inse = IV->idx_end(); + exti != exte && insi != inse; + ++exti, ++insi) { + if (*insi != *exti) + // The insert and extract both reference distinctly different elements. + // This means the extract is not influenced by the insert, and we can + // replace the aggregate operand of the extract with the aggregate + // operand of the insert. i.e., replace + // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1 + // %E = extractvalue { i32, { i32 } } %I, 0 + // with + // %E = extractvalue { i32, { i32 } } %A, 0 + return ExtractValueInst::Create(IV->getAggregateOperand(), + EV.idx_begin(), EV.idx_end()); + } + if (exti == exte && insi == inse) + // Both iterators are at the end: Index lists are identical. Replace + // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0 + // %C = extractvalue { i32, { i32 } } %B, 1, 0 + // with "i32 42" + return ReplaceInstUsesWith(EV, IV->getInsertedValueOperand()); + if (exti == exte) { + // The extract list is a prefix of the insert list. i.e. replace + // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0 + // %E = extractvalue { i32, { i32 } } %I, 1 + // with + // %X = extractvalue { i32, { i32 } } %A, 1 + // %E = insertvalue { i32 } %X, i32 42, 0 + // by switching the order of the insert and extract (though the + // insertvalue should be left in, since it may have other uses). + Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(), + EV.idx_begin(), EV.idx_end()); + return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(), + insi, inse); + } + if (insi == inse) + // The insert list is a prefix of the extract list + // We can simply remove the common indices from the extract and make it + // operate on the inserted value instead of the insertvalue result. + // i.e., replace + // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1 + // %E = extractvalue { i32, { i32 } } %I, 1, 0 + // with + // %E extractvalue { i32 } { i32 42 }, 0 + return ExtractValueInst::Create(IV->getInsertedValueOperand(), + exti, exte); + } + if (IntrinsicInst *II = dyn_cast(Agg)) { + // We're extracting from an intrinsic, see if we're the only user, which + // allows us to simplify multiple result intrinsics to simpler things that + // just get one value.. + if (II->hasOneUse()) { + // Check if we're grabbing the overflow bit or the result of a 'with + // overflow' intrinsic. If it's the latter we can remove the intrinsic + // and replace it with a traditional binary instruction. + switch (II->getIntrinsicID()) { + case Intrinsic::uadd_with_overflow: + case Intrinsic::sadd_with_overflow: + if (*EV.idx_begin() == 0) { // Normal result. + Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); + II->replaceAllUsesWith(UndefValue::get(II->getType())); + EraseInstFromFunction(*II); + return BinaryOperator::CreateAdd(LHS, RHS); + } + break; + case Intrinsic::usub_with_overflow: + case Intrinsic::ssub_with_overflow: + if (*EV.idx_begin() == 0) { // Normal result. + Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); + II->replaceAllUsesWith(UndefValue::get(II->getType())); + EraseInstFromFunction(*II); + return BinaryOperator::CreateSub(LHS, RHS); + } + break; + case Intrinsic::umul_with_overflow: + case Intrinsic::smul_with_overflow: + if (*EV.idx_begin() == 0) { // Normal result. + Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); + II->replaceAllUsesWith(UndefValue::get(II->getType())); + EraseInstFromFunction(*II); + return BinaryOperator::CreateMul(LHS, RHS); + } + break; + default: + break; + } + } + } + // Can't simplify extracts from other values. Note that nested extracts are + // already simplified implicitely by the above (extract ( extract (insert) ) + // will be translated into extract ( insert ( extract ) ) first and then just + // the value inserted, if appropriate). + return 0; +} + + + + +/// TryToSinkInstruction - Try to move the specified instruction from its +/// current block into the beginning of DestBlock, which can only happen if it's +/// safe to move the instruction past all of the instructions between it and the +/// end of its block. +static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { + assert(I->hasOneUse() && "Invariants didn't hold!"); + + // Cannot move control-flow-involving, volatile loads, vaarg, etc. + if (isa(I) || I->mayHaveSideEffects() || isa(I)) + return false; + + // Do not sink alloca instructions out of the entry block. + if (isa(I) && I->getParent() == + &DestBlock->getParent()->getEntryBlock()) + return false; + + // We can only sink load instructions if there is nothing between the load and + // the end of block that could change the value. + if (I->mayReadFromMemory()) { + for (BasicBlock::iterator Scan = I, E = I->getParent()->end(); + Scan != E; ++Scan) + if (Scan->mayWriteToMemory()) + return false; + } + + BasicBlock::iterator InsertPos = DestBlock->getFirstNonPHI(); + + I->moveBefore(InsertPos); + ++NumSunkInst; + return true; +} + + +/// AddReachableCodeToWorklist - Walk the function in depth-first order, adding +/// all reachable code to the worklist. +/// +/// This has a couple of tricks to make the code faster and more powerful. In +/// particular, we constant fold and DCE instructions as we go, to avoid adding +/// them to the worklist (this significantly speeds up instcombine on code where +/// many instructions are dead or constant). Additionally, if we find a branch +/// whose condition is a known constant, we only visit the reachable successors. +/// +static bool AddReachableCodeToWorklist(BasicBlock *BB, + SmallPtrSet &Visited, + InstCombiner &IC, + const TargetData *TD) { + bool MadeIRChange = false; + SmallVector Worklist; + Worklist.push_back(BB); + + std::vector InstrsForInstCombineWorklist; + InstrsForInstCombineWorklist.reserve(128); + + SmallPtrSet FoldedConstants; + + do { + BB = Worklist.pop_back_val(); + + // We have now visited this block! If we've already been here, ignore it. + if (!Visited.insert(BB)) continue; + + for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { + Instruction *Inst = BBI++; + + // DCE instruction if trivially dead. + if (isInstructionTriviallyDead(Inst)) { + ++NumDeadInst; + DEBUG(errs() << "IC: DCE: " << *Inst << '\n'); + Inst->eraseFromParent(); + continue; + } + + // ConstantProp instruction if trivially constant. + if (!Inst->use_empty() && isa(Inst->getOperand(0))) + if (Constant *C = ConstantFoldInstruction(Inst, TD)) { + DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " + << *Inst << '\n'); + Inst->replaceAllUsesWith(C); + ++NumConstProp; + Inst->eraseFromParent(); + continue; + } + + if (TD) { + // See if we can constant fold its operands. + for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end(); + i != e; ++i) { + ConstantExpr *CE = dyn_cast(i); + if (CE == 0) continue; + + // If we already folded this constant, don't try again. + if (!FoldedConstants.insert(CE)) + continue; + + Constant *NewC = ConstantFoldConstantExpression(CE, TD); + if (NewC && NewC != CE) { + *i = NewC; + MadeIRChange = true; + } + } + } + + InstrsForInstCombineWorklist.push_back(Inst); + } + + // Recursively visit successors. If this is a branch or switch on a + // constant, only visit the reachable successor. + TerminatorInst *TI = BB->getTerminator(); + if (BranchInst *BI = dyn_cast(TI)) { + if (BI->isConditional() && isa(BI->getCondition())) { + bool CondVal = cast(BI->getCondition())->getZExtValue(); + BasicBlock *ReachableBB = BI->getSuccessor(!CondVal); + Worklist.push_back(ReachableBB); + continue; + } + } else if (SwitchInst *SI = dyn_cast(TI)) { + if (ConstantInt *Cond = dyn_cast(SI->getCondition())) { + // See if this is an explicit destination. + for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) + if (SI->getCaseValue(i) == Cond) { + BasicBlock *ReachableBB = SI->getSuccessor(i); + Worklist.push_back(ReachableBB); + continue; + } + + // Otherwise it is the default destination. + Worklist.push_back(SI->getSuccessor(0)); + continue; + } + } + + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + Worklist.push_back(TI->getSuccessor(i)); + } while (!Worklist.empty()); + + // Once we've found all of the instructions to add to instcombine's worklist, + // add them in reverse order. This way instcombine will visit from the top + // of the function down. This jives well with the way that it adds all uses + // of instructions to the worklist after doing a transformation, thus avoiding + // some N^2 behavior in pathological cases. + IC.Worklist.AddInitialGroup(&InstrsForInstCombineWorklist[0], + InstrsForInstCombineWorklist.size()); + + return MadeIRChange; +} + +bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { + MadeIRChange = false; + + DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " + << F.getNameStr() << "\n"); + + { + // Do a depth-first traversal of the function, populate the worklist with + // the reachable instructions. Ignore blocks that are not reachable. Keep + // track of which blocks we visit. + SmallPtrSet Visited; + MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD); + + // Do a quick scan over the function. If we find any blocks that are + // unreachable, remove any instructions inside of them. This prevents + // the instcombine code from having to deal with some bad special cases. + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + if (!Visited.count(BB)) { + Instruction *Term = BB->getTerminator(); + while (Term != BB->begin()) { // Remove instrs bottom-up + BasicBlock::iterator I = Term; --I; + + DEBUG(errs() << "IC: DCE: " << *I << '\n'); + // A debug intrinsic shouldn't force another iteration if we weren't + // going to do one without it. + if (!isa(I)) { + ++NumDeadInst; + MadeIRChange = true; + } + + // If I is not void type then replaceAllUsesWith undef. + // This allows ValueHandlers and custom metadata to adjust itself. + if (!I->getType()->isVoidTy()) + I->replaceAllUsesWith(UndefValue::get(I->getType())); + I->eraseFromParent(); + } + } + } + + while (!Worklist.isEmpty()) { + Instruction *I = Worklist.RemoveOne(); + if (I == 0) continue; // skip null values. + + // Check to see if we can DCE the instruction. + if (isInstructionTriviallyDead(I)) { + DEBUG(errs() << "IC: DCE: " << *I << '\n'); + EraseInstFromFunction(*I); + ++NumDeadInst; + MadeIRChange = true; + continue; + } + + // Instruction isn't dead, see if we can constant propagate it. + if (!I->use_empty() && isa(I->getOperand(0))) + if (Constant *C = ConstantFoldInstruction(I, TD)) { + DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n'); + + // Add operands to the worklist. + ReplaceInstUsesWith(*I, C); + ++NumConstProp; + EraseInstFromFunction(*I); + MadeIRChange = true; + continue; + } + + // See if we can trivially sink this instruction to a successor basic block. + if (I->hasOneUse()) { + BasicBlock *BB = I->getParent(); + Instruction *UserInst = cast(I->use_back()); + BasicBlock *UserParent; + + // Get the block the use occurs in. + if (PHINode *PN = dyn_cast(UserInst)) + UserParent = PN->getIncomingBlock(I->use_begin().getUse()); + else + UserParent = UserInst->getParent(); + + if (UserParent != BB) { + bool UserIsSuccessor = false; + // See if the user is one of our successors. + for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) + if (*SI == UserParent) { + UserIsSuccessor = true; + break; + } + + // If the user is one of our immediate successors, and if that successor + // only has us as a predecessors (we'd have to split the critical edge + // otherwise), we can keep going. + if (UserIsSuccessor && UserParent->getSinglePredecessor()) + // Okay, the CFG is simple enough, try to sink this instruction. + MadeIRChange |= TryToSinkInstruction(I, UserParent); + } + } + + // Now that we have an instruction, try combining it to simplify it. + Builder->SetInsertPoint(I->getParent(), I); + +#ifndef NDEBUG + std::string OrigI; +#endif + DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str();); + DEBUG(errs() << "IC: Visiting: " << OrigI << '\n'); + + if (Instruction *Result = visit(*I)) { + ++NumCombined; + // Should we replace the old instruction with a new one? + if (Result != I) { + DEBUG(errs() << "IC: Old = " << *I << '\n' + << " New = " << *Result << '\n'); + + // Everything uses the new instruction now. + I->replaceAllUsesWith(Result); + + // Push the new instruction and any users onto the worklist. + Worklist.Add(Result); + Worklist.AddUsersToWorkList(*Result); + + // Move the name to the new instruction first. + Result->takeName(I); + + // Insert the new instruction into the basic block... + BasicBlock *InstParent = I->getParent(); + BasicBlock::iterator InsertPos = I; + + if (!isa(Result)) // If combining a PHI, don't insert + while (isa(InsertPos)) // middle of a block of PHIs. + ++InsertPos; + + InstParent->getInstList().insert(InsertPos, Result); + + EraseInstFromFunction(*I); + } else { +#ifndef NDEBUG + DEBUG(errs() << "IC: Mod = " << OrigI << '\n' + << " New = " << *I << '\n'); +#endif + + // If the instruction was modified, it's possible that it is now dead. + // if so, remove it. + if (isInstructionTriviallyDead(I)) { + EraseInstFromFunction(*I); + } else { + Worklist.Add(I); + Worklist.AddUsersToWorkList(*I); + } + } + MadeIRChange = true; + } + } + + Worklist.Zap(); + return MadeIRChange; +} + + +bool InstCombiner::runOnFunction(Function &F) { + MustPreserveLCSSA = mustPreserveAnalysisID(LCSSAID); + TD = getAnalysisIfAvailable(); + + + /// Builder - This is an IRBuilder that automatically inserts new + /// instructions into the worklist when they are created. + IRBuilder + TheBuilder(F.getContext(), TargetFolder(TD), + InstCombineIRInserter(Worklist)); + Builder = &TheBuilder; + + bool EverMadeChange = false; + + // Iterate while there is work to do. + unsigned Iteration = 0; + while (DoOneIteration(F, Iteration++)) + EverMadeChange = true; + + Builder = 0; + return EverMadeChange; +} + +FunctionPass *llvm::createInstructionCombiningPass() { + return new InstCombiner(); +} diff --git a/lib/Transforms/InstCombine/Makefile b/lib/Transforms/InstCombine/Makefile new file mode 100644 index 000000000000..0c488e78b6d9 --- /dev/null +++ b/lib/Transforms/InstCombine/Makefile @@ -0,0 +1,15 @@ +##===- lib/Transforms/InstCombine/Makefile -----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMInstCombine +BUILD_ARCHIVE = 1 + +include $(LEVEL)/Makefile.common + diff --git a/lib/Transforms/Instrumentation/BlockProfiling.cpp b/lib/Transforms/Instrumentation/BlockProfiling.cpp deleted file mode 100644 index 211a6d628bee..000000000000 --- a/lib/Transforms/Instrumentation/BlockProfiling.cpp +++ /dev/null @@ -1,128 +0,0 @@ -//===- BlockProfiling.cpp - Insert counters for block profiling -----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass instruments the specified program with counters for basic block or -// function profiling. This is the most basic form of profiling, which can tell -// which blocks are hot, but cannot reliably detect hot paths through the CFG. -// Block profiling counts the number of times each basic block executes, and -// function profiling counts the number of times each function is called. -// -// Note that this implementation is very naive. Control equivalent regions of -// the CFG should not require duplicate counters, but we do put duplicate -// counters in. -// -//===----------------------------------------------------------------------===// - -#include "llvm/DerivedTypes.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Instrumentation.h" -#include "RSProfiling.h" -#include "ProfilingUtils.h" -using namespace llvm; - -namespace { - class FunctionProfiler : public RSProfilers_std { - public: - static char ID; - bool runOnModule(Module &M); - }; -} - -char FunctionProfiler::ID = 0; - -static RegisterPass -X("insert-function-profiling", - "Insert instrumentation for function profiling"); -static RegisterAnalysisGroup XG(X); - -ModulePass *llvm::createFunctionProfilerPass() { - return new FunctionProfiler(); -} - -bool FunctionProfiler::runOnModule(Module &M) { - Function *Main = M.getFunction("main"); - if (Main == 0) { - errs() << "WARNING: cannot insert function profiling into a module" - << " with no main function!\n"; - return false; // No main, no instrumentation! - } - - unsigned NumFunctions = 0; - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) - if (!I->isDeclaration()) - ++NumFunctions; - - const Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()), - NumFunctions); - GlobalVariable *Counters = - new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage, - Constant::getNullValue(ATy), "FuncProfCounters"); - - // Instrument all of the functions... - unsigned i = 0; - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) - if (!I->isDeclaration()) - // Insert counter at the start of the function - IncrementCounterInBlock(&I->getEntryBlock(), i++, Counters); - - // Add the initialization call to main. - InsertProfilingInitCall(Main, "llvm_start_func_profiling", Counters); - return true; -} - - -namespace { - class BlockProfiler : public RSProfilers_std { - bool runOnModule(Module &M); - public: - static char ID; - }; -} - -char BlockProfiler::ID = 0; -static RegisterPass -Y("insert-block-profiling", "Insert instrumentation for block profiling"); -static RegisterAnalysisGroup YG(Y); - -ModulePass *llvm::createBlockProfilerPass() { return new BlockProfiler(); } - -bool BlockProfiler::runOnModule(Module &M) { - Function *Main = M.getFunction("main"); - if (Main == 0) { - errs() << "WARNING: cannot insert block profiling into a module" - << " with no main function!\n"; - return false; // No main, no instrumentation! - } - - unsigned NumBlocks = 0; - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) - if (!I->isDeclaration()) - NumBlocks += I->size(); - - const Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()), NumBlocks); - GlobalVariable *Counters = - new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage, - Constant::getNullValue(ATy), "BlockProfCounters"); - - // Instrument all of the blocks... - unsigned i = 0; - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { - if (I->isDeclaration()) continue; - for (Function::iterator BB = I->begin(), E = I->end(); BB != E; ++BB) - // Insert counter at the start of the block - IncrementCounterInBlock(BB, i++, Counters); - } - - // Add the initialization call to main. - InsertProfilingInitCall(Main, "llvm_start_block_profiling", Counters); - return true; -} - diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt index 494928e43814..128bf489787c 100644 --- a/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/lib/Transforms/Instrumentation/CMakeLists.txt @@ -1,7 +1,5 @@ add_llvm_library(LLVMInstrumentation - BlockProfiling.cpp EdgeProfiling.cpp OptimalEdgeProfiling.cpp ProfilingUtils.cpp - RSProfiling.cpp ) diff --git a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp index 0a46fe5e8586..94b0671c5b0e 100644 --- a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp +++ b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp @@ -61,7 +61,7 @@ ModulePass *llvm::createOptimalEdgeProfilerPass() { inline static void printEdgeCounter(ProfileInfo::Edge e, BasicBlock* b, unsigned i) { - DEBUG(errs() << "--Edge Counter for " << (e) << " in " \ + DEBUG(dbgs() << "--Edge Counter for " << (e) << " in " \ << ((b)?(b)->getNameStr():"0") << " (# " << (i) << ")\n"); } @@ -120,7 +120,7 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) { unsigned i = 0; for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { if (F->isDeclaration()) continue; - DEBUG(errs()<<"Working on "<getNameStr()<<"\n"); + DEBUG(dbgs()<<"Working on "<getNameStr()<<"\n"); // Calculate a Maximum Spanning Tree with the edge weights determined by // ProfileEstimator. ProfileEstimator also assign weights to the virtual diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/lib/Transforms/Instrumentation/ProfilingUtils.cpp index 1679bea08c19..3214c8c45950 100644 --- a/lib/Transforms/Instrumentation/ProfilingUtils.cpp +++ b/lib/Transforms/Instrumentation/ProfilingUtils.cpp @@ -84,7 +84,7 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName, AI = MainFn->arg_begin(); // If the program looked at argc, have it look at the return value of the // init call instead. - if (AI->getType() != Type::getInt32Ty(Context)) { + if (!AI->getType()->isInteger(32)) { Instruction::CastOps opcode; if (!AI->use_empty()) { opcode = CastInst::getCastOpcode(InitCall, true, AI->getType(), true); diff --git a/lib/Transforms/Instrumentation/RSProfiling.cpp b/lib/Transforms/Instrumentation/RSProfiling.cpp deleted file mode 100644 index c08efc1e4bdd..000000000000 --- a/lib/Transforms/Instrumentation/RSProfiling.cpp +++ /dev/null @@ -1,662 +0,0 @@ -//===- RSProfiling.cpp - Various profiling using random sampling ----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// These passes implement a random sampling based profiling. Different methods -// of choosing when to sample are supported, as well as different types of -// profiling. This is done as two passes. The first is a sequence of profiling -// passes which insert profiling into the program, and remember what they -// inserted. -// -// The second stage duplicates all instructions in a function, ignoring the -// profiling code, then connects the two versions togeather at the entry and at -// backedges. At each connection point a choice is made as to whether to jump -// to the profiled code (take a sample) or execute the unprofiled code. -// -// It is highly recommended that after this pass one runs mem2reg and adce -// (instcombine load-vn gdce dse also are good to run afterwards) -// -// This design is intended to make the profiling passes independent of the RS -// framework, but any profiling pass that implements the RSProfiling interface -// is compatible with the rs framework (and thus can be sampled) -// -// TODO: obviously the block and function profiling are almost identical to the -// existing ones, so they can be unified (esp since these passes are valid -// without the rs framework). -// TODO: Fix choice code so that frequency is not hard coded -// -//===----------------------------------------------------------------------===// - -#include "llvm/Pass.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" -#include "llvm/Instructions.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Intrinsics.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Instrumentation.h" -#include "RSProfiling.h" -#include -#include -#include -using namespace llvm; - -namespace { - enum RandomMeth { - GBV, GBVO, HOSTCC - }; -} - -static cl::opt RandomMethod("profile-randomness", - cl::desc("How to randomly choose to profile:"), - cl::values( - clEnumValN(GBV, "global", "global counter"), - clEnumValN(GBVO, "ra_global", - "register allocated global counter"), - clEnumValN(HOSTCC, "rdcc", "cycle counter"), - clEnumValEnd)); - -namespace { - /// NullProfilerRS - The basic profiler that does nothing. It is the default - /// profiler and thus terminates RSProfiler chains. It is useful for - /// measuring framework overhead - class NullProfilerRS : public RSProfilers { - public: - static char ID; // Pass identification, replacement for typeid - bool isProfiling(Value* v) { - return false; - } - bool runOnModule(Module &M) { - return false; - } - void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - } - }; -} - -static RegisterAnalysisGroup A("Profiling passes"); -static RegisterPass NP("insert-null-profiling-rs", - "Measure profiling framework overhead"); -static RegisterAnalysisGroup NPT(NP); - -namespace { - /// Chooser - Something that chooses when to make a sample of the profiled code - class Chooser { - public: - /// ProcessChoicePoint - is called for each basic block inserted to choose - /// between normal and sample code - virtual void ProcessChoicePoint(BasicBlock*) = 0; - /// PrepFunction - is called once per function before other work is done. - /// This gives the opertunity to insert new allocas and such. - virtual void PrepFunction(Function*) = 0; - virtual ~Chooser() {} - }; - - //Things that implement sampling policies - //A global value that is read-mod-stored to choose when to sample. - //A sample is taken when the global counter hits 0 - class GlobalRandomCounter : public Chooser { - GlobalVariable* Counter; - Value* ResetValue; - const IntegerType* T; - public: - GlobalRandomCounter(Module& M, const IntegerType* t, uint64_t resetval); - virtual ~GlobalRandomCounter(); - virtual void PrepFunction(Function* F); - virtual void ProcessChoicePoint(BasicBlock* bb); - }; - - //Same is GRC, but allow register allocation of the global counter - class GlobalRandomCounterOpt : public Chooser { - GlobalVariable* Counter; - Value* ResetValue; - AllocaInst* AI; - const IntegerType* T; - public: - GlobalRandomCounterOpt(Module& M, const IntegerType* t, uint64_t resetval); - virtual ~GlobalRandomCounterOpt(); - virtual void PrepFunction(Function* F); - virtual void ProcessChoicePoint(BasicBlock* bb); - }; - - //Use the cycle counter intrinsic as a source of pseudo randomness when - //deciding when to sample. - class CycleCounter : public Chooser { - uint64_t rm; - Constant *F; - public: - CycleCounter(Module& m, uint64_t resetmask); - virtual ~CycleCounter(); - virtual void PrepFunction(Function* F); - virtual void ProcessChoicePoint(BasicBlock* bb); - }; - - /// ProfilerRS - Insert the random sampling framework - struct ProfilerRS : public FunctionPass { - static char ID; // Pass identification, replacement for typeid - ProfilerRS() : FunctionPass(&ID) {} - - std::map TransCache; - std::set ChoicePoints; - Chooser* c; - - //Translate and duplicate values for the new profile free version of stuff - Value* Translate(Value* v); - //Duplicate an entire function (with out profiling) - void Duplicate(Function& F, RSProfilers& LI); - //Called once for each backedge, handle the insertion of choice points and - //the interconection of the two versions of the code - void ProcessBackEdge(BasicBlock* src, BasicBlock* dst, Function& F); - bool runOnFunction(Function& F); - bool doInitialization(Module &M); - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - }; -} - -static RegisterPass -X("insert-rs-profiling-framework", - "Insert random sampling instrumentation framework"); - -char RSProfilers::ID = 0; -char NullProfilerRS::ID = 0; -char ProfilerRS::ID = 0; - -//Local utilities -static void ReplacePhiPred(BasicBlock* btarget, - BasicBlock* bold, BasicBlock* bnew); - -static void CollapsePhi(BasicBlock* btarget, BasicBlock* bsrc); - -template -static void recBackEdge(BasicBlock* bb, T& BackEdges, - std::map& color, - std::map& depth, - std::map& finish, - int& time); - -//find the back edges and where they go to -template -static void getBackEdges(Function& F, T& BackEdges); - - -/////////////////////////////////////// -// Methods of choosing when to profile -/////////////////////////////////////// - -GlobalRandomCounter::GlobalRandomCounter(Module& M, const IntegerType* t, - uint64_t resetval) : T(t) { - ConstantInt* Init = ConstantInt::get(T, resetval); - ResetValue = Init; - Counter = new GlobalVariable(M, T, false, GlobalValue::InternalLinkage, - Init, "RandomSteeringCounter"); -} - -GlobalRandomCounter::~GlobalRandomCounter() {} - -void GlobalRandomCounter::PrepFunction(Function* F) {} - -void GlobalRandomCounter::ProcessChoicePoint(BasicBlock* bb) { - BranchInst* t = cast(bb->getTerminator()); - - //decrement counter - LoadInst* l = new LoadInst(Counter, "counter", t); - - ICmpInst* s = new ICmpInst(t, ICmpInst::ICMP_EQ, l, - ConstantInt::get(T, 0), - "countercc"); - - Value* nv = BinaryOperator::CreateSub(l, ConstantInt::get(T, 1), - "counternew", t); - new StoreInst(nv, Counter, t); - t->setCondition(s); - - //reset counter - BasicBlock* oldnext = t->getSuccessor(0); - BasicBlock* resetblock = BasicBlock::Create(bb->getContext(), - "reset", oldnext->getParent(), - oldnext); - TerminatorInst* t2 = BranchInst::Create(oldnext, resetblock); - t->setSuccessor(0, resetblock); - new StoreInst(ResetValue, Counter, t2); - ReplacePhiPred(oldnext, bb, resetblock); -} - -GlobalRandomCounterOpt::GlobalRandomCounterOpt(Module& M, const IntegerType* t, - uint64_t resetval) - : AI(0), T(t) { - ConstantInt* Init = ConstantInt::get(T, resetval); - ResetValue = Init; - Counter = new GlobalVariable(M, T, false, GlobalValue::InternalLinkage, - Init, "RandomSteeringCounter"); -} - -GlobalRandomCounterOpt::~GlobalRandomCounterOpt() {} - -void GlobalRandomCounterOpt::PrepFunction(Function* F) { - //make a local temporary to cache the global - BasicBlock& bb = F->getEntryBlock(); - BasicBlock::iterator InsertPt = bb.begin(); - AI = new AllocaInst(T, 0, "localcounter", InsertPt); - LoadInst* l = new LoadInst(Counter, "counterload", InsertPt); - new StoreInst(l, AI, InsertPt); - - //modify all functions and return values to restore the local variable to/from - //the global variable - for(Function::iterator fib = F->begin(), fie = F->end(); - fib != fie; ++fib) - for(BasicBlock::iterator bib = fib->begin(), bie = fib->end(); - bib != bie; ++bib) - if (isa(bib)) { - LoadInst* l = new LoadInst(AI, "counter", bib); - new StoreInst(l, Counter, bib); - l = new LoadInst(Counter, "counter", ++bib); - new StoreInst(l, AI, bib--); - } else if (isa(bib)) { - LoadInst* l = new LoadInst(AI, "counter", bib); - new StoreInst(l, Counter, bib); - - BasicBlock* bb = cast(bib)->getNormalDest(); - BasicBlock::iterator i = bb->getFirstNonPHI(); - l = new LoadInst(Counter, "counter", i); - - bb = cast(bib)->getUnwindDest(); - i = bb->getFirstNonPHI(); - l = new LoadInst(Counter, "counter", i); - new StoreInst(l, AI, i); - } else if (isa(&*bib) || isa(&*bib)) { - LoadInst* l = new LoadInst(AI, "counter", bib); - new StoreInst(l, Counter, bib); - } -} - -void GlobalRandomCounterOpt::ProcessChoicePoint(BasicBlock* bb) { - BranchInst* t = cast(bb->getTerminator()); - - //decrement counter - LoadInst* l = new LoadInst(AI, "counter", t); - - ICmpInst* s = new ICmpInst(t, ICmpInst::ICMP_EQ, l, - ConstantInt::get(T, 0), - "countercc"); - - Value* nv = BinaryOperator::CreateSub(l, ConstantInt::get(T, 1), - "counternew", t); - new StoreInst(nv, AI, t); - t->setCondition(s); - - //reset counter - BasicBlock* oldnext = t->getSuccessor(0); - BasicBlock* resetblock = BasicBlock::Create(bb->getContext(), - "reset", oldnext->getParent(), - oldnext); - TerminatorInst* t2 = BranchInst::Create(oldnext, resetblock); - t->setSuccessor(0, resetblock); - new StoreInst(ResetValue, AI, t2); - ReplacePhiPred(oldnext, bb, resetblock); -} - - -CycleCounter::CycleCounter(Module& m, uint64_t resetmask) : rm(resetmask) { - F = Intrinsic::getDeclaration(&m, Intrinsic::readcyclecounter); -} - -CycleCounter::~CycleCounter() {} - -void CycleCounter::PrepFunction(Function* F) {} - -void CycleCounter::ProcessChoicePoint(BasicBlock* bb) { - BranchInst* t = cast(bb->getTerminator()); - - CallInst* c = CallInst::Create(F, "rdcc", t); - BinaryOperator* b = - BinaryOperator::CreateAnd(c, - ConstantInt::get(Type::getInt64Ty(bb->getContext()), rm), - "mrdcc", t); - - ICmpInst *s = new ICmpInst(t, ICmpInst::ICMP_EQ, b, - ConstantInt::get(Type::getInt64Ty(bb->getContext()), 0), - "mrdccc"); - - t->setCondition(s); -} - -/////////////////////////////////////// -// Profiling: -/////////////////////////////////////// -bool RSProfilers_std::isProfiling(Value* v) { - if (profcode.find(v) != profcode.end()) - return true; - //else - RSProfilers& LI = getAnalysis(); - return LI.isProfiling(v); -} - -void RSProfilers_std::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum, - GlobalValue *CounterArray) { - // Insert the increment after any alloca or PHI instructions... - BasicBlock::iterator InsertPos = BB->getFirstNonPHI(); - while (isa(InsertPos)) - ++InsertPos; - - // Create the getelementptr constant expression - std::vector Indices(2); - Indices[0] = Constant::getNullValue(Type::getInt32Ty(BB->getContext())); - Indices[1] = ConstantInt::get(Type::getInt32Ty(BB->getContext()), CounterNum); - Constant *ElementPtr =ConstantExpr::getGetElementPtr(CounterArray, - &Indices[0], 2); - - // Load, increment and store the value back. - Value *OldVal = new LoadInst(ElementPtr, "OldCounter", InsertPos); - profcode.insert(OldVal); - Value *NewVal = BinaryOperator::CreateAdd(OldVal, - ConstantInt::get(Type::getInt32Ty(BB->getContext()), 1), - "NewCounter", InsertPos); - profcode.insert(NewVal); - profcode.insert(new StoreInst(NewVal, ElementPtr, InsertPos)); -} - -void RSProfilers_std::getAnalysisUsage(AnalysisUsage &AU) const { - //grab any outstanding profiler, or get the null one - AU.addRequired(); -} - -/////////////////////////////////////// -// RS Framework -/////////////////////////////////////// - -Value* ProfilerRS::Translate(Value* v) { - if(TransCache[v]) - return TransCache[v]; - - if (BasicBlock* bb = dyn_cast(v)) { - if (bb == &bb->getParent()->getEntryBlock()) - TransCache[bb] = bb; //don't translate entry block - else - TransCache[bb] = BasicBlock::Create(v->getContext(), - "dup_" + bb->getName(), - bb->getParent(), NULL); - return TransCache[bb]; - } else if (Instruction* i = dyn_cast(v)) { - //we have already translated this - //do not translate entry block allocas - if(&i->getParent()->getParent()->getEntryBlock() == i->getParent()) { - TransCache[i] = i; - return i; - } else { - //translate this - Instruction* i2 = i->clone(); - if (i->hasName()) - i2->setName("dup_" + i->getName()); - TransCache[i] = i2; - //NumNewInst++; - for (unsigned x = 0; x < i2->getNumOperands(); ++x) - i2->setOperand(x, Translate(i2->getOperand(x))); - return i2; - } - } else if (isa(v) || isa(v) || isa(v)) { - TransCache[v] = v; - return v; - } - llvm_unreachable("Value not handled"); - return 0; -} - -void ProfilerRS::Duplicate(Function& F, RSProfilers& LI) -{ - //perform a breadth first search, building up a duplicate of the code - std::queue worklist; - std::set seen; - - //This loop ensures proper BB order, to help performance - for (Function::iterator fib = F.begin(), fie = F.end(); fib != fie; ++fib) - worklist.push(fib); - while (!worklist.empty()) { - Translate(worklist.front()); - worklist.pop(); - } - - //remember than reg2mem created a new entry block we don't want to duplicate - worklist.push(F.getEntryBlock().getTerminator()->getSuccessor(0)); - seen.insert(&F.getEntryBlock()); - - while (!worklist.empty()) { - BasicBlock* bb = worklist.front(); - worklist.pop(); - if(seen.find(bb) == seen.end()) { - BasicBlock* bbtarget = cast(Translate(bb)); - BasicBlock::InstListType& instlist = bbtarget->getInstList(); - for (BasicBlock::iterator iib = bb->begin(), iie = bb->end(); - iib != iie; ++iib) { - //NumOldInst++; - if (!LI.isProfiling(&*iib)) { - Instruction* i = cast(Translate(iib)); - instlist.insert(bbtarget->end(), i); - } - } - //updated search state; - seen.insert(bb); - TerminatorInst* ti = bb->getTerminator(); - for (unsigned x = 0; x < ti->getNumSuccessors(); ++x) { - BasicBlock* bbs = ti->getSuccessor(x); - if (seen.find(bbs) == seen.end()) { - worklist.push(bbs); - } - } - } - } -} - -void ProfilerRS::ProcessBackEdge(BasicBlock* src, BasicBlock* dst, Function& F) { - //given a backedge from B -> A, and translations A' and B', - //a: insert C and C' - //b: add branches in C to A and A' and in C' to A and A' - //c: mod terminators@B, replace A with C - //d: mod terminators@B', replace A' with C' - //e: mod phis@A for pred B to be pred C - // if multiple entries, simplify to one - //f: mod phis@A' for pred B' to be pred C' - // if multiple entries, simplify to one - //g: for all phis@A with pred C using x - // add in edge from C' using x' - // add in edge from C using x in A' - - //a: - Function::iterator BBN = src; ++BBN; - BasicBlock* bbC = BasicBlock::Create(F.getContext(), "choice", &F, BBN); - //ChoicePoints.insert(bbC); - BBN = cast(Translate(src)); - BasicBlock* bbCp = BasicBlock::Create(F.getContext(), "choice", &F, ++BBN); - ChoicePoints.insert(bbCp); - - //b: - BranchInst::Create(cast(Translate(dst)), bbC); - BranchInst::Create(dst, cast(Translate(dst)), - ConstantInt::get(Type::getInt1Ty(src->getContext()), true), bbCp); - //c: - { - TerminatorInst* iB = src->getTerminator(); - for (unsigned x = 0; x < iB->getNumSuccessors(); ++x) - if (iB->getSuccessor(x) == dst) - iB->setSuccessor(x, bbC); - } - //d: - { - TerminatorInst* iBp = cast(Translate(src->getTerminator())); - for (unsigned x = 0; x < iBp->getNumSuccessors(); ++x) - if (iBp->getSuccessor(x) == cast(Translate(dst))) - iBp->setSuccessor(x, bbCp); - } - //e: - ReplacePhiPred(dst, src, bbC); - //src could be a switch, in which case we are replacing several edges with one - //thus collapse those edges int the Phi - CollapsePhi(dst, bbC); - //f: - ReplacePhiPred(cast(Translate(dst)), - cast(Translate(src)),bbCp); - CollapsePhi(cast(Translate(dst)), bbCp); - //g: - for(BasicBlock::iterator ib = dst->begin(), ie = dst->end(); ib != ie; - ++ib) - if (PHINode* phi = dyn_cast(&*ib)) { - for(unsigned x = 0; x < phi->getNumIncomingValues(); ++x) - if(bbC == phi->getIncomingBlock(x)) { - phi->addIncoming(Translate(phi->getIncomingValue(x)), bbCp); - cast(Translate(phi))->addIncoming(phi->getIncomingValue(x), - bbC); - } - phi->removeIncomingValue(bbC); - } -} - -bool ProfilerRS::runOnFunction(Function& F) { - if (!F.isDeclaration()) { - std::set > BackEdges; - RSProfilers& LI = getAnalysis(); - - getBackEdges(F, BackEdges); - Duplicate(F, LI); - //assume that stuff worked. now connect the duplicated basic blocks - //with the originals in such a way as to preserve ssa. yuk! - for (std::set >::iterator - ib = BackEdges.begin(), ie = BackEdges.end(); ib != ie; ++ib) - ProcessBackEdge(ib->first, ib->second, F); - - //oh, and add the edge from the reg2mem created entry node to the - //duplicated second node - TerminatorInst* T = F.getEntryBlock().getTerminator(); - ReplaceInstWithInst(T, BranchInst::Create(T->getSuccessor(0), - cast( - Translate(T->getSuccessor(0))), - ConstantInt::get(Type::getInt1Ty(F.getContext()), true))); - - //do whatever is needed now that the function is duplicated - c->PrepFunction(&F); - - //add entry node to choice points - ChoicePoints.insert(&F.getEntryBlock()); - - for (std::set::iterator - ii = ChoicePoints.begin(), ie = ChoicePoints.end(); ii != ie; ++ii) - c->ProcessChoicePoint(*ii); - - ChoicePoints.clear(); - TransCache.clear(); - - return true; - } - return false; -} - -bool ProfilerRS::doInitialization(Module &M) { - switch (RandomMethod) { - case GBV: - c = new GlobalRandomCounter(M, Type::getInt32Ty(M.getContext()), - (1 << 14) - 1); - break; - case GBVO: - c = new GlobalRandomCounterOpt(M, Type::getInt32Ty(M.getContext()), - (1 << 14) - 1); - break; - case HOSTCC: - c = new CycleCounter(M, (1 << 14) - 1); - break; - }; - return true; -} - -void ProfilerRS::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.addRequiredID(DemoteRegisterToMemoryID); -} - -/////////////////////////////////////// -// Utilities: -/////////////////////////////////////// -static void ReplacePhiPred(BasicBlock* btarget, - BasicBlock* bold, BasicBlock* bnew) { - for(BasicBlock::iterator ib = btarget->begin(), ie = btarget->end(); - ib != ie; ++ib) - if (PHINode* phi = dyn_cast(&*ib)) { - for(unsigned x = 0; x < phi->getNumIncomingValues(); ++x) - if(bold == phi->getIncomingBlock(x)) - phi->setIncomingBlock(x, bnew); - } -} - -static void CollapsePhi(BasicBlock* btarget, BasicBlock* bsrc) { - for(BasicBlock::iterator ib = btarget->begin(), ie = btarget->end(); - ib != ie; ++ib) - if (PHINode* phi = dyn_cast(&*ib)) { - std::map counter; - for(unsigned i = 0; i < phi->getNumIncomingValues(); ) { - if (counter[phi->getIncomingBlock(i)]) { - assert(phi->getIncomingValue(i) == counter[phi->getIncomingBlock(i)]); - phi->removeIncomingValue(i, false); - } else { - counter[phi->getIncomingBlock(i)] = phi->getIncomingValue(i); - ++i; - } - } - } -} - -template -static void recBackEdge(BasicBlock* bb, T& BackEdges, - std::map& color, - std::map& depth, - std::map& finish, - int& time) -{ - color[bb] = 1; - ++time; - depth[bb] = time; - TerminatorInst* t= bb->getTerminator(); - for(unsigned i = 0; i < t->getNumSuccessors(); ++i) { - BasicBlock* bbnew = t->getSuccessor(i); - if (color[bbnew] == 0) - recBackEdge(bbnew, BackEdges, color, depth, finish, time); - else if (color[bbnew] == 1) { - BackEdges.insert(std::make_pair(bb, bbnew)); - //NumBackEdges++; - } - } - color[bb] = 2; - ++time; - finish[bb] = time; -} - - - -//find the back edges and where they go to -template -static void getBackEdges(Function& F, T& BackEdges) { - std::map color; - std::map depth; - std::map finish; - int time = 0; - recBackEdge(&F.getEntryBlock(), BackEdges, color, depth, finish, time); - DEBUG(errs() << F.getName() << " " << BackEdges.size() << "\n"); -} - - -//Creation functions -ModulePass* llvm::createNullProfilerRSPass() { - return new NullProfilerRS(); -} - -FunctionPass* llvm::createRSProfilingPass() { - return new ProfilerRS(); -} diff --git a/lib/Transforms/Instrumentation/RSProfiling.h b/lib/Transforms/Instrumentation/RSProfiling.h deleted file mode 100644 index 8bbe7c7b28fe..000000000000 --- a/lib/Transforms/Instrumentation/RSProfiling.h +++ /dev/null @@ -1,31 +0,0 @@ -//===- RSProfiling.h - Various profiling using random sampling ----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// See notes in RSProfiling.cpp -// -//===----------------------------------------------------------------------===// -#include "llvm/Transforms/RSProfiling.h" -#include - -namespace llvm { - /// RSProfilers_std - a simple support class for profilers that handles most - /// of the work of chaining and tracking inserted code. - struct RSProfilers_std : public RSProfilers { - static char ID; - std::set profcode; - // Lookup up values in profcode - virtual bool isProfiling(Value* v); - // handles required chaining - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - // places counter updates in basic blocks and recordes added instructions in - // profcode - void IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum, - GlobalValue *CounterArray); - }; -} diff --git a/lib/Transforms/Makefile b/lib/Transforms/Makefile index 025d02ad3073..ea4a1158acc7 100644 --- a/lib/Transforms/Makefile +++ b/lib/Transforms/Makefile @@ -8,7 +8,7 @@ ##===----------------------------------------------------------------------===## LEVEL = ../.. -PARALLEL_DIRS = Utils Instrumentation Scalar IPO Hello +PARALLEL_DIRS = Utils Instrumentation Scalar InstCombine IPO Hello include $(LEVEL)/Makefile.config diff --git a/lib/Transforms/Scalar/ABCD.cpp b/lib/Transforms/Scalar/ABCD.cpp index e58fa636047d..cf5e8c07a52f 100644 --- a/lib/Transforms/Scalar/ABCD.cpp +++ b/lib/Transforms/Scalar/ABCD.cpp @@ -451,7 +451,7 @@ bool ABCD::runOnFunction(Function &F) { modified = false; createSSI(F); executeABCD(F); - DEBUG(inequality_graph.printGraph(errs(), F)); + DEBUG(inequality_graph.printGraph(dbgs(), F)); removePhis(); inequality_graph.clear(); diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp index 37f383fb512a..5a4984158ea0 100644 --- a/lib/Transforms/Scalar/ADCE.cpp +++ b/lib/Transforms/Scalar/ADCE.cpp @@ -62,8 +62,7 @@ bool ADCE::runOnFunction(Function& F) { // Propagate liveness backwards to operands. while (!worklist.empty()) { - Instruction* curr = worklist.back(); - worklist.pop_back(); + Instruction* curr = worklist.pop_back_val(); for (Instruction::op_iterator OI = curr->op_begin(), OE = curr->op_end(); OI != OE; ++OI) diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index 5a92399f67cc..683c1c2fd708 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -9,7 +9,6 @@ add_llvm_library(LLVMScalarOpts GEPSplitter.cpp GVN.cpp IndVarSimplify.cpp - InstructionCombining.cpp JumpThreading.cpp LICM.cpp LoopDeletion.cpp diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 372616c86993..9c1b440bd5c1 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -237,7 +237,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { BranchInst *BI = cast(BB->getTerminator()); BasicBlock *DestBB = BI->getSuccessor(0); - DEBUG(errs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB); + DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB); // If the destination block has a single pred, then this is a trivial edge, // just collapse it. @@ -251,7 +251,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { if (isEntry && BB != &BB->getParent()->getEntryBlock()) BB->moveBefore(&BB->getParent()->getEntryBlock()); - DEBUG(errs() << "AFTER:\n" << *DestBB << "\n\n\n"); + DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); return; } } @@ -294,7 +294,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { } BB->eraseFromParent(); - DEBUG(errs() << "AFTER:\n" << *DestBB << "\n\n\n"); + DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); } @@ -591,7 +591,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // If all the instructions matched are already in this BB, don't do anything. if (!AnyNonLocal) { - DEBUG(errs() << "CGP: Found local addrmode: " << AddrMode << "\n"); + DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n"); return false; } @@ -606,12 +606,12 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // computation. Value *&SunkAddr = SunkAddrs[Addr]; if (SunkAddr) { - DEBUG(errs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " + DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " << *MemoryInst); if (SunkAddr->getType() != Addr->getType()) SunkAddr = new BitCastInst(SunkAddr, Addr->getType(), "tmp", InsertPt); } else { - DEBUG(errs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " + DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " << *MemoryInst); const Type *IntPtrTy = TLI->getTargetData()->getIntPtrType(AccessTy->getContext()); diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 1cfde8fc0c77..320afa19d5fb 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -52,9 +52,9 @@ namespace { bool runOnBasicBlock(BasicBlock &BB); bool handleFreeWithNonTrivialDependency(Instruction *F, MemDepResult Dep); bool handleEndBlock(BasicBlock &BB); - bool RemoveUndeadPointers(Value* Ptr, uint64_t killPointerSize, - BasicBlock::iterator& BBI, - SmallPtrSet& deadPointers); + bool RemoveUndeadPointers(Value *Ptr, uint64_t killPointerSize, + BasicBlock::iterator &BBI, + SmallPtrSet &deadPointers); void DeleteDeadInstruction(Instruction *I, SmallPtrSet *deadPointers = 0); @@ -70,6 +70,8 @@ namespace { AU.addPreserved(); AU.addPreserved(); } + + unsigned getPointerSize(Value *V) const; }; } @@ -173,7 +175,7 @@ static bool isStoreAtLeastAsWideAs(Instruction *I1, Instruction *I2, } bool DSE::runOnBasicBlock(BasicBlock &BB) { - MemoryDependenceAnalysis& MD = getAnalysis(); + MemoryDependenceAnalysis &MD = getAnalysis(); TD = getAnalysisIfAvailable(); bool MadeChange = false; @@ -355,7 +357,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { continue; } - Value* killPointer = 0; + Value *killPointer = 0; uint64_t killPointerSize = ~0UL; // If we encounter a use of the pointer, it is no longer considered dead @@ -371,14 +373,14 @@ bool DSE::handleEndBlock(BasicBlock &BB) { } killPointer = L->getPointerOperand(); - } else if (VAArgInst* V = dyn_cast(BBI)) { + } else if (VAArgInst *V = dyn_cast(BBI)) { killPointer = V->getOperand(0); } else if (isa(BBI) && isa(cast(BBI)->getLength())) { killPointer = cast(BBI)->getSource(); killPointerSize = cast( cast(BBI)->getLength())->getZExtValue(); - } else if (AllocaInst* A = dyn_cast(BBI)) { + } else if (AllocaInst *A = dyn_cast(BBI)) { deadPointers.erase(A); // Dead alloca's can be DCE'd when we reach them @@ -412,23 +414,10 @@ bool DSE::handleEndBlock(BasicBlock &BB) { deadPointers.clear(); return MadeChange; } - - // Get size information for the alloca - unsigned pointerSize = ~0U; - if (TD) { - if (AllocaInst* A = dyn_cast(*I)) { - if (ConstantInt* C = dyn_cast(A->getArraySize())) - pointerSize = C->getZExtValue() * - TD->getTypeAllocSize(A->getAllocatedType()); - } else { - const PointerType* PT = cast( - cast(*I)->getType()); - pointerSize = TD->getTypeAllocSize(PT->getElementType()); - } - } - + // See if the call site touches it - AliasAnalysis::ModRefResult A = AA.getModRefInfo(CS, *I, pointerSize); + AliasAnalysis::ModRefResult A = AA.getModRefInfo(CS, *I, + getPointerSize(*I)); if (A == AliasAnalysis::ModRef) modRef++; @@ -469,11 +458,11 @@ bool DSE::handleEndBlock(BasicBlock &BB) { /// RemoveUndeadPointers - check for uses of a pointer that make it /// undead when scanning for dead stores to alloca's. -bool DSE::RemoveUndeadPointers(Value* killPointer, uint64_t killPointerSize, +bool DSE::RemoveUndeadPointers(Value *killPointer, uint64_t killPointerSize, BasicBlock::iterator &BBI, - SmallPtrSet& deadPointers) { + SmallPtrSet &deadPointers) { AliasAnalysis &AA = getAnalysis(); - + // If the kill pointer can be easily reduced to an alloca, // don't bother doing extraneous AA queries. if (deadPointers.count(killPointer)) { @@ -488,32 +477,19 @@ bool DSE::RemoveUndeadPointers(Value* killPointer, uint64_t killPointerSize, bool MadeChange = false; SmallVector undead; - + for (SmallPtrSet::iterator I = deadPointers.begin(), - E = deadPointers.end(); I != E; ++I) { - // Get size information for the alloca. - unsigned pointerSize = ~0U; - if (TD) { - if (AllocaInst* A = dyn_cast(*I)) { - if (ConstantInt* C = dyn_cast(A->getArraySize())) - pointerSize = C->getZExtValue() * - TD->getTypeAllocSize(A->getAllocatedType()); - } else { - const PointerType* PT = cast(cast(*I)->getType()); - pointerSize = TD->getTypeAllocSize(PT->getElementType()); - } - } - + E = deadPointers.end(); I != E; ++I) { // See if this pointer could alias it - AliasAnalysis::AliasResult A = AA.alias(*I, pointerSize, + AliasAnalysis::AliasResult A = AA.alias(*I, getPointerSize(*I), killPointer, killPointerSize); // If it must-alias and a store, we can delete it if (isa(BBI) && A == AliasAnalysis::MustAlias) { - StoreInst* S = cast(BBI); + StoreInst *S = cast(BBI); // Remove it! - BBI++; + ++BBI; DeleteDeadInstruction(S, &deadPointers); NumFastStores++; MadeChange = true; @@ -547,9 +523,8 @@ void DSE::DeleteDeadInstruction(Instruction *I, // Before we touch this instruction, remove it from memdep! MemoryDependenceAnalysis &MDA = getAnalysis(); - while (!NowDeadInsts.empty()) { - Instruction *DeadInst = NowDeadInsts.back(); - NowDeadInsts.pop_back(); + do { + Instruction *DeadInst = NowDeadInsts.pop_back_val(); ++NumFastOther; @@ -573,5 +548,20 @@ void DSE::DeleteDeadInstruction(Instruction *I, DeadInst->eraseFromParent(); if (ValueSet) ValueSet->erase(DeadInst); - } + } while (!NowDeadInsts.empty()); +} + +unsigned DSE::getPointerSize(Value *V) const { + if (TD) { + if (AllocaInst *A = dyn_cast(V)) { + // Get size information for the alloca + if (ConstantInt *C = dyn_cast(A->getArraySize())) + return C->getZExtValue() * TD->getTypeAllocSize(A->getAllocatedType()); + } else { + assert(isa(V) && "Expected AllocaInst or Argument!"); + const PointerType *PT = cast(V->getType()); + return TD->getTypeAllocSize(PT->getElementType()); + } + } + return ~0U; } diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 612b41587e8e..ac0d850360d6 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -829,7 +829,7 @@ static bool IsValueFullyAvailableInBlock(BasicBlock *BB, SmallVector BBWorklist; BBWorklist.push_back(BB); - while (!BBWorklist.empty()) { + do { BasicBlock *Entry = BBWorklist.pop_back_val(); // Note that this sets blocks to 0 (unavailable) if they happen to not // already be in FullyAvailableBlocks. This is safe. @@ -841,7 +841,7 @@ static bool IsValueFullyAvailableInBlock(BasicBlock *BB, for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I) BBWorklist.push_back(*I); - } + } while (!BBWorklist.empty()); return false; } @@ -1022,7 +1022,7 @@ static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr, // FIXME: Study to see if/when this happens. if (LoadOffset == StoreOffset) { #if 0 - errs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n" + dbgs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n" << "Base = " << *StoreBase << "\n" << "Store Ptr = " << *WritePtr << "\n" << "Store Offs = " << StoreOffset << "\n" @@ -1053,7 +1053,7 @@ static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr, } if (isAAFailure) { #if 0 - errs() << "STORE LOAD DEP WITH COMMON BASE:\n" + dbgs() << "STORE LOAD DEP WITH COMMON BASE:\n" << "Base = " << *StoreBase << "\n" << "Store Ptr = " << *WritePtr << "\n" << "Store Offs = " << StoreOffset << "\n" @@ -1362,7 +1362,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, SmallVector Deps; MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(), Deps); - //DEBUG(errs() << "INVESTIGATING NONLOCAL LOAD: " + //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: " // << Deps.size() << *LI << '\n'); // If we had to process more than one hundred blocks to find the @@ -1375,9 +1375,9 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // clobber in the current block. Reject this early. if (Deps.size() == 1 && Deps[0].getResult().isClobber()) { DEBUG( - errs() << "GVN: non-local load "; - WriteAsOperand(errs(), LI); - errs() << " is clobbered by " << *Deps[0].getResult().getInst() << '\n'; + dbgs() << "GVN: non-local load "; + WriteAsOperand(dbgs(), LI); + dbgs() << " is clobbered by " << *Deps[0].getResult().getInst() << '\n'; ); return false; } @@ -1500,7 +1500,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // load, then it is fully redundant and we can use PHI insertion to compute // its value. Insert PHIs and remove the fully redundant value now. if (UnavailableBlocks.empty()) { - DEBUG(errs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); + DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); // Perform PHI construction. Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT, @@ -1614,7 +1614,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // We don't currently handle critical edges :( if (UnavailablePred->getTerminator()->getNumSuccessors() != 1) { - DEBUG(errs() << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '" + DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '" << UnavailablePred->getName() << "': " << *LI << '\n'); return false; } @@ -1646,7 +1646,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // we fail PRE. if (LoadPtr == 0) { assert(NewInsts.empty() && "Shouldn't insert insts on failure"); - DEBUG(errs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: " + DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: " << *LI->getOperand(0) << "\n"); return false; } @@ -1679,9 +1679,9 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // Okay, we can eliminate this load by inserting a reload in the predecessor // and using PHI construction to get the value in the other predecessors, do // it. - DEBUG(errs() << "GVN REMOVING PRE LOAD: " << *LI << '\n'); + DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *LI << '\n'); DEBUG(if (!NewInsts.empty()) - errs() << "INSERTED " << NewInsts.size() << " INSTS: " + dbgs() << "INSERTED " << NewInsts.size() << " INSTS: " << *NewInsts.back() << '\n'); Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false, @@ -1752,7 +1752,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { } if (AvailVal) { - DEBUG(errs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n' + DEBUG(dbgs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n' << *AvailVal << '\n' << *L << "\n\n\n"); // Replace the load! @@ -1766,10 +1766,10 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { DEBUG( // fast print dep, using operator<< on instruction would be too slow - errs() << "GVN: load "; - WriteAsOperand(errs(), L); + dbgs() << "GVN: load "; + WriteAsOperand(dbgs(), L); Instruction *I = Dep.getInst(); - errs() << " is clobbered by " << *I << '\n'; + dbgs() << " is clobbered by " << *I << '\n'; ); return false; } @@ -1793,7 +1793,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { if (StoredVal == 0) return false; - DEBUG(errs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal + DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal << '\n' << *L << "\n\n\n"); } else @@ -1822,7 +1822,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { if (AvailableVal == 0) return false; - DEBUG(errs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal + DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal << "\n" << *L << "\n\n\n"); } else @@ -1990,7 +1990,7 @@ bool GVN::runOnFunction(Function& F) { unsigned Iteration = 0; while (ShouldContinue) { - DEBUG(errs() << "GVN iteration: " << Iteration << "\n"); + DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n"); ShouldContinue = iterateOnFunction(F); Changed |= ShouldContinue; ++Iteration; @@ -2038,7 +2038,7 @@ bool GVN::processBlock(BasicBlock *BB) { for (SmallVector::iterator I = toErase.begin(), E = toErase.end(); I != E; ++I) { - DEBUG(errs() << "GVN removed: " << **I << '\n'); + DEBUG(dbgs() << "GVN removed: " << **I << '\n'); if (MD) MD->removeInstruction(*I); (*I)->eraseFromParent(); DEBUG(verifyRemoved(*I)); @@ -2196,7 +2196,7 @@ bool GVN::performPRE(Function &F) { MD->invalidateCachedPointerInfo(Phi); VN.erase(CurInst); - DEBUG(errs() << "GVN PRE removed: " << *CurInst << '\n'); + DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n'); if (MD) MD->removeInstruction(CurInst); CurInst->eraseFromParent(); DEBUG(verifyRemoved(CurInst)); diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 3aa4fd32348a..ce1307c8df3b 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -182,7 +182,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, else Opcode = ICmpInst::ICMP_EQ; - DEBUG(errs() << "INDVARS: Rewriting loop exit condition to:\n" + DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" << " LHS:" << *CmpIndVar << '\n' << " op:\t" << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n" @@ -273,7 +273,7 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); - DEBUG(errs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n' + DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n' << " LoopVal = " << *Inst << "\n"); PN->setIncomingValue(i, ExitVal); @@ -401,7 +401,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { ++NumInserted; Changed = true; - DEBUG(errs() << "INDVARS: New CanIV: " << *IndVar << '\n'); + DEBUG(dbgs() << "INDVARS: New CanIV: " << *IndVar << '\n'); // Now that the official induction variable is established, reinsert // the old canonical-looking variable after it so that the IR remains @@ -438,7 +438,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { IU->AddUsersIfInteresting(cast(NewICmp->getOperand(0))); // Clean up dead instructions. - DeleteDeadPHIs(L->getHeader()); + Changed |= DeleteDeadPHIs(L->getHeader()); // Check a post-condition. assert(L->isLCSSAForm() && "Indvars did not leave the loop in lcssa form!"); return Changed; @@ -506,7 +506,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType, NewVal->takeName(Op); User->replaceUsesOfWith(Op, NewVal); UI->setOperandValToReplace(NewVal); - DEBUG(errs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n' + DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n' << " into = " << *NewVal << "\n"); ++NumRemoved; Changed = true; diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp deleted file mode 100644 index 516d72ea8996..000000000000 --- a/lib/Transforms/Scalar/InstructionCombining.cpp +++ /dev/null @@ -1,13736 +0,0 @@ -//===- InstructionCombining.cpp - Combine multiple instructions -----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// InstructionCombining - Combine instructions to form fewer, simple -// instructions. This pass does not modify the CFG. This pass is where -// algebraic simplification happens. -// -// This pass combines things like: -// %Y = add i32 %X, 1 -// %Z = add i32 %Y, 1 -// into: -// %Z = add i32 %X, 2 -// -// This is a simple worklist driven algorithm. -// -// This pass guarantees that the following canonicalizations are performed on -// the program: -// 1. If a binary operator has a constant operand, it is moved to the RHS -// 2. Bitwise operators with constant operands are always grouped so that -// shifts are performed first, then or's, then and's, then xor's. -// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible -// 4. All cmp instructions on boolean values are replaced with logical ops -// 5. add X, X is represented as (X*2) => (X << 1) -// 6. Multiplies with a power-of-two constant argument are transformed into -// shifts. -// ... etc. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "instcombine" -#include "llvm/Transforms/Scalar.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" -#include "llvm/Pass.h" -#include "llvm/DerivedTypes.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Operator.h" -#include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Support/CallSite.h" -#include "llvm/Support/ConstantRange.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/GetElementPtrTypeIterator.h" -#include "llvm/Support/InstVisitor.h" -#include "llvm/Support/IRBuilder.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/PatternMatch.h" -#include "llvm/Support/TargetFolder.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" -#include -#include -using namespace llvm; -using namespace llvm::PatternMatch; - -STATISTIC(NumCombined , "Number of insts combined"); -STATISTIC(NumConstProp, "Number of constant folds"); -STATISTIC(NumDeadInst , "Number of dead inst eliminated"); -STATISTIC(NumDeadStore, "Number of dead stores eliminated"); -STATISTIC(NumSunkInst , "Number of instructions sunk"); - -/// SelectPatternFlavor - We can match a variety of different patterns for -/// select operations. -enum SelectPatternFlavor { - SPF_UNKNOWN = 0, - SPF_SMIN, SPF_UMIN, - SPF_SMAX, SPF_UMAX - //SPF_ABS - TODO. -}; - -namespace { - /// InstCombineWorklist - This is the worklist management logic for - /// InstCombine. - class InstCombineWorklist { - SmallVector Worklist; - DenseMap WorklistMap; - - void operator=(const InstCombineWorklist&RHS); // DO NOT IMPLEMENT - InstCombineWorklist(const InstCombineWorklist&); // DO NOT IMPLEMENT - public: - InstCombineWorklist() {} - - bool isEmpty() const { return Worklist.empty(); } - - /// Add - Add the specified instruction to the worklist if it isn't already - /// in it. - void Add(Instruction *I) { - if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) { - DEBUG(errs() << "IC: ADD: " << *I << '\n'); - Worklist.push_back(I); - } - } - - void AddValue(Value *V) { - if (Instruction *I = dyn_cast(V)) - Add(I); - } - - /// AddInitialGroup - Add the specified batch of stuff in reverse order. - /// which should only be done when the worklist is empty and when the group - /// has no duplicates. - void AddInitialGroup(Instruction *const *List, unsigned NumEntries) { - assert(Worklist.empty() && "Worklist must be empty to add initial group"); - Worklist.reserve(NumEntries+16); - DEBUG(errs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n"); - for (; NumEntries; --NumEntries) { - Instruction *I = List[NumEntries-1]; - WorklistMap.insert(std::make_pair(I, Worklist.size())); - Worklist.push_back(I); - } - } - - // Remove - remove I from the worklist if it exists. - void Remove(Instruction *I) { - DenseMap::iterator It = WorklistMap.find(I); - if (It == WorklistMap.end()) return; // Not in worklist. - - // Don't bother moving everything down, just null out the slot. - Worklist[It->second] = 0; - - WorklistMap.erase(It); - } - - Instruction *RemoveOne() { - Instruction *I = Worklist.back(); - Worklist.pop_back(); - WorklistMap.erase(I); - return I; - } - - /// AddUsersToWorkList - When an instruction is simplified, add all users of - /// the instruction to the work lists because they might get more simplified - /// now. - /// - void AddUsersToWorkList(Instruction &I) { - for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); - UI != UE; ++UI) - Add(cast(*UI)); - } - - - /// Zap - check that the worklist is empty and nuke the backing store for - /// the map if it is large. - void Zap() { - assert(WorklistMap.empty() && "Worklist empty, but map not?"); - - // Do an explicit clear, this shrinks the map if needed. - WorklistMap.clear(); - } - }; -} // end anonymous namespace. - - -namespace { - /// InstCombineIRInserter - This is an IRBuilder insertion helper that works - /// just like the normal insertion helper, but also adds any new instructions - /// to the instcombine worklist. - class InstCombineIRInserter : public IRBuilderDefaultInserter { - InstCombineWorklist &Worklist; - public: - InstCombineIRInserter(InstCombineWorklist &WL) : Worklist(WL) {} - - void InsertHelper(Instruction *I, const Twine &Name, - BasicBlock *BB, BasicBlock::iterator InsertPt) const { - IRBuilderDefaultInserter::InsertHelper(I, Name, BB, InsertPt); - Worklist.Add(I); - } - }; -} // end anonymous namespace - - -namespace { - class InstCombiner : public FunctionPass, - public InstVisitor { - TargetData *TD; - bool MustPreserveLCSSA; - bool MadeIRChange; - public: - /// Worklist - All of the instructions that need to be simplified. - InstCombineWorklist Worklist; - - /// Builder - This is an IRBuilder that automatically inserts new - /// instructions into the worklist when they are created. - typedef IRBuilder BuilderTy; - BuilderTy *Builder; - - static char ID; // Pass identification, replacement for typeid - InstCombiner() : FunctionPass(&ID), TD(0), Builder(0) {} - - LLVMContext *Context; - LLVMContext *getContext() const { return Context; } - - public: - virtual bool runOnFunction(Function &F); - - bool DoOneIteration(Function &F, unsigned ItNum); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addPreservedID(LCSSAID); - AU.setPreservesCFG(); - } - - TargetData *getTargetData() const { return TD; } - - // Visitation implementation - Implement instruction combining for different - // instruction types. The semantics are as follows: - // Return Value: - // null - No change was made - // I - Change was made, I is still valid, I may be dead though - // otherwise - Change was made, replace I with returned instruction - // - Instruction *visitAdd(BinaryOperator &I); - Instruction *visitFAdd(BinaryOperator &I); - Value *OptimizePointerDifference(Value *LHS, Value *RHS, const Type *Ty); - Instruction *visitSub(BinaryOperator &I); - Instruction *visitFSub(BinaryOperator &I); - Instruction *visitMul(BinaryOperator &I); - Instruction *visitFMul(BinaryOperator &I); - Instruction *visitURem(BinaryOperator &I); - Instruction *visitSRem(BinaryOperator &I); - Instruction *visitFRem(BinaryOperator &I); - bool SimplifyDivRemOfSelect(BinaryOperator &I); - Instruction *commonRemTransforms(BinaryOperator &I); - Instruction *commonIRemTransforms(BinaryOperator &I); - Instruction *commonDivTransforms(BinaryOperator &I); - Instruction *commonIDivTransforms(BinaryOperator &I); - Instruction *visitUDiv(BinaryOperator &I); - Instruction *visitSDiv(BinaryOperator &I); - Instruction *visitFDiv(BinaryOperator &I); - Instruction *FoldAndOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS); - Instruction *FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, FCmpInst *RHS); - Instruction *visitAnd(BinaryOperator &I); - Instruction *FoldOrOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS); - Instruction *FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, FCmpInst *RHS); - Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, - Value *A, Value *B, Value *C); - Instruction *visitOr (BinaryOperator &I); - Instruction *visitXor(BinaryOperator &I); - Instruction *visitShl(BinaryOperator &I); - Instruction *visitAShr(BinaryOperator &I); - Instruction *visitLShr(BinaryOperator &I); - Instruction *commonShiftTransforms(BinaryOperator &I); - Instruction *FoldFCmp_IntToFP_Cst(FCmpInst &I, Instruction *LHSI, - Constant *RHSC); - Instruction *visitFCmpInst(FCmpInst &I); - Instruction *visitICmpInst(ICmpInst &I); - Instruction *visitICmpInstWithCastAndCast(ICmpInst &ICI); - Instruction *visitICmpInstWithInstAndIntCst(ICmpInst &ICI, - Instruction *LHS, - ConstantInt *RHS); - Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, - ConstantInt *DivRHS); - Instruction *FoldICmpAddOpCst(ICmpInst &ICI, Value *X, ConstantInt *CI, - ICmpInst::Predicate Pred, Value *TheAdd); - Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, - ICmpInst::Predicate Cond, Instruction &I); - Instruction *FoldShiftByConstant(Value *Op0, ConstantInt *Op1, - BinaryOperator &I); - Instruction *commonCastTransforms(CastInst &CI); - Instruction *commonIntCastTransforms(CastInst &CI); - Instruction *commonPointerCastTransforms(CastInst &CI); - Instruction *visitTrunc(TruncInst &CI); - Instruction *visitZExt(ZExtInst &CI); - Instruction *visitSExt(SExtInst &CI); - Instruction *visitFPTrunc(FPTruncInst &CI); - Instruction *visitFPExt(CastInst &CI); - Instruction *visitFPToUI(FPToUIInst &FI); - Instruction *visitFPToSI(FPToSIInst &FI); - Instruction *visitUIToFP(CastInst &CI); - Instruction *visitSIToFP(CastInst &CI); - Instruction *visitPtrToInt(PtrToIntInst &CI); - Instruction *visitIntToPtr(IntToPtrInst &CI); - Instruction *visitBitCast(BitCastInst &CI); - Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI, - Instruction *FI); - Instruction *FoldSelectIntoOp(SelectInst &SI, Value*, Value*); - Instruction *FoldSPFofSPF(Instruction *Inner, SelectPatternFlavor SPF1, - Value *A, Value *B, Instruction &Outer, - SelectPatternFlavor SPF2, Value *C); - Instruction *visitSelectInst(SelectInst &SI); - Instruction *visitSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI); - Instruction *visitCallInst(CallInst &CI); - Instruction *visitInvokeInst(InvokeInst &II); - - Instruction *SliceUpIllegalIntegerPHI(PHINode &PN); - Instruction *visitPHINode(PHINode &PN); - Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP); - Instruction *visitAllocaInst(AllocaInst &AI); - Instruction *visitFree(Instruction &FI); - Instruction *visitLoadInst(LoadInst &LI); - Instruction *visitStoreInst(StoreInst &SI); - Instruction *visitBranchInst(BranchInst &BI); - Instruction *visitSwitchInst(SwitchInst &SI); - Instruction *visitInsertElementInst(InsertElementInst &IE); - Instruction *visitExtractElementInst(ExtractElementInst &EI); - Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI); - Instruction *visitExtractValueInst(ExtractValueInst &EV); - - // visitInstruction - Specify what to return for unhandled instructions... - Instruction *visitInstruction(Instruction &I) { return 0; } - - private: - Instruction *visitCallSite(CallSite CS); - bool transformConstExprCastCall(CallSite CS); - Instruction *transformCallThroughTrampoline(CallSite CS); - Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI, - bool DoXform = true); - bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS); - DbgDeclareInst *hasOneUsePlusDeclare(Value *V); - - - public: - // InsertNewInstBefore - insert an instruction New before instruction Old - // in the program. Add the new instruction to the worklist. - // - Instruction *InsertNewInstBefore(Instruction *New, Instruction &Old) { - assert(New && New->getParent() == 0 && - "New instruction already inserted into a basic block!"); - BasicBlock *BB = Old.getParent(); - BB->getInstList().insert(&Old, New); // Insert inst - Worklist.Add(New); - return New; - } - - // ReplaceInstUsesWith - This method is to be used when an instruction is - // found to be dead, replacable with another preexisting expression. Here - // we add all uses of I to the worklist, replace all uses of I with the new - // value, then return I, so that the inst combiner will know that I was - // modified. - // - Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) { - Worklist.AddUsersToWorkList(I); // Add all modified instrs to worklist. - - // If we are replacing the instruction with itself, this must be in a - // segment of unreachable code, so just clobber the instruction. - if (&I == V) - V = UndefValue::get(I.getType()); - - I.replaceAllUsesWith(V); - return &I; - } - - // EraseInstFromFunction - When dealing with an instruction that has side - // effects or produces a void value, we can't rely on DCE to delete the - // instruction. Instead, visit methods should return the value returned by - // this function. - Instruction *EraseInstFromFunction(Instruction &I) { - DEBUG(errs() << "IC: ERASE " << I << '\n'); - - assert(I.use_empty() && "Cannot erase instruction that is used!"); - // Make sure that we reprocess all operands now that we reduced their - // use counts. - if (I.getNumOperands() < 8) { - for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i) - if (Instruction *Op = dyn_cast(*i)) - Worklist.Add(Op); - } - Worklist.Remove(&I); - I.eraseFromParent(); - MadeIRChange = true; - return 0; // Don't do anything with FI - } - - void ComputeMaskedBits(Value *V, const APInt &Mask, APInt &KnownZero, - APInt &KnownOne, unsigned Depth = 0) const { - return llvm::ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth); - } - - bool MaskedValueIsZero(Value *V, const APInt &Mask, - unsigned Depth = 0) const { - return llvm::MaskedValueIsZero(V, Mask, TD, Depth); - } - unsigned ComputeNumSignBits(Value *Op, unsigned Depth = 0) const { - return llvm::ComputeNumSignBits(Op, TD, Depth); - } - - private: - - /// SimplifyCommutative - This performs a few simplifications for - /// commutative operators. - bool SimplifyCommutative(BinaryOperator &I); - - /// SimplifyDemandedUseBits - Attempts to replace V with a simpler value - /// based on the demanded bits. - Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, - APInt& KnownZero, APInt& KnownOne, - unsigned Depth); - bool SimplifyDemandedBits(Use &U, APInt DemandedMask, - APInt& KnownZero, APInt& KnownOne, - unsigned Depth=0); - - /// SimplifyDemandedInstructionBits - Inst is an integer instruction that - /// SimplifyDemandedBits knows about. See if the instruction has any - /// properties that allow us to simplify its operands. - bool SimplifyDemandedInstructionBits(Instruction &Inst); - - Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, - APInt& UndefElts, unsigned Depth = 0); - - // FoldOpIntoPhi - Given a binary operator, cast instruction, or select - // which has a PHI node as operand #0, see if we can fold the instruction - // into the PHI (which is only possible if all operands to the PHI are - // constants). - // - // If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms - // that would normally be unprofitable because they strongly encourage jump - // threading. - Instruction *FoldOpIntoPhi(Instruction &I, bool AllowAggressive = false); - - // FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary" - // operator and they all are only used by the PHI, PHI together their - // inputs, and do the operation once, to the result of the PHI. - Instruction *FoldPHIArgOpIntoPHI(PHINode &PN); - Instruction *FoldPHIArgBinOpIntoPHI(PHINode &PN); - Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN); - Instruction *FoldPHIArgLoadIntoPHI(PHINode &PN); - - - Instruction *OptAndOp(Instruction *Op, ConstantInt *OpRHS, - ConstantInt *AndRHS, BinaryOperator &TheAnd); - - Value *FoldLogicalPlusAnd(Value *LHS, Value *RHS, ConstantInt *Mask, - bool isSub, Instruction &I); - Instruction *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, - bool isSigned, bool Inside, Instruction &IB); - Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI); - Instruction *MatchBSwap(BinaryOperator &I); - bool SimplifyStoreAtEndOfBlock(StoreInst &SI); - Instruction *SimplifyMemTransfer(MemIntrinsic *MI); - Instruction *SimplifyMemSet(MemSetInst *MI); - - - Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned); - - bool CanEvaluateInDifferentType(Value *V, const Type *Ty, - unsigned CastOpc, int &NumCastsRemoved); - unsigned GetOrEnforceKnownAlignment(Value *V, - unsigned PrefAlign = 0); - - }; -} // end anonymous namespace - -char InstCombiner::ID = 0; -static RegisterPass -X("instcombine", "Combine redundant instructions"); - -// getComplexity: Assign a complexity or rank value to LLVM Values... -// 0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst -static unsigned getComplexity(Value *V) { - if (isa(V)) { - if (BinaryOperator::isNeg(V) || - BinaryOperator::isFNeg(V) || - BinaryOperator::isNot(V)) - return 3; - return 4; - } - if (isa(V)) return 3; - return isa(V) ? (isa(V) ? 0 : 1) : 2; -} - -// isOnlyUse - Return true if this instruction will be deleted if we stop using -// it. -static bool isOnlyUse(Value *V) { - return V->hasOneUse() || isa(V); -} - -// getPromotedType - Return the specified type promoted as it would be to pass -// though a va_arg area... -static const Type *getPromotedType(const Type *Ty) { - if (const IntegerType* ITy = dyn_cast(Ty)) { - if (ITy->getBitWidth() < 32) - return Type::getInt32Ty(Ty->getContext()); - } - return Ty; -} - -/// ShouldChangeType - Return true if it is desirable to convert a computation -/// from 'From' to 'To'. We don't want to convert from a legal to an illegal -/// type for example, or from a smaller to a larger illegal type. -static bool ShouldChangeType(const Type *From, const Type *To, - const TargetData *TD) { - assert(isa(From) && isa(To)); - - // If we don't have TD, we don't know if the source/dest are legal. - if (!TD) return false; - - unsigned FromWidth = From->getPrimitiveSizeInBits(); - unsigned ToWidth = To->getPrimitiveSizeInBits(); - bool FromLegal = TD->isLegalInteger(FromWidth); - bool ToLegal = TD->isLegalInteger(ToWidth); - - // If this is a legal integer from type, and the result would be an illegal - // type, don't do the transformation. - if (FromLegal && !ToLegal) - return false; - - // Otherwise, if both are illegal, do not increase the size of the result. We - // do allow things like i160 -> i64, but not i64 -> i160. - if (!FromLegal && !ToLegal && ToWidth > FromWidth) - return false; - - return true; -} - -/// getBitCastOperand - If the specified operand is a CastInst, a constant -/// expression bitcast, or a GetElementPtrInst with all zero indices, return the -/// operand value, otherwise return null. -static Value *getBitCastOperand(Value *V) { - if (Operator *O = dyn_cast(V)) { - if (O->getOpcode() == Instruction::BitCast) - return O->getOperand(0); - if (GEPOperator *GEP = dyn_cast(V)) - if (GEP->hasAllZeroIndices()) - return GEP->getPointerOperand(); - } - return 0; -} - -/// This function is a wrapper around CastInst::isEliminableCastPair. It -/// simply extracts arguments and returns what that function returns. -static Instruction::CastOps -isEliminableCastPair( - const CastInst *CI, ///< The first cast instruction - unsigned opcode, ///< The opcode of the second cast instruction - const Type *DstTy, ///< The target type for the second cast instruction - TargetData *TD ///< The target data for pointer size -) { - - const Type *SrcTy = CI->getOperand(0)->getType(); // A from above - const Type *MidTy = CI->getType(); // B from above - - // Get the opcodes of the two Cast instructions - Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode()); - Instruction::CastOps secondOp = Instruction::CastOps(opcode); - - unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy, - DstTy, - TD ? TD->getIntPtrType(CI->getContext()) : 0); - - // We don't want to form an inttoptr or ptrtoint that converts to an integer - // type that differs from the pointer size. - if ((Res == Instruction::IntToPtr && - (!TD || SrcTy != TD->getIntPtrType(CI->getContext()))) || - (Res == Instruction::PtrToInt && - (!TD || DstTy != TD->getIntPtrType(CI->getContext())))) - Res = 0; - - return Instruction::CastOps(Res); -} - -/// ValueRequiresCast - Return true if the cast from "V to Ty" actually results -/// in any code being generated. It does not require codegen if V is simple -/// enough or if the cast can be folded into other casts. -static bool ValueRequiresCast(Instruction::CastOps opcode, const Value *V, - const Type *Ty, TargetData *TD) { - if (V->getType() == Ty || isa(V)) return false; - - // If this is another cast that can be eliminated, it isn't codegen either. - if (const CastInst *CI = dyn_cast(V)) - if (isEliminableCastPair(CI, opcode, Ty, TD)) - return false; - return true; -} - -// SimplifyCommutative - This performs a few simplifications for commutative -// operators: -// -// 1. Order operands such that they are listed from right (least complex) to -// left (most complex). This puts constants before unary operators before -// binary operators. -// -// 2. Transform: (op (op V, C1), C2) ==> (op V, (op C1, C2)) -// 3. Transform: (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2)) -// -bool InstCombiner::SimplifyCommutative(BinaryOperator &I) { - bool Changed = false; - if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) - Changed = !I.swapOperands(); - - if (!I.isAssociative()) return Changed; - Instruction::BinaryOps Opcode = I.getOpcode(); - if (BinaryOperator *Op = dyn_cast(I.getOperand(0))) - if (Op->getOpcode() == Opcode && isa(Op->getOperand(1))) { - if (isa(I.getOperand(1))) { - Constant *Folded = ConstantExpr::get(I.getOpcode(), - cast(I.getOperand(1)), - cast(Op->getOperand(1))); - I.setOperand(0, Op->getOperand(0)); - I.setOperand(1, Folded); - return true; - } else if (BinaryOperator *Op1=dyn_cast(I.getOperand(1))) - if (Op1->getOpcode() == Opcode && isa(Op1->getOperand(1)) && - isOnlyUse(Op) && isOnlyUse(Op1)) { - Constant *C1 = cast(Op->getOperand(1)); - Constant *C2 = cast(Op1->getOperand(1)); - - // Fold (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2)) - Constant *Folded = ConstantExpr::get(I.getOpcode(), C1, C2); - Instruction *New = BinaryOperator::Create(Opcode, Op->getOperand(0), - Op1->getOperand(0), - Op1->getName(), &I); - Worklist.Add(New); - I.setOperand(0, New); - I.setOperand(1, Folded); - return true; - } - } - return Changed; -} - -// dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction -// if the LHS is a constant zero (which is the 'negate' form). -// -static inline Value *dyn_castNegVal(Value *V) { - if (BinaryOperator::isNeg(V)) - return BinaryOperator::getNegArgument(V); - - // Constants can be considered to be negated values if they can be folded. - if (ConstantInt *C = dyn_cast(V)) - return ConstantExpr::getNeg(C); - - if (ConstantVector *C = dyn_cast(V)) - if (C->getType()->getElementType()->isInteger()) - return ConstantExpr::getNeg(C); - - return 0; -} - -// dyn_castFNegVal - Given a 'fsub' instruction, return the RHS of the -// instruction if the LHS is a constant negative zero (which is the 'negate' -// form). -// -static inline Value *dyn_castFNegVal(Value *V) { - if (BinaryOperator::isFNeg(V)) - return BinaryOperator::getFNegArgument(V); - - // Constants can be considered to be negated values if they can be folded. - if (ConstantFP *C = dyn_cast(V)) - return ConstantExpr::getFNeg(C); - - if (ConstantVector *C = dyn_cast(V)) - if (C->getType()->getElementType()->isFloatingPoint()) - return ConstantExpr::getFNeg(C); - - return 0; -} - -/// MatchSelectPattern - Pattern match integer [SU]MIN, [SU]MAX, and ABS idioms, -/// returning the kind and providing the out parameter results if we -/// successfully match. -static SelectPatternFlavor -MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) { - SelectInst *SI = dyn_cast(V); - if (SI == 0) return SPF_UNKNOWN; - - ICmpInst *ICI = dyn_cast(SI->getCondition()); - if (ICI == 0) return SPF_UNKNOWN; - - LHS = ICI->getOperand(0); - RHS = ICI->getOperand(1); - - // (icmp X, Y) ? X : Y - if (SI->getTrueValue() == ICI->getOperand(0) && - SI->getFalseValue() == ICI->getOperand(1)) { - switch (ICI->getPredicate()) { - default: return SPF_UNKNOWN; // Equality. - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_UGE: return SPF_UMAX; - case ICmpInst::ICMP_SGT: - case ICmpInst::ICMP_SGE: return SPF_SMAX; - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_ULE: return SPF_UMIN; - case ICmpInst::ICMP_SLT: - case ICmpInst::ICMP_SLE: return SPF_SMIN; - } - } - - // (icmp X, Y) ? Y : X - if (SI->getTrueValue() == ICI->getOperand(1) && - SI->getFalseValue() == ICI->getOperand(0)) { - switch (ICI->getPredicate()) { - default: return SPF_UNKNOWN; // Equality. - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_UGE: return SPF_UMIN; - case ICmpInst::ICMP_SGT: - case ICmpInst::ICMP_SGE: return SPF_SMIN; - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_ULE: return SPF_UMAX; - case ICmpInst::ICMP_SLT: - case ICmpInst::ICMP_SLE: return SPF_SMAX; - } - } - - // TODO: (X > 4) ? X : 5 --> (X >= 5) ? X : 5 --> MAX(X, 5) - - return SPF_UNKNOWN; -} - -/// isFreeToInvert - Return true if the specified value is free to invert (apply -/// ~ to). This happens in cases where the ~ can be eliminated. -static inline bool isFreeToInvert(Value *V) { - // ~(~(X)) -> X. - if (BinaryOperator::isNot(V)) - return true; - - // Constants can be considered to be not'ed values. - if (isa(V)) - return true; - - // Compares can be inverted if they have a single use. - if (CmpInst *CI = dyn_cast(V)) - return CI->hasOneUse(); - - return false; -} - -static inline Value *dyn_castNotVal(Value *V) { - // If this is not(not(x)) don't return that this is a not: we want the two - // not's to be folded first. - if (BinaryOperator::isNot(V)) { - Value *Operand = BinaryOperator::getNotArgument(V); - if (!isFreeToInvert(Operand)) - return Operand; - } - - // Constants can be considered to be not'ed values... - if (ConstantInt *C = dyn_cast(V)) - return ConstantInt::get(C->getType(), ~C->getValue()); - return 0; -} - - - -// dyn_castFoldableMul - If this value is a multiply that can be folded into -// other computations (because it has a constant operand), return the -// non-constant operand of the multiply, and set CST to point to the multiplier. -// Otherwise, return null. -// -static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) { - if (V->hasOneUse() && V->getType()->isInteger()) - if (Instruction *I = dyn_cast(V)) { - if (I->getOpcode() == Instruction::Mul) - if ((CST = dyn_cast(I->getOperand(1)))) - return I->getOperand(0); - if (I->getOpcode() == Instruction::Shl) - if ((CST = dyn_cast(I->getOperand(1)))) { - // The multiplier is really 1 << CST. - uint32_t BitWidth = cast(V->getType())->getBitWidth(); - uint32_t CSTVal = CST->getLimitedValue(BitWidth); - CST = ConstantInt::get(V->getType()->getContext(), - APInt(BitWidth, 1).shl(CSTVal)); - return I->getOperand(0); - } - } - return 0; -} - -/// AddOne - Add one to a ConstantInt -static Constant *AddOne(Constant *C) { - return ConstantExpr::getAdd(C, - ConstantInt::get(C->getType(), 1)); -} -/// SubOne - Subtract one from a ConstantInt -static Constant *SubOne(ConstantInt *C) { - return ConstantExpr::getSub(C, - ConstantInt::get(C->getType(), 1)); -} -/// MultiplyOverflows - True if the multiply can not be expressed in an int -/// this size. -static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) { - uint32_t W = C1->getBitWidth(); - APInt LHSExt = C1->getValue(), RHSExt = C2->getValue(); - if (sign) { - LHSExt.sext(W * 2); - RHSExt.sext(W * 2); - } else { - LHSExt.zext(W * 2); - RHSExt.zext(W * 2); - } - - APInt MulExt = LHSExt * RHSExt; - - if (!sign) - return MulExt.ugt(APInt::getLowBitsSet(W * 2, W)); - - APInt Min = APInt::getSignedMinValue(W).sext(W * 2); - APInt Max = APInt::getSignedMaxValue(W).sext(W * 2); - return MulExt.slt(Min) || MulExt.sgt(Max); -} - - -/// ShrinkDemandedConstant - Check to see if the specified operand of the -/// specified instruction is a constant integer. If so, check to see if there -/// are any bits set in the constant that are not demanded. If so, shrink the -/// constant and return true. -static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, - APInt Demanded) { - assert(I && "No instruction?"); - assert(OpNo < I->getNumOperands() && "Operand index too large"); - - // If the operand is not a constant integer, nothing to do. - ConstantInt *OpC = dyn_cast(I->getOperand(OpNo)); - if (!OpC) return false; - - // If there are no bits set that aren't demanded, nothing to do. - Demanded.zextOrTrunc(OpC->getValue().getBitWidth()); - if ((~Demanded & OpC->getValue()) == 0) - return false; - - // This instruction is producing bits that are not demanded. Shrink the RHS. - Demanded &= OpC->getValue(); - I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Demanded)); - return true; -} - -// ComputeSignedMinMaxValuesFromKnownBits - Given a signed integer type and a -// set of known zero and one bits, compute the maximum and minimum values that -// could have the specified known zero and known one bits, returning them in -// min/max. -static void ComputeSignedMinMaxValuesFromKnownBits(const APInt& KnownZero, - const APInt& KnownOne, - APInt& Min, APInt& Max) { - assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() && - KnownZero.getBitWidth() == Min.getBitWidth() && - KnownZero.getBitWidth() == Max.getBitWidth() && - "KnownZero, KnownOne and Min, Max must have equal bitwidth."); - APInt UnknownBits = ~(KnownZero|KnownOne); - - // The minimum value is when all unknown bits are zeros, EXCEPT for the sign - // bit if it is unknown. - Min = KnownOne; - Max = KnownOne|UnknownBits; - - if (UnknownBits.isNegative()) { // Sign bit is unknown - Min.set(Min.getBitWidth()-1); - Max.clear(Max.getBitWidth()-1); - } -} - -// ComputeUnsignedMinMaxValuesFromKnownBits - Given an unsigned integer type and -// a set of known zero and one bits, compute the maximum and minimum values that -// could have the specified known zero and known one bits, returning them in -// min/max. -static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero, - const APInt &KnownOne, - APInt &Min, APInt &Max) { - assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() && - KnownZero.getBitWidth() == Min.getBitWidth() && - KnownZero.getBitWidth() == Max.getBitWidth() && - "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth."); - APInt UnknownBits = ~(KnownZero|KnownOne); - - // The minimum value is when the unknown bits are all zeros. - Min = KnownOne; - // The maximum value is when the unknown bits are all ones. - Max = KnownOne|UnknownBits; -} - -/// SimplifyDemandedInstructionBits - Inst is an integer instruction that -/// SimplifyDemandedBits knows about. See if the instruction has any -/// properties that allow us to simplify its operands. -bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) { - unsigned BitWidth = Inst.getType()->getScalarSizeInBits(); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - APInt DemandedMask(APInt::getAllOnesValue(BitWidth)); - - Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, - KnownZero, KnownOne, 0); - if (V == 0) return false; - if (V == &Inst) return true; - ReplaceInstUsesWith(Inst, V); - return true; -} - -/// SimplifyDemandedBits - This form of SimplifyDemandedBits simplifies the -/// specified instruction operand if possible, updating it in place. It returns -/// true if it made any change and false otherwise. -bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask, - APInt &KnownZero, APInt &KnownOne, - unsigned Depth) { - Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, - KnownZero, KnownOne, Depth); - if (NewVal == 0) return false; - U = NewVal; - return true; -} - - -/// SimplifyDemandedUseBits - This function attempts to replace V with a simpler -/// value based on the demanded bits. When this function is called, it is known -/// that only the bits set in DemandedMask of the result of V are ever used -/// downstream. Consequently, depending on the mask and V, it may be possible -/// to replace V with a constant or one of its operands. In such cases, this -/// function does the replacement and returns true. In all other cases, it -/// returns false after analyzing the expression and setting KnownOne and known -/// to be one in the expression. KnownZero contains all the bits that are known -/// to be zero in the expression. These are provided to potentially allow the -/// caller (which might recursively be SimplifyDemandedBits itself) to simplify -/// the expression. KnownOne and KnownZero always follow the invariant that -/// KnownOne & KnownZero == 0. That is, a bit can't be both 1 and 0. Note that -/// the bits in KnownOne and KnownZero may only be accurate for those bits set -/// in DemandedMask. Note also that the bitwidth of V, DemandedMask, KnownZero -/// and KnownOne must all be the same. -/// -/// This returns null if it did not change anything and it permits no -/// simplification. This returns V itself if it did some simplification of V's -/// operands based on the information about what bits are demanded. This returns -/// some other non-null value if it found out that V is equal to another value -/// in the context where the specified bits are demanded, but not for all users. -Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, - APInt &KnownZero, APInt &KnownOne, - unsigned Depth) { - assert(V != 0 && "Null pointer of Value???"); - assert(Depth <= 6 && "Limit Search Depth"); - uint32_t BitWidth = DemandedMask.getBitWidth(); - const Type *VTy = V->getType(); - assert((TD || !isa(VTy)) && - "SimplifyDemandedBits needs to know bit widths!"); - assert((!TD || TD->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) && - (!VTy->isIntOrIntVector() || - VTy->getScalarSizeInBits() == BitWidth) && - KnownZero.getBitWidth() == BitWidth && - KnownOne.getBitWidth() == BitWidth && - "Value *V, DemandedMask, KnownZero and KnownOne " - "must have same BitWidth"); - if (ConstantInt *CI = dyn_cast(V)) { - // We know all of the bits for a constant! - KnownOne = CI->getValue() & DemandedMask; - KnownZero = ~KnownOne & DemandedMask; - return 0; - } - if (isa(V)) { - // We know all of the bits for a constant! - KnownOne.clear(); - KnownZero = DemandedMask; - return 0; - } - - KnownZero.clear(); - KnownOne.clear(); - if (DemandedMask == 0) { // Not demanding any bits from V. - if (isa(V)) - return 0; - return UndefValue::get(VTy); - } - - if (Depth == 6) // Limit search depth. - return 0; - - APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - APInt &RHSKnownZero = KnownZero, &RHSKnownOne = KnownOne; - - Instruction *I = dyn_cast(V); - if (!I) { - ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth); - return 0; // Only analyze instructions. - } - - // If there are multiple uses of this value and we aren't at the root, then - // we can't do any simplifications of the operands, because DemandedMask - // only reflects the bits demanded by *one* of the users. - if (Depth != 0 && !I->hasOneUse()) { - // Despite the fact that we can't simplify this instruction in all User's - // context, we can at least compute the knownzero/knownone bits, and we can - // do simplifications that apply to *just* the one user if we know that - // this instruction has a simpler value in that context. - if (I->getOpcode() == Instruction::And) { - // If either the LHS or the RHS are Zero, the result is zero. - ComputeMaskedBits(I->getOperand(1), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1); - ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownZero, - LHSKnownZero, LHSKnownOne, Depth+1); - - // If all of the demanded bits are known 1 on one side, return the other. - // These bits cannot contribute to the result of the 'and' in this - // context. - if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == - (DemandedMask & ~LHSKnownZero)) - return I->getOperand(0); - if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == - (DemandedMask & ~RHSKnownZero)) - return I->getOperand(1); - - // If all of the demanded bits in the inputs are known zeros, return zero. - if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask) - return Constant::getNullValue(VTy); - - } else if (I->getOpcode() == Instruction::Or) { - // We can simplify (X|Y) -> X or Y in the user's context if we know that - // only bits from X or Y are demanded. - - // If either the LHS or the RHS are One, the result is One. - ComputeMaskedBits(I->getOperand(1), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1); - ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownOne, - LHSKnownZero, LHSKnownOne, Depth+1); - - // If all of the demanded bits are known zero on one side, return the - // other. These bits cannot contribute to the result of the 'or' in this - // context. - if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == - (DemandedMask & ~LHSKnownOne)) - return I->getOperand(0); - if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == - (DemandedMask & ~RHSKnownOne)) - return I->getOperand(1); - - // If all of the potentially set bits on one side are known to be set on - // the other side, just use the 'other' side. - if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == - (DemandedMask & (~RHSKnownZero))) - return I->getOperand(0); - if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == - (DemandedMask & (~LHSKnownZero))) - return I->getOperand(1); - } - - // Compute the KnownZero/KnownOne bits to simplify things downstream. - ComputeMaskedBits(I, DemandedMask, KnownZero, KnownOne, Depth); - return 0; - } - - // If this is the root being simplified, allow it to have multiple uses, - // just set the DemandedMask to all bits so that we can try to simplify the - // operands. This allows visitTruncInst (for example) to simplify the - // operand of a trunc without duplicating all the logic below. - if (Depth == 0 && !V->hasOneUse()) - DemandedMask = APInt::getAllOnesValue(BitWidth); - - switch (I->getOpcode()) { - default: - ComputeMaskedBits(I, DemandedMask, RHSKnownZero, RHSKnownOne, Depth); - break; - case Instruction::And: - // If either the LHS or the RHS are Zero, the result is zero. - if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1) || - SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero, - LHSKnownZero, LHSKnownOne, Depth+1)) - return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); - - // If all of the demanded bits are known 1 on one side, return the other. - // These bits cannot contribute to the result of the 'and'. - if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == - (DemandedMask & ~LHSKnownZero)) - return I->getOperand(0); - if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == - (DemandedMask & ~RHSKnownZero)) - return I->getOperand(1); - - // If all of the demanded bits in the inputs are known zeros, return zero. - if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask) - return Constant::getNullValue(VTy); - - // If the RHS is a constant, see if we can simplify it. - if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero)) - return I; - - // Output known-1 bits are only known if set in both the LHS & RHS. - RHSKnownOne &= LHSKnownOne; - // Output known-0 are known to be clear if zero in either the LHS | RHS. - RHSKnownZero |= LHSKnownZero; - break; - case Instruction::Or: - // If either the LHS or the RHS are One, the result is One. - if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1) || - SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne, - LHSKnownZero, LHSKnownOne, Depth+1)) - return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); - - // If all of the demanded bits are known zero on one side, return the other. - // These bits cannot contribute to the result of the 'or'. - if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == - (DemandedMask & ~LHSKnownOne)) - return I->getOperand(0); - if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == - (DemandedMask & ~RHSKnownOne)) - return I->getOperand(1); - - // If all of the potentially set bits on one side are known to be set on - // the other side, just use the 'other' side. - if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == - (DemandedMask & (~RHSKnownZero))) - return I->getOperand(0); - if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == - (DemandedMask & (~LHSKnownZero))) - return I->getOperand(1); - - // If the RHS is a constant, see if we can simplify it. - if (ShrinkDemandedConstant(I, 1, DemandedMask)) - return I; - - // Output known-0 bits are only known if clear in both the LHS & RHS. - RHSKnownZero &= LHSKnownZero; - // Output known-1 are known to be set if set in either the LHS | RHS. - RHSKnownOne |= LHSKnownOne; - break; - case Instruction::Xor: { - if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1) || - SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, - LHSKnownZero, LHSKnownOne, Depth+1)) - return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); - - // If all of the demanded bits are known zero on one side, return the other. - // These bits cannot contribute to the result of the 'xor'. - if ((DemandedMask & RHSKnownZero) == DemandedMask) - return I->getOperand(0); - if ((DemandedMask & LHSKnownZero) == DemandedMask) - return I->getOperand(1); - - // Output known-0 bits are known if clear or set in both the LHS & RHS. - APInt KnownZeroOut = (RHSKnownZero & LHSKnownZero) | - (RHSKnownOne & LHSKnownOne); - // Output known-1 are known to be set if set in only one of the LHS, RHS. - APInt KnownOneOut = (RHSKnownZero & LHSKnownOne) | - (RHSKnownOne & LHSKnownZero); - - // If all of the demanded bits are known to be zero on one side or the - // other, turn this into an *inclusive* or. - // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 - if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) { - Instruction *Or = - BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1), - I->getName()); - return InsertNewInstBefore(Or, *I); - } - - // If all of the demanded bits on one side are known, and all of the set - // bits on that side are also known to be set on the other side, turn this - // into an AND, as we know the bits will be cleared. - // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 - if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) { - // all known - if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) { - Constant *AndC = Constant::getIntegerValue(VTy, - ~RHSKnownOne & DemandedMask); - Instruction *And = - BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp"); - return InsertNewInstBefore(And, *I); - } - } - - // If the RHS is a constant, see if we can simplify it. - // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1. - if (ShrinkDemandedConstant(I, 1, DemandedMask)) - return I; - - // If our LHS is an 'and' and if it has one use, and if any of the bits we - // are flipping are known to be set, then the xor is just resetting those - // bits to zero. We can just knock out bits from the 'and' and the 'xor', - // simplifying both of them. - if (Instruction *LHSInst = dyn_cast(I->getOperand(0))) - if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() && - isa(I->getOperand(1)) && - isa(LHSInst->getOperand(1)) && - (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) { - ConstantInt *AndRHS = cast(LHSInst->getOperand(1)); - ConstantInt *XorRHS = cast(I->getOperand(1)); - APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask); - - Constant *AndC = - ConstantInt::get(I->getType(), NewMask & AndRHS->getValue()); - Instruction *NewAnd = - BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp"); - InsertNewInstBefore(NewAnd, *I); - - Constant *XorC = - ConstantInt::get(I->getType(), NewMask & XorRHS->getValue()); - Instruction *NewXor = - BinaryOperator::CreateXor(NewAnd, XorC, "tmp"); - return InsertNewInstBefore(NewXor, *I); - } - - - RHSKnownZero = KnownZeroOut; - RHSKnownOne = KnownOneOut; - break; - } - case Instruction::Select: - if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1) || - SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, - LHSKnownZero, LHSKnownOne, Depth+1)) - return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); - - // If the operands are constants, see if we can simplify them. - if (ShrinkDemandedConstant(I, 1, DemandedMask) || - ShrinkDemandedConstant(I, 2, DemandedMask)) - return I; - - // Only known if known in both the LHS and RHS. - RHSKnownOne &= LHSKnownOne; - RHSKnownZero &= LHSKnownZero; - break; - case Instruction::Trunc: { - unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits(); - DemandedMask.zext(truncBf); - RHSKnownZero.zext(truncBf); - RHSKnownOne.zext(truncBf); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1)) - return I; - DemandedMask.trunc(BitWidth); - RHSKnownZero.trunc(BitWidth); - RHSKnownOne.trunc(BitWidth); - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - break; - } - case Instruction::BitCast: - if (!I->getOperand(0)->getType()->isIntOrIntVector()) - return false; // vector->int or fp->int? - - if (const VectorType *DstVTy = dyn_cast(I->getType())) { - if (const VectorType *SrcVTy = - dyn_cast(I->getOperand(0)->getType())) { - if (DstVTy->getNumElements() != SrcVTy->getNumElements()) - // Don't touch a bitcast between vectors of different element counts. - return false; - } else - // Don't touch a scalar-to-vector bitcast. - return false; - } else if (isa(I->getOperand(0)->getType())) - // Don't touch a vector-to-scalar bitcast. - return false; - - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1)) - return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - break; - case Instruction::ZExt: { - // Compute the bits in the result that are not present in the input. - unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits(); - - DemandedMask.trunc(SrcBitWidth); - RHSKnownZero.trunc(SrcBitWidth); - RHSKnownOne.trunc(SrcBitWidth); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1)) - return I; - DemandedMask.zext(BitWidth); - RHSKnownZero.zext(BitWidth); - RHSKnownOne.zext(BitWidth); - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - // The top bits are known to be zero. - RHSKnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); - break; - } - case Instruction::SExt: { - // Compute the bits in the result that are not present in the input. - unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits(); - - APInt InputDemandedBits = DemandedMask & - APInt::getLowBitsSet(BitWidth, SrcBitWidth); - - APInt NewBits(APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth)); - // If any of the sign extended bits are demanded, we know that the sign - // bit is demanded. - if ((NewBits & DemandedMask) != 0) - InputDemandedBits.set(SrcBitWidth-1); - - InputDemandedBits.trunc(SrcBitWidth); - RHSKnownZero.trunc(SrcBitWidth); - RHSKnownOne.trunc(SrcBitWidth); - if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits, - RHSKnownZero, RHSKnownOne, Depth+1)) - return I; - InputDemandedBits.zext(BitWidth); - RHSKnownZero.zext(BitWidth); - RHSKnownOne.zext(BitWidth); - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - - // If the sign bit of the input is known set or clear, then we know the - // top bits of the result. - - // If the input sign bit is known zero, or if the NewBits are not demanded - // convert this into a zero extension. - if (RHSKnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) { - // Convert to ZExt cast - CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName()); - return InsertNewInstBefore(NewCast, *I); - } else if (RHSKnownOne[SrcBitWidth-1]) { // Input sign bit known set - RHSKnownOne |= NewBits; - } - break; - } - case Instruction::Add: { - // Figure out what the input bits are. If the top bits of the and result - // are not demanded, then the add doesn't demand them from its input - // either. - unsigned NLZ = DemandedMask.countLeadingZeros(); - - // If there is a constant on the RHS, there are a variety of xformations - // we can do. - if (ConstantInt *RHS = dyn_cast(I->getOperand(1))) { - // If null, this should be simplified elsewhere. Some of the xforms here - // won't work if the RHS is zero. - if (RHS->isZero()) - break; - - // If the top bit of the output is demanded, demand everything from the - // input. Otherwise, we demand all the input bits except NLZ top bits. - APInt InDemandedBits(APInt::getLowBitsSet(BitWidth, BitWidth - NLZ)); - - // Find information about known zero/one bits in the input. - if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits, - LHSKnownZero, LHSKnownOne, Depth+1)) - return I; - - // If the RHS of the add has bits set that can't affect the input, reduce - // the constant. - if (ShrinkDemandedConstant(I, 1, InDemandedBits)) - return I; - - // Avoid excess work. - if (LHSKnownZero == 0 && LHSKnownOne == 0) - break; - - // Turn it into OR if input bits are zero. - if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) { - Instruction *Or = - BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1), - I->getName()); - return InsertNewInstBefore(Or, *I); - } - - // We can say something about the output known-zero and known-one bits, - // depending on potential carries from the input constant and the - // unknowns. For example if the LHS is known to have at most the 0x0F0F0 - // bits set and the RHS constant is 0x01001, then we know we have a known - // one mask of 0x00001 and a known zero mask of 0xE0F0E. - - // To compute this, we first compute the potential carry bits. These are - // the bits which may be modified. I'm not aware of a better way to do - // this scan. - const APInt &RHSVal = RHS->getValue(); - APInt CarryBits((~LHSKnownZero + RHSVal) ^ (~LHSKnownZero ^ RHSVal)); - - // Now that we know which bits have carries, compute the known-1/0 sets. - - // Bits are known one if they are known zero in one operand and one in the - // other, and there is no input carry. - RHSKnownOne = ((LHSKnownZero & RHSVal) | - (LHSKnownOne & ~RHSVal)) & ~CarryBits; - - // Bits are known zero if they are known zero in both operands and there - // is no input carry. - RHSKnownZero = LHSKnownZero & ~RHSVal & ~CarryBits; - } else { - // If the high-bits of this ADD are not demanded, then it does not demand - // the high bits of its LHS or RHS. - if (DemandedMask[BitWidth-1] == 0) { - // Right fill the mask of bits for this ADD to demand the most - // significant bit and all those below it. - APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ)); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps, - LHSKnownZero, LHSKnownOne, Depth+1) || - SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps, - LHSKnownZero, LHSKnownOne, Depth+1)) - return I; - } - } - break; - } - case Instruction::Sub: - // If the high-bits of this SUB are not demanded, then it does not demand - // the high bits of its LHS or RHS. - if (DemandedMask[BitWidth-1] == 0) { - // Right fill the mask of bits for this SUB to demand the most - // significant bit and all those below it. - uint32_t NLZ = DemandedMask.countLeadingZeros(); - APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ)); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps, - LHSKnownZero, LHSKnownOne, Depth+1) || - SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps, - LHSKnownZero, LHSKnownOne, Depth+1)) - return I; - } - // Otherwise just hand the sub off to ComputeMaskedBits to fill in - // the known zeros and ones. - ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth); - break; - case Instruction::Shl: - if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { - uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); - APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt)); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, - RHSKnownZero, RHSKnownOne, Depth+1)) - return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - RHSKnownZero <<= ShiftAmt; - RHSKnownOne <<= ShiftAmt; - // low bits known zero. - if (ShiftAmt) - RHSKnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); - } - break; - case Instruction::LShr: - // For a logical shift right - if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { - uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); - - // Unsigned shift right. - APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, - RHSKnownZero, RHSKnownOne, Depth+1)) - return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - RHSKnownZero = APIntOps::lshr(RHSKnownZero, ShiftAmt); - RHSKnownOne = APIntOps::lshr(RHSKnownOne, ShiftAmt); - if (ShiftAmt) { - // Compute the new bits that are at the top now. - APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); - RHSKnownZero |= HighBits; // high bits known zero. - } - } - break; - case Instruction::AShr: - // If this is an arithmetic shift right and only the low-bit is set, we can - // always convert this into a logical shr, even if the shift amount is - // variable. The low bit of the shift cannot be an input sign bit unless - // the shift amount is >= the size of the datatype, which is undefined. - if (DemandedMask == 1) { - // Perform the logical shift right. - Instruction *NewVal = BinaryOperator::CreateLShr( - I->getOperand(0), I->getOperand(1), I->getName()); - return InsertNewInstBefore(NewVal, *I); - } - - // If the sign bit is the only bit demanded by this ashr, then there is no - // need to do it, the shift doesn't change the high bit. - if (DemandedMask.isSignBit()) - return I->getOperand(0); - - if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { - uint32_t ShiftAmt = SA->getLimitedValue(BitWidth); - - // Signed shift right. - APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); - // If any of the "high bits" are demanded, we should set the sign bit as - // demanded. - if (DemandedMask.countLeadingZeros() <= ShiftAmt) - DemandedMaskIn.set(BitWidth-1); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, - RHSKnownZero, RHSKnownOne, Depth+1)) - return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - // Compute the new bits that are at the top now. - APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); - RHSKnownZero = APIntOps::lshr(RHSKnownZero, ShiftAmt); - RHSKnownOne = APIntOps::lshr(RHSKnownOne, ShiftAmt); - - // Handle the sign bits. - APInt SignBit(APInt::getSignBit(BitWidth)); - // Adjust to where it is now in the mask. - SignBit = APIntOps::lshr(SignBit, ShiftAmt); - - // If the input sign bit is known to be zero, or if none of the top bits - // are demanded, turn this into an unsigned shift right. - if (BitWidth <= ShiftAmt || RHSKnownZero[BitWidth-ShiftAmt-1] || - (HighBits & ~DemandedMask) == HighBits) { - // Perform the logical shift right. - Instruction *NewVal = BinaryOperator::CreateLShr( - I->getOperand(0), SA, I->getName()); - return InsertNewInstBefore(NewVal, *I); - } else if ((RHSKnownOne & SignBit) != 0) { // New bits are known one. - RHSKnownOne |= HighBits; - } - } - break; - case Instruction::SRem: - if (ConstantInt *Rem = dyn_cast(I->getOperand(1))) { - APInt RA = Rem->getValue().abs(); - if (RA.isPowerOf2()) { - if (DemandedMask.ult(RA)) // srem won't affect demanded bits - return I->getOperand(0); - - APInt LowBits = RA - 1; - APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); - if (SimplifyDemandedBits(I->getOperandUse(0), Mask2, - LHSKnownZero, LHSKnownOne, Depth+1)) - return I; - - if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits)) - LHSKnownZero |= ~LowBits; - - KnownZero |= LHSKnownZero & DemandedMask; - - assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); - } - } - break; - case Instruction::URem: { - APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0); - APInt AllOnes = APInt::getAllOnesValue(BitWidth); - if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes, - KnownZero2, KnownOne2, Depth+1) || - SimplifyDemandedBits(I->getOperandUse(1), AllOnes, - KnownZero2, KnownOne2, Depth+1)) - return I; - - unsigned Leaders = KnownZero2.countLeadingOnes(); - Leaders = std::max(Leaders, - KnownZero2.countLeadingOnes()); - KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask; - break; - } - case Instruction::Call: - if (IntrinsicInst *II = dyn_cast(I)) { - switch (II->getIntrinsicID()) { - default: break; - case Intrinsic::bswap: { - // If the only bits demanded come from one byte of the bswap result, - // just shift the input byte into position to eliminate the bswap. - unsigned NLZ = DemandedMask.countLeadingZeros(); - unsigned NTZ = DemandedMask.countTrailingZeros(); - - // Round NTZ down to the next byte. If we have 11 trailing zeros, then - // we need all the bits down to bit 8. Likewise, round NLZ. If we - // have 14 leading zeros, round to 8. - NLZ &= ~7; - NTZ &= ~7; - // If we need exactly one byte, we can do this transformation. - if (BitWidth-NLZ-NTZ == 8) { - unsigned ResultBit = NTZ; - unsigned InputBit = BitWidth-NTZ-8; - - // Replace this with either a left or right shift to get the byte into - // the right place. - Instruction *NewVal; - if (InputBit > ResultBit) - NewVal = BinaryOperator::CreateLShr(I->getOperand(1), - ConstantInt::get(I->getType(), InputBit-ResultBit)); - else - NewVal = BinaryOperator::CreateShl(I->getOperand(1), - ConstantInt::get(I->getType(), ResultBit-InputBit)); - NewVal->takeName(I); - return InsertNewInstBefore(NewVal, *I); - } - - // TODO: Could compute known zero/one bits based on the input. - break; - } - } - } - ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth); - break; - } - - // If the client is only demanding bits that we know, return the known - // constant. - if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) - return Constant::getIntegerValue(VTy, RHSKnownOne); - return false; -} - - -/// SimplifyDemandedVectorElts - The specified value produces a vector with -/// any number of elements. DemandedElts contains the set of elements that are -/// actually used by the caller. This method analyzes which elements of the -/// operand are undef and returns that information in UndefElts. -/// -/// If the information about demanded elements can be used to simplify the -/// operation, the operation is simplified, then the resultant value is -/// returned. This returns null if no change was made. -Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, - APInt& UndefElts, - unsigned Depth) { - unsigned VWidth = cast(V->getType())->getNumElements(); - APInt EltMask(APInt::getAllOnesValue(VWidth)); - assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!"); - - if (isa(V)) { - // If the entire vector is undefined, just return this info. - UndefElts = EltMask; - return 0; - } else if (DemandedElts == 0) { // If nothing is demanded, provide undef. - UndefElts = EltMask; - return UndefValue::get(V->getType()); - } - - UndefElts = 0; - if (ConstantVector *CP = dyn_cast(V)) { - const Type *EltTy = cast(V->getType())->getElementType(); - Constant *Undef = UndefValue::get(EltTy); - - std::vector Elts; - for (unsigned i = 0; i != VWidth; ++i) - if (!DemandedElts[i]) { // If not demanded, set to undef. - Elts.push_back(Undef); - UndefElts.set(i); - } else if (isa(CP->getOperand(i))) { // Already undef. - Elts.push_back(Undef); - UndefElts.set(i); - } else { // Otherwise, defined. - Elts.push_back(CP->getOperand(i)); - } - - // If we changed the constant, return it. - Constant *NewCP = ConstantVector::get(Elts); - return NewCP != CP ? NewCP : 0; - } else if (isa(V)) { - // Simplify the CAZ to a ConstantVector where the non-demanded elements are - // set to undef. - - // Check if this is identity. If so, return 0 since we are not simplifying - // anything. - if (DemandedElts == ((1ULL << VWidth) -1)) - return 0; - - const Type *EltTy = cast(V->getType())->getElementType(); - Constant *Zero = Constant::getNullValue(EltTy); - Constant *Undef = UndefValue::get(EltTy); - std::vector Elts; - for (unsigned i = 0; i != VWidth; ++i) { - Constant *Elt = DemandedElts[i] ? Zero : Undef; - Elts.push_back(Elt); - } - UndefElts = DemandedElts ^ EltMask; - return ConstantVector::get(Elts); - } - - // Limit search depth. - if (Depth == 10) - return 0; - - // If multiple users are using the root value, procede with - // simplification conservatively assuming that all elements - // are needed. - if (!V->hasOneUse()) { - // Quit if we find multiple users of a non-root value though. - // They'll be handled when it's their turn to be visited by - // the main instcombine process. - if (Depth != 0) - // TODO: Just compute the UndefElts information recursively. - return 0; - - // Conservatively assume that all elements are needed. - DemandedElts = EltMask; - } - - Instruction *I = dyn_cast(V); - if (!I) return 0; // Only analyze instructions. - - bool MadeChange = false; - APInt UndefElts2(VWidth, 0); - Value *TmpV; - switch (I->getOpcode()) { - default: break; - - case Instruction::InsertElement: { - // If this is a variable index, we don't know which element it overwrites. - // demand exactly the same input as we produce. - ConstantInt *Idx = dyn_cast(I->getOperand(2)); - if (Idx == 0) { - // Note that we can't propagate undef elt info, because we don't know - // which elt is getting updated. - TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, - UndefElts2, Depth+1); - if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } - break; - } - - // If this is inserting an element that isn't demanded, remove this - // insertelement. - unsigned IdxNo = Idx->getZExtValue(); - if (IdxNo >= VWidth || !DemandedElts[IdxNo]) { - Worklist.Add(I); - return I->getOperand(0); - } - - // Otherwise, the element inserted overwrites whatever was there, so the - // input demanded set is simpler than the output set. - APInt DemandedElts2 = DemandedElts; - DemandedElts2.clear(IdxNo); - TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2, - UndefElts, Depth+1); - if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } - - // The inserted element is defined. - UndefElts.clear(IdxNo); - break; - } - case Instruction::ShuffleVector: { - ShuffleVectorInst *Shuffle = cast(I); - uint64_t LHSVWidth = - cast(Shuffle->getOperand(0)->getType())->getNumElements(); - APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0); - for (unsigned i = 0; i < VWidth; i++) { - if (DemandedElts[i]) { - unsigned MaskVal = Shuffle->getMaskValue(i); - if (MaskVal != -1u) { - assert(MaskVal < LHSVWidth * 2 && - "shufflevector mask index out of range!"); - if (MaskVal < LHSVWidth) - LeftDemanded.set(MaskVal); - else - RightDemanded.set(MaskVal - LHSVWidth); - } - } - } - - APInt UndefElts4(LHSVWidth, 0); - TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded, - UndefElts4, Depth+1); - if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } - - APInt UndefElts3(LHSVWidth, 0); - TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded, - UndefElts3, Depth+1); - if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } - - bool NewUndefElts = false; - for (unsigned i = 0; i < VWidth; i++) { - unsigned MaskVal = Shuffle->getMaskValue(i); - if (MaskVal == -1u) { - UndefElts.set(i); - } else if (MaskVal < LHSVWidth) { - if (UndefElts4[MaskVal]) { - NewUndefElts = true; - UndefElts.set(i); - } - } else { - if (UndefElts3[MaskVal - LHSVWidth]) { - NewUndefElts = true; - UndefElts.set(i); - } - } - } - - if (NewUndefElts) { - // Add additional discovered undefs. - std::vector Elts; - for (unsigned i = 0; i < VWidth; ++i) { - if (UndefElts[i]) - Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context))); - else - Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context), - Shuffle->getMaskValue(i))); - } - I->setOperand(2, ConstantVector::get(Elts)); - MadeChange = true; - } - break; - } - case Instruction::BitCast: { - // Vector->vector casts only. - const VectorType *VTy = dyn_cast(I->getOperand(0)->getType()); - if (!VTy) break; - unsigned InVWidth = VTy->getNumElements(); - APInt InputDemandedElts(InVWidth, 0); - unsigned Ratio; - - if (VWidth == InVWidth) { - // If we are converting from <4 x i32> -> <4 x f32>, we demand the same - // elements as are demanded of us. - Ratio = 1; - InputDemandedElts = DemandedElts; - } else if (VWidth > InVWidth) { - // Untested so far. - break; - - // If there are more elements in the result than there are in the source, - // then an input element is live if any of the corresponding output - // elements are live. - Ratio = VWidth/InVWidth; - for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) { - if (DemandedElts[OutIdx]) - InputDemandedElts.set(OutIdx/Ratio); - } - } else { - // Untested so far. - break; - - // If there are more elements in the source than there are in the result, - // then an input element is live if the corresponding output element is - // live. - Ratio = InVWidth/VWidth; - for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx) - if (DemandedElts[InIdx/Ratio]) - InputDemandedElts.set(InIdx); - } - - // div/rem demand all inputs, because they don't want divide by zero. - TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts, - UndefElts2, Depth+1); - if (TmpV) { - I->setOperand(0, TmpV); - MadeChange = true; - } - - UndefElts = UndefElts2; - if (VWidth > InVWidth) { - llvm_unreachable("Unimp"); - // If there are more elements in the result than there are in the source, - // then an output element is undef if the corresponding input element is - // undef. - for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) - if (UndefElts2[OutIdx/Ratio]) - UndefElts.set(OutIdx); - } else if (VWidth < InVWidth) { - llvm_unreachable("Unimp"); - // If there are more elements in the source than there are in the result, - // then a result element is undef if all of the corresponding input - // elements are undef. - UndefElts = ~0ULL >> (64-VWidth); // Start out all undef. - for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx) - if (!UndefElts2[InIdx]) // Not undef? - UndefElts.clear(InIdx/Ratio); // Clear undef bit. - } - break; - } - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - case Instruction::Add: - case Instruction::Sub: - case Instruction::Mul: - // div/rem demand all inputs, because they don't want divide by zero. - TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, - UndefElts, Depth+1); - if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } - TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts, - UndefElts2, Depth+1); - if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } - - // Output elements are undefined if both are undefined. Consider things - // like undef&0. The result is known zero, not undef. - UndefElts &= UndefElts2; - break; - - case Instruction::Call: { - IntrinsicInst *II = dyn_cast(I); - if (!II) break; - switch (II->getIntrinsicID()) { - default: break; - - // Binary vector operations that work column-wise. A dest element is a - // function of the corresponding input elements from the two inputs. - case Intrinsic::x86_sse_sub_ss: - case Intrinsic::x86_sse_mul_ss: - case Intrinsic::x86_sse_min_ss: - case Intrinsic::x86_sse_max_ss: - case Intrinsic::x86_sse2_sub_sd: - case Intrinsic::x86_sse2_mul_sd: - case Intrinsic::x86_sse2_min_sd: - case Intrinsic::x86_sse2_max_sd: - TmpV = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts, - UndefElts, Depth+1); - if (TmpV) { II->setOperand(1, TmpV); MadeChange = true; } - TmpV = SimplifyDemandedVectorElts(II->getOperand(2), DemandedElts, - UndefElts2, Depth+1); - if (TmpV) { II->setOperand(2, TmpV); MadeChange = true; } - - // If only the low elt is demanded and this is a scalarizable intrinsic, - // scalarize it now. - if (DemandedElts == 1) { - switch (II->getIntrinsicID()) { - default: break; - case Intrinsic::x86_sse_sub_ss: - case Intrinsic::x86_sse_mul_ss: - case Intrinsic::x86_sse2_sub_sd: - case Intrinsic::x86_sse2_mul_sd: - // TODO: Lower MIN/MAX/ABS/etc - Value *LHS = II->getOperand(1); - Value *RHS = II->getOperand(2); - // Extract the element as scalars. - LHS = InsertNewInstBefore(ExtractElementInst::Create(LHS, - ConstantInt::get(Type::getInt32Ty(*Context), 0U, false), "tmp"), *II); - RHS = InsertNewInstBefore(ExtractElementInst::Create(RHS, - ConstantInt::get(Type::getInt32Ty(*Context), 0U, false), "tmp"), *II); - - switch (II->getIntrinsicID()) { - default: llvm_unreachable("Case stmts out of sync!"); - case Intrinsic::x86_sse_sub_ss: - case Intrinsic::x86_sse2_sub_sd: - TmpV = InsertNewInstBefore(BinaryOperator::CreateFSub(LHS, RHS, - II->getName()), *II); - break; - case Intrinsic::x86_sse_mul_ss: - case Intrinsic::x86_sse2_mul_sd: - TmpV = InsertNewInstBefore(BinaryOperator::CreateFMul(LHS, RHS, - II->getName()), *II); - break; - } - - Instruction *New = - InsertElementInst::Create( - UndefValue::get(II->getType()), TmpV, - ConstantInt::get(Type::getInt32Ty(*Context), 0U, false), II->getName()); - InsertNewInstBefore(New, *II); - return New; - } - } - - // Output elements are undefined if both are undefined. Consider things - // like undef&0. The result is known zero, not undef. - UndefElts &= UndefElts2; - break; - } - break; - } - } - return MadeChange ? I : 0; -} - - -/// AssociativeOpt - Perform an optimization on an associative operator. This -/// function is designed to check a chain of associative operators for a -/// potential to apply a certain optimization. Since the optimization may be -/// applicable if the expression was reassociated, this checks the chain, then -/// reassociates the expression as necessary to expose the optimization -/// opportunity. This makes use of a special Functor, which must define -/// 'shouldApply' and 'apply' methods. -/// -template -static Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F) { - unsigned Opcode = Root.getOpcode(); - Value *LHS = Root.getOperand(0); - - // Quick check, see if the immediate LHS matches... - if (F.shouldApply(LHS)) - return F.apply(Root); - - // Otherwise, if the LHS is not of the same opcode as the root, return. - Instruction *LHSI = dyn_cast(LHS); - while (LHSI && LHSI->getOpcode() == Opcode && LHSI->hasOneUse()) { - // Should we apply this transform to the RHS? - bool ShouldApply = F.shouldApply(LHSI->getOperand(1)); - - // If not to the RHS, check to see if we should apply to the LHS... - if (!ShouldApply && F.shouldApply(LHSI->getOperand(0))) { - cast(LHSI)->swapOperands(); // Make the LHS the RHS - ShouldApply = true; - } - - // If the functor wants to apply the optimization to the RHS of LHSI, - // reassociate the expression from ((? op A) op B) to (? op (A op B)) - if (ShouldApply) { - // Now all of the instructions are in the current basic block, go ahead - // and perform the reassociation. - Instruction *TmpLHSI = cast(Root.getOperand(0)); - - // First move the selected RHS to the LHS of the root... - Root.setOperand(0, LHSI->getOperand(1)); - - // Make what used to be the LHS of the root be the user of the root... - Value *ExtraOperand = TmpLHSI->getOperand(1); - if (&Root == TmpLHSI) { - Root.replaceAllUsesWith(Constant::getNullValue(TmpLHSI->getType())); - return 0; - } - Root.replaceAllUsesWith(TmpLHSI); // Users now use TmpLHSI - TmpLHSI->setOperand(1, &Root); // TmpLHSI now uses the root - BasicBlock::iterator ARI = &Root; ++ARI; - TmpLHSI->moveBefore(ARI); // Move TmpLHSI to after Root - ARI = Root; - - // Now propagate the ExtraOperand down the chain of instructions until we - // get to LHSI. - while (TmpLHSI != LHSI) { - Instruction *NextLHSI = cast(TmpLHSI->getOperand(0)); - // Move the instruction to immediately before the chain we are - // constructing to avoid breaking dominance properties. - NextLHSI->moveBefore(ARI); - ARI = NextLHSI; - - Value *NextOp = NextLHSI->getOperand(1); - NextLHSI->setOperand(1, ExtraOperand); - TmpLHSI = NextLHSI; - ExtraOperand = NextOp; - } - - // Now that the instructions are reassociated, have the functor perform - // the transformation... - return F.apply(Root); - } - - LHSI = dyn_cast(LHSI->getOperand(0)); - } - return 0; -} - -namespace { - -// AddRHS - Implements: X + X --> X << 1 -struct AddRHS { - Value *RHS; - explicit AddRHS(Value *rhs) : RHS(rhs) {} - bool shouldApply(Value *LHS) const { return LHS == RHS; } - Instruction *apply(BinaryOperator &Add) const { - return BinaryOperator::CreateShl(Add.getOperand(0), - ConstantInt::get(Add.getType(), 1)); - } -}; - -// AddMaskingAnd - Implements (A & C1)+(B & C2) --> (A & C1)|(B & C2) -// iff C1&C2 == 0 -struct AddMaskingAnd { - Constant *C2; - explicit AddMaskingAnd(Constant *c) : C2(c) {} - bool shouldApply(Value *LHS) const { - ConstantInt *C1; - return match(LHS, m_And(m_Value(), m_ConstantInt(C1))) && - ConstantExpr::getAnd(C1, C2)->isNullValue(); - } - Instruction *apply(BinaryOperator &Add) const { - return BinaryOperator::CreateOr(Add.getOperand(0), Add.getOperand(1)); - } -}; - -} - -static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO, - InstCombiner *IC) { - if (CastInst *CI = dyn_cast(&I)) - return IC->Builder->CreateCast(CI->getOpcode(), SO, I.getType()); - - // Figure out if the constant is the left or the right argument. - bool ConstIsRHS = isa(I.getOperand(1)); - Constant *ConstOperand = cast(I.getOperand(ConstIsRHS)); - - if (Constant *SOC = dyn_cast(SO)) { - if (ConstIsRHS) - return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand); - return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC); - } - - Value *Op0 = SO, *Op1 = ConstOperand; - if (!ConstIsRHS) - std::swap(Op0, Op1); - - if (BinaryOperator *BO = dyn_cast(&I)) - return IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1, - SO->getName()+".op"); - if (ICmpInst *CI = dyn_cast(&I)) - return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1, - SO->getName()+".cmp"); - if (FCmpInst *CI = dyn_cast(&I)) - return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1, - SO->getName()+".cmp"); - llvm_unreachable("Unknown binary instruction type!"); -} - -// FoldOpIntoSelect - Given an instruction with a select as one operand and a -// constant as the other operand, try to fold the binary operator into the -// select arguments. This also works for Cast instructions, which obviously do -// not have a second operand. -static Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI, - InstCombiner *IC) { - // Don't modify shared select instructions - if (!SI->hasOneUse()) return 0; - Value *TV = SI->getOperand(1); - Value *FV = SI->getOperand(2); - - if (isa(TV) || isa(FV)) { - // Bool selects with constant operands can be folded to logical ops. - if (SI->getType() == Type::getInt1Ty(*IC->getContext())) return 0; - - Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, IC); - Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, IC); - - return SelectInst::Create(SI->getCondition(), SelectTrueVal, - SelectFalseVal); - } - return 0; -} - - -/// FoldOpIntoPhi - Given a binary operator, cast instruction, or select which -/// has a PHI node as operand #0, see if we can fold the instruction into the -/// PHI (which is only possible if all operands to the PHI are constants). -/// -/// If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms -/// that would normally be unprofitable because they strongly encourage jump -/// threading. -Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I, - bool AllowAggressive) { - AllowAggressive = false; - PHINode *PN = cast(I.getOperand(0)); - unsigned NumPHIValues = PN->getNumIncomingValues(); - if (NumPHIValues == 0 || - // We normally only transform phis with a single use, unless we're trying - // hard to make jump threading happen. - (!PN->hasOneUse() && !AllowAggressive)) - return 0; - - - // Check to see if all of the operands of the PHI are simple constants - // (constantint/constantfp/undef). If there is one non-constant value, - // remember the BB it is in. If there is more than one or if *it* is a PHI, - // bail out. We don't do arbitrary constant expressions here because moving - // their computation can be expensive without a cost model. - BasicBlock *NonConstBB = 0; - for (unsigned i = 0; i != NumPHIValues; ++i) - if (!isa(PN->getIncomingValue(i)) || - isa(PN->getIncomingValue(i))) { - if (NonConstBB) return 0; // More than one non-const value. - if (isa(PN->getIncomingValue(i))) return 0; // Itself a phi. - NonConstBB = PN->getIncomingBlock(i); - - // If the incoming non-constant value is in I's block, we have an infinite - // loop. - if (NonConstBB == I.getParent()) - return 0; - } - - // If there is exactly one non-constant value, we can insert a copy of the - // operation in that block. However, if this is a critical edge, we would be - // inserting the computation one some other paths (e.g. inside a loop). Only - // do this if the pred block is unconditionally branching into the phi block. - if (NonConstBB != 0 && !AllowAggressive) { - BranchInst *BI = dyn_cast(NonConstBB->getTerminator()); - if (!BI || !BI->isUnconditional()) return 0; - } - - // Okay, we can do the transformation: create the new PHI node. - PHINode *NewPN = PHINode::Create(I.getType(), ""); - NewPN->reserveOperandSpace(PN->getNumOperands()/2); - InsertNewInstBefore(NewPN, *PN); - NewPN->takeName(PN); - - // Next, add all of the operands to the PHI. - if (SelectInst *SI = dyn_cast(&I)) { - // We only currently try to fold the condition of a select when it is a phi, - // not the true/false values. - Value *TrueV = SI->getTrueValue(); - Value *FalseV = SI->getFalseValue(); - BasicBlock *PhiTransBB = PN->getParent(); - for (unsigned i = 0; i != NumPHIValues; ++i) { - BasicBlock *ThisBB = PN->getIncomingBlock(i); - Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB); - Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB); - Value *InV = 0; - if (Constant *InC = dyn_cast(PN->getIncomingValue(i))) { - InV = InC->isNullValue() ? FalseVInPred : TrueVInPred; - } else { - assert(PN->getIncomingBlock(i) == NonConstBB); - InV = SelectInst::Create(PN->getIncomingValue(i), TrueVInPred, - FalseVInPred, - "phitmp", NonConstBB->getTerminator()); - Worklist.Add(cast(InV)); - } - NewPN->addIncoming(InV, ThisBB); - } - } else if (I.getNumOperands() == 2) { - Constant *C = cast(I.getOperand(1)); - for (unsigned i = 0; i != NumPHIValues; ++i) { - Value *InV = 0; - if (Constant *InC = dyn_cast(PN->getIncomingValue(i))) { - if (CmpInst *CI = dyn_cast(&I)) - InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C); - else - InV = ConstantExpr::get(I.getOpcode(), InC, C); - } else { - assert(PN->getIncomingBlock(i) == NonConstBB); - if (BinaryOperator *BO = dyn_cast(&I)) - InV = BinaryOperator::Create(BO->getOpcode(), - PN->getIncomingValue(i), C, "phitmp", - NonConstBB->getTerminator()); - else if (CmpInst *CI = dyn_cast(&I)) - InV = CmpInst::Create(CI->getOpcode(), - CI->getPredicate(), - PN->getIncomingValue(i), C, "phitmp", - NonConstBB->getTerminator()); - else - llvm_unreachable("Unknown binop!"); - - Worklist.Add(cast(InV)); - } - NewPN->addIncoming(InV, PN->getIncomingBlock(i)); - } - } else { - CastInst *CI = cast(&I); - const Type *RetTy = CI->getType(); - for (unsigned i = 0; i != NumPHIValues; ++i) { - Value *InV; - if (Constant *InC = dyn_cast(PN->getIncomingValue(i))) { - InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy); - } else { - assert(PN->getIncomingBlock(i) == NonConstBB); - InV = CastInst::Create(CI->getOpcode(), PN->getIncomingValue(i), - I.getType(), "phitmp", - NonConstBB->getTerminator()); - Worklist.Add(cast(InV)); - } - NewPN->addIncoming(InV, PN->getIncomingBlock(i)); - } - } - return ReplaceInstUsesWith(I, NewPN); -} - - -/// WillNotOverflowSignedAdd - Return true if we can prove that: -/// (sext (add LHS, RHS)) === (add (sext LHS), (sext RHS)) -/// This basically requires proving that the add in the original type would not -/// overflow to change the sign bit or have a carry out. -bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) { - // There are different heuristics we can use for this. Here are some simple - // ones. - - // Add has the property that adding any two 2's complement numbers can only - // have one carry bit which can change a sign. As such, if LHS and RHS each - // have at least two sign bits, we know that the addition of the two values - // will sign extend fine. - if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1) - return true; - - - // If one of the operands only has one non-zero bit, and if the other operand - // has a known-zero bit in a more significant place than it (not including the - // sign bit) the ripple may go up to and fill the zero, but won't change the - // sign. For example, (X & ~4) + 1. - - // TODO: Implement. - - return false; -} - - -Instruction *InstCombiner::visitAdd(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); - Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); - - if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), - I.hasNoUnsignedWrap(), TD)) - return ReplaceInstUsesWith(I, V); - - - if (Constant *RHSC = dyn_cast(RHS)) { - if (ConstantInt *CI = dyn_cast(RHSC)) { - // X + (signbit) --> X ^ signbit - const APInt& Val = CI->getValue(); - uint32_t BitWidth = Val.getBitWidth(); - if (Val == APInt::getSignBit(BitWidth)) - return BinaryOperator::CreateXor(LHS, RHS); - - // See if SimplifyDemandedBits can simplify this. This handles stuff like - // (X & 254)+1 -> (X&254)|1 - if (SimplifyDemandedInstructionBits(I)) - return &I; - - // zext(bool) + C -> bool ? C + 1 : C - if (ZExtInst *ZI = dyn_cast(LHS)) - if (ZI->getSrcTy() == Type::getInt1Ty(*Context)) - return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI); - } - - if (isa(LHS)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; - - ConstantInt *XorRHS = 0; - Value *XorLHS = 0; - if (isa(RHSC) && - match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) { - uint32_t TySizeBits = I.getType()->getScalarSizeInBits(); - const APInt& RHSVal = cast(RHSC)->getValue(); - - uint32_t Size = TySizeBits / 2; - APInt C0080Val(APInt(TySizeBits, 1ULL).shl(Size - 1)); - APInt CFF80Val(-C0080Val); - do { - if (TySizeBits > Size) { - // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext. - // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext. - if ((RHSVal == CFF80Val && XorRHS->getValue() == C0080Val) || - (RHSVal == C0080Val && XorRHS->getValue() == CFF80Val)) { - // This is a sign extend if the top bits are known zero. - if (!MaskedValueIsZero(XorLHS, - APInt::getHighBitsSet(TySizeBits, TySizeBits - Size))) - Size = 0; // Not a sign ext, but can't be any others either. - break; - } - } - Size >>= 1; - C0080Val = APIntOps::lshr(C0080Val, Size); - CFF80Val = APIntOps::ashr(CFF80Val, Size); - } while (Size >= 1); - - // FIXME: This shouldn't be necessary. When the backends can handle types - // with funny bit widths then this switch statement should be removed. It - // is just here to get the size of the "middle" type back up to something - // that the back ends can handle. - const Type *MiddleType = 0; - switch (Size) { - default: break; - case 32: MiddleType = Type::getInt32Ty(*Context); break; - case 16: MiddleType = Type::getInt16Ty(*Context); break; - case 8: MiddleType = Type::getInt8Ty(*Context); break; - } - if (MiddleType) { - Value *NewTrunc = Builder->CreateTrunc(XorLHS, MiddleType, "sext"); - return new SExtInst(NewTrunc, I.getType(), I.getName()); - } - } - } - - if (I.getType() == Type::getInt1Ty(*Context)) - return BinaryOperator::CreateXor(LHS, RHS); - - // X + X --> X << 1 - if (I.getType()->isInteger()) { - if (Instruction *Result = AssociativeOpt(I, AddRHS(RHS))) - return Result; - - if (Instruction *RHSI = dyn_cast(RHS)) { - if (RHSI->getOpcode() == Instruction::Sub) - if (LHS == RHSI->getOperand(1)) // A + (B - A) --> B - return ReplaceInstUsesWith(I, RHSI->getOperand(0)); - } - if (Instruction *LHSI = dyn_cast(LHS)) { - if (LHSI->getOpcode() == Instruction::Sub) - if (RHS == LHSI->getOperand(1)) // (B - A) + A --> B - return ReplaceInstUsesWith(I, LHSI->getOperand(0)); - } - } - - // -A + B --> B - A - // -A + -B --> -(A + B) - if (Value *LHSV = dyn_castNegVal(LHS)) { - if (LHS->getType()->isIntOrIntVector()) { - if (Value *RHSV = dyn_castNegVal(RHS)) { - Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum"); - return BinaryOperator::CreateNeg(NewAdd); - } - } - - return BinaryOperator::CreateSub(RHS, LHSV); - } - - // A + -B --> A - B - if (!isa(RHS)) - if (Value *V = dyn_castNegVal(RHS)) - return BinaryOperator::CreateSub(LHS, V); - - - ConstantInt *C2; - if (Value *X = dyn_castFoldableMul(LHS, C2)) { - if (X == RHS) // X*C + X --> X * (C+1) - return BinaryOperator::CreateMul(RHS, AddOne(C2)); - - // X*C1 + X*C2 --> X * (C1+C2) - ConstantInt *C1; - if (X == dyn_castFoldableMul(RHS, C1)) - return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2)); - } - - // X + X*C --> X * (C+1) - if (dyn_castFoldableMul(RHS, C2) == LHS) - return BinaryOperator::CreateMul(LHS, AddOne(C2)); - - // X + ~X --> -1 since ~X = -X-1 - if (dyn_castNotVal(LHS) == RHS || - dyn_castNotVal(RHS) == LHS) - return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); - - - // (A & C1)+(B & C2) --> (A & C1)|(B & C2) iff C1&C2 == 0 - if (match(RHS, m_And(m_Value(), m_ConstantInt(C2)))) - if (Instruction *R = AssociativeOpt(I, AddMaskingAnd(C2))) - return R; - - // A+B --> A|B iff A and B have no bits set in common. - if (const IntegerType *IT = dyn_cast(I.getType())) { - APInt Mask = APInt::getAllOnesValue(IT->getBitWidth()); - APInt LHSKnownOne(IT->getBitWidth(), 0); - APInt LHSKnownZero(IT->getBitWidth(), 0); - ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne); - if (LHSKnownZero != 0) { - APInt RHSKnownOne(IT->getBitWidth(), 0); - APInt RHSKnownZero(IT->getBitWidth(), 0); - ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne); - - // No bits in common -> bitwise or. - if ((LHSKnownZero|RHSKnownZero).isAllOnesValue()) - return BinaryOperator::CreateOr(LHS, RHS); - } - } - - // W*X + Y*Z --> W * (X+Z) iff W == Y - if (I.getType()->isIntOrIntVector()) { - Value *W, *X, *Y, *Z; - if (match(LHS, m_Mul(m_Value(W), m_Value(X))) && - match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) { - if (W != Y) { - if (W == Z) { - std::swap(Y, Z); - } else if (Y == X) { - std::swap(W, X); - } else if (X == Z) { - std::swap(Y, Z); - std::swap(W, X); - } - } - - if (W == Y) { - Value *NewAdd = Builder->CreateAdd(X, Z, LHS->getName()); - return BinaryOperator::CreateMul(W, NewAdd); - } - } - } - - if (ConstantInt *CRHS = dyn_cast(RHS)) { - Value *X = 0; - if (match(LHS, m_Not(m_Value(X)))) // ~X + C --> (C-1) - X - return BinaryOperator::CreateSub(SubOne(CRHS), X); - - // (X & FF00) + xx00 -> (X+xx00) & FF00 - if (LHS->hasOneUse() && - match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) { - Constant *Anded = ConstantExpr::getAnd(CRHS, C2); - if (Anded == CRHS) { - // See if all bits from the first bit set in the Add RHS up are included - // in the mask. First, get the rightmost bit. - const APInt& AddRHSV = CRHS->getValue(); - - // Form a mask of all bits from the lowest bit added through the top. - APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1)); - - // See if the and mask includes all of these bits. - APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue()); - - if (AddRHSHighBits == AddRHSHighBitsAnd) { - // Okay, the xform is safe. Insert the new add pronto. - Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName()); - return BinaryOperator::CreateAnd(NewAdd, C2); - } - } - } - - // Try to fold constant add into select arguments. - if (SelectInst *SI = dyn_cast(LHS)) - if (Instruction *R = FoldOpIntoSelect(I, SI, this)) - return R; - } - - // add (select X 0 (sub n A)) A --> select X A n - { - SelectInst *SI = dyn_cast(LHS); - Value *A = RHS; - if (!SI) { - SI = dyn_cast(RHS); - A = LHS; - } - if (SI && SI->hasOneUse()) { - Value *TV = SI->getTrueValue(); - Value *FV = SI->getFalseValue(); - Value *N; - - // Can we fold the add into the argument of the select? - // We check both true and false select arguments for a matching subtract. - if (match(FV, m_Zero()) && - match(TV, m_Sub(m_Value(N), m_Specific(A)))) - // Fold the add into the true select value. - return SelectInst::Create(SI->getCondition(), N, A); - if (match(TV, m_Zero()) && - match(FV, m_Sub(m_Value(N), m_Specific(A)))) - // Fold the add into the false select value. - return SelectInst::Create(SI->getCondition(), A, N); - } - } - - // Check for (add (sext x), y), see if we can merge this into an - // integer add followed by a sext. - if (SExtInst *LHSConv = dyn_cast(LHS)) { - // (add (sext x), cst) --> (sext (add x, cst')) - if (ConstantInt *RHSC = dyn_cast(RHS)) { - Constant *CI = - ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType()); - if (LHSConv->hasOneUse() && - ConstantExpr::getSExt(CI, I.getType()) == RHSC && - WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { - // Insert the new, smaller add. - Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), - CI, "addconv"); - return new SExtInst(NewAdd, I.getType()); - } - } - - // (add (sext x), (sext y)) --> (sext (add int x, y)) - if (SExtInst *RHSConv = dyn_cast(RHS)) { - // Only do this if x/y have the same type, if at last one of them has a - // single use (so we don't increase the number of sexts), and if the - // integer add will not overflow. - if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&& - (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && - WillNotOverflowSignedAdd(LHSConv->getOperand(0), - RHSConv->getOperand(0))) { - // Insert the new integer add. - Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), - RHSConv->getOperand(0), "addconv"); - return new SExtInst(NewAdd, I.getType()); - } - } - } - - return Changed ? &I : 0; -} - -Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); - Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); - - if (Constant *RHSC = dyn_cast(RHS)) { - // X + 0 --> X - if (ConstantFP *CFP = dyn_cast(RHSC)) { - if (CFP->isExactlyValue(ConstantFP::getNegativeZero - (I.getType())->getValueAPF())) - return ReplaceInstUsesWith(I, LHS); - } - - if (isa(LHS)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; - } - - // -A + B --> B - A - // -A + -B --> -(A + B) - if (Value *LHSV = dyn_castFNegVal(LHS)) - return BinaryOperator::CreateFSub(RHS, LHSV); - - // A + -B --> A - B - if (!isa(RHS)) - if (Value *V = dyn_castFNegVal(RHS)) - return BinaryOperator::CreateFSub(LHS, V); - - // Check for X+0.0. Simplify it to X if we know X is not -0.0. - if (ConstantFP *CFP = dyn_cast(RHS)) - if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS)) - return ReplaceInstUsesWith(I, LHS); - - // Check for (add double (sitofp x), y), see if we can merge this into an - // integer add followed by a promotion. - if (SIToFPInst *LHSConv = dyn_cast(LHS)) { - // (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst)) - // ... if the constant fits in the integer value. This is useful for things - // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer - // requires a constant pool load, and generally allows the add to be better - // instcombined. - if (ConstantFP *CFP = dyn_cast(RHS)) { - Constant *CI = - ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType()); - if (LHSConv->hasOneUse() && - ConstantExpr::getSIToFP(CI, I.getType()) == CFP && - WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { - // Insert the new integer add. - Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), - CI, "addconv"); - return new SIToFPInst(NewAdd, I.getType()); - } - } - - // (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) - if (SIToFPInst *RHSConv = dyn_cast(RHS)) { - // Only do this if x/y have the same type, if at last one of them has a - // single use (so we don't increase the number of int->fp conversions), - // and if the integer add will not overflow. - if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&& - (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && - WillNotOverflowSignedAdd(LHSConv->getOperand(0), - RHSConv->getOperand(0))) { - // Insert the new integer add. - Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), - RHSConv->getOperand(0),"addconv"); - return new SIToFPInst(NewAdd, I.getType()); - } - } - } - - return Changed ? &I : 0; -} - - -/// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the -/// code necessary to compute the offset from the base pointer (without adding -/// in the base pointer). Return the result as a signed integer of intptr size. -static Value *EmitGEPOffset(User *GEP, InstCombiner &IC) { - TargetData &TD = *IC.getTargetData(); - gep_type_iterator GTI = gep_type_begin(GEP); - const Type *IntPtrTy = TD.getIntPtrType(GEP->getContext()); - Value *Result = Constant::getNullValue(IntPtrTy); - - // Build a mask for high order bits. - unsigned IntPtrWidth = TD.getPointerSizeInBits(); - uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); - - for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e; - ++i, ++GTI) { - Value *Op = *i; - uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask; - if (ConstantInt *OpC = dyn_cast(Op)) { - if (OpC->isZero()) continue; - - // Handle a struct index, which adds its field offset to the pointer. - if (const StructType *STy = dyn_cast(*GTI)) { - Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); - - Result = IC.Builder->CreateAdd(Result, - ConstantInt::get(IntPtrTy, Size), - GEP->getName()+".offs"); - continue; - } - - Constant *Scale = ConstantInt::get(IntPtrTy, Size); - Constant *OC = - ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/); - Scale = ConstantExpr::getMul(OC, Scale); - // Emit an add instruction. - Result = IC.Builder->CreateAdd(Result, Scale, GEP->getName()+".offs"); - continue; - } - // Convert to correct type. - if (Op->getType() != IntPtrTy) - Op = IC.Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c"); - if (Size != 1) { - Constant *Scale = ConstantInt::get(IntPtrTy, Size); - // We'll let instcombine(mul) convert this to a shl if possible. - Op = IC.Builder->CreateMul(Op, Scale, GEP->getName()+".idx"); - } - - // Emit an add instruction. - Result = IC.Builder->CreateAdd(Op, Result, GEP->getName()+".offs"); - } - return Result; -} - - -/// EvaluateGEPOffsetExpression - Return a value that can be used to compare -/// the *offset* implied by a GEP to zero. For example, if we have &A[i], we -/// want to return 'i' for "icmp ne i, 0". Note that, in general, indices can -/// be complex, and scales are involved. The above expression would also be -/// legal to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32). -/// This later form is less amenable to optimization though, and we are allowed -/// to generate the first by knowing that pointer arithmetic doesn't overflow. -/// -/// If we can't emit an optimized form for this expression, this returns null. -/// -static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I, - InstCombiner &IC) { - TargetData &TD = *IC.getTargetData(); - gep_type_iterator GTI = gep_type_begin(GEP); - - // Check to see if this gep only has a single variable index. If so, and if - // any constant indices are a multiple of its scale, then we can compute this - // in terms of the scale of the variable index. For example, if the GEP - // implies an offset of "12 + i*4", then we can codegen this as "3 + i", - // because the expression will cross zero at the same point. - unsigned i, e = GEP->getNumOperands(); - int64_t Offset = 0; - for (i = 1; i != e; ++i, ++GTI) { - if (ConstantInt *CI = dyn_cast(GEP->getOperand(i))) { - // Compute the aggregate offset of constant indices. - if (CI->isZero()) continue; - - // Handle a struct index, which adds its field offset to the pointer. - if (const StructType *STy = dyn_cast(*GTI)) { - Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue()); - } else { - uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); - Offset += Size*CI->getSExtValue(); - } - } else { - // Found our variable index. - break; - } - } - - // If there are no variable indices, we must have a constant offset, just - // evaluate it the general way. - if (i == e) return 0; - - Value *VariableIdx = GEP->getOperand(i); - // Determine the scale factor of the variable element. For example, this is - // 4 if the variable index is into an array of i32. - uint64_t VariableScale = TD.getTypeAllocSize(GTI.getIndexedType()); - - // Verify that there are no other variable indices. If so, emit the hard way. - for (++i, ++GTI; i != e; ++i, ++GTI) { - ConstantInt *CI = dyn_cast(GEP->getOperand(i)); - if (!CI) return 0; - - // Compute the aggregate offset of constant indices. - if (CI->isZero()) continue; - - // Handle a struct index, which adds its field offset to the pointer. - if (const StructType *STy = dyn_cast(*GTI)) { - Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue()); - } else { - uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); - Offset += Size*CI->getSExtValue(); - } - } - - // Okay, we know we have a single variable index, which must be a - // pointer/array/vector index. If there is no offset, life is simple, return - // the index. - unsigned IntPtrWidth = TD.getPointerSizeInBits(); - if (Offset == 0) { - // Cast to intptrty in case a truncation occurs. If an extension is needed, - // we don't need to bother extending: the extension won't affect where the - // computation crosses zero. - if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) - VariableIdx = new TruncInst(VariableIdx, - TD.getIntPtrType(VariableIdx->getContext()), - VariableIdx->getName(), &I); - return VariableIdx; - } - - // Otherwise, there is an index. The computation we will do will be modulo - // the pointer size, so get it. - uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); - - Offset &= PtrSizeMask; - VariableScale &= PtrSizeMask; - - // To do this transformation, any constant index must be a multiple of the - // variable scale factor. For example, we can evaluate "12 + 4*i" as "3 + i", - // but we can't evaluate "10 + 3*i" in terms of i. Check that the offset is a - // multiple of the variable scale. - int64_t NewOffs = Offset / (int64_t)VariableScale; - if (Offset != NewOffs*(int64_t)VariableScale) - return 0; - - // Okay, we can do this evaluation. Start by converting the index to intptr. - const Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext()); - if (VariableIdx->getType() != IntPtrTy) - VariableIdx = CastInst::CreateIntegerCast(VariableIdx, IntPtrTy, - true /*SExt*/, - VariableIdx->getName(), &I); - Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs); - return BinaryOperator::CreateAdd(VariableIdx, OffsetVal, "offset", &I); -} - - -/// Optimize pointer differences into the same array into a size. Consider: -/// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer -/// operands to the ptrtoint instructions for the LHS/RHS of the subtract. -/// -Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, - const Type *Ty) { - assert(TD && "Must have target data info for this"); - - // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize - // this. - bool Swapped; - GetElementPtrInst *GEP; - - if ((GEP = dyn_cast(LHS)) && - GEP->getOperand(0) == RHS) - Swapped = false; - else if ((GEP = dyn_cast(RHS)) && - GEP->getOperand(0) == LHS) - Swapped = true; - else - return 0; - - // TODO: Could also optimize &A[i] - &A[j] -> "i-j". - - // Emit the offset of the GEP and an intptr_t. - Value *Result = EmitGEPOffset(GEP, *this); - - // If we have p - gep(p, ...) then we have to negate the result. - if (Swapped) - Result = Builder->CreateNeg(Result, "diff.neg"); - - return Builder->CreateIntCast(Result, Ty, true); -} - - -Instruction *InstCombiner::visitSub(BinaryOperator &I) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - if (Op0 == Op1) // sub X, X -> 0 - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - - // If this is a 'B = x-(-A)', change to B = x+A. This preserves NSW/NUW. - if (Value *V = dyn_castNegVal(Op1)) { - BinaryOperator *Res = BinaryOperator::CreateAdd(Op0, V); - Res->setHasNoSignedWrap(I.hasNoSignedWrap()); - Res->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); - return Res; - } - - if (isa(Op0)) - return ReplaceInstUsesWith(I, Op0); // undef - X -> undef - if (isa(Op1)) - return ReplaceInstUsesWith(I, Op1); // X - undef -> undef - if (I.getType() == Type::getInt1Ty(*Context)) - return BinaryOperator::CreateXor(Op0, Op1); - - if (ConstantInt *C = dyn_cast(Op0)) { - // Replace (-1 - A) with (~A). - if (C->isAllOnesValue()) - return BinaryOperator::CreateNot(Op1); - - // C - ~X == X + (1+C) - Value *X = 0; - if (match(Op1, m_Not(m_Value(X)))) - return BinaryOperator::CreateAdd(X, AddOne(C)); - - // -(X >>u 31) -> (X >>s 31) - // -(X >>s 31) -> (X >>u 31) - if (C->isZero()) { - if (BinaryOperator *SI = dyn_cast(Op1)) { - if (SI->getOpcode() == Instruction::LShr) { - if (ConstantInt *CU = dyn_cast(SI->getOperand(1))) { - // Check to see if we are shifting out everything but the sign bit. - if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) == - SI->getType()->getPrimitiveSizeInBits()-1) { - // Ok, the transformation is safe. Insert AShr. - return BinaryOperator::Create(Instruction::AShr, - SI->getOperand(0), CU, SI->getName()); - } - } - } else if (SI->getOpcode() == Instruction::AShr) { - if (ConstantInt *CU = dyn_cast(SI->getOperand(1))) { - // Check to see if we are shifting out everything but the sign bit. - if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) == - SI->getType()->getPrimitiveSizeInBits()-1) { - // Ok, the transformation is safe. Insert LShr. - return BinaryOperator::CreateLShr( - SI->getOperand(0), CU, SI->getName()); - } - } - } - } - } - - // Try to fold constant sub into select arguments. - if (SelectInst *SI = dyn_cast(Op1)) - if (Instruction *R = FoldOpIntoSelect(I, SI, this)) - return R; - - // C - zext(bool) -> bool ? C - 1 : C - if (ZExtInst *ZI = dyn_cast(Op1)) - if (ZI->getSrcTy() == Type::getInt1Ty(*Context)) - return SelectInst::Create(ZI->getOperand(0), SubOne(C), C); - } - - if (BinaryOperator *Op1I = dyn_cast(Op1)) { - if (Op1I->getOpcode() == Instruction::Add) { - if (Op1I->getOperand(0) == Op0) // X-(X+Y) == -Y - return BinaryOperator::CreateNeg(Op1I->getOperand(1), - I.getName()); - else if (Op1I->getOperand(1) == Op0) // X-(Y+X) == -Y - return BinaryOperator::CreateNeg(Op1I->getOperand(0), - I.getName()); - else if (ConstantInt *CI1 = dyn_cast(I.getOperand(0))) { - if (ConstantInt *CI2 = dyn_cast(Op1I->getOperand(1))) - // C1-(X+C2) --> (C1-C2)-X - return BinaryOperator::CreateSub( - ConstantExpr::getSub(CI1, CI2), Op1I->getOperand(0)); - } - } - - if (Op1I->hasOneUse()) { - // Replace (x - (y - z)) with (x + (z - y)) if the (y - z) subexpression - // is not used by anyone else... - // - if (Op1I->getOpcode() == Instruction::Sub) { - // Swap the two operands of the subexpr... - Value *IIOp0 = Op1I->getOperand(0), *IIOp1 = Op1I->getOperand(1); - Op1I->setOperand(0, IIOp1); - Op1I->setOperand(1, IIOp0); - - // Create the new top level add instruction... - return BinaryOperator::CreateAdd(Op0, Op1); - } - - // Replace (A - (A & B)) with (A & ~B) if this is the only use of (A&B)... - // - if (Op1I->getOpcode() == Instruction::And && - (Op1I->getOperand(0) == Op0 || Op1I->getOperand(1) == Op0)) { - Value *OtherOp = Op1I->getOperand(Op1I->getOperand(0) == Op0); - - Value *NewNot = Builder->CreateNot(OtherOp, "B.not"); - return BinaryOperator::CreateAnd(Op0, NewNot); - } - - // 0 - (X sdiv C) -> (X sdiv -C) - if (Op1I->getOpcode() == Instruction::SDiv) - if (ConstantInt *CSI = dyn_cast(Op0)) - if (CSI->isZero()) - if (Constant *DivRHS = dyn_cast(Op1I->getOperand(1))) - return BinaryOperator::CreateSDiv(Op1I->getOperand(0), - ConstantExpr::getNeg(DivRHS)); - - // X - X*C --> X * (1-C) - ConstantInt *C2 = 0; - if (dyn_castFoldableMul(Op1I, C2) == Op0) { - Constant *CP1 = - ConstantExpr::getSub(ConstantInt::get(I.getType(), 1), - C2); - return BinaryOperator::CreateMul(Op0, CP1); - } - } - } - - if (BinaryOperator *Op0I = dyn_cast(Op0)) { - if (Op0I->getOpcode() == Instruction::Add) { - if (Op0I->getOperand(0) == Op1) // (Y+X)-Y == X - return ReplaceInstUsesWith(I, Op0I->getOperand(1)); - else if (Op0I->getOperand(1) == Op1) // (X+Y)-Y == X - return ReplaceInstUsesWith(I, Op0I->getOperand(0)); - } else if (Op0I->getOpcode() == Instruction::Sub) { - if (Op0I->getOperand(0) == Op1) // (X-Y)-X == -Y - return BinaryOperator::CreateNeg(Op0I->getOperand(1), - I.getName()); - } - } - - ConstantInt *C1; - if (Value *X = dyn_castFoldableMul(Op0, C1)) { - if (X == Op1) // X*C - X --> X * (C-1) - return BinaryOperator::CreateMul(Op1, SubOne(C1)); - - ConstantInt *C2; // X*C1 - X*C2 -> X * (C1-C2) - if (X == dyn_castFoldableMul(Op1, C2)) - return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2)); - } - - // Optimize pointer differences into the same array into a size. Consider: - // &A[10] - &A[0]: we should compile this to "10". - if (TD) { - if (PtrToIntInst *LHS = dyn_cast(Op0)) - if (PtrToIntInst *RHS = dyn_cast(Op1)) - if (Value *Res = OptimizePointerDifference(LHS->getOperand(0), - RHS->getOperand(0), - I.getType())) - return ReplaceInstUsesWith(I, Res); - - // trunc(p)-trunc(q) -> trunc(p-q) - if (TruncInst *LHST = dyn_cast(Op0)) - if (TruncInst *RHST = dyn_cast(Op1)) - if (PtrToIntInst *LHS = dyn_cast(LHST->getOperand(0))) - if (PtrToIntInst *RHS = dyn_cast(RHST->getOperand(0))) - if (Value *Res = OptimizePointerDifference(LHS->getOperand(0), - RHS->getOperand(0), - I.getType())) - return ReplaceInstUsesWith(I, Res); - } - - return 0; -} - -Instruction *InstCombiner::visitFSub(BinaryOperator &I) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - // If this is a 'B = x-(-A)', change to B = x+A... - if (Value *V = dyn_castFNegVal(Op1)) - return BinaryOperator::CreateFAdd(Op0, V); - - if (BinaryOperator *Op1I = dyn_cast(Op1)) { - if (Op1I->getOpcode() == Instruction::FAdd) { - if (Op1I->getOperand(0) == Op0) // X-(X+Y) == -Y - return BinaryOperator::CreateFNeg(Op1I->getOperand(1), - I.getName()); - else if (Op1I->getOperand(1) == Op0) // X-(Y+X) == -Y - return BinaryOperator::CreateFNeg(Op1I->getOperand(0), - I.getName()); - } - } - - return 0; -} - -/// isSignBitCheck - Given an exploded icmp instruction, return true if the -/// comparison only checks the sign bit. If it only checks the sign bit, set -/// TrueIfSigned if the result of the comparison is true when the input value is -/// signed. -static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS, - bool &TrueIfSigned) { - switch (pred) { - case ICmpInst::ICMP_SLT: // True if LHS s< 0 - TrueIfSigned = true; - return RHS->isZero(); - case ICmpInst::ICMP_SLE: // True if LHS s<= RHS and RHS == -1 - TrueIfSigned = true; - return RHS->isAllOnesValue(); - case ICmpInst::ICMP_SGT: // True if LHS s> -1 - TrueIfSigned = false; - return RHS->isAllOnesValue(); - case ICmpInst::ICMP_UGT: - // True if LHS u> RHS and RHS == high-bit-mask - 1 - TrueIfSigned = true; - return RHS->getValue() == - APInt::getSignedMaxValue(RHS->getType()->getPrimitiveSizeInBits()); - case ICmpInst::ICMP_UGE: - // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc) - TrueIfSigned = true; - return RHS->getValue().isSignBit(); - default: - return false; - } -} - -Instruction *InstCombiner::visitMul(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - if (isa(Op1)) // undef * X -> 0 - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - - // Simplify mul instructions with a constant RHS. - if (Constant *Op1C = dyn_cast(Op1)) { - if (ConstantInt *CI = dyn_cast(Op1C)) { - - // ((X << C1)*C2) == (X * (C2 << C1)) - if (BinaryOperator *SI = dyn_cast(Op0)) - if (SI->getOpcode() == Instruction::Shl) - if (Constant *ShOp = dyn_cast(SI->getOperand(1))) - return BinaryOperator::CreateMul(SI->getOperand(0), - ConstantExpr::getShl(CI, ShOp)); - - if (CI->isZero()) - return ReplaceInstUsesWith(I, Op1C); // X * 0 == 0 - if (CI->equalsInt(1)) // X * 1 == X - return ReplaceInstUsesWith(I, Op0); - if (CI->isAllOnesValue()) // X * -1 == 0 - X - return BinaryOperator::CreateNeg(Op0, I.getName()); - - const APInt& Val = cast(CI)->getValue(); - if (Val.isPowerOf2()) { // Replace X*(2^C) with X << C - return BinaryOperator::CreateShl(Op0, - ConstantInt::get(Op0->getType(), Val.logBase2())); - } - } else if (isa(Op1C->getType())) { - if (Op1C->isNullValue()) - return ReplaceInstUsesWith(I, Op1C); - - if (ConstantVector *Op1V = dyn_cast(Op1C)) { - if (Op1V->isAllOnesValue()) // X * -1 == 0 - X - return BinaryOperator::CreateNeg(Op0, I.getName()); - - // As above, vector X*splat(1.0) -> X in all defined cases. - if (Constant *Splat = Op1V->getSplatValue()) { - if (ConstantInt *CI = dyn_cast(Splat)) - if (CI->equalsInt(1)) - return ReplaceInstUsesWith(I, Op0); - } - } - } - - if (BinaryOperator *Op0I = dyn_cast(Op0)) - if (Op0I->getOpcode() == Instruction::Add && Op0I->hasOneUse() && - isa(Op0I->getOperand(1)) && isa(Op1C)) { - // Canonicalize (X+C1)*C2 -> X*C2+C1*C2. - Value *Add = Builder->CreateMul(Op0I->getOperand(0), Op1C, "tmp"); - Value *C1C2 = Builder->CreateMul(Op1C, Op0I->getOperand(1)); - return BinaryOperator::CreateAdd(Add, C1C2); - - } - - // Try to fold constant mul into select arguments. - if (SelectInst *SI = dyn_cast(Op0)) - if (Instruction *R = FoldOpIntoSelect(I, SI, this)) - return R; - - if (isa(Op0)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; - } - - if (Value *Op0v = dyn_castNegVal(Op0)) // -X * -Y = X*Y - if (Value *Op1v = dyn_castNegVal(Op1)) - return BinaryOperator::CreateMul(Op0v, Op1v); - - // (X / Y) * Y = X - (X % Y) - // (X / Y) * -Y = (X % Y) - X - { - Value *Op1C = Op1; - BinaryOperator *BO = dyn_cast(Op0); - if (!BO || - (BO->getOpcode() != Instruction::UDiv && - BO->getOpcode() != Instruction::SDiv)) { - Op1C = Op0; - BO = dyn_cast(Op1); - } - Value *Neg = dyn_castNegVal(Op1C); - if (BO && BO->hasOneUse() && - (BO->getOperand(1) == Op1C || BO->getOperand(1) == Neg) && - (BO->getOpcode() == Instruction::UDiv || - BO->getOpcode() == Instruction::SDiv)) { - Value *Op0BO = BO->getOperand(0), *Op1BO = BO->getOperand(1); - - // If the division is exact, X % Y is zero. - if (SDivOperator *SDiv = dyn_cast(BO)) - if (SDiv->isExact()) { - if (Op1BO == Op1C) - return ReplaceInstUsesWith(I, Op0BO); - return BinaryOperator::CreateNeg(Op0BO); - } - - Value *Rem; - if (BO->getOpcode() == Instruction::UDiv) - Rem = Builder->CreateURem(Op0BO, Op1BO); - else - Rem = Builder->CreateSRem(Op0BO, Op1BO); - Rem->takeName(BO); - - if (Op1BO == Op1C) - return BinaryOperator::CreateSub(Op0BO, Rem); - return BinaryOperator::CreateSub(Rem, Op0BO); - } - } - - /// i1 mul -> i1 and. - if (I.getType() == Type::getInt1Ty(*Context)) - return BinaryOperator::CreateAnd(Op0, Op1); - - // X*(1 << Y) --> X << Y - // (1 << Y)*X --> X << Y - { - Value *Y; - if (match(Op0, m_Shl(m_One(), m_Value(Y)))) - return BinaryOperator::CreateShl(Op1, Y); - if (match(Op1, m_Shl(m_One(), m_Value(Y)))) - return BinaryOperator::CreateShl(Op0, Y); - } - - // If one of the operands of the multiply is a cast from a boolean value, then - // we know the bool is either zero or one, so this is a 'masking' multiply. - // X * Y (where Y is 0 or 1) -> X & (0-Y) - if (!isa(I.getType())) { - // -2 is "-1 << 1" so it is all bits set except the low one. - APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true); - - Value *BoolCast = 0, *OtherOp = 0; - if (MaskedValueIsZero(Op0, Negative2)) - BoolCast = Op0, OtherOp = Op1; - else if (MaskedValueIsZero(Op1, Negative2)) - BoolCast = Op1, OtherOp = Op0; - - if (BoolCast) { - Value *V = Builder->CreateSub(Constant::getNullValue(I.getType()), - BoolCast, "tmp"); - return BinaryOperator::CreateAnd(V, OtherOp); - } - } - - return Changed ? &I : 0; -} - -Instruction *InstCombiner::visitFMul(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - // Simplify mul instructions with a constant RHS... - if (Constant *Op1C = dyn_cast(Op1)) { - if (ConstantFP *Op1F = dyn_cast(Op1C)) { - // "In IEEE floating point, x*1 is not equivalent to x for nans. However, - // ANSI says we can drop signals, so we can do this anyway." (from GCC) - if (Op1F->isExactlyValue(1.0)) - return ReplaceInstUsesWith(I, Op0); // Eliminate 'mul double %X, 1.0' - } else if (isa(Op1C->getType())) { - if (ConstantVector *Op1V = dyn_cast(Op1C)) { - // As above, vector X*splat(1.0) -> X in all defined cases. - if (Constant *Splat = Op1V->getSplatValue()) { - if (ConstantFP *F = dyn_cast(Splat)) - if (F->isExactlyValue(1.0)) - return ReplaceInstUsesWith(I, Op0); - } - } - } - - // Try to fold constant mul into select arguments. - if (SelectInst *SI = dyn_cast(Op0)) - if (Instruction *R = FoldOpIntoSelect(I, SI, this)) - return R; - - if (isa(Op0)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; - } - - if (Value *Op0v = dyn_castFNegVal(Op0)) // -X * -Y = X*Y - if (Value *Op1v = dyn_castFNegVal(Op1)) - return BinaryOperator::CreateFMul(Op0v, Op1v); - - return Changed ? &I : 0; -} - -/// SimplifyDivRemOfSelect - Try to fold a divide or remainder of a select -/// instruction. -bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) { - SelectInst *SI = cast(I.getOperand(1)); - - // div/rem X, (Cond ? 0 : Y) -> div/rem X, Y - int NonNullOperand = -1; - if (Constant *ST = dyn_cast(SI->getOperand(1))) - if (ST->isNullValue()) - NonNullOperand = 2; - // div/rem X, (Cond ? Y : 0) -> div/rem X, Y - if (Constant *ST = dyn_cast(SI->getOperand(2))) - if (ST->isNullValue()) - NonNullOperand = 1; - - if (NonNullOperand == -1) - return false; - - Value *SelectCond = SI->getOperand(0); - - // Change the div/rem to use 'Y' instead of the select. - I.setOperand(1, SI->getOperand(NonNullOperand)); - - // Okay, we know we replace the operand of the div/rem with 'Y' with no - // problem. However, the select, or the condition of the select may have - // multiple uses. Based on our knowledge that the operand must be non-zero, - // propagate the known value for the select into other uses of it, and - // propagate a known value of the condition into its other users. - - // If the select and condition only have a single use, don't bother with this, - // early exit. - if (SI->use_empty() && SelectCond->hasOneUse()) - return true; - - // Scan the current block backward, looking for other uses of SI. - BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin(); - - while (BBI != BBFront) { - --BBI; - // If we found a call to a function, we can't assume it will return, so - // information from below it cannot be propagated above it. - if (isa(BBI) && !isa(BBI)) - break; - - // Replace uses of the select or its condition with the known values. - for (Instruction::op_iterator I = BBI->op_begin(), E = BBI->op_end(); - I != E; ++I) { - if (*I == SI) { - *I = SI->getOperand(NonNullOperand); - Worklist.Add(BBI); - } else if (*I == SelectCond) { - *I = NonNullOperand == 1 ? ConstantInt::getTrue(*Context) : - ConstantInt::getFalse(*Context); - Worklist.Add(BBI); - } - } - - // If we past the instruction, quit looking for it. - if (&*BBI == SI) - SI = 0; - if (&*BBI == SelectCond) - SelectCond = 0; - - // If we ran out of things to eliminate, break out of the loop. - if (SelectCond == 0 && SI == 0) - break; - - } - return true; -} - - -/// This function implements the transforms on div instructions that work -/// regardless of the kind of div instruction it is (udiv, sdiv, or fdiv). It is -/// used by the visitors to those instructions. -/// @brief Transforms common to all three div instructions -Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - // undef / X -> 0 for integer. - // undef / X -> undef for FP (the undef could be a snan). - if (isa(Op0)) { - if (Op0->getType()->isFPOrFPVector()) - return ReplaceInstUsesWith(I, Op0); - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - } - - // X / undef -> undef - if (isa(Op1)) - return ReplaceInstUsesWith(I, Op1); - - return 0; -} - -/// This function implements the transforms common to both integer division -/// instructions (udiv and sdiv). It is called by the visitors to those integer -/// division instructions. -/// @brief Common integer divide transforms -Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - // (sdiv X, X) --> 1 (udiv X, X) --> 1 - if (Op0 == Op1) { - if (const VectorType *Ty = dyn_cast(I.getType())) { - Constant *CI = ConstantInt::get(Ty->getElementType(), 1); - std::vector Elts(Ty->getNumElements(), CI); - return ReplaceInstUsesWith(I, ConstantVector::get(Elts)); - } - - Constant *CI = ConstantInt::get(I.getType(), 1); - return ReplaceInstUsesWith(I, CI); - } - - if (Instruction *Common = commonDivTransforms(I)) - return Common; - - // Handle cases involving: [su]div X, (select Cond, Y, Z) - // This does not apply for fdiv. - if (isa(Op1) && SimplifyDivRemOfSelect(I)) - return &I; - - if (ConstantInt *RHS = dyn_cast(Op1)) { - // div X, 1 == X - if (RHS->equalsInt(1)) - return ReplaceInstUsesWith(I, Op0); - - // (X / C1) / C2 -> X / (C1*C2) - if (Instruction *LHS = dyn_cast(Op0)) - if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode()) - if (ConstantInt *LHSRHS = dyn_cast(LHS->getOperand(1))) { - if (MultiplyOverflows(RHS, LHSRHS, - I.getOpcode()==Instruction::SDiv)) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - else - return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0), - ConstantExpr::getMul(RHS, LHSRHS)); - } - - if (!RHS->isZero()) { // avoid X udiv 0 - if (SelectInst *SI = dyn_cast(Op0)) - if (Instruction *R = FoldOpIntoSelect(I, SI, this)) - return R; - if (isa(Op0)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; - } - } - - // 0 / X == 0, we don't need to preserve faults! - if (ConstantInt *LHS = dyn_cast(Op0)) - if (LHS->equalsInt(0)) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - - // It can't be division by zero, hence it must be division by one. - if (I.getType() == Type::getInt1Ty(*Context)) - return ReplaceInstUsesWith(I, Op0); - - if (ConstantVector *Op1V = dyn_cast(Op1)) { - if (ConstantInt *X = cast_or_null(Op1V->getSplatValue())) - // div X, 1 == X - if (X->isOne()) - return ReplaceInstUsesWith(I, Op0); - } - - return 0; -} - -Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - // Handle the integer div common cases - if (Instruction *Common = commonIDivTransforms(I)) - return Common; - - if (ConstantInt *C = dyn_cast(Op1)) { - // X udiv C^2 -> X >> C - // Check to see if this is an unsigned division with an exact power of 2, - // if so, convert to a right shift. - if (C->getValue().isPowerOf2()) // 0 not included in isPowerOf2 - return BinaryOperator::CreateLShr(Op0, - ConstantInt::get(Op0->getType(), C->getValue().logBase2())); - - // X udiv C, where C >= signbit - if (C->getValue().isNegative()) { - Value *IC = Builder->CreateICmpULT( Op0, C); - return SelectInst::Create(IC, Constant::getNullValue(I.getType()), - ConstantInt::get(I.getType(), 1)); - } - } - - // X udiv (C1 << N), where C1 is "1< X >> (N+C2) - if (BinaryOperator *RHSI = dyn_cast(I.getOperand(1))) { - if (RHSI->getOpcode() == Instruction::Shl && - isa(RHSI->getOperand(0))) { - const APInt& C1 = cast(RHSI->getOperand(0))->getValue(); - if (C1.isPowerOf2()) { - Value *N = RHSI->getOperand(1); - const Type *NTy = N->getType(); - if (uint32_t C2 = C1.logBase2()) - N = Builder->CreateAdd(N, ConstantInt::get(NTy, C2), "tmp"); - return BinaryOperator::CreateLShr(Op0, N); - } - } - } - - // udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2) - // where C1&C2 are powers of two. - if (SelectInst *SI = dyn_cast(Op1)) - if (ConstantInt *STO = dyn_cast(SI->getOperand(1))) - if (ConstantInt *SFO = dyn_cast(SI->getOperand(2))) { - const APInt &TVA = STO->getValue(), &FVA = SFO->getValue(); - if (TVA.isPowerOf2() && FVA.isPowerOf2()) { - // Compute the shift amounts - uint32_t TSA = TVA.logBase2(), FSA = FVA.logBase2(); - // Construct the "on true" case of the select - Constant *TC = ConstantInt::get(Op0->getType(), TSA); - Value *TSI = Builder->CreateLShr(Op0, TC, SI->getName()+".t"); - - // Construct the "on false" case of the select - Constant *FC = ConstantInt::get(Op0->getType(), FSA); - Value *FSI = Builder->CreateLShr(Op0, FC, SI->getName()+".f"); - - // construct the select instruction and return it. - return SelectInst::Create(SI->getOperand(0), TSI, FSI, SI->getName()); - } - } - return 0; -} - -Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - // Handle the integer div common cases - if (Instruction *Common = commonIDivTransforms(I)) - return Common; - - if (ConstantInt *RHS = dyn_cast(Op1)) { - // sdiv X, -1 == -X - if (RHS->isAllOnesValue()) - return BinaryOperator::CreateNeg(Op0); - - // sdiv X, C --> ashr X, log2(C) - if (cast(&I)->isExact() && - RHS->getValue().isNonNegative() && - RHS->getValue().isPowerOf2()) { - Value *ShAmt = llvm::ConstantInt::get(RHS->getType(), - RHS->getValue().exactLogBase2()); - return BinaryOperator::CreateAShr(Op0, ShAmt, I.getName()); - } - - // -X/C --> X/-C provided the negation doesn't overflow. - if (SubOperator *Sub = dyn_cast(Op0)) - if (isa(Sub->getOperand(0)) && - cast(Sub->getOperand(0))->isNullValue() && - Sub->hasNoSignedWrap()) - return BinaryOperator::CreateSDiv(Sub->getOperand(1), - ConstantExpr::getNeg(RHS)); - } - - // If the sign bits of both operands are zero (i.e. we can prove they are - // unsigned inputs), turn this into a udiv. - if (I.getType()->isInteger()) { - APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); - if (MaskedValueIsZero(Op0, Mask)) { - if (MaskedValueIsZero(Op1, Mask)) { - // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set - return BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); - } - ConstantInt *ShiftedInt; - if (match(Op1, m_Shl(m_ConstantInt(ShiftedInt), m_Value())) && - ShiftedInt->getValue().isPowerOf2()) { - // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y) - // Safe because the only negative value (1 << Y) can take on is - // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have - // the sign bit set. - return BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); - } - } - } - - return 0; -} - -Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { - return commonDivTransforms(I); -} - -/// This function implements the transforms on rem instructions that work -/// regardless of the kind of rem instruction it is (urem, srem, or frem). It -/// is used by the visitors to those instructions. -/// @brief Transforms common to all three rem instructions -Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - if (isa(Op0)) { // undef % X -> 0 - if (I.getType()->isFPOrFPVector()) - return ReplaceInstUsesWith(I, Op0); // X % undef -> undef (could be SNaN) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - } - if (isa(Op1)) - return ReplaceInstUsesWith(I, Op1); // X % undef -> undef - - // Handle cases involving: rem X, (select Cond, Y, Z) - if (isa(Op1) && SimplifyDivRemOfSelect(I)) - return &I; - - return 0; -} - -/// This function implements the transforms common to both integer remainder -/// instructions (urem and srem). It is called by the visitors to those integer -/// remainder instructions. -/// @brief Common integer remainder transforms -Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - if (Instruction *common = commonRemTransforms(I)) - return common; - - // 0 % X == 0 for integer, we don't need to preserve faults! - if (Constant *LHS = dyn_cast(Op0)) - if (LHS->isNullValue()) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - - if (ConstantInt *RHS = dyn_cast(Op1)) { - // X % 0 == undef, we don't need to preserve faults! - if (RHS->equalsInt(0)) - return ReplaceInstUsesWith(I, UndefValue::get(I.getType())); - - if (RHS->equalsInt(1)) // X % 1 == 0 - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - - if (Instruction *Op0I = dyn_cast(Op0)) { - if (SelectInst *SI = dyn_cast(Op0I)) { - if (Instruction *R = FoldOpIntoSelect(I, SI, this)) - return R; - } else if (isa(Op0I)) { - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; - } - - // See if we can fold away this rem instruction. - if (SimplifyDemandedInstructionBits(I)) - return &I; - } - } - - return 0; -} - -Instruction *InstCombiner::visitURem(BinaryOperator &I) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - if (Instruction *common = commonIRemTransforms(I)) - return common; - - if (ConstantInt *RHS = dyn_cast(Op1)) { - // X urem C^2 -> X and C - // Check to see if this is an unsigned remainder with an exact power of 2, - // if so, convert to a bitwise and. - if (ConstantInt *C = dyn_cast(RHS)) - if (C->getValue().isPowerOf2()) - return BinaryOperator::CreateAnd(Op0, SubOne(C)); - } - - if (Instruction *RHSI = dyn_cast(I.getOperand(1))) { - // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1) - if (RHSI->getOpcode() == Instruction::Shl && - isa(RHSI->getOperand(0))) { - if (cast(RHSI->getOperand(0))->getValue().isPowerOf2()) { - Constant *N1 = Constant::getAllOnesValue(I.getType()); - Value *Add = Builder->CreateAdd(RHSI, N1, "tmp"); - return BinaryOperator::CreateAnd(Op0, Add); - } - } - } - - // urem X, (select Cond, 2^C1, 2^C2) --> select Cond, (and X, C1), (and X, C2) - // where C1&C2 are powers of two. - if (SelectInst *SI = dyn_cast(Op1)) { - if (ConstantInt *STO = dyn_cast(SI->getOperand(1))) - if (ConstantInt *SFO = dyn_cast(SI->getOperand(2))) { - // STO == 0 and SFO == 0 handled above. - if ((STO->getValue().isPowerOf2()) && - (SFO->getValue().isPowerOf2())) { - Value *TrueAnd = Builder->CreateAnd(Op0, SubOne(STO), - SI->getName()+".t"); - Value *FalseAnd = Builder->CreateAnd(Op0, SubOne(SFO), - SI->getName()+".f"); - return SelectInst::Create(SI->getOperand(0), TrueAnd, FalseAnd); - } - } - } - - return 0; -} - -Instruction *InstCombiner::visitSRem(BinaryOperator &I) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - // Handle the integer rem common cases - if (Instruction *Common = commonIRemTransforms(I)) - return Common; - - if (Value *RHSNeg = dyn_castNegVal(Op1)) - if (!isa(RHSNeg) || - (isa(RHSNeg) && - cast(RHSNeg)->getValue().isStrictlyPositive())) { - // X % -Y -> X % Y - Worklist.AddValue(I.getOperand(1)); - I.setOperand(1, RHSNeg); - return &I; - } - - // If the sign bits of both operands are zero (i.e. we can prove they are - // unsigned inputs), turn this into a urem. - if (I.getType()->isInteger()) { - APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); - if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) { - // X srem Y -> X urem Y, iff X and Y don't have sign bit set - return BinaryOperator::CreateURem(Op0, Op1, I.getName()); - } - } - - // If it's a constant vector, flip any negative values positive. - if (ConstantVector *RHSV = dyn_cast(Op1)) { - unsigned VWidth = RHSV->getNumOperands(); - - bool hasNegative = false; - for (unsigned i = 0; !hasNegative && i != VWidth; ++i) - if (ConstantInt *RHS = dyn_cast(RHSV->getOperand(i))) - if (RHS->getValue().isNegative()) - hasNegative = true; - - if (hasNegative) { - std::vector Elts(VWidth); - for (unsigned i = 0; i != VWidth; ++i) { - if (ConstantInt *RHS = dyn_cast(RHSV->getOperand(i))) { - if (RHS->getValue().isNegative()) - Elts[i] = cast(ConstantExpr::getNeg(RHS)); - else - Elts[i] = RHS; - } - } - - Constant *NewRHSV = ConstantVector::get(Elts); - if (NewRHSV != RHSV) { - Worklist.AddValue(I.getOperand(1)); - I.setOperand(1, NewRHSV); - return &I; - } - } - } - - return 0; -} - -Instruction *InstCombiner::visitFRem(BinaryOperator &I) { - return commonRemTransforms(I); -} - -// isOneBitSet - Return true if there is exactly one bit set in the specified -// constant. -static bool isOneBitSet(const ConstantInt *CI) { - return CI->getValue().isPowerOf2(); -} - -// isHighOnes - Return true if the constant is of the form 1+0+. -// This is the same as lowones(~X). -static bool isHighOnes(const ConstantInt *CI) { - return (~CI->getValue() + 1).isPowerOf2(); -} - -/// getICmpCode - Encode a icmp predicate into a three bit mask. These bits -/// are carefully arranged to allow folding of expressions such as: -/// -/// (A < B) | (A > B) --> (A != B) -/// -/// Note that this is only valid if the first and second predicates have the -/// same sign. Is illegal to do: (A u< B) | (A s> B) -/// -/// Three bits are used to represent the condition, as follows: -/// 0 A > B -/// 1 A == B -/// 2 A < B -/// -/// <=> Value Definition -/// 000 0 Always false -/// 001 1 A > B -/// 010 2 A == B -/// 011 3 A >= B -/// 100 4 A < B -/// 101 5 A != B -/// 110 6 A <= B -/// 111 7 Always true -/// -static unsigned getICmpCode(const ICmpInst *ICI) { - switch (ICI->getPredicate()) { - // False -> 0 - case ICmpInst::ICMP_UGT: return 1; // 001 - case ICmpInst::ICMP_SGT: return 1; // 001 - case ICmpInst::ICMP_EQ: return 2; // 010 - case ICmpInst::ICMP_UGE: return 3; // 011 - case ICmpInst::ICMP_SGE: return 3; // 011 - case ICmpInst::ICMP_ULT: return 4; // 100 - case ICmpInst::ICMP_SLT: return 4; // 100 - case ICmpInst::ICMP_NE: return 5; // 101 - case ICmpInst::ICMP_ULE: return 6; // 110 - case ICmpInst::ICMP_SLE: return 6; // 110 - // True -> 7 - default: - llvm_unreachable("Invalid ICmp predicate!"); - return 0; - } -} - -/// getFCmpCode - Similar to getICmpCode but for FCmpInst. This encodes a fcmp -/// predicate into a three bit mask. It also returns whether it is an ordered -/// predicate by reference. -static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) { - isOrdered = false; - switch (CC) { - case FCmpInst::FCMP_ORD: isOrdered = true; return 0; // 000 - case FCmpInst::FCMP_UNO: return 0; // 000 - case FCmpInst::FCMP_OGT: isOrdered = true; return 1; // 001 - case FCmpInst::FCMP_UGT: return 1; // 001 - case FCmpInst::FCMP_OEQ: isOrdered = true; return 2; // 010 - case FCmpInst::FCMP_UEQ: return 2; // 010 - case FCmpInst::FCMP_OGE: isOrdered = true; return 3; // 011 - case FCmpInst::FCMP_UGE: return 3; // 011 - case FCmpInst::FCMP_OLT: isOrdered = true; return 4; // 100 - case FCmpInst::FCMP_ULT: return 4; // 100 - case FCmpInst::FCMP_ONE: isOrdered = true; return 5; // 101 - case FCmpInst::FCMP_UNE: return 5; // 101 - case FCmpInst::FCMP_OLE: isOrdered = true; return 6; // 110 - case FCmpInst::FCMP_ULE: return 6; // 110 - // True -> 7 - default: - // Not expecting FCMP_FALSE and FCMP_TRUE; - llvm_unreachable("Unexpected FCmp predicate!"); - return 0; - } -} - -/// getICmpValue - This is the complement of getICmpCode, which turns an -/// opcode and two operands into either a constant true or false, or a brand -/// new ICmp instruction. The sign is passed in to determine which kind -/// of predicate to use in the new icmp instruction. -static Value *getICmpValue(bool sign, unsigned code, Value *LHS, Value *RHS, - LLVMContext *Context) { - switch (code) { - default: llvm_unreachable("Illegal ICmp code!"); - case 0: return ConstantInt::getFalse(*Context); - case 1: - if (sign) - return new ICmpInst(ICmpInst::ICMP_SGT, LHS, RHS); - else - return new ICmpInst(ICmpInst::ICMP_UGT, LHS, RHS); - case 2: return new ICmpInst(ICmpInst::ICMP_EQ, LHS, RHS); - case 3: - if (sign) - return new ICmpInst(ICmpInst::ICMP_SGE, LHS, RHS); - else - return new ICmpInst(ICmpInst::ICMP_UGE, LHS, RHS); - case 4: - if (sign) - return new ICmpInst(ICmpInst::ICMP_SLT, LHS, RHS); - else - return new ICmpInst(ICmpInst::ICMP_ULT, LHS, RHS); - case 5: return new ICmpInst(ICmpInst::ICMP_NE, LHS, RHS); - case 6: - if (sign) - return new ICmpInst(ICmpInst::ICMP_SLE, LHS, RHS); - else - return new ICmpInst(ICmpInst::ICMP_ULE, LHS, RHS); - case 7: return ConstantInt::getTrue(*Context); - } -} - -/// getFCmpValue - This is the complement of getFCmpCode, which turns an -/// opcode and two operands into either a FCmp instruction. isordered is passed -/// in to determine which kind of predicate to use in the new fcmp instruction. -static Value *getFCmpValue(bool isordered, unsigned code, - Value *LHS, Value *RHS, LLVMContext *Context) { - switch (code) { - default: llvm_unreachable("Illegal FCmp code!"); - case 0: - if (isordered) - return new FCmpInst(FCmpInst::FCMP_ORD, LHS, RHS); - else - return new FCmpInst(FCmpInst::FCMP_UNO, LHS, RHS); - case 1: - if (isordered) - return new FCmpInst(FCmpInst::FCMP_OGT, LHS, RHS); - else - return new FCmpInst(FCmpInst::FCMP_UGT, LHS, RHS); - case 2: - if (isordered) - return new FCmpInst(FCmpInst::FCMP_OEQ, LHS, RHS); - else - return new FCmpInst(FCmpInst::FCMP_UEQ, LHS, RHS); - case 3: - if (isordered) - return new FCmpInst(FCmpInst::FCMP_OGE, LHS, RHS); - else - return new FCmpInst(FCmpInst::FCMP_UGE, LHS, RHS); - case 4: - if (isordered) - return new FCmpInst(FCmpInst::FCMP_OLT, LHS, RHS); - else - return new FCmpInst(FCmpInst::FCMP_ULT, LHS, RHS); - case 5: - if (isordered) - return new FCmpInst(FCmpInst::FCMP_ONE, LHS, RHS); - else - return new FCmpInst(FCmpInst::FCMP_UNE, LHS, RHS); - case 6: - if (isordered) - return new FCmpInst(FCmpInst::FCMP_OLE, LHS, RHS); - else - return new FCmpInst(FCmpInst::FCMP_ULE, LHS, RHS); - case 7: return ConstantInt::getTrue(*Context); - } -} - -/// PredicatesFoldable - Return true if both predicates match sign or if at -/// least one of them is an equality comparison (which is signless). -static bool PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) { - return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) || - (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) || - (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1)); -} - -namespace { -// FoldICmpLogical - Implements (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B) -struct FoldICmpLogical { - InstCombiner &IC; - Value *LHS, *RHS; - ICmpInst::Predicate pred; - FoldICmpLogical(InstCombiner &ic, ICmpInst *ICI) - : IC(ic), LHS(ICI->getOperand(0)), RHS(ICI->getOperand(1)), - pred(ICI->getPredicate()) {} - bool shouldApply(Value *V) const { - if (ICmpInst *ICI = dyn_cast(V)) - if (PredicatesFoldable(pred, ICI->getPredicate())) - return ((ICI->getOperand(0) == LHS && ICI->getOperand(1) == RHS) || - (ICI->getOperand(0) == RHS && ICI->getOperand(1) == LHS)); - return false; - } - Instruction *apply(Instruction &Log) const { - ICmpInst *ICI = cast(Log.getOperand(0)); - if (ICI->getOperand(0) != LHS) { - assert(ICI->getOperand(1) == LHS); - ICI->swapOperands(); // Swap the LHS and RHS of the ICmp - } - - ICmpInst *RHSICI = cast(Log.getOperand(1)); - unsigned LHSCode = getICmpCode(ICI); - unsigned RHSCode = getICmpCode(RHSICI); - unsigned Code; - switch (Log.getOpcode()) { - case Instruction::And: Code = LHSCode & RHSCode; break; - case Instruction::Or: Code = LHSCode | RHSCode; break; - case Instruction::Xor: Code = LHSCode ^ RHSCode; break; - default: llvm_unreachable("Illegal logical opcode!"); return 0; - } - - bool isSigned = RHSICI->isSigned() || ICI->isSigned(); - Value *RV = getICmpValue(isSigned, Code, LHS, RHS, IC.getContext()); - if (Instruction *I = dyn_cast(RV)) - return I; - // Otherwise, it's a constant boolean value... - return IC.ReplaceInstUsesWith(Log, RV); - } -}; -} // end anonymous namespace - -// OptAndOp - This handles expressions of the form ((val OP C1) & C2). Where -// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'. Op is -// guaranteed to be a binary operator. -Instruction *InstCombiner::OptAndOp(Instruction *Op, - ConstantInt *OpRHS, - ConstantInt *AndRHS, - BinaryOperator &TheAnd) { - Value *X = Op->getOperand(0); - Constant *Together = 0; - if (!Op->isShift()) - Together = ConstantExpr::getAnd(AndRHS, OpRHS); - - switch (Op->getOpcode()) { - case Instruction::Xor: - if (Op->hasOneUse()) { - // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2) - Value *And = Builder->CreateAnd(X, AndRHS); - And->takeName(Op); - return BinaryOperator::CreateXor(And, Together); - } - break; - case Instruction::Or: - if (Together == AndRHS) // (X | C) & C --> C - return ReplaceInstUsesWith(TheAnd, AndRHS); - - if (Op->hasOneUse() && Together != OpRHS) { - // (X | C1) & C2 --> (X | (C1&C2)) & C2 - Value *Or = Builder->CreateOr(X, Together); - Or->takeName(Op); - return BinaryOperator::CreateAnd(Or, AndRHS); - } - break; - case Instruction::Add: - if (Op->hasOneUse()) { - // Adding a one to a single bit bit-field should be turned into an XOR - // of the bit. First thing to check is to see if this AND is with a - // single bit constant. - const APInt& AndRHSV = cast(AndRHS)->getValue(); - - // If there is only one bit set... - if (isOneBitSet(cast(AndRHS))) { - // Ok, at this point, we know that we are masking the result of the - // ADD down to exactly one bit. If the constant we are adding has - // no bits set below this bit, then we can eliminate the ADD. - const APInt& AddRHS = cast(OpRHS)->getValue(); - - // Check to see if any bits below the one bit set in AndRHSV are set. - if ((AddRHS & (AndRHSV-1)) == 0) { - // If not, the only thing that can effect the output of the AND is - // the bit specified by AndRHSV. If that bit is set, the effect of - // the XOR is to toggle the bit. If it is clear, then the ADD has - // no effect. - if ((AddRHS & AndRHSV) == 0) { // Bit is not set, noop - TheAnd.setOperand(0, X); - return &TheAnd; - } else { - // Pull the XOR out of the AND. - Value *NewAnd = Builder->CreateAnd(X, AndRHS); - NewAnd->takeName(Op); - return BinaryOperator::CreateXor(NewAnd, AndRHS); - } - } - } - } - break; - - case Instruction::Shl: { - // We know that the AND will not produce any of the bits shifted in, so if - // the anded constant includes them, clear them now! - // - uint32_t BitWidth = AndRHS->getType()->getBitWidth(); - uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); - APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal)); - ConstantInt *CI = ConstantInt::get(*Context, AndRHS->getValue() & ShlMask); - - if (CI->getValue() == ShlMask) { - // Masking out bits that the shift already masks - return ReplaceInstUsesWith(TheAnd, Op); // No need for the and. - } else if (CI != AndRHS) { // Reducing bits set in and. - TheAnd.setOperand(1, CI); - return &TheAnd; - } - break; - } - case Instruction::LShr: - { - // We know that the AND will not produce any of the bits shifted in, so if - // the anded constant includes them, clear them now! This only applies to - // unsigned shifts, because a signed shr may bring in set bits! - // - uint32_t BitWidth = AndRHS->getType()->getBitWidth(); - uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); - APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); - ConstantInt *CI = ConstantInt::get(*Context, AndRHS->getValue() & ShrMask); - - if (CI->getValue() == ShrMask) { - // Masking out bits that the shift already masks. - return ReplaceInstUsesWith(TheAnd, Op); - } else if (CI != AndRHS) { - TheAnd.setOperand(1, CI); // Reduce bits set in and cst. - return &TheAnd; - } - break; - } - case Instruction::AShr: - // Signed shr. - // See if this is shifting in some sign extension, then masking it out - // with an and. - if (Op->hasOneUse()) { - uint32_t BitWidth = AndRHS->getType()->getBitWidth(); - uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); - APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); - Constant *C = ConstantInt::get(*Context, AndRHS->getValue() & ShrMask); - if (C == AndRHS) { // Masking out bits shifted in. - // (Val ashr C1) & C2 -> (Val lshr C1) & C2 - // Make the argument unsigned. - Value *ShVal = Op->getOperand(0); - ShVal = Builder->CreateLShr(ShVal, OpRHS, Op->getName()); - return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName()); - } - } - break; - } - return 0; -} - - -/// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is -/// true, otherwise (V < Lo || V >= Hi). In pratice, we emit the more efficient -/// (V-Lo) (ConstantExpr::getICmp((isSigned ? - ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() && - "Lo is not <= Hi in range emission code!"); - - if (Inside) { - if (Lo == Hi) // Trivially false. - return new ICmpInst(ICmpInst::ICMP_NE, V, V); - - // V >= Min && V < Hi --> V < Hi - if (cast(Lo)->isMinValue(isSigned)) { - ICmpInst::Predicate pred = (isSigned ? - ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT); - return new ICmpInst(pred, V, Hi); - } - - // Emit V-Lo CreateAdd(V, NegLo, V->getName()+".off"); - Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi); - return new ICmpInst(ICmpInst::ICMP_ULT, Add, UpperBound); - } - - if (Lo == Hi) // Trivially true. - return new ICmpInst(ICmpInst::ICMP_EQ, V, V); - - // V < Min || V >= Hi -> V > Hi-1 - Hi = SubOne(cast(Hi)); - if (cast(Lo)->isMinValue(isSigned)) { - ICmpInst::Predicate pred = (isSigned ? - ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT); - return new ICmpInst(pred, V, Hi); - } - - // Emit V-Lo >u Hi-1-Lo - // Note that Hi has already had one subtracted from it, above. - ConstantInt *NegLo = cast(ConstantExpr::getNeg(Lo)); - Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off"); - Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi); - return new ICmpInst(ICmpInst::ICMP_UGT, Add, LowerBound); -} - -// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s with -// any number of 0s on either side. The 1s are allowed to wrap from LSB to -// MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is -// not, since all 1s are not contiguous. -static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) { - const APInt& V = Val->getValue(); - uint32_t BitWidth = Val->getType()->getBitWidth(); - if (!APIntOps::isShiftedMask(BitWidth, V)) return false; - - // look for the first zero bit after the run of ones - MB = BitWidth - ((V - 1) ^ V).countLeadingZeros(); - // look for the first non-zero bit - ME = V.getActiveBits(); - return true; -} - -/// FoldLogicalPlusAnd - This is part of an expression (LHS +/- RHS) & Mask, -/// where isSub determines whether the operator is a sub. If we can fold one of -/// the following xforms: -/// -/// ((A & N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == Mask -/// ((A | N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0 -/// ((A ^ N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0 -/// -/// return (A +/- B). -/// -Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, - ConstantInt *Mask, bool isSub, - Instruction &I) { - Instruction *LHSI = dyn_cast(LHS); - if (!LHSI || LHSI->getNumOperands() != 2 || - !isa(LHSI->getOperand(1))) return 0; - - ConstantInt *N = cast(LHSI->getOperand(1)); - - switch (LHSI->getOpcode()) { - default: return 0; - case Instruction::And: - if (ConstantExpr::getAnd(N, Mask) == Mask) { - // If the AndRHS is a power of two minus one (0+1+), this is simple. - if ((Mask->getValue().countLeadingZeros() + - Mask->getValue().countPopulation()) == - Mask->getValue().getBitWidth()) - break; - - // Otherwise, if Mask is 0+1+0+, and if B is known to have the low 0+ - // part, we don't need any explicit masks to take them out of A. If that - // is all N is, ignore it. - uint32_t MB = 0, ME = 0; - if (isRunOfOnes(Mask, MB, ME)) { // begin/end bit of run, inclusive - uint32_t BitWidth = cast(RHS->getType())->getBitWidth(); - APInt Mask(APInt::getLowBitsSet(BitWidth, MB-1)); - if (MaskedValueIsZero(RHS, Mask)) - break; - } - } - return 0; - case Instruction::Or: - case Instruction::Xor: - // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0 - if ((Mask->getValue().countLeadingZeros() + - Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth() - && ConstantExpr::getAnd(N, Mask)->isNullValue()) - break; - return 0; - } - - if (isSub) - return Builder->CreateSub(LHSI->getOperand(0), RHS, "fold"); - return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold"); -} - -/// FoldAndOfICmps - Fold (icmp)&(icmp) if possible. -Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, - ICmpInst *LHS, ICmpInst *RHS) { - // (icmp eq A, null) & (icmp eq B, null) --> - // (icmp eq (ptrtoint(A)|ptrtoint(B)), 0) - if (TD && - LHS->getPredicate() == ICmpInst::ICMP_EQ && - RHS->getPredicate() == ICmpInst::ICMP_EQ && - isa(LHS->getOperand(1)) && - isa(RHS->getOperand(1))) { - const Type *IntPtrTy = TD->getIntPtrType(I.getContext()); - Value *A = Builder->CreatePtrToInt(LHS->getOperand(0), IntPtrTy); - Value *B = Builder->CreatePtrToInt(RHS->getOperand(0), IntPtrTy); - Value *NewOr = Builder->CreateOr(A, B); - return new ICmpInst(ICmpInst::ICMP_EQ, NewOr, - Constant::getNullValue(IntPtrTy)); - } - - Value *Val, *Val2; - ConstantInt *LHSCst, *RHSCst; - ICmpInst::Predicate LHSCC, RHSCC; - - // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2). - if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), - m_ConstantInt(LHSCst))) || - !match(RHS, m_ICmp(RHSCC, m_Value(Val2), - m_ConstantInt(RHSCst)))) - return 0; - - if (LHSCst == RHSCst && LHSCC == RHSCC) { - // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C) - // where C is a power of 2 - if (LHSCC == ICmpInst::ICMP_ULT && - LHSCst->getValue().isPowerOf2()) { - Value *NewOr = Builder->CreateOr(Val, Val2); - return new ICmpInst(LHSCC, NewOr, LHSCst); - } - - // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0) - if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) { - Value *NewOr = Builder->CreateOr(Val, Val2); - return new ICmpInst(LHSCC, NewOr, LHSCst); - } - } - - // From here on, we only handle: - // (icmp1 A, C1) & (icmp2 A, C2) --> something simpler. - if (Val != Val2) return 0; - - // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere. - if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE || - RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE || - LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || - RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) - return 0; - - // We can't fold (ugt x, C) & (sgt x, C2). - if (!PredicatesFoldable(LHSCC, RHSCC)) - return 0; - - // Ensure that the larger constant is on the RHS. - bool ShouldSwap; - if (CmpInst::isSigned(LHSCC) || - (ICmpInst::isEquality(LHSCC) && - CmpInst::isSigned(RHSCC))) - ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue()); - else - ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue()); - - if (ShouldSwap) { - std::swap(LHS, RHS); - std::swap(LHSCst, RHSCst); - std::swap(LHSCC, RHSCC); - } - - // At this point, we know we have have two icmp instructions - // comparing a value against two constants and and'ing the result - // together. Because of the above check, we know that we only have - // icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know - // (from the FoldICmpLogical check above), that the two constants - // are not equal and that the larger constant is on the RHS - assert(LHSCst != RHSCst && "Compares not folded above?"); - - switch (LHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: - switch (RHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X == 13 & X == 15) -> false - case ICmpInst::ICMP_UGT: // (X == 13 & X > 15) -> false - case ICmpInst::ICMP_SGT: // (X == 13 & X > 15) -> false - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - case ICmpInst::ICMP_NE: // (X == 13 & X != 15) -> X == 13 - case ICmpInst::ICMP_ULT: // (X == 13 & X < 15) -> X == 13 - case ICmpInst::ICMP_SLT: // (X == 13 & X < 15) -> X == 13 - return ReplaceInstUsesWith(I, LHS); - } - case ICmpInst::ICMP_NE: - switch (RHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_ULT: - if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13 - return new ICmpInst(ICmpInst::ICMP_ULT, Val, LHSCst); - break; // (X != 13 & X u< 15) -> no change - case ICmpInst::ICMP_SLT: - if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13 - return new ICmpInst(ICmpInst::ICMP_SLT, Val, LHSCst); - break; // (X != 13 & X s< 15) -> no change - case ICmpInst::ICMP_EQ: // (X != 13 & X == 15) -> X == 15 - case ICmpInst::ICMP_UGT: // (X != 13 & X u> 15) -> X u> 15 - case ICmpInst::ICMP_SGT: // (X != 13 & X s> 15) -> X s> 15 - return ReplaceInstUsesWith(I, RHS); - case ICmpInst::ICMP_NE: - if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1 - Constant *AddCST = ConstantExpr::getNeg(LHSCst); - Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); - return new ICmpInst(ICmpInst::ICMP_UGT, Add, - ConstantInt::get(Add->getType(), 1)); - } - break; // (X != 13 & X != 15) -> no change - } - break; - case ICmpInst::ICMP_ULT: - switch (RHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X u< 13 & X == 15) -> false - case ICmpInst::ICMP_UGT: // (X u< 13 & X u> 15) -> false - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - case ICmpInst::ICMP_SGT: // (X u< 13 & X s> 15) -> no change - break; - case ICmpInst::ICMP_NE: // (X u< 13 & X != 15) -> X u< 13 - case ICmpInst::ICMP_ULT: // (X u< 13 & X u< 15) -> X u< 13 - return ReplaceInstUsesWith(I, LHS); - case ICmpInst::ICMP_SLT: // (X u< 13 & X s< 15) -> no change - break; - } - break; - case ICmpInst::ICMP_SLT: - switch (RHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X s< 13 & X == 15) -> false - case ICmpInst::ICMP_SGT: // (X s< 13 & X s> 15) -> false - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - case ICmpInst::ICMP_UGT: // (X s< 13 & X u> 15) -> no change - break; - case ICmpInst::ICMP_NE: // (X s< 13 & X != 15) -> X < 13 - case ICmpInst::ICMP_SLT: // (X s< 13 & X s< 15) -> X < 13 - return ReplaceInstUsesWith(I, LHS); - case ICmpInst::ICMP_ULT: // (X s< 13 & X u< 15) -> no change - break; - } - break; - case ICmpInst::ICMP_UGT: - switch (RHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X u> 13 & X == 15) -> X == 15 - case ICmpInst::ICMP_UGT: // (X u> 13 & X u> 15) -> X u> 15 - return ReplaceInstUsesWith(I, RHS); - case ICmpInst::ICMP_SGT: // (X u> 13 & X s> 15) -> no change - break; - case ICmpInst::ICMP_NE: - if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14 - return new ICmpInst(LHSCC, Val, RHSCst); - break; // (X u> 13 & X != 15) -> no change - case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) 13 & X s< 15) -> no change - break; - } - break; - case ICmpInst::ICMP_SGT: - switch (RHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X s> 13 & X == 15) -> X == 15 - case ICmpInst::ICMP_SGT: // (X s> 13 & X s> 15) -> X s> 15 - return ReplaceInstUsesWith(I, RHS); - case ICmpInst::ICMP_UGT: // (X s> 13 & X u> 15) -> no change - break; - case ICmpInst::ICMP_NE: - if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14 - return new ICmpInst(LHSCC, Val, RHSCst); - break; // (X s> 13 & X != 15) -> no change - case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) s< 1 - return InsertRangeTest(Val, AddOne(LHSCst), - RHSCst, true, true, I); - case ICmpInst::ICMP_ULT: // (X s> 13 & X u< 15) -> no change - break; - } - break; - } - - return 0; -} - -Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, - FCmpInst *RHS) { - - if (LHS->getPredicate() == FCmpInst::FCMP_ORD && - RHS->getPredicate() == FCmpInst::FCMP_ORD) { - // (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y) - if (ConstantFP *LHSC = dyn_cast(LHS->getOperand(1))) - if (ConstantFP *RHSC = dyn_cast(RHS->getOperand(1))) { - // If either of the constants are nans, then the whole thing returns - // false. - if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - return new FCmpInst(FCmpInst::FCMP_ORD, - LHS->getOperand(0), RHS->getOperand(0)); - } - - // Handle vector zeros. This occurs because the canonical form of - // "fcmp ord x,x" is "fcmp ord x, 0". - if (isa(LHS->getOperand(1)) && - isa(RHS->getOperand(1))) - return new FCmpInst(FCmpInst::FCMP_ORD, - LHS->getOperand(0), RHS->getOperand(0)); - return 0; - } - - Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); - Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); - FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); - - - if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { - // Swap RHS operands to match LHS. - Op1CC = FCmpInst::getSwappedPredicate(Op1CC); - std::swap(Op1LHS, Op1RHS); - } - - if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { - // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y). - if (Op0CC == Op1CC) - return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS); - - if (Op0CC == FCmpInst::FCMP_FALSE || Op1CC == FCmpInst::FCMP_FALSE) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - if (Op0CC == FCmpInst::FCMP_TRUE) - return ReplaceInstUsesWith(I, RHS); - if (Op1CC == FCmpInst::FCMP_TRUE) - return ReplaceInstUsesWith(I, LHS); - - bool Op0Ordered; - bool Op1Ordered; - unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); - unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); - if (Op1Pred == 0) { - std::swap(LHS, RHS); - std::swap(Op0Pred, Op1Pred); - std::swap(Op0Ordered, Op1Ordered); - } - if (Op0Pred == 0) { - // uno && ueq -> uno && (uno || eq) -> ueq - // ord && olt -> ord && (ord && lt) -> olt - if (Op0Ordered == Op1Ordered) - return ReplaceInstUsesWith(I, RHS); - - // uno && oeq -> uno && (ord && eq) -> false - // uno && ord -> false - if (!Op0Ordered) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - // ord && ueq -> ord && (uno || eq) -> oeq - return cast(getFCmpValue(true, Op1Pred, - Op0LHS, Op0RHS, Context)); - } - } - - return 0; -} - - -Instruction *InstCombiner::visitAnd(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - if (Value *V = SimplifyAndInst(Op0, Op1, TD)) - return ReplaceInstUsesWith(I, V); - - // See if we can simplify any instructions used by the instruction whose sole - // purpose is to compute bits we don't care about. - if (SimplifyDemandedInstructionBits(I)) - return &I; - - - if (ConstantInt *AndRHS = dyn_cast(Op1)) { - const APInt &AndRHSMask = AndRHS->getValue(); - APInt NotAndRHS(~AndRHSMask); - - // Optimize a variety of ((val OP C1) & C2) combinations... - if (BinaryOperator *Op0I = dyn_cast(Op0)) { - Value *Op0LHS = Op0I->getOperand(0); - Value *Op0RHS = Op0I->getOperand(1); - switch (Op0I->getOpcode()) { - default: break; - case Instruction::Xor: - case Instruction::Or: - // If the mask is only needed on one incoming arm, push it up. - if (!Op0I->hasOneUse()) break; - - if (MaskedValueIsZero(Op0LHS, NotAndRHS)) { - // Not masking anything out for the LHS, move to RHS. - Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS, - Op0RHS->getName()+".masked"); - return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS); - } - if (!isa(Op0RHS) && - MaskedValueIsZero(Op0RHS, NotAndRHS)) { - // Not masking anything out for the RHS, move to LHS. - Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS, - Op0LHS->getName()+".masked"); - return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS); - } - - break; - case Instruction::Add: - // ((A & N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == AndRHS. - // ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0 - // ((A ^ N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0 - if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, false, I)) - return BinaryOperator::CreateAnd(V, AndRHS); - if (Value *V = FoldLogicalPlusAnd(Op0RHS, Op0LHS, AndRHS, false, I)) - return BinaryOperator::CreateAnd(V, AndRHS); // Add commutes - break; - - case Instruction::Sub: - // ((A & N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == AndRHS. - // ((A | N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0 - // ((A ^ N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0 - if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, true, I)) - return BinaryOperator::CreateAnd(V, AndRHS); - - // (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS - // has 1's for all bits that the subtraction with A might affect. - if (Op0I->hasOneUse()) { - uint32_t BitWidth = AndRHSMask.getBitWidth(); - uint32_t Zeros = AndRHSMask.countLeadingZeros(); - APInt Mask = APInt::getLowBitsSet(BitWidth, BitWidth - Zeros); - - ConstantInt *A = dyn_cast(Op0LHS); - if (!(A && A->isZero()) && // avoid infinite recursion. - MaskedValueIsZero(Op0LHS, Mask)) { - Value *NewNeg = Builder->CreateNeg(Op0RHS); - return BinaryOperator::CreateAnd(NewNeg, AndRHS); - } - } - break; - - case Instruction::Shl: - case Instruction::LShr: - // (1 << x) & 1 --> zext(x == 0) - // (1 >> x) & 1 --> zext(x == 0) - if (AndRHSMask == 1 && Op0LHS == AndRHS) { - Value *NewICmp = - Builder->CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType())); - return new ZExtInst(NewICmp, I.getType()); - } - break; - } - - if (ConstantInt *Op0CI = dyn_cast(Op0I->getOperand(1))) - if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I)) - return Res; - } else if (CastInst *CI = dyn_cast(Op0)) { - // If this is an integer truncation or change from signed-to-unsigned, and - // if the source is an and/or with immediate, transform it. This - // frequently occurs for bitfield accesses. - if (Instruction *CastOp = dyn_cast(CI->getOperand(0))) { - if ((isa(CI) || isa(CI)) && - CastOp->getNumOperands() == 2) - if (ConstantInt *AndCI =dyn_cast(CastOp->getOperand(1))){ - if (CastOp->getOpcode() == Instruction::And) { - // Change: and (cast (and X, C1) to T), C2 - // into : and (cast X to T), trunc_or_bitcast(C1)&C2 - // This will fold the two constants together, which may allow - // other simplifications. - Value *NewCast = Builder->CreateTruncOrBitCast( - CastOp->getOperand(0), I.getType(), - CastOp->getName()+".shrunk"); - // trunc_or_bitcast(C1)&C2 - Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType()); - C3 = ConstantExpr::getAnd(C3, AndRHS); - return BinaryOperator::CreateAnd(NewCast, C3); - } else if (CastOp->getOpcode() == Instruction::Or) { - // Change: and (cast (or X, C1) to T), C2 - // into : trunc(C1)&C2 iff trunc(C1)&C2 == C2 - Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType()); - if (ConstantExpr::getAnd(C3, AndRHS) == AndRHS) - // trunc(C1)&C2 - return ReplaceInstUsesWith(I, AndRHS); - } - } - } - } - - // Try to fold constant and into select arguments. - if (SelectInst *SI = dyn_cast(Op0)) - if (Instruction *R = FoldOpIntoSelect(I, SI, this)) - return R; - if (isa(Op0)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; - } - - - // (~A & ~B) == (~(A | B)) - De Morgan's Law - if (Value *Op0NotVal = dyn_castNotVal(Op0)) - if (Value *Op1NotVal = dyn_castNotVal(Op1)) - if (Op0->hasOneUse() && Op1->hasOneUse()) { - Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal, - I.getName()+".demorgan"); - return BinaryOperator::CreateNot(Or); - } - - { - Value *A = 0, *B = 0, *C = 0, *D = 0; - // (A|B) & ~(A&B) -> A^B - if (match(Op0, m_Or(m_Value(A), m_Value(B))) && - match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) && - ((A == C && B == D) || (A == D && B == C))) - return BinaryOperator::CreateXor(A, B); - - // ~(A&B) & (A|B) -> A^B - if (match(Op1, m_Or(m_Value(A), m_Value(B))) && - match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) && - ((A == C && B == D) || (A == D && B == C))) - return BinaryOperator::CreateXor(A, B); - - if (Op0->hasOneUse() && - match(Op0, m_Xor(m_Value(A), m_Value(B)))) { - if (A == Op1) { // (A^B)&A -> A&(A^B) - I.swapOperands(); // Simplify below - std::swap(Op0, Op1); - } else if (B == Op1) { // (A^B)&B -> B&(B^A) - cast(Op0)->swapOperands(); - I.swapOperands(); // Simplify below - std::swap(Op0, Op1); - } - } - - if (Op1->hasOneUse() && - match(Op1, m_Xor(m_Value(A), m_Value(B)))) { - if (B == Op0) { // B&(A^B) -> B&(B^A) - cast(Op1)->swapOperands(); - std::swap(A, B); - } - if (A == Op0) // A&(A^B) -> A & ~B - return BinaryOperator::CreateAnd(A, Builder->CreateNot(B, "tmp")); - } - - // (A&((~A)|B)) -> A&B - if (match(Op0, m_Or(m_Not(m_Specific(Op1)), m_Value(A))) || - match(Op0, m_Or(m_Value(A), m_Not(m_Specific(Op1))))) - return BinaryOperator::CreateAnd(A, Op1); - if (match(Op1, m_Or(m_Not(m_Specific(Op0)), m_Value(A))) || - match(Op1, m_Or(m_Value(A), m_Not(m_Specific(Op0))))) - return BinaryOperator::CreateAnd(A, Op0); - } - - if (ICmpInst *RHS = dyn_cast(Op1)) { - // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B) - if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS))) - return R; - - if (ICmpInst *LHS = dyn_cast(Op0)) - if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS)) - return Res; - } - - // fold (and (cast A), (cast B)) -> (cast (and A, B)) - if (CastInst *Op0C = dyn_cast(Op0)) - if (CastInst *Op1C = dyn_cast(Op1)) - if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind ? - const Type *SrcTy = Op0C->getOperand(0)->getType(); - if (SrcTy == Op1C->getOperand(0)->getType() && - SrcTy->isIntOrIntVector() && - // Only do this if the casts both really cause code to be generated. - ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), - I.getType(), TD) && - ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), - I.getType(), TD)) { - Value *NewOp = Builder->CreateAnd(Op0C->getOperand(0), - Op1C->getOperand(0), I.getName()); - return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); - } - } - - // (X >> Z) & (Y >> Z) -> (X&Y) >> Z for all shifts. - if (BinaryOperator *SI1 = dyn_cast(Op1)) { - if (BinaryOperator *SI0 = dyn_cast(Op0)) - if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && - SI0->getOperand(1) == SI1->getOperand(1) && - (SI0->hasOneUse() || SI1->hasOneUse())) { - Value *NewOp = - Builder->CreateAnd(SI0->getOperand(0), SI1->getOperand(0), - SI0->getName()); - return BinaryOperator::Create(SI1->getOpcode(), NewOp, - SI1->getOperand(1)); - } - } - - // If and'ing two fcmp, try combine them into one. - if (FCmpInst *LHS = dyn_cast(I.getOperand(0))) { - if (FCmpInst *RHS = dyn_cast(I.getOperand(1))) - if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS)) - return Res; - } - - return Changed ? &I : 0; -} - -/// CollectBSwapParts - Analyze the specified subexpression and see if it is -/// capable of providing pieces of a bswap. The subexpression provides pieces -/// of a bswap if it is proven that each of the non-zero bytes in the output of -/// the expression came from the corresponding "byte swapped" byte in some other -/// value. For example, if the current subexpression is "(shl i32 %X, 24)" then -/// we know that the expression deposits the low byte of %X into the high byte -/// of the bswap result and that all other bytes are zero. This expression is -/// accepted, the high byte of ByteValues is set to X to indicate a correct -/// match. -/// -/// This function returns true if the match was unsuccessful and false if so. -/// On entry to the function the "OverallLeftShift" is a signed integer value -/// indicating the number of bytes that the subexpression is later shifted. For -/// example, if the expression is later right shifted by 16 bits, the -/// OverallLeftShift value would be -2 on entry. This is used to specify which -/// byte of ByteValues is actually being set. -/// -/// Similarly, ByteMask is a bitmask where a bit is clear if its corresponding -/// byte is masked to zero by a user. For example, in (X & 255), X will be -/// processed with a bytemask of 1. Because bytemask is 32-bits, this limits -/// this function to working on up to 32-byte (256 bit) values. ByteMask is -/// always in the local (OverallLeftShift) coordinate space. -/// -static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask, - SmallVector &ByteValues) { - if (Instruction *I = dyn_cast(V)) { - // If this is an or instruction, it may be an inner node of the bswap. - if (I->getOpcode() == Instruction::Or) { - return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, - ByteValues) || - CollectBSwapParts(I->getOperand(1), OverallLeftShift, ByteMask, - ByteValues); - } - - // If this is a logical shift by a constant multiple of 8, recurse with - // OverallLeftShift and ByteMask adjusted. - if (I->isLogicalShift() && isa(I->getOperand(1))) { - unsigned ShAmt = - cast(I->getOperand(1))->getLimitedValue(~0U); - // Ensure the shift amount is defined and of a byte value. - if ((ShAmt & 7) || (ShAmt > 8*ByteValues.size())) - return true; - - unsigned ByteShift = ShAmt >> 3; - if (I->getOpcode() == Instruction::Shl) { - // X << 2 -> collect(X, +2) - OverallLeftShift += ByteShift; - ByteMask >>= ByteShift; - } else { - // X >>u 2 -> collect(X, -2) - OverallLeftShift -= ByteShift; - ByteMask <<= ByteShift; - ByteMask &= (~0U >> (32-ByteValues.size())); - } - - if (OverallLeftShift >= (int)ByteValues.size()) return true; - if (OverallLeftShift <= -(int)ByteValues.size()) return true; - - return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, - ByteValues); - } - - // If this is a logical 'and' with a mask that clears bytes, clear the - // corresponding bytes in ByteMask. - if (I->getOpcode() == Instruction::And && - isa(I->getOperand(1))) { - // Scan every byte of the and mask, seeing if the byte is either 0 or 255. - unsigned NumBytes = ByteValues.size(); - APInt Byte(I->getType()->getPrimitiveSizeInBits(), 255); - const APInt &AndMask = cast(I->getOperand(1))->getValue(); - - for (unsigned i = 0; i != NumBytes; ++i, Byte <<= 8) { - // If this byte is masked out by a later operation, we don't care what - // the and mask is. - if ((ByteMask & (1 << i)) == 0) - continue; - - // If the AndMask is all zeros for this byte, clear the bit. - APInt MaskB = AndMask & Byte; - if (MaskB == 0) { - ByteMask &= ~(1U << i); - continue; - } - - // If the AndMask is not all ones for this byte, it's not a bytezap. - if (MaskB != Byte) - return true; - - // Otherwise, this byte is kept. - } - - return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, - ByteValues); - } - } - - // Okay, we got to something that isn't a shift, 'or' or 'and'. This must be - // the input value to the bswap. Some observations: 1) if more than one byte - // is demanded from this input, then it could not be successfully assembled - // into a byteswap. At least one of the two bytes would not be aligned with - // their ultimate destination. - if (!isPowerOf2_32(ByteMask)) return true; - unsigned InputByteNo = CountTrailingZeros_32(ByteMask); - - // 2) The input and ultimate destinations must line up: if byte 3 of an i32 - // is demanded, it needs to go into byte 0 of the result. This means that the - // byte needs to be shifted until it lands in the right byte bucket. The - // shift amount depends on the position: if the byte is coming from the high - // part of the value (e.g. byte 3) then it must be shifted right. If from the - // low part, it must be shifted left. - unsigned DestByteNo = InputByteNo + OverallLeftShift; - if (InputByteNo < ByteValues.size()/2) { - if (ByteValues.size()-1-DestByteNo != InputByteNo) - return true; - } else { - if (ByteValues.size()-1-DestByteNo != InputByteNo) - return true; - } - - // If the destination byte value is already defined, the values are or'd - // together, which isn't a bswap (unless it's an or of the same bits). - if (ByteValues[DestByteNo] && ByteValues[DestByteNo] != V) - return true; - ByteValues[DestByteNo] = V; - return false; -} - -/// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom. -/// If so, insert the new bswap intrinsic and return it. -Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) { - const IntegerType *ITy = dyn_cast(I.getType()); - if (!ITy || ITy->getBitWidth() % 16 || - // ByteMask only allows up to 32-byte values. - ITy->getBitWidth() > 32*8) - return 0; // Can only bswap pairs of bytes. Can't do vectors. - - /// ByteValues - For each byte of the result, we keep track of which value - /// defines each byte. - SmallVector ByteValues; - ByteValues.resize(ITy->getBitWidth()/8); - - // Try to find all the pieces corresponding to the bswap. - uint32_t ByteMask = ~0U >> (32-ByteValues.size()); - if (CollectBSwapParts(&I, 0, ByteMask, ByteValues)) - return 0; - - // Check to see if all of the bytes come from the same value. - Value *V = ByteValues[0]; - if (V == 0) return 0; // Didn't find a byte? Must be zero. - - // Check to make sure that all of the bytes come from the same value. - for (unsigned i = 1, e = ByteValues.size(); i != e; ++i) - if (ByteValues[i] != V) - return 0; - const Type *Tys[] = { ITy }; - Module *M = I.getParent()->getParent()->getParent(); - Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1); - return CallInst::Create(F, V); -} - -/// MatchSelectFromAndOr - We have an expression of the form (A&C)|(B&D). Check -/// If A is (cond?-1:0) and either B or D is ~(cond?-1,0) or (cond?0,-1), then -/// we can simplify this expression to "cond ? C : D or B". -static Instruction *MatchSelectFromAndOr(Value *A, Value *B, - Value *C, Value *D, - LLVMContext *Context) { - // If A is not a select of -1/0, this cannot match. - Value *Cond = 0; - if (!match(A, m_SelectCst<-1, 0>(m_Value(Cond)))) - return 0; - - // ((cond?-1:0)&C) | (B&(cond?0:-1)) -> cond ? C : B. - if (match(D, m_SelectCst<0, -1>(m_Specific(Cond)))) - return SelectInst::Create(Cond, C, B); - if (match(D, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond))))) - return SelectInst::Create(Cond, C, B); - // ((cond?-1:0)&C) | ((cond?0:-1)&D) -> cond ? C : D. - if (match(B, m_SelectCst<0, -1>(m_Specific(Cond)))) - return SelectInst::Create(Cond, C, D); - if (match(B, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond))))) - return SelectInst::Create(Cond, C, D); - return 0; -} - -/// FoldOrOfICmps - Fold (icmp)|(icmp) if possible. -Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, - ICmpInst *LHS, ICmpInst *RHS) { - // (icmp ne A, null) | (icmp ne B, null) --> - // (icmp ne (ptrtoint(A)|ptrtoint(B)), 0) - if (TD && - LHS->getPredicate() == ICmpInst::ICMP_NE && - RHS->getPredicate() == ICmpInst::ICMP_NE && - isa(LHS->getOperand(1)) && - isa(RHS->getOperand(1))) { - const Type *IntPtrTy = TD->getIntPtrType(I.getContext()); - Value *A = Builder->CreatePtrToInt(LHS->getOperand(0), IntPtrTy); - Value *B = Builder->CreatePtrToInt(RHS->getOperand(0), IntPtrTy); - Value *NewOr = Builder->CreateOr(A, B); - return new ICmpInst(ICmpInst::ICMP_NE, NewOr, - Constant::getNullValue(IntPtrTy)); - } - - Value *Val, *Val2; - ConstantInt *LHSCst, *RHSCst; - ICmpInst::Predicate LHSCC, RHSCC; - - // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2). - if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), m_ConstantInt(LHSCst))) || - !match(RHS, m_ICmp(RHSCC, m_Value(Val2), m_ConstantInt(RHSCst)))) - return 0; - - - // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0) - if (LHSCst == RHSCst && LHSCC == RHSCC && - LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) { - Value *NewOr = Builder->CreateOr(Val, Val2); - return new ICmpInst(LHSCC, NewOr, LHSCst); - } - - // From here on, we only handle: - // (icmp1 A, C1) | (icmp2 A, C2) --> something simpler. - if (Val != Val2) return 0; - - // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere. - if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE || - RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE || - LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || - RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) - return 0; - - // We can't fold (ugt x, C) | (sgt x, C2). - if (!PredicatesFoldable(LHSCC, RHSCC)) - return 0; - - // Ensure that the larger constant is on the RHS. - bool ShouldSwap; - if (CmpInst::isSigned(LHSCC) || - (ICmpInst::isEquality(LHSCC) && - CmpInst::isSigned(RHSCC))) - ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue()); - else - ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue()); - - if (ShouldSwap) { - std::swap(LHS, RHS); - std::swap(LHSCst, RHSCst); - std::swap(LHSCC, RHSCC); - } - - // At this point, we know we have have two icmp instructions - // comparing a value against two constants and or'ing the result - // together. Because of the above check, we know that we only have - // ICMP_EQ, ICMP_NE, ICMP_LT, and ICMP_GT here. We also know (from the - // FoldICmpLogical check above), that the two constants are not - // equal. - assert(LHSCst != RHSCst && "Compares not folded above?"); - - switch (LHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: - switch (RHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: - if (LHSCst == SubOne(RHSCst)) { - // (X == 13 | X == 14) -> X-13 CreateAdd(Val, AddCST, Val->getName()+".off"); - AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst); - return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST); - } - break; // (X == 13 | X == 15) -> no change - case ICmpInst::ICMP_UGT: // (X == 13 | X u> 14) -> no change - case ICmpInst::ICMP_SGT: // (X == 13 | X s> 14) -> no change - break; - case ICmpInst::ICMP_NE: // (X == 13 | X != 15) -> X != 15 - case ICmpInst::ICMP_ULT: // (X == 13 | X u< 15) -> X u< 15 - case ICmpInst::ICMP_SLT: // (X == 13 | X s< 15) -> X s< 15 - return ReplaceInstUsesWith(I, RHS); - } - break; - case ICmpInst::ICMP_NE: - switch (RHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X != 13 | X == 15) -> X != 13 - case ICmpInst::ICMP_UGT: // (X != 13 | X u> 15) -> X != 13 - case ICmpInst::ICMP_SGT: // (X != 13 | X s> 15) -> X != 13 - return ReplaceInstUsesWith(I, LHS); - case ICmpInst::ICMP_NE: // (X != 13 | X != 15) -> true - case ICmpInst::ICMP_ULT: // (X != 13 | X u< 15) -> true - case ICmpInst::ICMP_SLT: // (X != 13 | X s< 15) -> true - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - } - break; - case ICmpInst::ICMP_ULT: - switch (RHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X u< 13 | X == 14) -> no change - break; - case ICmpInst::ICMP_UGT: // (X u< 13 | X u> 15) -> (X-13) u> 2 - // If RHSCst is [us]MAXINT, it is always false. Not handling - // this can cause overflow. - if (RHSCst->isMaxValue(false)) - return ReplaceInstUsesWith(I, LHS); - return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), - false, false, I); - case ICmpInst::ICMP_SGT: // (X u< 13 | X s> 15) -> no change - break; - case ICmpInst::ICMP_NE: // (X u< 13 | X != 15) -> X != 15 - case ICmpInst::ICMP_ULT: // (X u< 13 | X u< 15) -> X u< 15 - return ReplaceInstUsesWith(I, RHS); - case ICmpInst::ICMP_SLT: // (X u< 13 | X s< 15) -> no change - break; - } - break; - case ICmpInst::ICMP_SLT: - switch (RHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X s< 13 | X == 14) -> no change - break; - case ICmpInst::ICMP_SGT: // (X s< 13 | X s> 15) -> (X-13) s> 2 - // If RHSCst is [us]MAXINT, it is always false. Not handling - // this can cause overflow. - if (RHSCst->isMaxValue(true)) - return ReplaceInstUsesWith(I, LHS); - return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), - true, false, I); - case ICmpInst::ICMP_UGT: // (X s< 13 | X u> 15) -> no change - break; - case ICmpInst::ICMP_NE: // (X s< 13 | X != 15) -> X != 15 - case ICmpInst::ICMP_SLT: // (X s< 13 | X s< 15) -> X s< 15 - return ReplaceInstUsesWith(I, RHS); - case ICmpInst::ICMP_ULT: // (X s< 13 | X u< 15) -> no change - break; - } - break; - case ICmpInst::ICMP_UGT: - switch (RHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X u> 13 | X == 15) -> X u> 13 - case ICmpInst::ICMP_UGT: // (X u> 13 | X u> 15) -> X u> 13 - return ReplaceInstUsesWith(I, LHS); - case ICmpInst::ICMP_SGT: // (X u> 13 | X s> 15) -> no change - break; - case ICmpInst::ICMP_NE: // (X u> 13 | X != 15) -> true - case ICmpInst::ICMP_ULT: // (X u> 13 | X u< 15) -> true - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - case ICmpInst::ICMP_SLT: // (X u> 13 | X s< 15) -> no change - break; - } - break; - case ICmpInst::ICMP_SGT: - switch (RHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X s> 13 | X == 15) -> X > 13 - case ICmpInst::ICMP_SGT: // (X s> 13 | X s> 15) -> X > 13 - return ReplaceInstUsesWith(I, LHS); - case ICmpInst::ICMP_UGT: // (X s> 13 | X u> 15) -> no change - break; - case ICmpInst::ICMP_NE: // (X s> 13 | X != 15) -> true - case ICmpInst::ICMP_SLT: // (X s> 13 | X s< 15) -> true - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - case ICmpInst::ICMP_ULT: // (X s> 13 | X u< 15) -> no change - break; - } - break; - } - return 0; -} - -Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, - FCmpInst *RHS) { - if (LHS->getPredicate() == FCmpInst::FCMP_UNO && - RHS->getPredicate() == FCmpInst::FCMP_UNO && - LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) { - if (ConstantFP *LHSC = dyn_cast(LHS->getOperand(1))) - if (ConstantFP *RHSC = dyn_cast(RHS->getOperand(1))) { - // If either of the constants are nans, then the whole thing returns - // true. - if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - - // Otherwise, no need to compare the two constants, compare the - // rest. - return new FCmpInst(FCmpInst::FCMP_UNO, - LHS->getOperand(0), RHS->getOperand(0)); - } - - // Handle vector zeros. This occurs because the canonical form of - // "fcmp uno x,x" is "fcmp uno x, 0". - if (isa(LHS->getOperand(1)) && - isa(RHS->getOperand(1))) - return new FCmpInst(FCmpInst::FCMP_UNO, - LHS->getOperand(0), RHS->getOperand(0)); - - return 0; - } - - Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); - Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); - FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); - - if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { - // Swap RHS operands to match LHS. - Op1CC = FCmpInst::getSwappedPredicate(Op1CC); - std::swap(Op1LHS, Op1RHS); - } - if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { - // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y). - if (Op0CC == Op1CC) - return new FCmpInst((FCmpInst::Predicate)Op0CC, - Op0LHS, Op0RHS); - if (Op0CC == FCmpInst::FCMP_TRUE || Op1CC == FCmpInst::FCMP_TRUE) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - if (Op0CC == FCmpInst::FCMP_FALSE) - return ReplaceInstUsesWith(I, RHS); - if (Op1CC == FCmpInst::FCMP_FALSE) - return ReplaceInstUsesWith(I, LHS); - bool Op0Ordered; - bool Op1Ordered; - unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); - unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); - if (Op0Ordered == Op1Ordered) { - // If both are ordered or unordered, return a new fcmp with - // or'ed predicates. - Value *RV = getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, - Op0LHS, Op0RHS, Context); - if (Instruction *I = dyn_cast(RV)) - return I; - // Otherwise, it's a constant boolean value... - return ReplaceInstUsesWith(I, RV); - } - } - return 0; -} - -/// FoldOrWithConstants - This helper function folds: -/// -/// ((A | B) & C1) | (B & C2) -/// -/// into: -/// -/// (A & C1) | B -/// -/// when the XOR of the two constants is "all ones" (-1). -Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op, - Value *A, Value *B, Value *C) { - ConstantInt *CI1 = dyn_cast(C); - if (!CI1) return 0; - - Value *V1 = 0; - ConstantInt *CI2 = 0; - if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return 0; - - APInt Xor = CI1->getValue() ^ CI2->getValue(); - if (!Xor.isAllOnesValue()) return 0; - - if (V1 == A || V1 == B) { - Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1); - return BinaryOperator::CreateOr(NewOp, V1); - } - - return 0; -} - -Instruction *InstCombiner::visitOr(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - if (Value *V = SimplifyOrInst(Op0, Op1, TD)) - return ReplaceInstUsesWith(I, V); - - - // See if we can simplify any instructions used by the instruction whose sole - // purpose is to compute bits we don't care about. - if (SimplifyDemandedInstructionBits(I)) - return &I; - - if (ConstantInt *RHS = dyn_cast(Op1)) { - ConstantInt *C1 = 0; Value *X = 0; - // (X & C1) | C2 --> (X | C2) & (C1|C2) - if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) && - isOnlyUse(Op0)) { - Value *Or = Builder->CreateOr(X, RHS); - Or->takeName(Op0); - return BinaryOperator::CreateAnd(Or, - ConstantInt::get(*Context, RHS->getValue() | C1->getValue())); - } - - // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2) - if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) && - isOnlyUse(Op0)) { - Value *Or = Builder->CreateOr(X, RHS); - Or->takeName(Op0); - return BinaryOperator::CreateXor(Or, - ConstantInt::get(*Context, C1->getValue() & ~RHS->getValue())); - } - - // Try to fold constant and into select arguments. - if (SelectInst *SI = dyn_cast(Op0)) - if (Instruction *R = FoldOpIntoSelect(I, SI, this)) - return R; - if (isa(Op0)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; - } - - Value *A = 0, *B = 0; - ConstantInt *C1 = 0, *C2 = 0; - - // (A | B) | C and A | (B | C) -> bswap if possible. - // (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible. - if (match(Op0, m_Or(m_Value(), m_Value())) || - match(Op1, m_Or(m_Value(), m_Value())) || - (match(Op0, m_Shift(m_Value(), m_Value())) && - match(Op1, m_Shift(m_Value(), m_Value())))) { - if (Instruction *BSwap = MatchBSwap(I)) - return BSwap; - } - - // (X^C)|Y -> (X|Y)^C iff Y&C == 0 - if (Op0->hasOneUse() && - match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) && - MaskedValueIsZero(Op1, C1->getValue())) { - Value *NOr = Builder->CreateOr(A, Op1); - NOr->takeName(Op0); - return BinaryOperator::CreateXor(NOr, C1); - } - - // Y|(X^C) -> (X|Y)^C iff Y&C == 0 - if (Op1->hasOneUse() && - match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) && - MaskedValueIsZero(Op0, C1->getValue())) { - Value *NOr = Builder->CreateOr(A, Op0); - NOr->takeName(Op0); - return BinaryOperator::CreateXor(NOr, C1); - } - - // (A & C)|(B & D) - Value *C = 0, *D = 0; - if (match(Op0, m_And(m_Value(A), m_Value(C))) && - match(Op1, m_And(m_Value(B), m_Value(D)))) { - Value *V1 = 0, *V2 = 0, *V3 = 0; - C1 = dyn_cast(C); - C2 = dyn_cast(D); - if (C1 && C2) { // (A & C1)|(B & C2) - // If we have: ((V + N) & C1) | (V & C2) - // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0 - // replace with V+N. - if (C1->getValue() == ~C2->getValue()) { - if ((C2->getValue() & (C2->getValue()+1)) == 0 && // C2 == 0+1+ - match(A, m_Add(m_Value(V1), m_Value(V2)))) { - // Add commutes, try both ways. - if (V1 == B && MaskedValueIsZero(V2, C2->getValue())) - return ReplaceInstUsesWith(I, A); - if (V2 == B && MaskedValueIsZero(V1, C2->getValue())) - return ReplaceInstUsesWith(I, A); - } - // Or commutes, try both ways. - if ((C1->getValue() & (C1->getValue()+1)) == 0 && - match(B, m_Add(m_Value(V1), m_Value(V2)))) { - // Add commutes, try both ways. - if (V1 == A && MaskedValueIsZero(V2, C1->getValue())) - return ReplaceInstUsesWith(I, B); - if (V2 == A && MaskedValueIsZero(V1, C1->getValue())) - return ReplaceInstUsesWith(I, B); - } - } - V1 = 0; V2 = 0; V3 = 0; - } - - // Check to see if we have any common things being and'ed. If so, find the - // terms for V1 & (V2|V3). - if (isOnlyUse(Op0) || isOnlyUse(Op1)) { - if (A == B) // (A & C)|(A & D) == A & (C|D) - V1 = A, V2 = C, V3 = D; - else if (A == D) // (A & C)|(B & A) == A & (B|C) - V1 = A, V2 = B, V3 = C; - else if (C == B) // (A & C)|(C & D) == C & (A|D) - V1 = C, V2 = A, V3 = D; - else if (C == D) // (A & C)|(B & C) == C & (A|B) - V1 = C, V2 = A, V3 = B; - - if (V1) { - Value *Or = Builder->CreateOr(V2, V3, "tmp"); - return BinaryOperator::CreateAnd(V1, Or); - } - } - - // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) -> C0 ? A : B, and commuted variants - if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D, Context)) - return Match; - if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C, Context)) - return Match; - if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D, Context)) - return Match; - if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C, Context)) - return Match; - - // ((A&~B)|(~A&B)) -> A^B - if ((match(C, m_Not(m_Specific(D))) && - match(B, m_Not(m_Specific(A))))) - return BinaryOperator::CreateXor(A, D); - // ((~B&A)|(~A&B)) -> A^B - if ((match(A, m_Not(m_Specific(D))) && - match(B, m_Not(m_Specific(C))))) - return BinaryOperator::CreateXor(C, D); - // ((A&~B)|(B&~A)) -> A^B - if ((match(C, m_Not(m_Specific(B))) && - match(D, m_Not(m_Specific(A))))) - return BinaryOperator::CreateXor(A, B); - // ((~B&A)|(B&~A)) -> A^B - if ((match(A, m_Not(m_Specific(B))) && - match(D, m_Not(m_Specific(C))))) - return BinaryOperator::CreateXor(C, B); - } - - // (X >> Z) | (Y >> Z) -> (X|Y) >> Z for all shifts. - if (BinaryOperator *SI1 = dyn_cast(Op1)) { - if (BinaryOperator *SI0 = dyn_cast(Op0)) - if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && - SI0->getOperand(1) == SI1->getOperand(1) && - (SI0->hasOneUse() || SI1->hasOneUse())) { - Value *NewOp = Builder->CreateOr(SI0->getOperand(0), SI1->getOperand(0), - SI0->getName()); - return BinaryOperator::Create(SI1->getOpcode(), NewOp, - SI1->getOperand(1)); - } - } - - // ((A|B)&1)|(B&-2) -> (A&1) | B - if (match(Op0, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) || - match(Op0, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) { - Instruction *Ret = FoldOrWithConstants(I, Op1, A, B, C); - if (Ret) return Ret; - } - // (B&-2)|((A|B)&1) -> (A&1) | B - if (match(Op1, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) || - match(Op1, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) { - Instruction *Ret = FoldOrWithConstants(I, Op0, A, B, C); - if (Ret) return Ret; - } - - // (~A | ~B) == (~(A & B)) - De Morgan's Law - if (Value *Op0NotVal = dyn_castNotVal(Op0)) - if (Value *Op1NotVal = dyn_castNotVal(Op1)) - if (Op0->hasOneUse() && Op1->hasOneUse()) { - Value *And = Builder->CreateAnd(Op0NotVal, Op1NotVal, - I.getName()+".demorgan"); - return BinaryOperator::CreateNot(And); - } - - // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B) - if (ICmpInst *RHS = dyn_cast(I.getOperand(1))) { - if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS))) - return R; - - if (ICmpInst *LHS = dyn_cast(I.getOperand(0))) - if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS)) - return Res; - } - - // fold (or (cast A), (cast B)) -> (cast (or A, B)) - if (CastInst *Op0C = dyn_cast(Op0)) { - if (CastInst *Op1C = dyn_cast(Op1)) - if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ? - if (!isa(Op0C->getOperand(0)) || - !isa(Op1C->getOperand(0))) { - const Type *SrcTy = Op0C->getOperand(0)->getType(); - if (SrcTy == Op1C->getOperand(0)->getType() && - SrcTy->isIntOrIntVector() && - // Only do this if the casts both really cause code to be - // generated. - ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), - I.getType(), TD) && - ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), - I.getType(), TD)) { - Value *NewOp = Builder->CreateOr(Op0C->getOperand(0), - Op1C->getOperand(0), I.getName()); - return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); - } - } - } - } - - - // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y) - if (FCmpInst *LHS = dyn_cast(I.getOperand(0))) { - if (FCmpInst *RHS = dyn_cast(I.getOperand(1))) - if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS)) - return Res; - } - - return Changed ? &I : 0; -} - -namespace { - -// XorSelf - Implements: X ^ X --> 0 -struct XorSelf { - Value *RHS; - XorSelf(Value *rhs) : RHS(rhs) {} - bool shouldApply(Value *LHS) const { return LHS == RHS; } - Instruction *apply(BinaryOperator &Xor) const { - return &Xor; - } -}; - -} - -Instruction *InstCombiner::visitXor(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - if (isa(Op1)) { - if (isa(Op0)) - // Handle undef ^ undef -> 0 special case. This is a common - // idiom (misuse). - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - return ReplaceInstUsesWith(I, Op1); // X ^ undef -> undef - } - - // xor X, X = 0, even if X is nested in a sequence of Xor's. - if (Instruction *Result = AssociativeOpt(I, XorSelf(Op1))) { - assert(Result == &I && "AssociativeOpt didn't work?"); Result=Result; - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - } - - // See if we can simplify any instructions used by the instruction whose sole - // purpose is to compute bits we don't care about. - if (SimplifyDemandedInstructionBits(I)) - return &I; - if (isa(I.getType())) - if (isa(Op1)) - return ReplaceInstUsesWith(I, Op0); // X ^ <0,0> -> X - - // Is this a ~ operation? - if (Value *NotOp = dyn_castNotVal(&I)) { - if (BinaryOperator *Op0I = dyn_cast(NotOp)) { - if (Op0I->getOpcode() == Instruction::And || - Op0I->getOpcode() == Instruction::Or) { - // ~(~X & Y) --> (X | ~Y) - De Morgan's Law - // ~(~X | Y) === (X & ~Y) - De Morgan's Law - if (dyn_castNotVal(Op0I->getOperand(1))) - Op0I->swapOperands(); - if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) { - Value *NotY = - Builder->CreateNot(Op0I->getOperand(1), - Op0I->getOperand(1)->getName()+".not"); - if (Op0I->getOpcode() == Instruction::And) - return BinaryOperator::CreateOr(Op0NotVal, NotY); - return BinaryOperator::CreateAnd(Op0NotVal, NotY); - } - - // ~(X & Y) --> (~X | ~Y) - De Morgan's Law - // ~(X | Y) === (~X & ~Y) - De Morgan's Law - if (isFreeToInvert(Op0I->getOperand(0)) && - isFreeToInvert(Op0I->getOperand(1))) { - Value *NotX = - Builder->CreateNot(Op0I->getOperand(0), "notlhs"); - Value *NotY = - Builder->CreateNot(Op0I->getOperand(1), "notrhs"); - if (Op0I->getOpcode() == Instruction::And) - return BinaryOperator::CreateOr(NotX, NotY); - return BinaryOperator::CreateAnd(NotX, NotY); - } - } - } - } - - - if (ConstantInt *RHS = dyn_cast(Op1)) { - if (RHS->isOne() && Op0->hasOneUse()) { - // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B - if (ICmpInst *ICI = dyn_cast(Op0)) - return new ICmpInst(ICI->getInversePredicate(), - ICI->getOperand(0), ICI->getOperand(1)); - - if (FCmpInst *FCI = dyn_cast(Op0)) - return new FCmpInst(FCI->getInversePredicate(), - FCI->getOperand(0), FCI->getOperand(1)); - } - - // fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp). - if (CastInst *Op0C = dyn_cast(Op0)) { - if (CmpInst *CI = dyn_cast(Op0C->getOperand(0))) { - if (CI->hasOneUse() && Op0C->hasOneUse()) { - Instruction::CastOps Opcode = Op0C->getOpcode(); - if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) && - (RHS == ConstantExpr::getCast(Opcode, - ConstantInt::getTrue(*Context), - Op0C->getDestTy()))) { - CI->setPredicate(CI->getInversePredicate()); - return CastInst::Create(Opcode, CI, Op0C->getType()); - } - } - } - } - - if (BinaryOperator *Op0I = dyn_cast(Op0)) { - // ~(c-X) == X-c-1 == X+(-c-1) - if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue()) - if (Constant *Op0I0C = dyn_cast(Op0I->getOperand(0))) { - Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C); - Constant *ConstantRHS = ConstantExpr::getSub(NegOp0I0C, - ConstantInt::get(I.getType(), 1)); - return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS); - } - - if (ConstantInt *Op0CI = dyn_cast(Op0I->getOperand(1))) { - if (Op0I->getOpcode() == Instruction::Add) { - // ~(X-c) --> (-c-1)-X - if (RHS->isAllOnesValue()) { - Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI); - return BinaryOperator::CreateSub( - ConstantExpr::getSub(NegOp0CI, - ConstantInt::get(I.getType(), 1)), - Op0I->getOperand(0)); - } else if (RHS->getValue().isSignBit()) { - // (X + C) ^ signbit -> (X + C + signbit) - Constant *C = ConstantInt::get(*Context, - RHS->getValue() + Op0CI->getValue()); - return BinaryOperator::CreateAdd(Op0I->getOperand(0), C); - - } - } else if (Op0I->getOpcode() == Instruction::Or) { - // (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0 - if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue())) { - Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS); - // Anything in both C1 and C2 is known to be zero, remove it from - // NewRHS. - Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS); - NewRHS = ConstantExpr::getAnd(NewRHS, - ConstantExpr::getNot(CommonBits)); - Worklist.Add(Op0I); - I.setOperand(0, Op0I->getOperand(0)); - I.setOperand(1, NewRHS); - return &I; - } - } - } - } - - // Try to fold constant and into select arguments. - if (SelectInst *SI = dyn_cast(Op0)) - if (Instruction *R = FoldOpIntoSelect(I, SI, this)) - return R; - if (isa(Op0)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; - } - - if (Value *X = dyn_castNotVal(Op0)) // ~A ^ A == -1 - if (X == Op1) - return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); - - if (Value *X = dyn_castNotVal(Op1)) // A ^ ~A == -1 - if (X == Op0) - return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); - - - BinaryOperator *Op1I = dyn_cast(Op1); - if (Op1I) { - Value *A, *B; - if (match(Op1I, m_Or(m_Value(A), m_Value(B)))) { - if (A == Op0) { // B^(B|A) == (A|B)^B - Op1I->swapOperands(); - I.swapOperands(); - std::swap(Op0, Op1); - } else if (B == Op0) { // B^(A|B) == (A|B)^B - I.swapOperands(); // Simplified below. - std::swap(Op0, Op1); - } - } else if (match(Op1I, m_Xor(m_Specific(Op0), m_Value(B)))) { - return ReplaceInstUsesWith(I, B); // A^(A^B) == B - } else if (match(Op1I, m_Xor(m_Value(A), m_Specific(Op0)))) { - return ReplaceInstUsesWith(I, A); // A^(B^A) == B - } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) && - Op1I->hasOneUse()){ - if (A == Op0) { // A^(A&B) -> A^(B&A) - Op1I->swapOperands(); - std::swap(A, B); - } - if (B == Op0) { // A^(B&A) -> (B&A)^A - I.swapOperands(); // Simplified below. - std::swap(Op0, Op1); - } - } - } - - BinaryOperator *Op0I = dyn_cast(Op0); - if (Op0I) { - Value *A, *B; - if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && - Op0I->hasOneUse()) { - if (A == Op1) // (B|A)^B == (A|B)^B - std::swap(A, B); - if (B == Op1) // (A|B)^B == A & ~B - return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1, "tmp")); - } else if (match(Op0I, m_Xor(m_Specific(Op1), m_Value(B)))) { - return ReplaceInstUsesWith(I, B); // (A^B)^A == B - } else if (match(Op0I, m_Xor(m_Value(A), m_Specific(Op1)))) { - return ReplaceInstUsesWith(I, A); // (B^A)^A == B - } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && - Op0I->hasOneUse()){ - if (A == Op1) // (A&B)^A -> (B&A)^A - std::swap(A, B); - if (B == Op1 && // (B&A)^A == ~B & A - !isa(Op1)) { // Canonical form is (B&C)^C - return BinaryOperator::CreateAnd(Builder->CreateNot(A, "tmp"), Op1); - } - } - } - - // (X >> Z) ^ (Y >> Z) -> (X^Y) >> Z for all shifts. - if (Op0I && Op1I && Op0I->isShift() && - Op0I->getOpcode() == Op1I->getOpcode() && - Op0I->getOperand(1) == Op1I->getOperand(1) && - (Op1I->hasOneUse() || Op1I->hasOneUse())) { - Value *NewOp = - Builder->CreateXor(Op0I->getOperand(0), Op1I->getOperand(0), - Op0I->getName()); - return BinaryOperator::Create(Op1I->getOpcode(), NewOp, - Op1I->getOperand(1)); - } - - if (Op0I && Op1I) { - Value *A, *B, *C, *D; - // (A & B)^(A | B) -> A ^ B - if (match(Op0I, m_And(m_Value(A), m_Value(B))) && - match(Op1I, m_Or(m_Value(C), m_Value(D)))) { - if ((A == C && B == D) || (A == D && B == C)) - return BinaryOperator::CreateXor(A, B); - } - // (A | B)^(A & B) -> A ^ B - if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && - match(Op1I, m_And(m_Value(C), m_Value(D)))) { - if ((A == C && B == D) || (A == D && B == C)) - return BinaryOperator::CreateXor(A, B); - } - - // (A & B)^(C & D) - if ((Op0I->hasOneUse() || Op1I->hasOneUse()) && - match(Op0I, m_And(m_Value(A), m_Value(B))) && - match(Op1I, m_And(m_Value(C), m_Value(D)))) { - // (X & Y)^(X & Y) -> (Y^Z) & X - Value *X = 0, *Y = 0, *Z = 0; - if (A == C) - X = A, Y = B, Z = D; - else if (A == D) - X = A, Y = B, Z = C; - else if (B == C) - X = B, Y = A, Z = D; - else if (B == D) - X = B, Y = A, Z = C; - - if (X) { - Value *NewOp = Builder->CreateXor(Y, Z, Op0->getName()); - return BinaryOperator::CreateAnd(NewOp, X); - } - } - } - - // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B) - if (ICmpInst *RHS = dyn_cast(I.getOperand(1))) - if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS))) - return R; - - // fold (xor (cast A), (cast B)) -> (cast (xor A, B)) - if (CastInst *Op0C = dyn_cast(Op0)) { - if (CastInst *Op1C = dyn_cast(Op1)) - if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind? - const Type *SrcTy = Op0C->getOperand(0)->getType(); - if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() && - // Only do this if the casts both really cause code to be generated. - ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), - I.getType(), TD) && - ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), - I.getType(), TD)) { - Value *NewOp = Builder->CreateXor(Op0C->getOperand(0), - Op1C->getOperand(0), I.getName()); - return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); - } - } - } - - return Changed ? &I : 0; -} - -static ConstantInt *ExtractElement(Constant *V, Constant *Idx, - LLVMContext *Context) { - return cast(ConstantExpr::getExtractElement(V, Idx)); -} - -static bool HasAddOverflow(ConstantInt *Result, - ConstantInt *In1, ConstantInt *In2, - bool IsSigned) { - if (IsSigned) - if (In2->getValue().isNegative()) - return Result->getValue().sgt(In1->getValue()); - else - return Result->getValue().slt(In1->getValue()); - else - return Result->getValue().ult(In1->getValue()); -} - -/// AddWithOverflow - Compute Result = In1+In2, returning true if the result -/// overflowed for this type. -static bool AddWithOverflow(Constant *&Result, Constant *In1, - Constant *In2, LLVMContext *Context, - bool IsSigned = false) { - Result = ConstantExpr::getAdd(In1, In2); - - if (const VectorType *VTy = dyn_cast(In1->getType())) { - for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { - Constant *Idx = ConstantInt::get(Type::getInt32Ty(*Context), i); - if (HasAddOverflow(ExtractElement(Result, Idx, Context), - ExtractElement(In1, Idx, Context), - ExtractElement(In2, Idx, Context), - IsSigned)) - return true; - } - return false; - } - - return HasAddOverflow(cast(Result), - cast(In1), cast(In2), - IsSigned); -} - -static bool HasSubOverflow(ConstantInt *Result, - ConstantInt *In1, ConstantInt *In2, - bool IsSigned) { - if (IsSigned) - if (In2->getValue().isNegative()) - return Result->getValue().slt(In1->getValue()); - else - return Result->getValue().sgt(In1->getValue()); - else - return Result->getValue().ugt(In1->getValue()); -} - -/// SubWithOverflow - Compute Result = In1-In2, returning true if the result -/// overflowed for this type. -static bool SubWithOverflow(Constant *&Result, Constant *In1, - Constant *In2, LLVMContext *Context, - bool IsSigned = false) { - Result = ConstantExpr::getSub(In1, In2); - - if (const VectorType *VTy = dyn_cast(In1->getType())) { - for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { - Constant *Idx = ConstantInt::get(Type::getInt32Ty(*Context), i); - if (HasSubOverflow(ExtractElement(Result, Idx, Context), - ExtractElement(In1, Idx, Context), - ExtractElement(In2, Idx, Context), - IsSigned)) - return true; - } - return false; - } - - return HasSubOverflow(cast(Result), - cast(In1), cast(In2), - IsSigned); -} - - -/// FoldGEPICmp - Fold comparisons between a GEP instruction and something -/// else. At this point we know that the GEP is on the LHS of the comparison. -Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, - ICmpInst::Predicate Cond, - Instruction &I) { - // Look through bitcasts. - if (BitCastInst *BCI = dyn_cast(RHS)) - RHS = BCI->getOperand(0); - - Value *PtrBase = GEPLHS->getOperand(0); - if (TD && PtrBase == RHS && GEPLHS->isInBounds()) { - // ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0). - // This transformation (ignoring the base and scales) is valid because we - // know pointers can't overflow since the gep is inbounds. See if we can - // output an optimized form. - Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, I, *this); - - // If not, synthesize the offset the hard way. - if (Offset == 0) - Offset = EmitGEPOffset(GEPLHS, *this); - return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset, - Constant::getNullValue(Offset->getType())); - } else if (GEPOperator *GEPRHS = dyn_cast(RHS)) { - // If the base pointers are different, but the indices are the same, just - // compare the base pointer. - if (PtrBase != GEPRHS->getOperand(0)) { - bool IndicesTheSame = GEPLHS->getNumOperands()==GEPRHS->getNumOperands(); - IndicesTheSame &= GEPLHS->getOperand(0)->getType() == - GEPRHS->getOperand(0)->getType(); - if (IndicesTheSame) - for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i) - if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) { - IndicesTheSame = false; - break; - } - - // If all indices are the same, just compare the base pointers. - if (IndicesTheSame) - return new ICmpInst(ICmpInst::getSignedPredicate(Cond), - GEPLHS->getOperand(0), GEPRHS->getOperand(0)); - - // Otherwise, the base pointers are different and the indices are - // different, bail out. - return 0; - } - - // If one of the GEPs has all zero indices, recurse. - bool AllZeros = true; - for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i) - if (!isa(GEPLHS->getOperand(i)) || - !cast(GEPLHS->getOperand(i))->isNullValue()) { - AllZeros = false; - break; - } - if (AllZeros) - return FoldGEPICmp(GEPRHS, GEPLHS->getOperand(0), - ICmpInst::getSwappedPredicate(Cond), I); - - // If the other GEP has all zero indices, recurse. - AllZeros = true; - for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i) - if (!isa(GEPRHS->getOperand(i)) || - !cast(GEPRHS->getOperand(i))->isNullValue()) { - AllZeros = false; - break; - } - if (AllZeros) - return FoldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I); - - if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands()) { - // If the GEPs only differ by one index, compare it. - unsigned NumDifferences = 0; // Keep track of # differences. - unsigned DiffOperand = 0; // The operand that differs. - for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i) - if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) { - if (GEPLHS->getOperand(i)->getType()->getPrimitiveSizeInBits() != - GEPRHS->getOperand(i)->getType()->getPrimitiveSizeInBits()) { - // Irreconcilable differences. - NumDifferences = 2; - break; - } else { - if (NumDifferences++) break; - DiffOperand = i; - } - } - - if (NumDifferences == 0) // SAME GEP? - return ReplaceInstUsesWith(I, // No comparison is needed here. - ConstantInt::get(Type::getInt1Ty(*Context), - ICmpInst::isTrueWhenEqual(Cond))); - - else if (NumDifferences == 1) { - Value *LHSV = GEPLHS->getOperand(DiffOperand); - Value *RHSV = GEPRHS->getOperand(DiffOperand); - // Make sure we do a signed comparison here. - return new ICmpInst(ICmpInst::getSignedPredicate(Cond), LHSV, RHSV); - } - } - - // Only lower this if the icmp is the only user of the GEP or if we expect - // the result to fold to a constant! - if (TD && - (isa(GEPLHS) || GEPLHS->hasOneUse()) && - (isa(GEPRHS) || GEPRHS->hasOneUse())) { - // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2) - Value *L = EmitGEPOffset(GEPLHS, *this); - Value *R = EmitGEPOffset(GEPRHS, *this); - return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R); - } - } - return 0; -} - -/// FoldFCmp_IntToFP_Cst - Fold fcmp ([us]itofp x, cst) if possible. -/// -Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, - Instruction *LHSI, - Constant *RHSC) { - if (!isa(RHSC)) return 0; - const APFloat &RHS = cast(RHSC)->getValueAPF(); - - // Get the width of the mantissa. We don't want to hack on conversions that - // might lose information from the integer, e.g. "i64 -> float" - int MantissaWidth = LHSI->getType()->getFPMantissaWidth(); - if (MantissaWidth == -1) return 0; // Unknown. - - // Check to see that the input is converted from an integer type that is small - // enough that preserves all bits. TODO: check here for "known" sign bits. - // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e. - unsigned InputSize = LHSI->getOperand(0)->getType()->getScalarSizeInBits(); - - // If this is a uitofp instruction, we need an extra bit to hold the sign. - bool LHSUnsigned = isa(LHSI); - if (LHSUnsigned) - ++InputSize; - - // If the conversion would lose info, don't hack on this. - if ((int)InputSize > MantissaWidth) - return 0; - - // Otherwise, we can potentially simplify the comparison. We know that it - // will always come through as an integer value and we know the constant is - // not a NAN (it would have been previously simplified). - assert(!RHS.isNaN() && "NaN comparison not already folded!"); - - ICmpInst::Predicate Pred; - switch (I.getPredicate()) { - default: llvm_unreachable("Unexpected predicate!"); - case FCmpInst::FCMP_UEQ: - case FCmpInst::FCMP_OEQ: - Pred = ICmpInst::ICMP_EQ; - break; - case FCmpInst::FCMP_UGT: - case FCmpInst::FCMP_OGT: - Pred = LHSUnsigned ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_SGT; - break; - case FCmpInst::FCMP_UGE: - case FCmpInst::FCMP_OGE: - Pred = LHSUnsigned ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_SGE; - break; - case FCmpInst::FCMP_ULT: - case FCmpInst::FCMP_OLT: - Pred = LHSUnsigned ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_SLT; - break; - case FCmpInst::FCMP_ULE: - case FCmpInst::FCMP_OLE: - Pred = LHSUnsigned ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_SLE; - break; - case FCmpInst::FCMP_UNE: - case FCmpInst::FCMP_ONE: - Pred = ICmpInst::ICMP_NE; - break; - case FCmpInst::FCMP_ORD: - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - case FCmpInst::FCMP_UNO: - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - } - - const IntegerType *IntTy = cast(LHSI->getOperand(0)->getType()); - - // Now we know that the APFloat is a normal number, zero or inf. - - // See if the FP constant is too large for the integer. For example, - // comparing an i8 to 300.0. - unsigned IntWidth = IntTy->getScalarSizeInBits(); - - if (!LHSUnsigned) { - // If the RHS value is > SignedMax, fold the comparison. This handles +INF - // and large values. - APFloat SMax(RHS.getSemantics(), APFloat::fcZero, false); - SMax.convertFromAPInt(APInt::getSignedMaxValue(IntWidth), true, - APFloat::rmNearestTiesToEven); - if (SMax.compare(RHS) == APFloat::cmpLessThan) { // smax < 13123.0 - if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SLT || - Pred == ICmpInst::ICMP_SLE) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - } - } else { - // If the RHS value is > UnsignedMax, fold the comparison. This handles - // +INF and large values. - APFloat UMax(RHS.getSemantics(), APFloat::fcZero, false); - UMax.convertFromAPInt(APInt::getMaxValue(IntWidth), false, - APFloat::rmNearestTiesToEven); - if (UMax.compare(RHS) == APFloat::cmpLessThan) { // umax < 13123.0 - if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_ULT || - Pred == ICmpInst::ICMP_ULE) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - } - } - - if (!LHSUnsigned) { - // See if the RHS value is < SignedMin. - APFloat SMin(RHS.getSemantics(), APFloat::fcZero, false); - SMin.convertFromAPInt(APInt::getSignedMinValue(IntWidth), true, - APFloat::rmNearestTiesToEven); - if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0 - if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT || - Pred == ICmpInst::ICMP_SGE) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - } - } - - // Okay, now we know that the FP constant fits in the range [SMIN, SMAX] or - // [0, UMAX], but it may still be fractional. See if it is fractional by - // casting the FP value to the integer value and back, checking for equality. - // Don't do this for zero, because -0.0 is not fractional. - Constant *RHSInt = LHSUnsigned - ? ConstantExpr::getFPToUI(RHSC, IntTy) - : ConstantExpr::getFPToSI(RHSC, IntTy); - if (!RHS.isZero()) { - bool Equal = LHSUnsigned - ? ConstantExpr::getUIToFP(RHSInt, RHSC->getType()) == RHSC - : ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) == RHSC; - if (!Equal) { - // If we had a comparison against a fractional value, we have to adjust - // the compare predicate and sometimes the value. RHSC is rounded towards - // zero at this point. - switch (Pred) { - default: llvm_unreachable("Unexpected integer comparison!"); - case ICmpInst::ICMP_NE: // (float)int != 4.4 --> true - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - case ICmpInst::ICMP_EQ: // (float)int == 4.4 --> false - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - case ICmpInst::ICMP_ULE: - // (float)int <= 4.4 --> int <= 4 - // (float)int <= -4.4 --> false - if (RHS.isNegative()) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - break; - case ICmpInst::ICMP_SLE: - // (float)int <= 4.4 --> int <= 4 - // (float)int <= -4.4 --> int < -4 - if (RHS.isNegative()) - Pred = ICmpInst::ICMP_SLT; - break; - case ICmpInst::ICMP_ULT: - // (float)int < -4.4 --> false - // (float)int < 4.4 --> int <= 4 - if (RHS.isNegative()) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - Pred = ICmpInst::ICMP_ULE; - break; - case ICmpInst::ICMP_SLT: - // (float)int < -4.4 --> int < -4 - // (float)int < 4.4 --> int <= 4 - if (!RHS.isNegative()) - Pred = ICmpInst::ICMP_SLE; - break; - case ICmpInst::ICMP_UGT: - // (float)int > 4.4 --> int > 4 - // (float)int > -4.4 --> true - if (RHS.isNegative()) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - break; - case ICmpInst::ICMP_SGT: - // (float)int > 4.4 --> int > 4 - // (float)int > -4.4 --> int >= -4 - if (RHS.isNegative()) - Pred = ICmpInst::ICMP_SGE; - break; - case ICmpInst::ICMP_UGE: - // (float)int >= -4.4 --> true - // (float)int >= 4.4 --> int > 4 - if (!RHS.isNegative()) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - Pred = ICmpInst::ICMP_UGT; - break; - case ICmpInst::ICMP_SGE: - // (float)int >= -4.4 --> int >= -4 - // (float)int >= 4.4 --> int > 4 - if (!RHS.isNegative()) - Pred = ICmpInst::ICMP_SGT; - break; - } - } - } - - // Lower this FP comparison into an appropriate integer version of the - // comparison. - return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt); -} - -Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { - bool Changed = false; - - /// Orders the operands of the compare so that they are listed from most - /// complex to least complex. This puts constants before unary operators, - /// before binary operators. - if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) { - I.swapOperands(); - Changed = true; - } - - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, TD)) - return ReplaceInstUsesWith(I, V); - - // Simplify 'fcmp pred X, X' - if (Op0 == Op1) { - switch (I.getPredicate()) { - default: llvm_unreachable("Unknown predicate!"); - case FCmpInst::FCMP_UNO: // True if unordered: isnan(X) | isnan(Y) - case FCmpInst::FCMP_ULT: // True if unordered or less than - case FCmpInst::FCMP_UGT: // True if unordered or greater than - case FCmpInst::FCMP_UNE: // True if unordered or not equal - // Canonicalize these to be 'fcmp uno %X, 0.0'. - I.setPredicate(FCmpInst::FCMP_UNO); - I.setOperand(1, Constant::getNullValue(Op0->getType())); - return &I; - - case FCmpInst::FCMP_ORD: // True if ordered (no nans) - case FCmpInst::FCMP_OEQ: // True if ordered and equal - case FCmpInst::FCMP_OGE: // True if ordered and greater than or equal - case FCmpInst::FCMP_OLE: // True if ordered and less than or equal - // Canonicalize these to be 'fcmp ord %X, 0.0'. - I.setPredicate(FCmpInst::FCMP_ORD); - I.setOperand(1, Constant::getNullValue(Op0->getType())); - return &I; - } - } - - // Handle fcmp with constant RHS - if (Constant *RHSC = dyn_cast(Op1)) { - if (Instruction *LHSI = dyn_cast(Op0)) - switch (LHSI->getOpcode()) { - case Instruction::PHI: - // Only fold fcmp into the PHI if the phi and fcmp are in the same - // block. If in the same block, we're encouraging jump threading. If - // not, we are just pessimizing the code by making an i1 phi. - if (LHSI->getParent() == I.getParent()) - if (Instruction *NV = FoldOpIntoPhi(I, true)) - return NV; - break; - case Instruction::SIToFP: - case Instruction::UIToFP: - if (Instruction *NV = FoldFCmp_IntToFP_Cst(I, LHSI, RHSC)) - return NV; - break; - case Instruction::Select: - // If either operand of the select is a constant, we can fold the - // comparison into the select arms, which will cause one to be - // constant folded and the select turned into a bitwise or. - Value *Op1 = 0, *Op2 = 0; - if (LHSI->hasOneUse()) { - if (Constant *C = dyn_cast(LHSI->getOperand(1))) { - // Fold the known value into the constant operand. - Op1 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC); - // Insert a new FCmp of the other select operand. - Op2 = Builder->CreateFCmp(I.getPredicate(), - LHSI->getOperand(2), RHSC, I.getName()); - } else if (Constant *C = dyn_cast(LHSI->getOperand(2))) { - // Fold the known value into the constant operand. - Op2 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC); - // Insert a new FCmp of the other select operand. - Op1 = Builder->CreateFCmp(I.getPredicate(), LHSI->getOperand(1), - RHSC, I.getName()); - } - } - - if (Op1) - return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); - break; - } - } - - return Changed ? &I : 0; -} - -Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { - bool Changed = false; - - /// Orders the operands of the compare so that they are listed from most - /// complex to least complex. This puts constants before unary operators, - /// before binary operators. - if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) { - I.swapOperands(); - Changed = true; - } - - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD)) - return ReplaceInstUsesWith(I, V); - - const Type *Ty = Op0->getType(); - - // icmp's with boolean values can always be turned into bitwise operations - if (Ty == Type::getInt1Ty(*Context)) { - switch (I.getPredicate()) { - default: llvm_unreachable("Invalid icmp instruction!"); - case ICmpInst::ICMP_EQ: { // icmp eq i1 A, B -> ~(A^B) - Value *Xor = Builder->CreateXor(Op0, Op1, I.getName()+"tmp"); - return BinaryOperator::CreateNot(Xor); - } - case ICmpInst::ICMP_NE: // icmp eq i1 A, B -> A^B - return BinaryOperator::CreateXor(Op0, Op1); - - case ICmpInst::ICMP_UGT: - std::swap(Op0, Op1); // Change icmp ugt -> icmp ult - // FALL THROUGH - case ICmpInst::ICMP_ULT:{ // icmp ult i1 A, B -> ~A & B - Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp"); - return BinaryOperator::CreateAnd(Not, Op1); - } - case ICmpInst::ICMP_SGT: - std::swap(Op0, Op1); // Change icmp sgt -> icmp slt - // FALL THROUGH - case ICmpInst::ICMP_SLT: { // icmp slt i1 A, B -> A & ~B - Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp"); - return BinaryOperator::CreateAnd(Not, Op0); - } - case ICmpInst::ICMP_UGE: - std::swap(Op0, Op1); // Change icmp uge -> icmp ule - // FALL THROUGH - case ICmpInst::ICMP_ULE: { // icmp ule i1 A, B -> ~A | B - Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp"); - return BinaryOperator::CreateOr(Not, Op1); - } - case ICmpInst::ICMP_SGE: - std::swap(Op0, Op1); // Change icmp sge -> icmp sle - // FALL THROUGH - case ICmpInst::ICMP_SLE: { // icmp sle i1 A, B -> A | ~B - Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp"); - return BinaryOperator::CreateOr(Not, Op0); - } - } - } - - unsigned BitWidth = 0; - if (TD) - BitWidth = TD->getTypeSizeInBits(Ty->getScalarType()); - else if (Ty->isIntOrIntVector()) - BitWidth = Ty->getScalarSizeInBits(); - - bool isSignBit = false; - - // See if we are doing a comparison with a constant. - if (ConstantInt *CI = dyn_cast(Op1)) { - Value *A = 0, *B = 0; - - // (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B) - if (I.isEquality() && CI->isNullValue() && - match(Op0, m_Sub(m_Value(A), m_Value(B)))) { - // (icmp cond A B) if cond is equality - return new ICmpInst(I.getPredicate(), A, B); - } - - // If we have an icmp le or icmp ge instruction, turn it into the - // appropriate icmp lt or icmp gt instruction. This allows us to rely on - // them being folded in the code below. The SimplifyICmpInst code has - // already handled the edge cases for us, so we just assert on them. - switch (I.getPredicate()) { - default: break; - case ICmpInst::ICMP_ULE: - assert(!CI->isMaxValue(false)); // A <=u MAX -> TRUE - return new ICmpInst(ICmpInst::ICMP_ULT, Op0, - AddOne(CI)); - case ICmpInst::ICMP_SLE: - assert(!CI->isMaxValue(true)); // A <=s MAX -> TRUE - return new ICmpInst(ICmpInst::ICMP_SLT, Op0, - AddOne(CI)); - case ICmpInst::ICMP_UGE: - assert(!CI->isMinValue(false)); // A >=u MIN -> TRUE - return new ICmpInst(ICmpInst::ICMP_UGT, Op0, - SubOne(CI)); - case ICmpInst::ICMP_SGE: - assert(!CI->isMinValue(true)); // A >=s MIN -> TRUE - return new ICmpInst(ICmpInst::ICMP_SGT, Op0, - SubOne(CI)); - } - - // If this comparison is a normal comparison, it demands all - // bits, if it is a sign bit comparison, it only demands the sign bit. - bool UnusedBit; - isSignBit = isSignBitCheck(I.getPredicate(), CI, UnusedBit); - } - - // See if we can fold the comparison based on range information we can get - // by checking whether bits are known to be zero or one in the input. - if (BitWidth != 0) { - APInt Op0KnownZero(BitWidth, 0), Op0KnownOne(BitWidth, 0); - APInt Op1KnownZero(BitWidth, 0), Op1KnownOne(BitWidth, 0); - - if (SimplifyDemandedBits(I.getOperandUse(0), - isSignBit ? APInt::getSignBit(BitWidth) - : APInt::getAllOnesValue(BitWidth), - Op0KnownZero, Op0KnownOne, 0)) - return &I; - if (SimplifyDemandedBits(I.getOperandUse(1), - APInt::getAllOnesValue(BitWidth), - Op1KnownZero, Op1KnownOne, 0)) - return &I; - - // Given the known and unknown bits, compute a range that the LHS could be - // in. Compute the Min, Max and RHS values based on the known bits. For the - // EQ and NE we use unsigned values. - APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0); - APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0); - if (I.isSigned()) { - ComputeSignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne, - Op0Min, Op0Max); - ComputeSignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne, - Op1Min, Op1Max); - } else { - ComputeUnsignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne, - Op0Min, Op0Max); - ComputeUnsignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne, - Op1Min, Op1Max); - } - - // If Min and Max are known to be the same, then SimplifyDemandedBits - // figured out that the LHS is a constant. Just constant fold this now so - // that code below can assume that Min != Max. - if (!isa(Op0) && Op0Min == Op0Max) - return new ICmpInst(I.getPredicate(), - ConstantInt::get(*Context, Op0Min), Op1); - if (!isa(Op1) && Op1Min == Op1Max) - return new ICmpInst(I.getPredicate(), Op0, - ConstantInt::get(*Context, Op1Min)); - - // Based on the range information we know about the LHS, see if we can - // simplify this comparison. For example, (x&4) < 8 is always true. - switch (I.getPredicate()) { - default: llvm_unreachable("Unknown icmp opcode!"); - case ICmpInst::ICMP_EQ: - if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - break; - case ICmpInst::ICMP_NE: - if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - break; - case ICmpInst::ICMP_ULT: - if (Op0Max.ult(Op1Min)) // A true if max(A) < min(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - if (Op0Min.uge(Op1Max)) // A false if min(A) >= max(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - if (Op1Min == Op0Max) // A A != B if max(A) == min(B) - return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); - if (ConstantInt *CI = dyn_cast(Op1)) { - if (Op1Max == Op0Min+1) // A A == C-1 if min(A)+1 == C - return new ICmpInst(ICmpInst::ICMP_EQ, Op0, - SubOne(CI)); - - // (x (x >s -1) -> true if sign bit clear - if (CI->isMinValue(true)) - return new ICmpInst(ICmpInst::ICMP_SGT, Op0, - Constant::getAllOnesValue(Op0->getType())); - } - break; - case ICmpInst::ICMP_UGT: - if (Op0Min.ugt(Op1Max)) // A >u B -> true if min(A) > max(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - - if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B) - return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); - if (ConstantInt *CI = dyn_cast(Op1)) { - if (Op1Min == Op0Max-1) // A >u C -> A == C+1 if max(a)-1 == C - return new ICmpInst(ICmpInst::ICMP_EQ, Op0, - AddOne(CI)); - - // (x >u 2147483647) -> (x true if sign bit set - if (CI->isMaxValue(true)) - return new ICmpInst(ICmpInst::ICMP_SLT, Op0, - Constant::getNullValue(Op0->getType())); - } - break; - case ICmpInst::ICMP_SLT: - if (Op0Max.slt(Op1Min)) // A true if max(A) < min(C) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - if (Op0Min.sge(Op1Max)) // A false if min(A) >= max(C) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - if (Op1Min == Op0Max) // A A != B if max(A) == min(B) - return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); - if (ConstantInt *CI = dyn_cast(Op1)) { - if (Op1Max == Op0Min+1) // A A == C-1 if min(A)+1 == C - return new ICmpInst(ICmpInst::ICMP_EQ, Op0, - SubOne(CI)); - } - break; - case ICmpInst::ICMP_SGT: - if (Op0Min.sgt(Op1Max)) // A >s B -> true if min(A) > max(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - - if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B) - return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); - if (ConstantInt *CI = dyn_cast(Op1)) { - if (Op1Min == Op0Max-1) // A >s C -> A == C+1 if max(A)-1 == C - return new ICmpInst(ICmpInst::ICMP_EQ, Op0, - AddOne(CI)); - } - break; - case ICmpInst::ICMP_SGE: - assert(!isa(Op1) && "ICMP_SGE with ConstantInt not folded!"); - if (Op0Min.sge(Op1Max)) // A >=s B -> true if min(A) >= max(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - if (Op0Max.slt(Op1Min)) // A >=s B -> false if max(A) < min(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - break; - case ICmpInst::ICMP_SLE: - assert(!isa(Op1) && "ICMP_SLE with ConstantInt not folded!"); - if (Op0Max.sle(Op1Min)) // A <=s B -> true if max(A) <= min(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - if (Op0Min.sgt(Op1Max)) // A <=s B -> false if min(A) > max(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - break; - case ICmpInst::ICMP_UGE: - assert(!isa(Op1) && "ICMP_UGE with ConstantInt not folded!"); - if (Op0Min.uge(Op1Max)) // A >=u B -> true if min(A) >= max(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - if (Op0Max.ult(Op1Min)) // A >=u B -> false if max(A) < min(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - break; - case ICmpInst::ICMP_ULE: - assert(!isa(Op1) && "ICMP_ULE with ConstantInt not folded!"); - if (Op0Max.ule(Op1Min)) // A <=u B -> true if max(A) <= min(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - if (Op0Min.ugt(Op1Max)) // A <=u B -> false if min(A) > max(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - break; - } - - // Turn a signed comparison into an unsigned one if both operands - // are known to have the same sign. - if (I.isSigned() && - ((Op0KnownZero.isNegative() && Op1KnownZero.isNegative()) || - (Op0KnownOne.isNegative() && Op1KnownOne.isNegative()))) - return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1); - } - - // Test if the ICmpInst instruction is used exclusively by a select as - // part of a minimum or maximum operation. If so, refrain from doing - // any other folding. This helps out other analyses which understand - // non-obfuscated minimum and maximum idioms, such as ScalarEvolution - // and CodeGen. And in this case, at least one of the comparison - // operands has at least one user besides the compare (the select), - // which would often largely negate the benefit of folding anyway. - if (I.hasOneUse()) - if (SelectInst *SI = dyn_cast(*I.use_begin())) - if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) || - (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1)) - return 0; - - // See if we are doing a comparison between a constant and an instruction that - // can be folded into the comparison. - if (ConstantInt *CI = dyn_cast(Op1)) { - // Since the RHS is a ConstantInt (CI), if the left hand side is an - // instruction, see if that instruction also has constants so that the - // instruction can be folded into the icmp - if (Instruction *LHSI = dyn_cast(Op0)) - if (Instruction *Res = visitICmpInstWithInstAndIntCst(I, LHSI, CI)) - return Res; - } - - // Handle icmp with constant (but not simple integer constant) RHS - if (Constant *RHSC = dyn_cast(Op1)) { - if (Instruction *LHSI = dyn_cast(Op0)) - switch (LHSI->getOpcode()) { - case Instruction::GetElementPtr: - if (RHSC->isNullValue()) { - // icmp pred GEP (P, int 0, int 0, int 0), null -> icmp pred P, null - bool isAllZeros = true; - for (unsigned i = 1, e = LHSI->getNumOperands(); i != e; ++i) - if (!isa(LHSI->getOperand(i)) || - !cast(LHSI->getOperand(i))->isNullValue()) { - isAllZeros = false; - break; - } - if (isAllZeros) - return new ICmpInst(I.getPredicate(), LHSI->getOperand(0), - Constant::getNullValue(LHSI->getOperand(0)->getType())); - } - break; - - case Instruction::PHI: - // Only fold icmp into the PHI if the phi and icmp are in the same - // block. If in the same block, we're encouraging jump threading. If - // not, we are just pessimizing the code by making an i1 phi. - if (LHSI->getParent() == I.getParent()) - if (Instruction *NV = FoldOpIntoPhi(I, true)) - return NV; - break; - case Instruction::Select: { - // If either operand of the select is a constant, we can fold the - // comparison into the select arms, which will cause one to be - // constant folded and the select turned into a bitwise or. - Value *Op1 = 0, *Op2 = 0; - if (Constant *C = dyn_cast(LHSI->getOperand(1))) - Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); - if (Constant *C = dyn_cast(LHSI->getOperand(2))) - Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); - - // We only want to perform this transformation if it will not lead to - // additional code. This is true if either both sides of the select - // fold to a constant (in which case the icmp is replaced with a select - // which will usually simplify) or this is the only user of the - // select (in which case we are trading a select+icmp for a simpler - // select+icmp). - if ((Op1 && Op2) || (LHSI->hasOneUse() && (Op1 || Op2))) { - if (!Op1) - Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1), - RHSC, I.getName()); - if (!Op2) - Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2), - RHSC, I.getName()); - return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); - } - break; - } - case Instruction::Call: - // If we have (malloc != null), and if the malloc has a single use, we - // can assume it is successful and remove the malloc. - if (isMalloc(LHSI) && LHSI->hasOneUse() && - isa(RHSC)) { - // Need to explicitly erase malloc call here, instead of adding it to - // Worklist, because it won't get DCE'd from the Worklist since - // isInstructionTriviallyDead() returns false for function calls. - // It is OK to replace LHSI/MallocCall with Undef because the - // instruction that uses it will be erased via Worklist. - if (extractMallocCall(LHSI)) { - LHSI->replaceAllUsesWith(UndefValue::get(LHSI->getType())); - EraseInstFromFunction(*LHSI); - return ReplaceInstUsesWith(I, - ConstantInt::get(Type::getInt1Ty(*Context), - !I.isTrueWhenEqual())); - } - if (CallInst* MallocCall = extractMallocCallFromBitCast(LHSI)) - if (MallocCall->hasOneUse()) { - MallocCall->replaceAllUsesWith( - UndefValue::get(MallocCall->getType())); - EraseInstFromFunction(*MallocCall); - Worklist.Add(LHSI); // The malloc's bitcast use. - return ReplaceInstUsesWith(I, - ConstantInt::get(Type::getInt1Ty(*Context), - !I.isTrueWhenEqual())); - } - } - break; - } - } - - // If we can optimize a 'icmp GEP, P' or 'icmp P, GEP', do so now. - if (GEPOperator *GEP = dyn_cast(Op0)) - if (Instruction *NI = FoldGEPICmp(GEP, Op1, I.getPredicate(), I)) - return NI; - if (GEPOperator *GEP = dyn_cast(Op1)) - if (Instruction *NI = FoldGEPICmp(GEP, Op0, - ICmpInst::getSwappedPredicate(I.getPredicate()), I)) - return NI; - - // Test to see if the operands of the icmp are casted versions of other - // values. If the ptr->ptr cast can be stripped off both arguments, we do so - // now. - if (BitCastInst *CI = dyn_cast(Op0)) { - if (isa(Op0->getType()) && - (isa(Op1) || isa(Op1))) { - // We keep moving the cast from the left operand over to the right - // operand, where it can often be eliminated completely. - Op0 = CI->getOperand(0); - - // If operand #1 is a bitcast instruction, it must also be a ptr->ptr cast - // so eliminate it as well. - if (BitCastInst *CI2 = dyn_cast(Op1)) - Op1 = CI2->getOperand(0); - - // If Op1 is a constant, we can fold the cast into the constant. - if (Op0->getType() != Op1->getType()) { - if (Constant *Op1C = dyn_cast(Op1)) { - Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType()); - } else { - // Otherwise, cast the RHS right before the icmp - Op1 = Builder->CreateBitCast(Op1, Op0->getType()); - } - } - return new ICmpInst(I.getPredicate(), Op0, Op1); - } - } - - if (isa(Op0)) { - // Handle the special case of: icmp (cast bool to X), - // This comes up when you have code like - // int X = A < B; - // if (X) ... - // For generality, we handle any zero-extension of any operand comparison - // with a constant or another cast from the same type. - if (isa(Op1) || isa(Op1)) - if (Instruction *R = visitICmpInstWithCastAndCast(I)) - return R; - } - - // See if it's the same type of instruction on the left and right. - if (BinaryOperator *Op0I = dyn_cast(Op0)) { - if (BinaryOperator *Op1I = dyn_cast(Op1)) { - if (Op0I->getOpcode() == Op1I->getOpcode() && Op0I->hasOneUse() && - Op1I->hasOneUse() && Op0I->getOperand(1) == Op1I->getOperand(1)) { - switch (Op0I->getOpcode()) { - default: break; - case Instruction::Add: - case Instruction::Sub: - case Instruction::Xor: - if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b - return new ICmpInst(I.getPredicate(), Op0I->getOperand(0), - Op1I->getOperand(0)); - // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b - if (ConstantInt *CI = dyn_cast(Op0I->getOperand(1))) { - if (CI->getValue().isSignBit()) { - ICmpInst::Predicate Pred = I.isSigned() - ? I.getUnsignedPredicate() - : I.getSignedPredicate(); - return new ICmpInst(Pred, Op0I->getOperand(0), - Op1I->getOperand(0)); - } - - if (CI->getValue().isMaxSignedValue()) { - ICmpInst::Predicate Pred = I.isSigned() - ? I.getUnsignedPredicate() - : I.getSignedPredicate(); - Pred = I.getSwappedPredicate(Pred); - return new ICmpInst(Pred, Op0I->getOperand(0), - Op1I->getOperand(0)); - } - } - break; - case Instruction::Mul: - if (!I.isEquality()) - break; - - if (ConstantInt *CI = dyn_cast(Op0I->getOperand(1))) { - // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask - // Mask = -1 >> count-trailing-zeros(Cst). - if (!CI->isZero() && !CI->isOne()) { - const APInt &AP = CI->getValue(); - ConstantInt *Mask = ConstantInt::get(*Context, - APInt::getLowBitsSet(AP.getBitWidth(), - AP.getBitWidth() - - AP.countTrailingZeros())); - Value *And1 = Builder->CreateAnd(Op0I->getOperand(0), Mask); - Value *And2 = Builder->CreateAnd(Op1I->getOperand(0), Mask); - return new ICmpInst(I.getPredicate(), And1, And2); - } - } - break; - } - } - } - } - - // ~x < ~y --> y < x - { Value *A, *B; - if (match(Op0, m_Not(m_Value(A))) && - match(Op1, m_Not(m_Value(B)))) - return new ICmpInst(I.getPredicate(), B, A); - } - - if (I.isEquality()) { - Value *A, *B, *C, *D; - - // -x == -y --> x == y - if (match(Op0, m_Neg(m_Value(A))) && - match(Op1, m_Neg(m_Value(B)))) - return new ICmpInst(I.getPredicate(), A, B); - - if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) { - if (A == Op1 || B == Op1) { // (A^B) == A -> B == 0 - Value *OtherVal = A == Op1 ? B : A; - return new ICmpInst(I.getPredicate(), OtherVal, - Constant::getNullValue(A->getType())); - } - - if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) { - // A^c1 == C^c2 --> A == C^(c1^c2) - ConstantInt *C1, *C2; - if (match(B, m_ConstantInt(C1)) && - match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) { - Constant *NC = - ConstantInt::get(*Context, C1->getValue() ^ C2->getValue()); - Value *Xor = Builder->CreateXor(C, NC, "tmp"); - return new ICmpInst(I.getPredicate(), A, Xor); - } - - // A^B == A^D -> B == D - if (A == C) return new ICmpInst(I.getPredicate(), B, D); - if (A == D) return new ICmpInst(I.getPredicate(), B, C); - if (B == C) return new ICmpInst(I.getPredicate(), A, D); - if (B == D) return new ICmpInst(I.getPredicate(), A, C); - } - } - - if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && - (A == Op0 || B == Op0)) { - // A == (A^B) -> B == 0 - Value *OtherVal = A == Op0 ? B : A; - return new ICmpInst(I.getPredicate(), OtherVal, - Constant::getNullValue(A->getType())); - } - - // (A-B) == A -> B == 0 - if (match(Op0, m_Sub(m_Specific(Op1), m_Value(B)))) - return new ICmpInst(I.getPredicate(), B, - Constant::getNullValue(B->getType())); - - // A == (A-B) -> B == 0 - if (match(Op1, m_Sub(m_Specific(Op0), m_Value(B)))) - return new ICmpInst(I.getPredicate(), B, - Constant::getNullValue(B->getType())); - - // (X&Z) == (Y&Z) -> (X^Y) & Z == 0 - if (Op0->hasOneUse() && Op1->hasOneUse() && - match(Op0, m_And(m_Value(A), m_Value(B))) && - match(Op1, m_And(m_Value(C), m_Value(D)))) { - Value *X = 0, *Y = 0, *Z = 0; - - if (A == C) { - X = B; Y = D; Z = A; - } else if (A == D) { - X = B; Y = C; Z = A; - } else if (B == C) { - X = A; Y = D; Z = B; - } else if (B == D) { - X = A; Y = C; Z = B; - } - - if (X) { // Build (X^Y) & Z - Op1 = Builder->CreateXor(X, Y, "tmp"); - Op1 = Builder->CreateAnd(Op1, Z, "tmp"); - I.setOperand(0, Op1); - I.setOperand(1, Constant::getNullValue(Op1->getType())); - return &I; - } - } - } - - { - Value *X; ConstantInt *Cst; - // icmp X+Cst, X - if (match(Op0, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op1 == X) - return FoldICmpAddOpCst(I, X, Cst, I.getPredicate(), Op0); - - // icmp X, X+Cst - if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X) - return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate(), Op1); - } - return Changed ? &I : 0; -} - -/// FoldICmpAddOpCst - Fold "icmp pred (X+CI), X". -Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI, - Value *X, ConstantInt *CI, - ICmpInst::Predicate Pred, - Value *TheAdd) { - // If we have X+0, exit early (simplifying logic below) and let it get folded - // elsewhere. icmp X+0, X -> icmp X, X - if (CI->isZero()) { - bool isTrue = ICmpInst::isTrueWhenEqual(Pred); - return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue)); - } - - // (X+4) == X -> false. - if (Pred == ICmpInst::ICMP_EQ) - return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext())); - - // (X+4) != X -> true. - if (Pred == ICmpInst::ICMP_NE) - return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext())); - - // If this is an instruction (as opposed to constantexpr) get NUW/NSW info. - bool isNUW = false, isNSW = false; - if (BinaryOperator *Add = dyn_cast(TheAdd)) { - isNUW = Add->hasNoUnsignedWrap(); - isNSW = Add->hasNoSignedWrap(); - } - - // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0, - // so the values can never be equal. Similiarly for all other "or equals" - // operators. - - // (X+1) X >u (MAXUINT-1) --> X != 255 - // (X+2) X >u (MAXUINT-2) --> X > 253 - // (X+MAXUINT) X >u (MAXUINT-MAXUINT) --> X != 0 - if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) { - // If this is an NUW add, then this is always false. - if (isNUW) - return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext())); - - Value *R = ConstantExpr::getSub(ConstantInt::get(CI->getType(), -1ULL), CI); - return new ICmpInst(ICmpInst::ICMP_UGT, X, R); - } - - // (X+1) >u X --> X X != 255 - // (X+2) >u X --> X X u X --> X X X == 0 - if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) { - // If this is an NUW add, then this is always true. - if (isNUW) - return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext())); - return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI)); - } - - unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits(); - ConstantInt *SMax = ConstantInt::get(X->getContext(), - APInt::getSignedMaxValue(BitWidth)); - - // (X+ 1) X >s (MAXSINT-1) --> X == 127 - // (X+ 2) X >s (MAXSINT-2) --> X >s 125 - // (X+MAXSINT) X >s (MAXSINT-MAXSINT) --> X >s 0 - // (X+MINSINT) X >s (MAXSINT-MINSINT) --> X >s -1 - // (X+ -2) X >s (MAXSINT- -2) --> X >s 126 - // (X+ -1) X >s (MAXSINT- -1) --> X != 127 - if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) { - // If this is an NSW add, then we have two cases: if the constant is - // positive, then this is always false, if negative, this is always true. - if (isNSW) { - bool isTrue = CI->getValue().isNegative(); - return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue)); - } - - return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI)); - } - - // (X+ 1) >s X --> X X != 127 - // (X+ 2) >s X --> X X s X --> X X s X --> X X s X --> X X s X --> X X == -128 - - // If this is an NSW add, then we have two cases: if the constant is - // positive, then this is always true, if negative, this is always false. - if (isNSW) { - bool isTrue = !CI->getValue().isNegative(); - return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue)); - } - - assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE); - Constant *C = ConstantInt::get(X->getContext(), CI->getValue()-1); - return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C)); -} - -/// FoldICmpDivCst - Fold "icmp pred, ([su]div X, DivRHS), CmpRHS" where DivRHS -/// and CmpRHS are both known to be integer constants. -Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, - ConstantInt *DivRHS) { - ConstantInt *CmpRHS = cast(ICI.getOperand(1)); - const APInt &CmpRHSV = CmpRHS->getValue(); - - // FIXME: If the operand types don't match the type of the divide - // then don't attempt this transform. The code below doesn't have the - // logic to deal with a signed divide and an unsigned compare (and - // vice versa). This is because (x /s C1) getOpcode() == Instruction::SDiv; - if (!ICI.isEquality() && DivIsSigned != ICI.isSigned()) - return 0; - if (DivRHS->isZero()) - return 0; // The ProdOV computation fails on divide by zero. - if (DivIsSigned && DivRHS->isAllOnesValue()) - return 0; // The overflow computation also screws up here - if (DivRHS->isOne()) - return 0; // Not worth bothering, and eliminates some funny cases - // with INT_MIN. - - // Compute Prod = CI * DivRHS. We are essentially solving an equation - // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and - // C2 (CI). By solving for X we can turn this into a range check - // instead of computing a divide. - Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS); - - // Determine if the product overflows by seeing if the product is - // not equal to the divide. Make sure we do the same kind of divide - // as in the LHS instruction that we're folding. - bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) : - ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS; - - // Get the ICmp opcode - ICmpInst::Predicate Pred = ICI.getPredicate(); - - // Figure out the interval that is being checked. For example, a comparison - // like "X /u 5 == 0" is really checking that X is in the interval [0, 5). - // Compute this interval based on the constants involved and the signedness of - // the compare/divide. This computes a half-open interval, keeping track of - // whether either value in the interval overflows. After analysis each - // overflow variable is set to 0 if it's corresponding bound variable is valid - // -1 if overflowed off the bottom end, or +1 if overflowed off the top end. - int LoOverflow = 0, HiOverflow = 0; - Constant *LoBound = 0, *HiBound = 0; - - if (!DivIsSigned) { // udiv - // e.g. X/5 op 3 --> [15, 20) - LoBound = Prod; - HiOverflow = LoOverflow = ProdOV; - if (!HiOverflow) - HiOverflow = AddWithOverflow(HiBound, LoBound, DivRHS, Context, false); - } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0. - if (CmpRHSV == 0) { // (X / pos) op 0 - // Can't overflow. e.g. X/2 op 0 --> [-1, 2) - LoBound = cast(ConstantExpr::getNeg(SubOne(DivRHS))); - HiBound = DivRHS; - } else if (CmpRHSV.isStrictlyPositive()) { // (X / pos) op pos - LoBound = Prod; // e.g. X/5 op 3 --> [15, 20) - HiOverflow = LoOverflow = ProdOV; - if (!HiOverflow) - HiOverflow = AddWithOverflow(HiBound, Prod, DivRHS, Context, true); - } else { // (X / pos) op neg - // e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14) - HiBound = AddOne(Prod); - LoOverflow = HiOverflow = ProdOV ? -1 : 0; - if (!LoOverflow) { - ConstantInt* DivNeg = - cast(ConstantExpr::getNeg(DivRHS)); - LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, Context, - true) ? -1 : 0; - } - } - } else if (DivRHS->getValue().isNegative()) { // Divisor is < 0. - if (CmpRHSV == 0) { // (X / neg) op 0 - // e.g. X/-5 op 0 --> [-4, 5) - LoBound = AddOne(DivRHS); - HiBound = cast(ConstantExpr::getNeg(DivRHS)); - if (HiBound == DivRHS) { // -INTMIN = INTMIN - HiOverflow = 1; // [INTMIN+1, overflow) - HiBound = 0; // e.g. X/INTMIN = 0 --> X > INTMIN - } - } else if (CmpRHSV.isStrictlyPositive()) { // (X / neg) op pos - // e.g. X/-5 op 3 --> [-19, -14) - HiBound = AddOne(Prod); - HiOverflow = LoOverflow = ProdOV ? -1 : 0; - if (!LoOverflow) - LoOverflow = AddWithOverflow(LoBound, HiBound, - DivRHS, Context, true) ? -1 : 0; - } else { // (X / neg) op neg - LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20) - LoOverflow = HiOverflow = ProdOV; - if (!HiOverflow) - HiOverflow = SubWithOverflow(HiBound, Prod, DivRHS, Context, true); - } - - // Dividing by a negative swaps the condition. LT <-> GT - Pred = ICmpInst::getSwappedPredicate(Pred); - } - - Value *X = DivI->getOperand(0); - switch (Pred) { - default: llvm_unreachable("Unhandled icmp opcode!"); - case ICmpInst::ICMP_EQ: - if (LoOverflow && HiOverflow) - return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context)); - else if (HiOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : - ICmpInst::ICMP_UGE, X, LoBound); - else if (LoOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : - ICmpInst::ICMP_ULT, X, HiBound); - else - return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, true, ICI); - case ICmpInst::ICMP_NE: - if (LoOverflow && HiOverflow) - return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context)); - else if (HiOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : - ICmpInst::ICMP_ULT, X, LoBound); - else if (LoOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : - ICmpInst::ICMP_UGE, X, HiBound); - else - return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, false, ICI); - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_SLT: - if (LoOverflow == +1) // Low bound is greater than input range. - return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context)); - if (LoOverflow == -1) // Low bound is less than input range. - return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context)); - return new ICmpInst(Pred, X, LoBound); - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_SGT: - if (HiOverflow == +1) // High bound greater than input range. - return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context)); - else if (HiOverflow == -1) // High bound less than input range. - return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context)); - if (Pred == ICmpInst::ICMP_UGT) - return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound); - else - return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound); - } -} - - -/// visitICmpInstWithInstAndIntCst - Handle "icmp (instr, intcst)". -/// -Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, - Instruction *LHSI, - ConstantInt *RHS) { - const APInt &RHSV = RHS->getValue(); - - switch (LHSI->getOpcode()) { - case Instruction::Trunc: - if (ICI.isEquality() && LHSI->hasOneUse()) { - // Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all - // of the high bits truncated out of x are known. - unsigned DstBits = LHSI->getType()->getPrimitiveSizeInBits(), - SrcBits = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits(); - APInt Mask(APInt::getHighBitsSet(SrcBits, SrcBits-DstBits)); - APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0); - ComputeMaskedBits(LHSI->getOperand(0), Mask, KnownZero, KnownOne); - - // If all the high bits are known, we can do this xform. - if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) { - // Pull in the high bits from known-ones set. - APInt NewRHS(RHS->getValue()); - NewRHS.zext(SrcBits); - NewRHS |= KnownOne; - return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), - ConstantInt::get(*Context, NewRHS)); - } - } - break; - - case Instruction::Xor: // (icmp pred (xor X, XorCST), CI) - if (ConstantInt *XorCST = dyn_cast(LHSI->getOperand(1))) { - // If this is a comparison that tests the signbit (X < 0) or (x > -1), - // fold the xor. - if ((ICI.getPredicate() == ICmpInst::ICMP_SLT && RHSV == 0) || - (ICI.getPredicate() == ICmpInst::ICMP_SGT && RHSV.isAllOnesValue())) { - Value *CompareVal = LHSI->getOperand(0); - - // If the sign bit of the XorCST is not set, there is no change to - // the operation, just stop using the Xor. - if (!XorCST->getValue().isNegative()) { - ICI.setOperand(0, CompareVal); - Worklist.Add(LHSI); - return &ICI; - } - - // Was the old condition true if the operand is positive? - bool isTrueIfPositive = ICI.getPredicate() == ICmpInst::ICMP_SGT; - - // If so, the new one isn't. - isTrueIfPositive ^= true; - - if (isTrueIfPositive) - return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal, - SubOne(RHS)); - else - return new ICmpInst(ICmpInst::ICMP_SLT, CompareVal, - AddOne(RHS)); - } - - if (LHSI->hasOneUse()) { - // (icmp u/s (xor A SignBit), C) -> (icmp s/u A, (xor C SignBit)) - if (!ICI.isEquality() && XorCST->getValue().isSignBit()) { - const APInt &SignBit = XorCST->getValue(); - ICmpInst::Predicate Pred = ICI.isSigned() - ? ICI.getUnsignedPredicate() - : ICI.getSignedPredicate(); - return new ICmpInst(Pred, LHSI->getOperand(0), - ConstantInt::get(*Context, RHSV ^ SignBit)); - } - - // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A) - if (!ICI.isEquality() && XorCST->getValue().isMaxSignedValue()) { - const APInt &NotSignBit = XorCST->getValue(); - ICmpInst::Predicate Pred = ICI.isSigned() - ? ICI.getUnsignedPredicate() - : ICI.getSignedPredicate(); - Pred = ICI.getSwappedPredicate(Pred); - return new ICmpInst(Pred, LHSI->getOperand(0), - ConstantInt::get(*Context, RHSV ^ NotSignBit)); - } - } - } - break; - case Instruction::And: // (icmp pred (and X, AndCST), RHS) - if (LHSI->hasOneUse() && isa(LHSI->getOperand(1)) && - LHSI->getOperand(0)->hasOneUse()) { - ConstantInt *AndCST = cast(LHSI->getOperand(1)); - - // If the LHS is an AND of a truncating cast, we can widen the - // and/compare to be the input width without changing the value - // produced, eliminating a cast. - if (TruncInst *Cast = dyn_cast(LHSI->getOperand(0))) { - // We can do this transformation if either the AND constant does not - // have its sign bit set or if it is an equality comparison. - // Extending a relational comparison when we're checking the sign - // bit would not work. - if (Cast->hasOneUse() && - (ICI.isEquality() || - (AndCST->getValue().isNonNegative() && RHSV.isNonNegative()))) { - uint32_t BitWidth = - cast(Cast->getOperand(0)->getType())->getBitWidth(); - APInt NewCST = AndCST->getValue(); - NewCST.zext(BitWidth); - APInt NewCI = RHSV; - NewCI.zext(BitWidth); - Value *NewAnd = - Builder->CreateAnd(Cast->getOperand(0), - ConstantInt::get(*Context, NewCST), LHSI->getName()); - return new ICmpInst(ICI.getPredicate(), NewAnd, - ConstantInt::get(*Context, NewCI)); - } - } - - // If this is: (X >> C1) & C2 != C3 (where any shift and any compare - // could exist), turn it into (X & (C2 << C1)) != (C3 << C1). This - // happens a LOT in code produced by the C front-end, for bitfield - // access. - BinaryOperator *Shift = dyn_cast(LHSI->getOperand(0)); - if (Shift && !Shift->isShift()) - Shift = 0; - - ConstantInt *ShAmt; - ShAmt = Shift ? dyn_cast(Shift->getOperand(1)) : 0; - const Type *Ty = Shift ? Shift->getType() : 0; // Type of the shift. - const Type *AndTy = AndCST->getType(); // Type of the and. - - // We can fold this as long as we can't shift unknown bits - // into the mask. This can only happen with signed shift - // rights, as they sign-extend. - if (ShAmt) { - bool CanFold = Shift->isLogicalShift(); - if (!CanFold) { - // To test for the bad case of the signed shr, see if any - // of the bits shifted in could be tested after the mask. - uint32_t TyBits = Ty->getPrimitiveSizeInBits(); - int ShAmtVal = TyBits - ShAmt->getLimitedValue(TyBits); - - uint32_t BitWidth = AndTy->getPrimitiveSizeInBits(); - if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) & - AndCST->getValue()) == 0) - CanFold = true; - } - - if (CanFold) { - Constant *NewCst; - if (Shift->getOpcode() == Instruction::Shl) - NewCst = ConstantExpr::getLShr(RHS, ShAmt); - else - NewCst = ConstantExpr::getShl(RHS, ShAmt); - - // Check to see if we are shifting out any of the bits being - // compared. - if (ConstantExpr::get(Shift->getOpcode(), - NewCst, ShAmt) != RHS) { - // If we shifted bits out, the fold is not going to work out. - // As a special case, check to see if this means that the - // result is always true or false now. - if (ICI.getPredicate() == ICmpInst::ICMP_EQ) - return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context)); - if (ICI.getPredicate() == ICmpInst::ICMP_NE) - return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context)); - } else { - ICI.setOperand(1, NewCst); - Constant *NewAndCST; - if (Shift->getOpcode() == Instruction::Shl) - NewAndCST = ConstantExpr::getLShr(AndCST, ShAmt); - else - NewAndCST = ConstantExpr::getShl(AndCST, ShAmt); - LHSI->setOperand(1, NewAndCST); - LHSI->setOperand(0, Shift->getOperand(0)); - Worklist.Add(Shift); // Shift is dead. - return &ICI; - } - } - } - - // Turn ((X >> Y) & C) == 0 into (X & (C << Y)) == 0. The later is - // preferable because it allows the C<hasOneUse() && RHSV == 0 && - ICI.isEquality() && !Shift->isArithmeticShift() && - !isa(Shift->getOperand(0))) { - // Compute C << Y. - Value *NS; - if (Shift->getOpcode() == Instruction::LShr) { - NS = Builder->CreateShl(AndCST, Shift->getOperand(1), "tmp"); - } else { - // Insert a logical shift. - NS = Builder->CreateLShr(AndCST, Shift->getOperand(1), "tmp"); - } - - // Compute X & (C << Y). - Value *NewAnd = - Builder->CreateAnd(Shift->getOperand(0), NS, LHSI->getName()); - - ICI.setOperand(0, NewAnd); - return &ICI; - } - } - break; - - case Instruction::Shl: { // (icmp pred (shl X, ShAmt), CI) - ConstantInt *ShAmt = dyn_cast(LHSI->getOperand(1)); - if (!ShAmt) break; - - uint32_t TypeBits = RHSV.getBitWidth(); - - // Check that the shift amount is in range. If not, don't perform - // undefined shifts. When the shift is visited it will be - // simplified. - if (ShAmt->uge(TypeBits)) - break; - - if (ICI.isEquality()) { - // If we are comparing against bits always shifted out, the - // comparison cannot succeed. - Constant *Comp = - ConstantExpr::getShl(ConstantExpr::getLShr(RHS, ShAmt), - ShAmt); - if (Comp != RHS) {// Comparing against a bit that we know is zero. - bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; - Constant *Cst = ConstantInt::get(Type::getInt1Ty(*Context), IsICMP_NE); - return ReplaceInstUsesWith(ICI, Cst); - } - - if (LHSI->hasOneUse()) { - // Otherwise strength reduce the shift into an and. - uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); - Constant *Mask = - ConstantInt::get(*Context, APInt::getLowBitsSet(TypeBits, - TypeBits-ShAmtVal)); - - Value *And = - Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask"); - return new ICmpInst(ICI.getPredicate(), And, - ConstantInt::get(*Context, RHSV.lshr(ShAmtVal))); - } - } - - // Otherwise, if this is a comparison of the sign bit, simplify to and/test. - bool TrueIfSigned = false; - if (LHSI->hasOneUse() && - isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) { - // (X << 31) (X&1) != 0 - Constant *Mask = ConstantInt::get(*Context, APInt(TypeBits, 1) << - (TypeBits-ShAmt->getZExtValue()-1)); - Value *And = - Builder->CreateAnd(LHSI->getOperand(0), Mask, LHSI->getName()+".mask"); - return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ, - And, Constant::getNullValue(And->getType())); - } - break; - } - - case Instruction::LShr: // (icmp pred (shr X, ShAmt), CI) - case Instruction::AShr: { - // Only handle equality comparisons of shift-by-constant. - ConstantInt *ShAmt = dyn_cast(LHSI->getOperand(1)); - if (!ShAmt || !ICI.isEquality()) break; - - // Check that the shift amount is in range. If not, don't perform - // undefined shifts. When the shift is visited it will be - // simplified. - uint32_t TypeBits = RHSV.getBitWidth(); - if (ShAmt->uge(TypeBits)) - break; - - uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); - - // If we are comparing against bits always shifted out, the - // comparison cannot succeed. - APInt Comp = RHSV << ShAmtVal; - if (LHSI->getOpcode() == Instruction::LShr) - Comp = Comp.lshr(ShAmtVal); - else - Comp = Comp.ashr(ShAmtVal); - - if (Comp != RHSV) { // Comparing against a bit that we know is zero. - bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; - Constant *Cst = ConstantInt::get(Type::getInt1Ty(*Context), IsICMP_NE); - return ReplaceInstUsesWith(ICI, Cst); - } - - // Otherwise, check to see if the bits shifted out are known to be zero. - // If so, we can compare against the unshifted value: - // (X & 4) >> 1 == 2 --> (X & 4) == 4. - if (LHSI->hasOneUse() && - MaskedValueIsZero(LHSI->getOperand(0), - APInt::getLowBitsSet(Comp.getBitWidth(), ShAmtVal))) { - return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), - ConstantExpr::getShl(RHS, ShAmt)); - } - - if (LHSI->hasOneUse()) { - // Otherwise strength reduce the shift into an and. - APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); - Constant *Mask = ConstantInt::get(*Context, Val); - - Value *And = Builder->CreateAnd(LHSI->getOperand(0), - Mask, LHSI->getName()+".mask"); - return new ICmpInst(ICI.getPredicate(), And, - ConstantExpr::getShl(RHS, ShAmt)); - } - break; - } - - case Instruction::SDiv: - case Instruction::UDiv: - // Fold: icmp pred ([us]div X, C1), C2 -> range test - // Fold this div into the comparison, producing a range check. - // Determine, based on the divide type, what the range is being - // checked. If there is an overflow on the low or high side, remember - // it, otherwise compute the range [low, hi) bounding the new value. - // See: InsertRangeTest above for the kinds of replacements possible. - if (ConstantInt *DivRHS = dyn_cast(LHSI->getOperand(1))) - if (Instruction *R = FoldICmpDivCst(ICI, cast(LHSI), - DivRHS)) - return R; - break; - - case Instruction::Add: - // Fold: icmp pred (add X, C1), C2 - if (!ICI.isEquality()) { - ConstantInt *LHSC = dyn_cast(LHSI->getOperand(1)); - if (!LHSC) break; - const APInt &LHSV = LHSC->getValue(); - - ConstantRange CR = ICI.makeConstantRange(ICI.getPredicate(), RHSV) - .subtract(LHSV); - - if (ICI.isSigned()) { - if (CR.getLower().isSignBit()) { - return new ICmpInst(ICmpInst::ICMP_SLT, LHSI->getOperand(0), - ConstantInt::get(*Context, CR.getUpper())); - } else if (CR.getUpper().isSignBit()) { - return new ICmpInst(ICmpInst::ICMP_SGE, LHSI->getOperand(0), - ConstantInt::get(*Context, CR.getLower())); - } - } else { - if (CR.getLower().isMinValue()) { - return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0), - ConstantInt::get(*Context, CR.getUpper())); - } else if (CR.getUpper().isMinValue()) { - return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0), - ConstantInt::get(*Context, CR.getLower())); - } - } - } - break; - } - - // Simplify icmp_eq and icmp_ne instructions with integer constant RHS. - if (ICI.isEquality()) { - bool isICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; - - // If the first operand is (add|sub|and|or|xor|rem) with a constant, and - // the second operand is a constant, simplify a bit. - if (BinaryOperator *BO = dyn_cast(LHSI)) { - switch (BO->getOpcode()) { - case Instruction::SRem: - // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one. - if (RHSV == 0 && isa(BO->getOperand(1)) &&BO->hasOneUse()){ - const APInt &V = cast(BO->getOperand(1))->getValue(); - if (V.sgt(APInt(V.getBitWidth(), 1)) && V.isPowerOf2()) { - Value *NewRem = - Builder->CreateURem(BO->getOperand(0), BO->getOperand(1), - BO->getName()); - return new ICmpInst(ICI.getPredicate(), NewRem, - Constant::getNullValue(BO->getType())); - } - } - break; - case Instruction::Add: - // Replace ((add A, B) != C) with (A != C-B) if B & C are constants. - if (ConstantInt *BOp1C = dyn_cast(BO->getOperand(1))) { - if (BO->hasOneUse()) - return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), - ConstantExpr::getSub(RHS, BOp1C)); - } else if (RHSV == 0) { - // Replace ((add A, B) != 0) with (A != -B) if A or B is - // efficiently invertible, or if the add has just this one use. - Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1); - - if (Value *NegVal = dyn_castNegVal(BOp1)) - return new ICmpInst(ICI.getPredicate(), BOp0, NegVal); - else if (Value *NegVal = dyn_castNegVal(BOp0)) - return new ICmpInst(ICI.getPredicate(), NegVal, BOp1); - else if (BO->hasOneUse()) { - Value *Neg = Builder->CreateNeg(BOp1); - Neg->takeName(BO); - return new ICmpInst(ICI.getPredicate(), BOp0, Neg); - } - } - break; - case Instruction::Xor: - // For the xor case, we can xor two constants together, eliminating - // the explicit xor. - if (Constant *BOC = dyn_cast(BO->getOperand(1))) - return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), - ConstantExpr::getXor(RHS, BOC)); - - // FALLTHROUGH - case Instruction::Sub: - // Replace (([sub|xor] A, B) != 0) with (A != B) - if (RHSV == 0) - return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), - BO->getOperand(1)); - break; - - case Instruction::Or: - // If bits are being or'd in that are not present in the constant we - // are comparing against, then the comparison could never succeed! - if (Constant *BOC = dyn_cast(BO->getOperand(1))) { - Constant *NotCI = ConstantExpr::getNot(RHS); - if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue()) - return ReplaceInstUsesWith(ICI, - ConstantInt::get(Type::getInt1Ty(*Context), - isICMP_NE)); - } - break; - - case Instruction::And: - if (ConstantInt *BOC = dyn_cast(BO->getOperand(1))) { - // If bits are being compared against that are and'd out, then the - // comparison can never succeed! - if ((RHSV & ~BOC->getValue()) != 0) - return ReplaceInstUsesWith(ICI, - ConstantInt::get(Type::getInt1Ty(*Context), - isICMP_NE)); - - // If we have ((X & C) == C), turn it into ((X & C) != 0). - if (RHS == BOC && RHSV.isPowerOf2()) - return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : - ICmpInst::ICMP_NE, LHSI, - Constant::getNullValue(RHS->getType())); - - // Replace (and X, (1 << size(X)-1) != 0) with x s< 0 - if (BOC->getValue().isSignBit()) { - Value *X = BO->getOperand(0); - Constant *Zero = Constant::getNullValue(X->getType()); - ICmpInst::Predicate pred = isICMP_NE ? - ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE; - return new ICmpInst(pred, X, Zero); - } - - // ((X & ~7) == 0) --> X < 8 - if (RHSV == 0 && isHighOnes(BOC)) { - Value *X = BO->getOperand(0); - Constant *NegX = ConstantExpr::getNeg(BOC); - ICmpInst::Predicate pred = isICMP_NE ? - ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; - return new ICmpInst(pred, X, NegX); - } - } - default: break; - } - } else if (IntrinsicInst *II = dyn_cast(LHSI)) { - // Handle icmp {eq|ne} , intcst. - if (II->getIntrinsicID() == Intrinsic::bswap) { - Worklist.Add(II); - ICI.setOperand(0, II->getOperand(1)); - ICI.setOperand(1, ConstantInt::get(*Context, RHSV.byteSwap())); - return &ICI; - } - } - } - return 0; -} - -/// visitICmpInstWithCastAndCast - Handle icmp (cast x to y), (cast/cst). -/// We only handle extending casts so far. -/// -Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { - const CastInst *LHSCI = cast(ICI.getOperand(0)); - Value *LHSCIOp = LHSCI->getOperand(0); - const Type *SrcTy = LHSCIOp->getType(); - const Type *DestTy = LHSCI->getType(); - Value *RHSCIOp; - - // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the - // integer type is the same size as the pointer type. - if (TD && LHSCI->getOpcode() == Instruction::PtrToInt && - TD->getPointerSizeInBits() == - cast(DestTy)->getBitWidth()) { - Value *RHSOp = 0; - if (Constant *RHSC = dyn_cast(ICI.getOperand(1))) { - RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy); - } else if (PtrToIntInst *RHSC = dyn_cast(ICI.getOperand(1))) { - RHSOp = RHSC->getOperand(0); - // If the pointer types don't match, insert a bitcast. - if (LHSCIOp->getType() != RHSOp->getType()) - RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType()); - } - - if (RHSOp) - return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSOp); - } - - // The code below only handles extension cast instructions, so far. - // Enforce this. - if (LHSCI->getOpcode() != Instruction::ZExt && - LHSCI->getOpcode() != Instruction::SExt) - return 0; - - bool isSignedExt = LHSCI->getOpcode() == Instruction::SExt; - bool isSignedCmp = ICI.isSigned(); - - if (CastInst *CI = dyn_cast(ICI.getOperand(1))) { - // Not an extension from the same type? - RHSCIOp = CI->getOperand(0); - if (RHSCIOp->getType() != LHSCIOp->getType()) - return 0; - - // If the signedness of the two casts doesn't agree (i.e. one is a sext - // and the other is a zext), then we can't handle this. - if (CI->getOpcode() != LHSCI->getOpcode()) - return 0; - - // Deal with equality cases early. - if (ICI.isEquality()) - return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp); - - // A signed comparison of sign extended values simplifies into a - // signed comparison. - if (isSignedCmp && isSignedExt) - return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp); - - // The other three cases all fold into an unsigned comparison. - return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, RHSCIOp); - } - - // If we aren't dealing with a constant on the RHS, exit early - ConstantInt *CI = dyn_cast(ICI.getOperand(1)); - if (!CI) - return 0; - - // Compute the constant that would happen if we truncated to SrcTy then - // reextended to DestTy. - Constant *Res1 = ConstantExpr::getTrunc(CI, SrcTy); - Constant *Res2 = ConstantExpr::getCast(LHSCI->getOpcode(), - Res1, DestTy); - - // If the re-extended constant didn't change... - if (Res2 == CI) { - // Deal with equality cases early. - if (ICI.isEquality()) - return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1); - - // A signed comparison of sign extended values simplifies into a - // signed comparison. - if (isSignedExt && isSignedCmp) - return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1); - - // The other three cases all fold into an unsigned comparison. - return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, Res1); - } - - // The re-extended constant changed so the constant cannot be represented - // in the shorter type. Consequently, we cannot emit a simple comparison. - - // First, handle some easy cases. We know the result cannot be equal at this - // point so handle the ICI.isEquality() cases - if (ICI.getPredicate() == ICmpInst::ICMP_EQ) - return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context)); - if (ICI.getPredicate() == ICmpInst::ICMP_NE) - return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context)); - - // Evaluate the comparison for LT (we invert for GT below). LE and GE cases - // should have been folded away previously and not enter in here. - Value *Result; - if (isSignedCmp) { - // We're performing a signed comparison. - if (cast(CI)->getValue().isNegative()) - Result = ConstantInt::getFalse(*Context); // X < (small) --> false - else - Result = ConstantInt::getTrue(*Context); // X < (large) --> true - } else { - // We're performing an unsigned comparison. - if (isSignedExt) { - // We're performing an unsigned comp with a sign extended value. - // This is true if the input is >= 0. [aka >s -1] - Constant *NegOne = Constant::getAllOnesValue(SrcTy); - Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICI.getName()); - } else { - // Unsigned extend & unsigned compare -> always true. - Result = ConstantInt::getTrue(*Context); - } - } - - // Finally, return the value computed. - if (ICI.getPredicate() == ICmpInst::ICMP_ULT || - ICI.getPredicate() == ICmpInst::ICMP_SLT) - return ReplaceInstUsesWith(ICI, Result); - - assert((ICI.getPredicate()==ICmpInst::ICMP_UGT || - ICI.getPredicate()==ICmpInst::ICMP_SGT) && - "ICmp should be folded!"); - if (Constant *CI = dyn_cast(Result)) - return ReplaceInstUsesWith(ICI, ConstantExpr::getNot(CI)); - return BinaryOperator::CreateNot(Result); -} - -Instruction *InstCombiner::visitShl(BinaryOperator &I) { - return commonShiftTransforms(I); -} - -Instruction *InstCombiner::visitLShr(BinaryOperator &I) { - return commonShiftTransforms(I); -} - -Instruction *InstCombiner::visitAShr(BinaryOperator &I) { - if (Instruction *R = commonShiftTransforms(I)) - return R; - - Value *Op0 = I.getOperand(0); - - // ashr int -1, X = -1 (for any arithmetic shift rights of ~0) - if (ConstantInt *CSI = dyn_cast(Op0)) - if (CSI->isAllOnesValue()) - return ReplaceInstUsesWith(I, CSI); - - // See if we can turn a signed shr into an unsigned shr. - if (MaskedValueIsZero(Op0, - APInt::getSignBit(I.getType()->getScalarSizeInBits()))) - return BinaryOperator::CreateLShr(Op0, I.getOperand(1)); - - // Arithmetic shifting an all-sign-bit value is a no-op. - unsigned NumSignBits = ComputeNumSignBits(Op0); - if (NumSignBits == Op0->getType()->getScalarSizeInBits()) - return ReplaceInstUsesWith(I, Op0); - - return 0; -} - -Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { - assert(I.getOperand(1)->getType() == I.getOperand(0)->getType()); - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - // shl X, 0 == X and shr X, 0 == X - // shl 0, X == 0 and shr 0, X == 0 - if (Op1 == Constant::getNullValue(Op1->getType()) || - Op0 == Constant::getNullValue(Op0->getType())) - return ReplaceInstUsesWith(I, Op0); - - if (isa(Op0)) { - if (I.getOpcode() == Instruction::AShr) // undef >>s X -> undef - return ReplaceInstUsesWith(I, Op0); - else // undef << X -> 0, undef >>u X -> 0 - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - } - if (isa(Op1)) { - if (I.getOpcode() == Instruction::AShr) // X >>s undef -> X - return ReplaceInstUsesWith(I, Op0); - else // X << undef, X >>u undef -> 0 - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - } - - // See if we can fold away this shift. - if (SimplifyDemandedInstructionBits(I)) - return &I; - - // Try to fold constant and into select arguments. - if (isa(Op0)) - if (SelectInst *SI = dyn_cast(Op1)) - if (Instruction *R = FoldOpIntoSelect(I, SI, this)) - return R; - - if (ConstantInt *CUI = dyn_cast(Op1)) - if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I)) - return Res; - return 0; -} - -Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, - BinaryOperator &I) { - bool isLeftShift = I.getOpcode() == Instruction::Shl; - - // See if we can simplify any instructions used by the instruction whose sole - // purpose is to compute bits we don't care about. - uint32_t TypeBits = Op0->getType()->getScalarSizeInBits(); - - // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate - // a signed shift. - // - if (Op1->uge(TypeBits)) { - if (I.getOpcode() != Instruction::AShr) - return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType())); - else { - I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1)); - return &I; - } - } - - // ((X*C1) << C2) == (X * (C1 << C2)) - if (BinaryOperator *BO = dyn_cast(Op0)) - if (BO->getOpcode() == Instruction::Mul && isLeftShift) - if (Constant *BOOp = dyn_cast(BO->getOperand(1))) - return BinaryOperator::CreateMul(BO->getOperand(0), - ConstantExpr::getShl(BOOp, Op1)); - - // Try to fold constant and into select arguments. - if (SelectInst *SI = dyn_cast(Op0)) - if (Instruction *R = FoldOpIntoSelect(I, SI, this)) - return R; - if (isa(Op0)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; - - // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2)) - if (TruncInst *TI = dyn_cast(Op0)) { - Instruction *TrOp = dyn_cast(TI->getOperand(0)); - // If 'shift2' is an ashr, we would have to get the sign bit into a funny - // place. Don't try to do this transformation in this case. Also, we - // require that the input operand is a shift-by-constant so that we have - // confidence that the shifts will get folded together. We could do this - // xform in more cases, but it is unlikely to be profitable. - if (TrOp && I.isLogicalShift() && TrOp->isShift() && - isa(TrOp->getOperand(1))) { - // Okay, we'll do this xform. Make the shift of shift. - Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType()); - // (shift2 (shift1 & 0x00FF), c2) - Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName()); - - // For logical shifts, the truncation has the effect of making the high - // part of the register be zeros. Emulate this by inserting an AND to - // clear the top bits as needed. This 'and' will usually be zapped by - // other xforms later if dead. - unsigned SrcSize = TrOp->getType()->getScalarSizeInBits(); - unsigned DstSize = TI->getType()->getScalarSizeInBits(); - APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize)); - - // The mask we constructed says what the trunc would do if occurring - // between the shifts. We want to know the effect *after* the second - // shift. We know that it is a logical shift by a constant, so adjust the - // mask as appropriate. - if (I.getOpcode() == Instruction::Shl) - MaskV <<= Op1->getZExtValue(); - else { - assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift"); - MaskV = MaskV.lshr(Op1->getZExtValue()); - } - - // shift1 & 0x00FF - Value *And = Builder->CreateAnd(NSh, ConstantInt::get(*Context, MaskV), - TI->getName()); - - // Return the value truncated to the interesting size. - return new TruncInst(And, I.getType()); - } - } - - if (Op0->hasOneUse()) { - if (BinaryOperator *Op0BO = dyn_cast(Op0)) { - // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) - Value *V1, *V2; - ConstantInt *CC; - switch (Op0BO->getOpcode()) { - default: break; - case Instruction::Add: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: { - // These operators commute. - // Turn (Y + (X >> C)) << C -> (X + (Y << C)) & (~0 << C) - if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() && - match(Op0BO->getOperand(1), m_Shr(m_Value(V1), - m_Specific(Op1)))) { - Value *YS = // (Y << C) - Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName()); - // (X + (Y << C)) - Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1, - Op0BO->getOperand(1)->getName()); - uint32_t Op1Val = Op1->getLimitedValue(TypeBits); - return BinaryOperator::CreateAnd(X, ConstantInt::get(*Context, - APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); - } - - // Turn (Y + ((X >> C) & CC)) << C -> ((X & (CC << C)) + (Y << C)) - Value *Op0BOOp1 = Op0BO->getOperand(1); - if (isLeftShift && Op0BOOp1->hasOneUse() && - match(Op0BOOp1, - m_And(m_Shr(m_Value(V1), m_Specific(Op1)), - m_ConstantInt(CC))) && - cast(Op0BOOp1)->getOperand(0)->hasOneUse()) { - Value *YS = // (Y << C) - Builder->CreateShl(Op0BO->getOperand(0), Op1, - Op0BO->getName()); - // X & (CC << C) - Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), - V1->getName()+".mask"); - return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM); - } - } - - // FALL THROUGH. - case Instruction::Sub: { - // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) - if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() && - match(Op0BO->getOperand(0), m_Shr(m_Value(V1), - m_Specific(Op1)))) { - Value *YS = // (Y << C) - Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); - // (X + (Y << C)) - Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS, - Op0BO->getOperand(0)->getName()); - uint32_t Op1Val = Op1->getLimitedValue(TypeBits); - return BinaryOperator::CreateAnd(X, ConstantInt::get(*Context, - APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); - } - - // Turn (((X >> C)&CC) + Y) << C -> (X + (Y << C)) & (CC << C) - if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() && - match(Op0BO->getOperand(0), - m_And(m_Shr(m_Value(V1), m_Value(V2)), - m_ConstantInt(CC))) && V2 == Op1 && - cast(Op0BO->getOperand(0)) - ->getOperand(0)->hasOneUse()) { - Value *YS = // (Y << C) - Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); - // X & (CC << C) - Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), - V1->getName()+".mask"); - - return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS); - } - - break; - } - } - - - // If the operand is an bitwise operator with a constant RHS, and the - // shift is the only use, we can pull it out of the shift. - if (ConstantInt *Op0C = dyn_cast(Op0BO->getOperand(1))) { - bool isValid = true; // Valid only for And, Or, Xor - bool highBitSet = false; // Transform if high bit of constant set? - - switch (Op0BO->getOpcode()) { - default: isValid = false; break; // Do not perform transform! - case Instruction::Add: - isValid = isLeftShift; - break; - case Instruction::Or: - case Instruction::Xor: - highBitSet = false; - break; - case Instruction::And: - highBitSet = true; - break; - } - - // If this is a signed shift right, and the high bit is modified - // by the logical operation, do not perform the transformation. - // The highBitSet boolean indicates the value of the high bit of - // the constant which would cause it to be modified for this - // operation. - // - if (isValid && I.getOpcode() == Instruction::AShr) - isValid = Op0C->getValue()[TypeBits-1] == highBitSet; - - if (isValid) { - Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1); - - Value *NewShift = - Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1); - NewShift->takeName(Op0BO); - - return BinaryOperator::Create(Op0BO->getOpcode(), NewShift, - NewRHS); - } - } - } - } - - // Find out if this is a shift of a shift by a constant. - BinaryOperator *ShiftOp = dyn_cast(Op0); - if (ShiftOp && !ShiftOp->isShift()) - ShiftOp = 0; - - if (ShiftOp && isa(ShiftOp->getOperand(1))) { - ConstantInt *ShiftAmt1C = cast(ShiftOp->getOperand(1)); - uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits); - uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits); - assert(ShiftAmt2 != 0 && "Should have been simplified earlier"); - if (ShiftAmt1 == 0) return 0; // Will be simplified in the future. - Value *X = ShiftOp->getOperand(0); - - uint32_t AmtSum = ShiftAmt1+ShiftAmt2; // Fold into one big shift. - - const IntegerType *Ty = cast(I.getType()); - - // Check for (X << c1) << c2 and (X >> c1) >> c2 - if (I.getOpcode() == ShiftOp->getOpcode()) { - // If this is oversized composite shift, then unsigned shifts get 0, ashr - // saturates. - if (AmtSum >= TypeBits) { - if (I.getOpcode() != Instruction::AShr) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - AmtSum = TypeBits-1; // Saturate to 31 for i32 ashr. - } - - return BinaryOperator::Create(I.getOpcode(), X, - ConstantInt::get(Ty, AmtSum)); - } - - if (ShiftOp->getOpcode() == Instruction::LShr && - I.getOpcode() == Instruction::AShr) { - if (AmtSum >= TypeBits) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - - // ((X >>u C1) >>s C2) -> (X >>u (C1+C2)) since C1 != 0. - return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum)); - } - - if (ShiftOp->getOpcode() == Instruction::AShr && - I.getOpcode() == Instruction::LShr) { - // ((X >>s C1) >>u C2) -> ((X >>s (C1+C2)) & mask) since C1 != 0. - if (AmtSum >= TypeBits) - AmtSum = TypeBits-1; - - Value *Shift = Builder->CreateAShr(X, ConstantInt::get(Ty, AmtSum)); - - APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); - return BinaryOperator::CreateAnd(Shift, ConstantInt::get(*Context, Mask)); - } - - // Okay, if we get here, one shift must be left, and the other shift must be - // right. See if the amounts are equal. - if (ShiftAmt1 == ShiftAmt2) { - // If we have ((X >>? C) << C), turn this into X & (-1 << C). - if (I.getOpcode() == Instruction::Shl) { - APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1)); - return BinaryOperator::CreateAnd(X, ConstantInt::get(*Context, Mask)); - } - // If we have ((X << C) >>u C), turn this into X & (-1 >>u C). - if (I.getOpcode() == Instruction::LShr) { - APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1)); - return BinaryOperator::CreateAnd(X, ConstantInt::get(*Context, Mask)); - } - // We can simplify ((X << C) >>s C) into a trunc + sext. - // NOTE: we could do this for any C, but that would make 'unusual' integer - // types. For now, just stick to ones well-supported by the code - // generators. - const Type *SExtType = 0; - switch (Ty->getBitWidth() - ShiftAmt1) { - case 1 : - case 8 : - case 16 : - case 32 : - case 64 : - case 128: - SExtType = IntegerType::get(*Context, Ty->getBitWidth() - ShiftAmt1); - break; - default: break; - } - if (SExtType) - return new SExtInst(Builder->CreateTrunc(X, SExtType, "sext"), Ty); - // Otherwise, we can't handle it yet. - } else if (ShiftAmt1 < ShiftAmt2) { - uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1; - - // (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2) - if (I.getOpcode() == Instruction::Shl) { - assert(ShiftOp->getOpcode() == Instruction::LShr || - ShiftOp->getOpcode() == Instruction::AShr); - Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); - - APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); - return BinaryOperator::CreateAnd(Shift, - ConstantInt::get(*Context, Mask)); - } - - // (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2) - if (I.getOpcode() == Instruction::LShr) { - assert(ShiftOp->getOpcode() == Instruction::Shl); - Value *Shift = Builder->CreateLShr(X, ConstantInt::get(Ty, ShiftDiff)); - - APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); - return BinaryOperator::CreateAnd(Shift, - ConstantInt::get(*Context, Mask)); - } - - // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. - } else { - assert(ShiftAmt2 < ShiftAmt1); - uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2; - - // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2) - if (I.getOpcode() == Instruction::Shl) { - assert(ShiftOp->getOpcode() == Instruction::LShr || - ShiftOp->getOpcode() == Instruction::AShr); - Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), X, - ConstantInt::get(Ty, ShiftDiff)); - - APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); - return BinaryOperator::CreateAnd(Shift, - ConstantInt::get(*Context, Mask)); - } - - // (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2) - if (I.getOpcode() == Instruction::LShr) { - assert(ShiftOp->getOpcode() == Instruction::Shl); - Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); - - APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); - return BinaryOperator::CreateAnd(Shift, - ConstantInt::get(*Context, Mask)); - } - - // We can't handle (X << C1) >>a C2, it shifts arbitrary bits in. - } - } - return 0; -} - - -/// DecomposeSimpleLinearExpr - Analyze 'Val', seeing if it is a simple linear -/// expression. If so, decompose it, returning some value X, such that Val is -/// X*Scale+Offset. -/// -static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, - int &Offset, LLVMContext *Context) { - assert(Val->getType() == Type::getInt32Ty(*Context) && - "Unexpected allocation size type!"); - if (ConstantInt *CI = dyn_cast(Val)) { - Offset = CI->getZExtValue(); - Scale = 0; - return ConstantInt::get(Type::getInt32Ty(*Context), 0); - } else if (BinaryOperator *I = dyn_cast(Val)) { - if (ConstantInt *RHS = dyn_cast(I->getOperand(1))) { - if (I->getOpcode() == Instruction::Shl) { - // This is a value scaled by '1 << the shift amt'. - Scale = 1U << RHS->getZExtValue(); - Offset = 0; - return I->getOperand(0); - } else if (I->getOpcode() == Instruction::Mul) { - // This value is scaled by 'RHS'. - Scale = RHS->getZExtValue(); - Offset = 0; - return I->getOperand(0); - } else if (I->getOpcode() == Instruction::Add) { - // We have X+C. Check to see if we really have (X*C2)+C1, - // where C1 is divisible by C2. - unsigned SubScale; - Value *SubVal = - DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, - Offset, Context); - Offset += RHS->getZExtValue(); - Scale = SubScale; - return SubVal; - } - } - } - - // Otherwise, we can't look past this. - Scale = 1; - Offset = 0; - return Val; -} - - -/// PromoteCastOfAllocation - If we find a cast of an allocation instruction, -/// try to eliminate the cast by moving the type information into the alloc. -Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, - AllocaInst &AI) { - const PointerType *PTy = cast(CI.getType()); - - BuilderTy AllocaBuilder(*Builder); - AllocaBuilder.SetInsertPoint(AI.getParent(), &AI); - - // Remove any uses of AI that are dead. - assert(!CI.use_empty() && "Dead instructions should be removed earlier!"); - - for (Value::use_iterator UI = AI.use_begin(), E = AI.use_end(); UI != E; ) { - Instruction *User = cast(*UI++); - if (isInstructionTriviallyDead(User)) { - while (UI != E && *UI == User) - ++UI; // If this instruction uses AI more than once, don't break UI. - - ++NumDeadInst; - DEBUG(errs() << "IC: DCE: " << *User << '\n'); - EraseInstFromFunction(*User); - } - } - - // This requires TargetData to get the alloca alignment and size information. - if (!TD) return 0; - - // Get the type really allocated and the type casted to. - const Type *AllocElTy = AI.getAllocatedType(); - const Type *CastElTy = PTy->getElementType(); - if (!AllocElTy->isSized() || !CastElTy->isSized()) return 0; - - unsigned AllocElTyAlign = TD->getABITypeAlignment(AllocElTy); - unsigned CastElTyAlign = TD->getABITypeAlignment(CastElTy); - if (CastElTyAlign < AllocElTyAlign) return 0; - - // If the allocation has multiple uses, only promote it if we are strictly - // increasing the alignment of the resultant allocation. If we keep it the - // same, we open the door to infinite loops of various kinds. (A reference - // from a dbg.declare doesn't count as a use for this purpose.) - if (!AI.hasOneUse() && !hasOneUsePlusDeclare(&AI) && - CastElTyAlign == AllocElTyAlign) return 0; - - uint64_t AllocElTySize = TD->getTypeAllocSize(AllocElTy); - uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy); - if (CastElTySize == 0 || AllocElTySize == 0) return 0; - - // See if we can satisfy the modulus by pulling a scale out of the array - // size argument. - unsigned ArraySizeScale; - int ArrayOffset; - Value *NumElements = // See if the array size is a decomposable linear expr. - DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, - ArrayOffset, Context); - - // If we can now satisfy the modulus, by using a non-1 scale, we really can - // do the xform. - if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 || - (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return 0; - - unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize; - Value *Amt = 0; - if (Scale == 1) { - Amt = NumElements; - } else { - Amt = ConstantInt::get(Type::getInt32Ty(*Context), Scale); - // Insert before the alloca, not before the cast. - Amt = AllocaBuilder.CreateMul(Amt, NumElements, "tmp"); - } - - if (int Offset = (AllocElTySize*ArrayOffset)/CastElTySize) { - Value *Off = ConstantInt::get(Type::getInt32Ty(*Context), Offset, true); - Amt = AllocaBuilder.CreateAdd(Amt, Off, "tmp"); - } - - AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt); - New->setAlignment(AI.getAlignment()); - New->takeName(&AI); - - // If the allocation has one real use plus a dbg.declare, just remove the - // declare. - if (DbgDeclareInst *DI = hasOneUsePlusDeclare(&AI)) { - EraseInstFromFunction(*DI); - } - // If the allocation has multiple real uses, insert a cast and change all - // things that used it to use the new cast. This will also hack on CI, but it - // will die soon. - else if (!AI.hasOneUse()) { - // New is the allocation instruction, pointer typed. AI is the original - // allocation instruction, also pointer typed. Thus, cast to use is BitCast. - Value *NewCast = AllocaBuilder.CreateBitCast(New, AI.getType(), "tmpcast"); - AI.replaceAllUsesWith(NewCast); - } - return ReplaceInstUsesWith(CI, New); -} - -/// CanEvaluateInDifferentType - Return true if we can take the specified value -/// and return it as type Ty without inserting any new casts and without -/// changing the computed value. This is used by code that tries to decide -/// whether promoting or shrinking integer operations to wider or smaller types -/// will allow us to eliminate a truncate or extend. -/// -/// This is a truncation operation if Ty is smaller than V->getType(), or an -/// extension operation if Ty is larger. -/// -/// If CastOpc is a truncation, then Ty will be a type smaller than V. We -/// should return true if trunc(V) can be computed by computing V in the smaller -/// type. If V is an instruction, then trunc(inst(x,y)) can be computed as -/// inst(trunc(x),trunc(y)), which only makes sense if x and y can be -/// efficiently truncated. -/// -/// If CastOpc is a sext or zext, we are asking if the low bits of the value can -/// bit computed in a larger type, which is then and'd or sext_in_reg'd to get -/// the final result. -bool InstCombiner::CanEvaluateInDifferentType(Value *V, const Type *Ty, - unsigned CastOpc, - int &NumCastsRemoved){ - // We can always evaluate constants in another type. - if (isa(V)) - return true; - - Instruction *I = dyn_cast(V); - if (!I) return false; - - const Type *OrigTy = V->getType(); - - // If this is an extension or truncate, we can often eliminate it. - if (isa(I) || isa(I) || isa(I)) { - // If this is a cast from the destination type, we can trivially eliminate - // it, and this will remove a cast overall. - if (I->getOperand(0)->getType() == Ty) { - // If the first operand is itself a cast, and is eliminable, do not count - // this as an eliminable cast. We would prefer to eliminate those two - // casts first. - if (!isa(I->getOperand(0)) && I->hasOneUse()) - ++NumCastsRemoved; - return true; - } - } - - // We can't extend or shrink something that has multiple uses: doing so would - // require duplicating the instruction in general, which isn't profitable. - if (!I->hasOneUse()) return false; - - unsigned Opc = I->getOpcode(); - switch (Opc) { - case Instruction::Add: - case Instruction::Sub: - case Instruction::Mul: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - // These operators can all arbitrarily be extended or truncated. - return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc, - NumCastsRemoved) && - CanEvaluateInDifferentType(I->getOperand(1), Ty, CastOpc, - NumCastsRemoved); - - case Instruction::UDiv: - case Instruction::URem: { - // UDiv and URem can be truncated if all the truncated bits are zero. - uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); - uint32_t BitWidth = Ty->getScalarSizeInBits(); - if (BitWidth < OrigBitWidth) { - APInt Mask = APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth); - if (MaskedValueIsZero(I->getOperand(0), Mask) && - MaskedValueIsZero(I->getOperand(1), Mask)) { - return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc, - NumCastsRemoved) && - CanEvaluateInDifferentType(I->getOperand(1), Ty, CastOpc, - NumCastsRemoved); - } - } - break; - } - case Instruction::Shl: - // If we are truncating the result of this SHL, and if it's a shift of a - // constant amount, we can always perform a SHL in a smaller type. - if (ConstantInt *CI = dyn_cast(I->getOperand(1))) { - uint32_t BitWidth = Ty->getScalarSizeInBits(); - if (BitWidth < OrigTy->getScalarSizeInBits() && - CI->getLimitedValue(BitWidth) < BitWidth) - return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc, - NumCastsRemoved); - } - break; - case Instruction::LShr: - // If this is a truncate of a logical shr, we can truncate it to a smaller - // lshr iff we know that the bits we would otherwise be shifting in are - // already zeros. - if (ConstantInt *CI = dyn_cast(I->getOperand(1))) { - uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); - uint32_t BitWidth = Ty->getScalarSizeInBits(); - if (BitWidth < OrigBitWidth && - MaskedValueIsZero(I->getOperand(0), - APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) && - CI->getLimitedValue(BitWidth) < BitWidth) { - return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc, - NumCastsRemoved); - } - } - break; - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::Trunc: - // If this is the same kind of case as our original (e.g. zext+zext), we - // can safely replace it. Note that replacing it does not reduce the number - // of casts in the input. - if (Opc == CastOpc) - return true; - - // sext (zext ty1), ty2 -> zext ty2 - if (CastOpc == Instruction::SExt && Opc == Instruction::ZExt) - return true; - break; - case Instruction::Select: { - SelectInst *SI = cast(I); - return CanEvaluateInDifferentType(SI->getTrueValue(), Ty, CastOpc, - NumCastsRemoved) && - CanEvaluateInDifferentType(SI->getFalseValue(), Ty, CastOpc, - NumCastsRemoved); - } - case Instruction::PHI: { - // We can change a phi if we can change all operands. - PHINode *PN = cast(I); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (!CanEvaluateInDifferentType(PN->getIncomingValue(i), Ty, CastOpc, - NumCastsRemoved)) - return false; - return true; - } - default: - // TODO: Can handle more cases here. - break; - } - - return false; -} - -/// EvaluateInDifferentType - Given an expression that -/// CanEvaluateInDifferentType returns true for, actually insert the code to -/// evaluate the expression. -Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, - bool isSigned) { - if (Constant *C = dyn_cast(V)) - return ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/); - - // Otherwise, it must be an instruction. - Instruction *I = cast(V); - Instruction *Res = 0; - unsigned Opc = I->getOpcode(); - switch (Opc) { - case Instruction::Add: - case Instruction::Sub: - case Instruction::Mul: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - case Instruction::AShr: - case Instruction::LShr: - case Instruction::Shl: - case Instruction::UDiv: - case Instruction::URem: { - Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned); - Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned); - Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS); - break; - } - case Instruction::Trunc: - case Instruction::ZExt: - case Instruction::SExt: - // If the source type of the cast is the type we're trying for then we can - // just return the source. There's no need to insert it because it is not - // new. - if (I->getOperand(0)->getType() == Ty) - return I->getOperand(0); - - // Otherwise, must be the same type of cast, so just reinsert a new one. - Res = CastInst::Create(cast(I)->getOpcode(), I->getOperand(0),Ty); - break; - case Instruction::Select: { - Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned); - Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned); - Res = SelectInst::Create(I->getOperand(0), True, False); - break; - } - case Instruction::PHI: { - PHINode *OPN = cast(I); - PHINode *NPN = PHINode::Create(Ty); - for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) { - Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned); - NPN->addIncoming(V, OPN->getIncomingBlock(i)); - } - Res = NPN; - break; - } - default: - // TODO: Can handle more cases here. - llvm_unreachable("Unreachable!"); - break; - } - - Res->takeName(I); - return InsertNewInstBefore(Res, *I); -} - -/// @brief Implement the transforms common to all CastInst visitors. -Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { - Value *Src = CI.getOperand(0); - - // Many cases of "cast of a cast" are eliminable. If it's eliminable we just - // eliminate it now. - if (CastInst *CSrc = dyn_cast(Src)) { // A->B->C cast - if (Instruction::CastOps opc = - isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), TD)) { - // The first cast (CSrc) is eliminable so we need to fix up or replace - // the second cast (CI). CSrc will then have a good chance of being dead. - return CastInst::Create(opc, CSrc->getOperand(0), CI.getType()); - } - } - - // If we are casting a select then fold the cast into the select - if (SelectInst *SI = dyn_cast(Src)) - if (Instruction *NV = FoldOpIntoSelect(CI, SI, this)) - return NV; - - // If we are casting a PHI then fold the cast into the PHI - if (isa(Src)) { - // We don't do this if this would create a PHI node with an illegal type if - // it is currently legal. - if (!isa(Src->getType()) || - !isa(CI.getType()) || - ShouldChangeType(CI.getType(), Src->getType(), TD)) - if (Instruction *NV = FoldOpIntoPhi(CI)) - return NV; - } - - return 0; -} - -/// FindElementAtOffset - Given a type and a constant offset, determine whether -/// or not there is a sequence of GEP indices into the type that will land us at -/// the specified offset. If so, fill them into NewIndices and return the -/// resultant element type, otherwise return null. -static const Type *FindElementAtOffset(const Type *Ty, int64_t Offset, - SmallVectorImpl &NewIndices, - const TargetData *TD, - LLVMContext *Context) { - if (!TD) return 0; - if (!Ty->isSized()) return 0; - - // Start with the index over the outer type. Note that the type size - // might be zero (even if the offset isn't zero) if the indexed type - // is something like [0 x {int, int}] - const Type *IntPtrTy = TD->getIntPtrType(*Context); - int64_t FirstIdx = 0; - if (int64_t TySize = TD->getTypeAllocSize(Ty)) { - FirstIdx = Offset/TySize; - Offset -= FirstIdx*TySize; - - // Handle hosts where % returns negative instead of values [0..TySize). - if (Offset < 0) { - --FirstIdx; - Offset += TySize; - assert(Offset >= 0); - } - assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset"); - } - - NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx)); - - // Index into the types. If we fail, set OrigBase to null. - while (Offset) { - // Indexing into tail padding between struct/array elements. - if (uint64_t(Offset*8) >= TD->getTypeSizeInBits(Ty)) - return 0; - - if (const StructType *STy = dyn_cast(Ty)) { - const StructLayout *SL = TD->getStructLayout(STy); - assert(Offset < (int64_t)SL->getSizeInBytes() && - "Offset must stay within the indexed type"); - - unsigned Elt = SL->getElementContainingOffset(Offset); - NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(*Context), Elt)); - - Offset -= SL->getElementOffset(Elt); - Ty = STy->getElementType(Elt); - } else if (const ArrayType *AT = dyn_cast(Ty)) { - uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType()); - assert(EltSize && "Cannot index into a zero-sized array"); - NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize)); - Offset %= EltSize; - Ty = AT->getElementType(); - } else { - // Otherwise, we can't index into the middle of this atomic type, bail. - return 0; - } - } - - return Ty; -} - -/// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint) -Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { - Value *Src = CI.getOperand(0); - - if (GetElementPtrInst *GEP = dyn_cast(Src)) { - // If casting the result of a getelementptr instruction with no offset, turn - // this into a cast of the original pointer! - if (GEP->hasAllZeroIndices()) { - // Changing the cast operand is usually not a good idea but it is safe - // here because the pointer operand is being replaced with another - // pointer operand so the opcode doesn't need to change. - Worklist.Add(GEP); - CI.setOperand(0, GEP->getOperand(0)); - return &CI; - } - - // If the GEP has a single use, and the base pointer is a bitcast, and the - // GEP computes a constant offset, see if we can convert these three - // instructions into fewer. This typically happens with unions and other - // non-type-safe code. - if (TD && GEP->hasOneUse() && isa(GEP->getOperand(0))) { - if (GEP->hasAllConstantIndices()) { - // We are guaranteed to get a constant from EmitGEPOffset. - ConstantInt *OffsetV = cast(EmitGEPOffset(GEP, *this)); - int64_t Offset = OffsetV->getSExtValue(); - - // Get the base pointer input of the bitcast, and the type it points to. - Value *OrigBase = cast(GEP->getOperand(0))->getOperand(0); - const Type *GEPIdxTy = - cast(OrigBase->getType())->getElementType(); - SmallVector NewIndices; - if (FindElementAtOffset(GEPIdxTy, Offset, NewIndices, TD, Context)) { - // If we were able to index down into an element, create the GEP - // and bitcast the result. This eliminates one bitcast, potentially - // two. - Value *NGEP = cast(GEP)->isInBounds() ? - Builder->CreateInBoundsGEP(OrigBase, - NewIndices.begin(), NewIndices.end()) : - Builder->CreateGEP(OrigBase, NewIndices.begin(), NewIndices.end()); - NGEP->takeName(GEP); - - if (isa(CI)) - return new BitCastInst(NGEP, CI.getType()); - assert(isa(CI)); - return new PtrToIntInst(NGEP, CI.getType()); - } - } - } - } - - return commonCastTransforms(CI); -} - -/// commonIntCastTransforms - This function implements the common transforms -/// for trunc, zext, and sext. -Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { - if (Instruction *Result = commonCastTransforms(CI)) - return Result; - - Value *Src = CI.getOperand(0); - const Type *SrcTy = Src->getType(); - const Type *DestTy = CI.getType(); - uint32_t SrcBitSize = SrcTy->getScalarSizeInBits(); - uint32_t DestBitSize = DestTy->getScalarSizeInBits(); - - // See if we can simplify any instructions used by the LHS whose sole - // purpose is to compute bits we don't care about. - if (SimplifyDemandedInstructionBits(CI)) - return &CI; - - // If the source isn't an instruction or has more than one use then we - // can't do anything more. - Instruction *SrcI = dyn_cast(Src); - if (!SrcI || !Src->hasOneUse()) - return 0; - - // Attempt to propagate the cast into the instruction for int->int casts. - int NumCastsRemoved = 0; - // Only do this if the dest type is a simple type, don't convert the - // expression tree to something weird like i93 unless the source is also - // strange. - if ((isa(DestTy) || - ShouldChangeType(SrcI->getType(), DestTy, TD)) && - CanEvaluateInDifferentType(SrcI, DestTy, - CI.getOpcode(), NumCastsRemoved)) { - // If this cast is a truncate, evaluting in a different type always - // eliminates the cast, so it is always a win. If this is a zero-extension, - // we need to do an AND to maintain the clear top-part of the computation, - // so we require that the input have eliminated at least one cast. If this - // is a sign extension, we insert two new casts (to do the extension) so we - // require that two casts have been eliminated. - bool DoXForm = false; - bool JustReplace = false; - switch (CI.getOpcode()) { - default: - // All the others use floating point so we shouldn't actually - // get here because of the check above. - llvm_unreachable("Unknown cast type"); - case Instruction::Trunc: - DoXForm = true; - break; - case Instruction::ZExt: { - DoXForm = NumCastsRemoved >= 1; - - if (!DoXForm && 0) { - // If it's unnecessary to issue an AND to clear the high bits, it's - // always profitable to do this xform. - Value *TryRes = EvaluateInDifferentType(SrcI, DestTy, false); - APInt Mask(APInt::getBitsSet(DestBitSize, SrcBitSize, DestBitSize)); - if (MaskedValueIsZero(TryRes, Mask)) - return ReplaceInstUsesWith(CI, TryRes); - - if (Instruction *TryI = dyn_cast(TryRes)) - if (TryI->use_empty()) - EraseInstFromFunction(*TryI); - } - break; - } - case Instruction::SExt: { - DoXForm = NumCastsRemoved >= 2; - if (!DoXForm && !isa(SrcI) && 0) { - // If we do not have to emit the truncate + sext pair, then it's always - // profitable to do this xform. - // - // It's not safe to eliminate the trunc + sext pair if one of the - // eliminated cast is a truncate. e.g. - // t2 = trunc i32 t1 to i16 - // t3 = sext i16 t2 to i32 - // != - // i32 t1 - Value *TryRes = EvaluateInDifferentType(SrcI, DestTy, true); - unsigned NumSignBits = ComputeNumSignBits(TryRes); - if (NumSignBits > (DestBitSize - SrcBitSize)) - return ReplaceInstUsesWith(CI, TryRes); - - if (Instruction *TryI = dyn_cast(TryRes)) - if (TryI->use_empty()) - EraseInstFromFunction(*TryI); - } - break; - } - } - - if (DoXForm) { - DEBUG(errs() << "ICE: EvaluateInDifferentType converting expression type" - " to avoid cast: " << CI); - Value *Res = EvaluateInDifferentType(SrcI, DestTy, - CI.getOpcode() == Instruction::SExt); - if (JustReplace) - // Just replace this cast with the result. - return ReplaceInstUsesWith(CI, Res); - - assert(Res->getType() == DestTy); - switch (CI.getOpcode()) { - default: llvm_unreachable("Unknown cast type!"); - case Instruction::Trunc: - // Just replace this cast with the result. - return ReplaceInstUsesWith(CI, Res); - case Instruction::ZExt: { - assert(SrcBitSize < DestBitSize && "Not a zext?"); - - // If the high bits are already zero, just replace this cast with the - // result. - APInt Mask(APInt::getBitsSet(DestBitSize, SrcBitSize, DestBitSize)); - if (MaskedValueIsZero(Res, Mask)) - return ReplaceInstUsesWith(CI, Res); - - // We need to emit an AND to clear the high bits. - Constant *C = ConstantInt::get(*Context, - APInt::getLowBitsSet(DestBitSize, SrcBitSize)); - return BinaryOperator::CreateAnd(Res, C); - } - case Instruction::SExt: { - // If the high bits are already filled with sign bit, just replace this - // cast with the result. - unsigned NumSignBits = ComputeNumSignBits(Res); - if (NumSignBits > (DestBitSize - SrcBitSize)) - return ReplaceInstUsesWith(CI, Res); - - // We need to emit a cast to truncate, then a cast to sext. - return new SExtInst(Builder->CreateTrunc(Res, Src->getType()), DestTy); - } - } - } - } - - Value *Op0 = SrcI->getNumOperands() > 0 ? SrcI->getOperand(0) : 0; - Value *Op1 = SrcI->getNumOperands() > 1 ? SrcI->getOperand(1) : 0; - - switch (SrcI->getOpcode()) { - case Instruction::Add: - case Instruction::Mul: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - // If we are discarding information, rewrite. - if (DestBitSize < SrcBitSize && DestBitSize != 1) { - // Don't insert two casts unless at least one can be eliminated. - if (!ValueRequiresCast(CI.getOpcode(), Op1, DestTy, TD) || - !ValueRequiresCast(CI.getOpcode(), Op0, DestTy, TD)) { - Value *Op0c = Builder->CreateTrunc(Op0, DestTy, Op0->getName()); - Value *Op1c = Builder->CreateTrunc(Op1, DestTy, Op1->getName()); - return BinaryOperator::Create( - cast(SrcI)->getOpcode(), Op0c, Op1c); - } - } - - // cast (xor bool X, true) to int --> xor (cast bool X to int), 1 - if (isa(CI) && SrcBitSize == 1 && - SrcI->getOpcode() == Instruction::Xor && - Op1 == ConstantInt::getTrue(*Context) && - (!Op0->hasOneUse() || !isa(Op0))) { - Value *New = Builder->CreateZExt(Op0, DestTy, Op0->getName()); - return BinaryOperator::CreateXor(New, - ConstantInt::get(CI.getType(), 1)); - } - break; - - case Instruction::Shl: { - // Canonicalize trunc inside shl, if we can. - ConstantInt *CI = dyn_cast(Op1); - if (CI && DestBitSize < SrcBitSize && - CI->getLimitedValue(DestBitSize) < DestBitSize) { - Value *Op0c = Builder->CreateTrunc(Op0, DestTy, Op0->getName()); - Value *Op1c = Builder->CreateTrunc(Op1, DestTy, Op1->getName()); - return BinaryOperator::CreateShl(Op0c, Op1c); - } - break; - } - } - return 0; -} - -Instruction *InstCombiner::visitTrunc(TruncInst &CI) { - if (Instruction *Result = commonIntCastTransforms(CI)) - return Result; - - Value *Src = CI.getOperand(0); - const Type *Ty = CI.getType(); - uint32_t DestBitWidth = Ty->getScalarSizeInBits(); - uint32_t SrcBitWidth = Src->getType()->getScalarSizeInBits(); - - // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0) - if (DestBitWidth == 1) { - Constant *One = ConstantInt::get(Src->getType(), 1); - Src = Builder->CreateAnd(Src, One, "tmp"); - Value *Zero = Constant::getNullValue(Src->getType()); - return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero); - } - - // Optimize trunc(lshr(), c) to pull the shift through the truncate. - ConstantInt *ShAmtV = 0; - Value *ShiftOp = 0; - if (Src->hasOneUse() && - match(Src, m_LShr(m_Value(ShiftOp), m_ConstantInt(ShAmtV)))) { - uint32_t ShAmt = ShAmtV->getLimitedValue(SrcBitWidth); - - // Get a mask for the bits shifting in. - APInt Mask(APInt::getLowBitsSet(SrcBitWidth, ShAmt).shl(DestBitWidth)); - if (MaskedValueIsZero(ShiftOp, Mask)) { - if (ShAmt >= DestBitWidth) // All zeros. - return ReplaceInstUsesWith(CI, Constant::getNullValue(Ty)); - - // Okay, we can shrink this. Truncate the input, then return a new - // shift. - Value *V1 = Builder->CreateTrunc(ShiftOp, Ty, ShiftOp->getName()); - Value *V2 = ConstantExpr::getTrunc(ShAmtV, Ty); - return BinaryOperator::CreateLShr(V1, V2); - } - } - - return 0; -} - -/// transformZExtICmp - Transform (zext icmp) to bitwise / integer operations -/// in order to eliminate the icmp. -Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, - bool DoXform) { - // If we are just checking for a icmp eq of a single bit and zext'ing it - // to an integer, then shift the bit to the appropriate place and then - // cast to integer to avoid the comparison. - if (ConstantInt *Op1C = dyn_cast(ICI->getOperand(1))) { - const APInt &Op1CV = Op1C->getValue(); - - // zext (x x>>u31 true if signbit set. - // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear. - if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) || - (ICI->getPredicate() == ICmpInst::ICMP_SGT &&Op1CV.isAllOnesValue())) { - if (!DoXform) return ICI; - - Value *In = ICI->getOperand(0); - Value *Sh = ConstantInt::get(In->getType(), - In->getType()->getScalarSizeInBits()-1); - In = Builder->CreateLShr(In, Sh, In->getName()+".lobit"); - if (In->getType() != CI.getType()) - In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/, "tmp"); - - if (ICI->getPredicate() == ICmpInst::ICMP_SGT) { - Constant *One = ConstantInt::get(In->getType(), 1); - In = Builder->CreateXor(In, One, In->getName()+".not"); - } - - return ReplaceInstUsesWith(CI, In); - } - - - - // zext (X == 0) to i32 --> X^1 iff X has only the low bit set. - // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set. - // zext (X == 1) to i32 --> X iff X has only the low bit set. - // zext (X == 2) to i32 --> X>>1 iff X has only the 2nd bit set. - // zext (X != 0) to i32 --> X iff X has only the low bit set. - // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set. - // zext (X != 1) to i32 --> X^1 iff X has only the low bit set. - // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set. - if ((Op1CV == 0 || Op1CV.isPowerOf2()) && - // This only works for EQ and NE - ICI->isEquality()) { - // If Op1C some other power of two, convert: - uint32_t BitWidth = Op1C->getType()->getBitWidth(); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - APInt TypeMask(APInt::getAllOnesValue(BitWidth)); - ComputeMaskedBits(ICI->getOperand(0), TypeMask, KnownZero, KnownOne); - - APInt KnownZeroMask(~KnownZero); - if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1? - if (!DoXform) return ICI; - - bool isNE = ICI->getPredicate() == ICmpInst::ICMP_NE; - if (Op1CV != 0 && (Op1CV != KnownZeroMask)) { - // (X&4) == 2 --> false - // (X&4) != 2 --> true - Constant *Res = ConstantInt::get(Type::getInt1Ty(*Context), isNE); - Res = ConstantExpr::getZExt(Res, CI.getType()); - return ReplaceInstUsesWith(CI, Res); - } - - uint32_t ShiftAmt = KnownZeroMask.logBase2(); - Value *In = ICI->getOperand(0); - if (ShiftAmt) { - // Perform a logical shr by shiftamt. - // Insert the shift to put the result in the low bit. - In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt), - In->getName()+".lobit"); - } - - if ((Op1CV != 0) == isNE) { // Toggle the low bit. - Constant *One = ConstantInt::get(In->getType(), 1); - In = Builder->CreateXor(In, One, "tmp"); - } - - if (CI.getType() == In->getType()) - return ReplaceInstUsesWith(CI, In); - else - return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/); - } - } - } - - // icmp ne A, B is equal to xor A, B when A and B only really have one bit. - // It is also profitable to transform icmp eq into not(xor(A, B)) because that - // may lead to additional simplifications. - if (ICI->isEquality() && CI.getType() == ICI->getOperand(0)->getType()) { - if (const IntegerType *ITy = dyn_cast(CI.getType())) { - uint32_t BitWidth = ITy->getBitWidth(); - Value *LHS = ICI->getOperand(0); - Value *RHS = ICI->getOperand(1); - - APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0); - APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0); - APInt TypeMask(APInt::getAllOnesValue(BitWidth)); - ComputeMaskedBits(LHS, TypeMask, KnownZeroLHS, KnownOneLHS); - ComputeMaskedBits(RHS, TypeMask, KnownZeroRHS, KnownOneRHS); - - if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) { - APInt KnownBits = KnownZeroLHS | KnownOneLHS; - APInt UnknownBit = ~KnownBits; - if (UnknownBit.countPopulation() == 1) { - if (!DoXform) return ICI; - - Value *Result = Builder->CreateXor(LHS, RHS); - - // Mask off any bits that are set and won't be shifted away. - if (KnownOneLHS.uge(UnknownBit)) - Result = Builder->CreateAnd(Result, - ConstantInt::get(ITy, UnknownBit)); - - // Shift the bit we're testing down to the lsb. - Result = Builder->CreateLShr( - Result, ConstantInt::get(ITy, UnknownBit.countTrailingZeros())); - - if (ICI->getPredicate() == ICmpInst::ICMP_EQ) - Result = Builder->CreateXor(Result, ConstantInt::get(ITy, 1)); - Result->takeName(ICI); - return ReplaceInstUsesWith(CI, Result); - } - } - } - } - - return 0; -} - -Instruction *InstCombiner::visitZExt(ZExtInst &CI) { - // If one of the common conversion will work .. - if (Instruction *Result = commonIntCastTransforms(CI)) - return Result; - - Value *Src = CI.getOperand(0); - - // If this is a TRUNC followed by a ZEXT then we are dealing with integral - // types and if the sizes are just right we can convert this into a logical - // 'and' which will be much cheaper than the pair of casts. - if (TruncInst *CSrc = dyn_cast(Src)) { // A->B->C cast - // Get the sizes of the types involved. We know that the intermediate type - // will be smaller than A or C, but don't know the relation between A and C. - Value *A = CSrc->getOperand(0); - unsigned SrcSize = A->getType()->getScalarSizeInBits(); - unsigned MidSize = CSrc->getType()->getScalarSizeInBits(); - unsigned DstSize = CI.getType()->getScalarSizeInBits(); - // If we're actually extending zero bits, then if - // SrcSize < DstSize: zext(a & mask) - // SrcSize == DstSize: a & mask - // SrcSize > DstSize: trunc(a) & mask - if (SrcSize < DstSize) { - APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); - Constant *AndConst = ConstantInt::get(A->getType(), AndValue); - Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask"); - return new ZExtInst(And, CI.getType()); - } - - if (SrcSize == DstSize) { - APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); - return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(), - AndValue)); - } - if (SrcSize > DstSize) { - Value *Trunc = Builder->CreateTrunc(A, CI.getType(), "tmp"); - APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize)); - return BinaryOperator::CreateAnd(Trunc, - ConstantInt::get(Trunc->getType(), - AndValue)); - } - } - - if (ICmpInst *ICI = dyn_cast(Src)) - return transformZExtICmp(ICI, CI); - - BinaryOperator *SrcI = dyn_cast(Src); - if (SrcI && SrcI->getOpcode() == Instruction::Or) { - // zext (or icmp, icmp) --> or (zext icmp), (zext icmp) if at least one - // of the (zext icmp) will be transformed. - ICmpInst *LHS = dyn_cast(SrcI->getOperand(0)); - ICmpInst *RHS = dyn_cast(SrcI->getOperand(1)); - if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() && - (transformZExtICmp(LHS, CI, false) || - transformZExtICmp(RHS, CI, false))) { - Value *LCast = Builder->CreateZExt(LHS, CI.getType(), LHS->getName()); - Value *RCast = Builder->CreateZExt(RHS, CI.getType(), RHS->getName()); - return BinaryOperator::Create(Instruction::Or, LCast, RCast); - } - } - - // zext(trunc(t) & C) -> (t & zext(C)). - if (SrcI && SrcI->getOpcode() == Instruction::And && SrcI->hasOneUse()) - if (ConstantInt *C = dyn_cast(SrcI->getOperand(1))) - if (TruncInst *TI = dyn_cast(SrcI->getOperand(0))) { - Value *TI0 = TI->getOperand(0); - if (TI0->getType() == CI.getType()) - return - BinaryOperator::CreateAnd(TI0, - ConstantExpr::getZExt(C, CI.getType())); - } - - // zext((trunc(t) & C) ^ C) -> ((t & zext(C)) ^ zext(C)). - if (SrcI && SrcI->getOpcode() == Instruction::Xor && SrcI->hasOneUse()) - if (ConstantInt *C = dyn_cast(SrcI->getOperand(1))) - if (BinaryOperator *And = dyn_cast(SrcI->getOperand(0))) - if (And->getOpcode() == Instruction::And && And->hasOneUse() && - And->getOperand(1) == C) - if (TruncInst *TI = dyn_cast(And->getOperand(0))) { - Value *TI0 = TI->getOperand(0); - if (TI0->getType() == CI.getType()) { - Constant *ZC = ConstantExpr::getZExt(C, CI.getType()); - Value *NewAnd = Builder->CreateAnd(TI0, ZC, "tmp"); - return BinaryOperator::CreateXor(NewAnd, ZC); - } - } - - return 0; -} - -Instruction *InstCombiner::visitSExt(SExtInst &CI) { - if (Instruction *I = commonIntCastTransforms(CI)) - return I; - - Value *Src = CI.getOperand(0); - - // Canonicalize sign-extend from i1 to a select. - if (Src->getType() == Type::getInt1Ty(*Context)) - return SelectInst::Create(Src, - Constant::getAllOnesValue(CI.getType()), - Constant::getNullValue(CI.getType())); - - // See if the value being truncated is already sign extended. If so, just - // eliminate the trunc/sext pair. - if (Operator::getOpcode(Src) == Instruction::Trunc) { - Value *Op = cast(Src)->getOperand(0); - unsigned OpBits = Op->getType()->getScalarSizeInBits(); - unsigned MidBits = Src->getType()->getScalarSizeInBits(); - unsigned DestBits = CI.getType()->getScalarSizeInBits(); - unsigned NumSignBits = ComputeNumSignBits(Op); - - if (OpBits == DestBits) { - // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign - // bits, it is already ready. - if (NumSignBits > DestBits-MidBits) - return ReplaceInstUsesWith(CI, Op); - } else if (OpBits < DestBits) { - // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign - // bits, just sext from i32. - if (NumSignBits > OpBits-MidBits) - return new SExtInst(Op, CI.getType(), "tmp"); - } else { - // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign - // bits, just truncate to i32. - if (NumSignBits > OpBits-MidBits) - return new TruncInst(Op, CI.getType(), "tmp"); - } - } - - // If the input is a shl/ashr pair of a same constant, then this is a sign - // extension from a smaller value. If we could trust arbitrary bitwidth - // integers, we could turn this into a truncate to the smaller bit and then - // use a sext for the whole extension. Since we don't, look deeper and check - // for a truncate. If the source and dest are the same type, eliminate the - // trunc and extend and just do shifts. For example, turn: - // %a = trunc i32 %i to i8 - // %b = shl i8 %a, 6 - // %c = ashr i8 %b, 6 - // %d = sext i8 %c to i32 - // into: - // %a = shl i32 %i, 30 - // %d = ashr i32 %a, 30 - Value *A = 0; - ConstantInt *BA = 0, *CA = 0; - if (match(Src, m_AShr(m_Shl(m_Value(A), m_ConstantInt(BA)), - m_ConstantInt(CA))) && - BA == CA && isa(A)) { - Value *I = cast(A)->getOperand(0); - if (I->getType() == CI.getType()) { - unsigned MidSize = Src->getType()->getScalarSizeInBits(); - unsigned SrcDstSize = CI.getType()->getScalarSizeInBits(); - unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize; - Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt); - I = Builder->CreateShl(I, ShAmtV, CI.getName()); - return BinaryOperator::CreateAShr(I, ShAmtV); - } - } - - return 0; -} - -/// FitsInFPType - Return a Constant* for the specified FP constant if it fits -/// in the specified FP type without changing its value. -static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem, - LLVMContext *Context) { - bool losesInfo; - APFloat F = CFP->getValueAPF(); - (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo); - if (!losesInfo) - return ConstantFP::get(*Context, F); - return 0; -} - -/// LookThroughFPExtensions - If this is an fp extension instruction, look -/// through it until we get the source value. -static Value *LookThroughFPExtensions(Value *V, LLVMContext *Context) { - if (Instruction *I = dyn_cast(V)) - if (I->getOpcode() == Instruction::FPExt) - return LookThroughFPExtensions(I->getOperand(0), Context); - - // If this value is a constant, return the constant in the smallest FP type - // that can accurately represent it. This allows us to turn - // (float)((double)X+2.0) into x+2.0f. - if (ConstantFP *CFP = dyn_cast(V)) { - if (CFP->getType() == Type::getPPC_FP128Ty(*Context)) - return V; // No constant folding of this. - // See if the value can be truncated to float and then reextended. - if (Value *V = FitsInFPType(CFP, APFloat::IEEEsingle, Context)) - return V; - if (CFP->getType() == Type::getDoubleTy(*Context)) - return V; // Won't shrink. - if (Value *V = FitsInFPType(CFP, APFloat::IEEEdouble, Context)) - return V; - // Don't try to shrink to various long double types. - } - - return V; -} - -Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { - if (Instruction *I = commonCastTransforms(CI)) - return I; - - // If we have fptrunc(fadd (fpextend x), (fpextend y)), where x and y are - // smaller than the destination type, we can eliminate the truncate by doing - // the add as the smaller type. This applies to fadd/fsub/fmul/fdiv as well as - // many builtins (sqrt, etc). - BinaryOperator *OpI = dyn_cast(CI.getOperand(0)); - if (OpI && OpI->hasOneUse()) { - switch (OpI->getOpcode()) { - default: break; - case Instruction::FAdd: - case Instruction::FSub: - case Instruction::FMul: - case Instruction::FDiv: - case Instruction::FRem: - const Type *SrcTy = OpI->getType(); - Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0), Context); - Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1), Context); - if (LHSTrunc->getType() != SrcTy && - RHSTrunc->getType() != SrcTy) { - unsigned DstSize = CI.getType()->getScalarSizeInBits(); - // If the source types were both smaller than the destination type of - // the cast, do this xform. - if (LHSTrunc->getType()->getScalarSizeInBits() <= DstSize && - RHSTrunc->getType()->getScalarSizeInBits() <= DstSize) { - LHSTrunc = Builder->CreateFPExt(LHSTrunc, CI.getType()); - RHSTrunc = Builder->CreateFPExt(RHSTrunc, CI.getType()); - return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc); - } - } - break; - } - } - return 0; -} - -Instruction *InstCombiner::visitFPExt(CastInst &CI) { - return commonCastTransforms(CI); -} - -Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) { - Instruction *OpI = dyn_cast(FI.getOperand(0)); - if (OpI == 0) - return commonCastTransforms(FI); - - // fptoui(uitofp(X)) --> X - // fptoui(sitofp(X)) --> X - // This is safe if the intermediate type has enough bits in its mantissa to - // accurately represent all values of X. For example, do not do this with - // i64->float->i64. This is also safe for sitofp case, because any negative - // 'X' value would cause an undefined result for the fptoui. - if ((isa(OpI) || isa(OpI)) && - OpI->getOperand(0)->getType() == FI.getType() && - (int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */ - OpI->getType()->getFPMantissaWidth()) - return ReplaceInstUsesWith(FI, OpI->getOperand(0)); - - return commonCastTransforms(FI); -} - -Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) { - Instruction *OpI = dyn_cast(FI.getOperand(0)); - if (OpI == 0) - return commonCastTransforms(FI); - - // fptosi(sitofp(X)) --> X - // fptosi(uitofp(X)) --> X - // This is safe if the intermediate type has enough bits in its mantissa to - // accurately represent all values of X. For example, do not do this with - // i64->float->i64. This is also safe for sitofp case, because any negative - // 'X' value would cause an undefined result for the fptoui. - if ((isa(OpI) || isa(OpI)) && - OpI->getOperand(0)->getType() == FI.getType() && - (int)FI.getType()->getScalarSizeInBits() <= - OpI->getType()->getFPMantissaWidth()) - return ReplaceInstUsesWith(FI, OpI->getOperand(0)); - - return commonCastTransforms(FI); -} - -Instruction *InstCombiner::visitUIToFP(CastInst &CI) { - return commonCastTransforms(CI); -} - -Instruction *InstCombiner::visitSIToFP(CastInst &CI) { - return commonCastTransforms(CI); -} - -Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { - // If the destination integer type is smaller than the intptr_t type for - // this target, do a ptrtoint to intptr_t then do a trunc. This allows the - // trunc to be exposed to other transforms. Don't do this for extending - // ptrtoint's, because we don't know if the target sign or zero extends its - // pointers. - if (TD && - CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) { - Value *P = Builder->CreatePtrToInt(CI.getOperand(0), - TD->getIntPtrType(CI.getContext()), - "tmp"); - return new TruncInst(P, CI.getType()); - } - - return commonPointerCastTransforms(CI); -} - -Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { - // If the source integer type is larger than the intptr_t type for - // this target, do a trunc to the intptr_t type, then inttoptr of it. This - // allows the trunc to be exposed to other transforms. Don't do this for - // extending inttoptr's, because we don't know if the target sign or zero - // extends to pointers. - if (TD && CI.getOperand(0)->getType()->getScalarSizeInBits() > - TD->getPointerSizeInBits()) { - Value *P = Builder->CreateTrunc(CI.getOperand(0), - TD->getIntPtrType(CI.getContext()), "tmp"); - return new IntToPtrInst(P, CI.getType()); - } - - if (Instruction *I = commonCastTransforms(CI)) - return I; - - return 0; -} - -Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { - // If the operands are integer typed then apply the integer transforms, - // otherwise just apply the common ones. - Value *Src = CI.getOperand(0); - const Type *SrcTy = Src->getType(); - const Type *DestTy = CI.getType(); - - if (isa(SrcTy)) { - if (Instruction *I = commonPointerCastTransforms(CI)) - return I; - } else { - if (Instruction *Result = commonCastTransforms(CI)) - return Result; - } - - - // Get rid of casts from one type to the same type. These are useless and can - // be replaced by the operand. - if (DestTy == Src->getType()) - return ReplaceInstUsesWith(CI, Src); - - if (const PointerType *DstPTy = dyn_cast(DestTy)) { - const PointerType *SrcPTy = cast(SrcTy); - const Type *DstElTy = DstPTy->getElementType(); - const Type *SrcElTy = SrcPTy->getElementType(); - - // If the address spaces don't match, don't eliminate the bitcast, which is - // required for changing types. - if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace()) - return 0; - - // If we are casting a alloca to a pointer to a type of the same - // size, rewrite the allocation instruction to allocate the "right" type. - // There is no need to modify malloc calls because it is their bitcast that - // needs to be cleaned up. - if (AllocaInst *AI = dyn_cast(Src)) - if (Instruction *V = PromoteCastOfAllocation(CI, *AI)) - return V; - - // If the source and destination are pointers, and this cast is equivalent - // to a getelementptr X, 0, 0, 0... turn it into the appropriate gep. - // This can enhance SROA and other transforms that want type-safe pointers. - Constant *ZeroUInt = Constant::getNullValue(Type::getInt32Ty(*Context)); - unsigned NumZeros = 0; - while (SrcElTy != DstElTy && - isa(SrcElTy) && !isa(SrcElTy) && - SrcElTy->getNumContainedTypes() /* not "{}" */) { - SrcElTy = cast(SrcElTy)->getTypeAtIndex(ZeroUInt); - ++NumZeros; - } - - // If we found a path from the src to dest, create the getelementptr now. - if (SrcElTy == DstElTy) { - SmallVector Idxs(NumZeros+1, ZeroUInt); - return GetElementPtrInst::CreateInBounds(Src, Idxs.begin(), Idxs.end(), "", - ((Instruction*) NULL)); - } - } - - if (const VectorType *DestVTy = dyn_cast(DestTy)) { - if (DestVTy->getNumElements() == 1) { - if (!isa(SrcTy)) { - Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType()); - return InsertElementInst::Create(UndefValue::get(DestTy), Elem, - Constant::getNullValue(Type::getInt32Ty(*Context))); - } - // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast) - } - } - - if (const VectorType *SrcVTy = dyn_cast(SrcTy)) { - if (SrcVTy->getNumElements() == 1) { - if (!isa(DestTy)) { - Value *Elem = - Builder->CreateExtractElement(Src, - Constant::getNullValue(Type::getInt32Ty(*Context))); - return CastInst::Create(Instruction::BitCast, Elem, DestTy); - } - } - } - - if (ShuffleVectorInst *SVI = dyn_cast(Src)) { - if (SVI->hasOneUse()) { - // Okay, we have (bitconvert (shuffle ..)). Check to see if this is - // a bitconvert to a vector with the same # elts. - if (isa(DestTy) && - cast(DestTy)->getNumElements() == - SVI->getType()->getNumElements() && - SVI->getType()->getNumElements() == - cast(SVI->getOperand(0)->getType())->getNumElements()) { - CastInst *Tmp; - // If either of the operands is a cast from CI.getType(), then - // evaluating the shuffle in the casted destination's type will allow - // us to eliminate at least one cast. - if (((Tmp = dyn_cast(SVI->getOperand(0))) && - Tmp->getOperand(0)->getType() == DestTy) || - ((Tmp = dyn_cast(SVI->getOperand(1))) && - Tmp->getOperand(0)->getType() == DestTy)) { - Value *LHS = Builder->CreateBitCast(SVI->getOperand(0), DestTy); - Value *RHS = Builder->CreateBitCast(SVI->getOperand(1), DestTy); - // Return a new shuffle vector. Use the same element ID's, as we - // know the vector types match #elts. - return new ShuffleVectorInst(LHS, RHS, SVI->getOperand(2)); - } - } - } - } - return 0; -} - -/// GetSelectFoldableOperands - We want to turn code that looks like this: -/// %C = or %A, %B -/// %D = select %cond, %C, %A -/// into: -/// %C = select %cond, %B, 0 -/// %D = or %A, %C -/// -/// Assuming that the specified instruction is an operand to the select, return -/// a bitmask indicating which operands of this instruction are foldable if they -/// equal the other incoming value of the select. -/// -static unsigned GetSelectFoldableOperands(Instruction *I) { - switch (I->getOpcode()) { - case Instruction::Add: - case Instruction::Mul: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - return 3; // Can fold through either operand. - case Instruction::Sub: // Can only fold on the amount subtracted. - case Instruction::Shl: // Can only fold on the shift amount. - case Instruction::LShr: - case Instruction::AShr: - return 1; - default: - return 0; // Cannot fold - } -} - -/// GetSelectFoldableConstant - For the same transformation as the previous -/// function, return the identity constant that goes into the select. -static Constant *GetSelectFoldableConstant(Instruction *I, - LLVMContext *Context) { - switch (I->getOpcode()) { - default: llvm_unreachable("This cannot happen!"); - case Instruction::Add: - case Instruction::Sub: - case Instruction::Or: - case Instruction::Xor: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - return Constant::getNullValue(I->getType()); - case Instruction::And: - return Constant::getAllOnesValue(I->getType()); - case Instruction::Mul: - return ConstantInt::get(I->getType(), 1); - } -} - -/// FoldSelectOpOp - Here we have (select c, TI, FI), and we know that TI and FI -/// have the same opcode and only one use each. Try to simplify this. -Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, - Instruction *FI) { - if (TI->getNumOperands() == 1) { - // If this is a non-volatile load or a cast from the same type, - // merge. - if (TI->isCast()) { - if (TI->getOperand(0)->getType() != FI->getOperand(0)->getType()) - return 0; - } else { - return 0; // unknown unary op. - } - - // Fold this by inserting a select from the input values. - SelectInst *NewSI = SelectInst::Create(SI.getCondition(), TI->getOperand(0), - FI->getOperand(0), SI.getName()+".v"); - InsertNewInstBefore(NewSI, SI); - return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI, - TI->getType()); - } - - // Only handle binary operators here. - if (!isa(TI)) - return 0; - - // Figure out if the operations have any operands in common. - Value *MatchOp, *OtherOpT, *OtherOpF; - bool MatchIsOpZero; - if (TI->getOperand(0) == FI->getOperand(0)) { - MatchOp = TI->getOperand(0); - OtherOpT = TI->getOperand(1); - OtherOpF = FI->getOperand(1); - MatchIsOpZero = true; - } else if (TI->getOperand(1) == FI->getOperand(1)) { - MatchOp = TI->getOperand(1); - OtherOpT = TI->getOperand(0); - OtherOpF = FI->getOperand(0); - MatchIsOpZero = false; - } else if (!TI->isCommutative()) { - return 0; - } else if (TI->getOperand(0) == FI->getOperand(1)) { - MatchOp = TI->getOperand(0); - OtherOpT = TI->getOperand(1); - OtherOpF = FI->getOperand(0); - MatchIsOpZero = true; - } else if (TI->getOperand(1) == FI->getOperand(0)) { - MatchOp = TI->getOperand(1); - OtherOpT = TI->getOperand(0); - OtherOpF = FI->getOperand(1); - MatchIsOpZero = true; - } else { - return 0; - } - - // If we reach here, they do have operations in common. - SelectInst *NewSI = SelectInst::Create(SI.getCondition(), OtherOpT, - OtherOpF, SI.getName()+".v"); - InsertNewInstBefore(NewSI, SI); - - if (BinaryOperator *BO = dyn_cast(TI)) { - if (MatchIsOpZero) - return BinaryOperator::Create(BO->getOpcode(), MatchOp, NewSI); - else - return BinaryOperator::Create(BO->getOpcode(), NewSI, MatchOp); - } - llvm_unreachable("Shouldn't get here"); - return 0; -} - -static bool isSelect01(Constant *C1, Constant *C2) { - ConstantInt *C1I = dyn_cast(C1); - if (!C1I) - return false; - ConstantInt *C2I = dyn_cast(C2); - if (!C2I) - return false; - return (C1I->isZero() || C1I->isOne()) && (C2I->isZero() || C2I->isOne()); -} - -/// FoldSelectIntoOp - Try fold the select into one of the operands to -/// facilitate further optimization. -Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, - Value *FalseVal) { - // See the comment above GetSelectFoldableOperands for a description of the - // transformation we are doing here. - if (Instruction *TVI = dyn_cast(TrueVal)) { - if (TVI->hasOneUse() && TVI->getNumOperands() == 2 && - !isa(FalseVal)) { - if (unsigned SFO = GetSelectFoldableOperands(TVI)) { - unsigned OpToFold = 0; - if ((SFO & 1) && FalseVal == TVI->getOperand(0)) { - OpToFold = 1; - } else if ((SFO & 2) && FalseVal == TVI->getOperand(1)) { - OpToFold = 2; - } - - if (OpToFold) { - Constant *C = GetSelectFoldableConstant(TVI, Context); - Value *OOp = TVI->getOperand(2-OpToFold); - // Avoid creating select between 2 constants unless it's selecting - // between 0 and 1. - if (!isa(OOp) || isSelect01(C, cast(OOp))) { - Instruction *NewSel = SelectInst::Create(SI.getCondition(), OOp, C); - InsertNewInstBefore(NewSel, SI); - NewSel->takeName(TVI); - if (BinaryOperator *BO = dyn_cast(TVI)) - return BinaryOperator::Create(BO->getOpcode(), FalseVal, NewSel); - llvm_unreachable("Unknown instruction!!"); - } - } - } - } - } - - if (Instruction *FVI = dyn_cast(FalseVal)) { - if (FVI->hasOneUse() && FVI->getNumOperands() == 2 && - !isa(TrueVal)) { - if (unsigned SFO = GetSelectFoldableOperands(FVI)) { - unsigned OpToFold = 0; - if ((SFO & 1) && TrueVal == FVI->getOperand(0)) { - OpToFold = 1; - } else if ((SFO & 2) && TrueVal == FVI->getOperand(1)) { - OpToFold = 2; - } - - if (OpToFold) { - Constant *C = GetSelectFoldableConstant(FVI, Context); - Value *OOp = FVI->getOperand(2-OpToFold); - // Avoid creating select between 2 constants unless it's selecting - // between 0 and 1. - if (!isa(OOp) || isSelect01(C, cast(OOp))) { - Instruction *NewSel = SelectInst::Create(SI.getCondition(), C, OOp); - InsertNewInstBefore(NewSel, SI); - NewSel->takeName(FVI); - if (BinaryOperator *BO = dyn_cast(FVI)) - return BinaryOperator::Create(BO->getOpcode(), TrueVal, NewSel); - llvm_unreachable("Unknown instruction!!"); - } - } - } - } - } - - return 0; -} - -/// visitSelectInstWithICmp - Visit a SelectInst that has an -/// ICmpInst as its first operand. -/// -Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, - ICmpInst *ICI) { - bool Changed = false; - ICmpInst::Predicate Pred = ICI->getPredicate(); - Value *CmpLHS = ICI->getOperand(0); - Value *CmpRHS = ICI->getOperand(1); - Value *TrueVal = SI.getTrueValue(); - Value *FalseVal = SI.getFalseValue(); - - // Check cases where the comparison is with a constant that - // can be adjusted to fit the min/max idiom. We may edit ICI in - // place here, so make sure the select is the only user. - if (ICI->hasOneUse()) - if (ConstantInt *CI = dyn_cast(CmpRHS)) { - switch (Pred) { - default: break; - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_SLT: { - // X < MIN ? T : F --> F - if (CI->isMinValue(Pred == ICmpInst::ICMP_SLT)) - return ReplaceInstUsesWith(SI, FalseVal); - // X < C ? X : C-1 --> X > C-1 ? C-1 : X - Constant *AdjustedRHS = SubOne(CI); - if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) || - (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) { - Pred = ICmpInst::getSwappedPredicate(Pred); - CmpRHS = AdjustedRHS; - std::swap(FalseVal, TrueVal); - ICI->setPredicate(Pred); - ICI->setOperand(1, CmpRHS); - SI.setOperand(1, TrueVal); - SI.setOperand(2, FalseVal); - Changed = true; - } - break; - } - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_SGT: { - // X > MAX ? T : F --> F - if (CI->isMaxValue(Pred == ICmpInst::ICMP_SGT)) - return ReplaceInstUsesWith(SI, FalseVal); - // X > C ? X : C+1 --> X < C+1 ? C+1 : X - Constant *AdjustedRHS = AddOne(CI); - if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) || - (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) { - Pred = ICmpInst::getSwappedPredicate(Pred); - CmpRHS = AdjustedRHS; - std::swap(FalseVal, TrueVal); - ICI->setPredicate(Pred); - ICI->setOperand(1, CmpRHS); - SI.setOperand(1, TrueVal); - SI.setOperand(2, FalseVal); - Changed = true; - } - break; - } - } - - // (x ashr x, 31 -> all ones if signed - // (x >s -1) ? -1 : 0 -> ashr x, 31 -> all ones if not signed - CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE; - if (match(TrueVal, m_ConstantInt<-1>()) && - match(FalseVal, m_ConstantInt<0>())) - Pred = ICI->getPredicate(); - else if (match(TrueVal, m_ConstantInt<0>()) && - match(FalseVal, m_ConstantInt<-1>())) - Pred = CmpInst::getInversePredicate(ICI->getPredicate()); - - if (Pred != CmpInst::BAD_ICMP_PREDICATE) { - // If we are just checking for a icmp eq of a single bit and zext'ing it - // to an integer, then shift the bit to the appropriate place and then - // cast to integer to avoid the comparison. - const APInt &Op1CV = CI->getValue(); - - // sext (x x>>s31 true if signbit set. - // sext (x >s -1) to i32 --> (x>>s31)^-1 true if signbit clear. - if ((Pred == ICmpInst::ICMP_SLT && Op1CV == 0) || - (Pred == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) { - Value *In = ICI->getOperand(0); - Value *Sh = ConstantInt::get(In->getType(), - In->getType()->getScalarSizeInBits()-1); - In = InsertNewInstBefore(BinaryOperator::CreateAShr(In, Sh, - In->getName()+".lobit"), - *ICI); - if (In->getType() != SI.getType()) - In = CastInst::CreateIntegerCast(In, SI.getType(), - true/*SExt*/, "tmp", ICI); - - if (Pred == ICmpInst::ICMP_SGT) - In = InsertNewInstBefore(BinaryOperator::CreateNot(In, - In->getName()+".not"), *ICI); - - return ReplaceInstUsesWith(SI, In); - } - } - } - - if (CmpLHS == TrueVal && CmpRHS == FalseVal) { - // Transform (X == Y) ? X : Y -> Y - if (Pred == ICmpInst::ICMP_EQ) - return ReplaceInstUsesWith(SI, FalseVal); - // Transform (X != Y) ? X : Y -> X - if (Pred == ICmpInst::ICMP_NE) - return ReplaceInstUsesWith(SI, TrueVal); - /// NOTE: if we wanted to, this is where to detect integer MIN/MAX - - } else if (CmpLHS == FalseVal && CmpRHS == TrueVal) { - // Transform (X == Y) ? Y : X -> X - if (Pred == ICmpInst::ICMP_EQ) - return ReplaceInstUsesWith(SI, FalseVal); - // Transform (X != Y) ? Y : X -> Y - if (Pred == ICmpInst::ICMP_NE) - return ReplaceInstUsesWith(SI, TrueVal); - /// NOTE: if we wanted to, this is where to detect integer MIN/MAX - } - return Changed ? &SI : 0; -} - - -/// CanSelectOperandBeMappingIntoPredBlock - SI is a select whose condition is a -/// PHI node (but the two may be in different blocks). See if the true/false -/// values (V) are live in all of the predecessor blocks of the PHI. For -/// example, cases like this cannot be mapped: -/// -/// X = phi [ C1, BB1], [C2, BB2] -/// Y = add -/// Z = select X, Y, 0 -/// -/// because Y is not live in BB1/BB2. -/// -static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V, - const SelectInst &SI) { - // If the value is a non-instruction value like a constant or argument, it - // can always be mapped. - const Instruction *I = dyn_cast(V); - if (I == 0) return true; - - // If V is a PHI node defined in the same block as the condition PHI, we can - // map the arguments. - const PHINode *CondPHI = cast(SI.getCondition()); - - if (const PHINode *VP = dyn_cast(I)) - if (VP->getParent() == CondPHI->getParent()) - return true; - - // Otherwise, if the PHI and select are defined in the same block and if V is - // defined in a different block, then we can transform it. - if (SI.getParent() == CondPHI->getParent() && - I->getParent() != CondPHI->getParent()) - return true; - - // Otherwise we have a 'hard' case and we can't tell without doing more - // detailed dominator based analysis, punt. - return false; -} - -/// FoldSPFofSPF - We have an SPF (e.g. a min or max) of an SPF of the form: -/// SPF2(SPF1(A, B), C) -Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner, - SelectPatternFlavor SPF1, - Value *A, Value *B, - Instruction &Outer, - SelectPatternFlavor SPF2, Value *C) { - if (C == A || C == B) { - // MAX(MAX(A, B), B) -> MAX(A, B) - // MIN(MIN(a, b), a) -> MIN(a, b) - if (SPF1 == SPF2) - return ReplaceInstUsesWith(Outer, Inner); - - // MAX(MIN(a, b), a) -> a - // MIN(MAX(a, b), a) -> a - if ((SPF1 == SPF_SMIN && SPF2 == SPF_SMAX) || - (SPF1 == SPF_SMAX && SPF2 == SPF_SMIN) || - (SPF1 == SPF_UMIN && SPF2 == SPF_UMAX) || - (SPF1 == SPF_UMAX && SPF2 == SPF_UMIN)) - return ReplaceInstUsesWith(Outer, C); - } - - // TODO: MIN(MIN(A, 23), 97) - return 0; -} - - - - -Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { - Value *CondVal = SI.getCondition(); - Value *TrueVal = SI.getTrueValue(); - Value *FalseVal = SI.getFalseValue(); - - // select true, X, Y -> X - // select false, X, Y -> Y - if (ConstantInt *C = dyn_cast(CondVal)) - return ReplaceInstUsesWith(SI, C->getZExtValue() ? TrueVal : FalseVal); - - // select C, X, X -> X - if (TrueVal == FalseVal) - return ReplaceInstUsesWith(SI, TrueVal); - - if (isa(TrueVal)) // select C, undef, X -> X - return ReplaceInstUsesWith(SI, FalseVal); - if (isa(FalseVal)) // select C, X, undef -> X - return ReplaceInstUsesWith(SI, TrueVal); - if (isa(CondVal)) { // select undef, X, Y -> X or Y - if (isa(TrueVal)) - return ReplaceInstUsesWith(SI, TrueVal); - else - return ReplaceInstUsesWith(SI, FalseVal); - } - - if (SI.getType() == Type::getInt1Ty(*Context)) { - if (ConstantInt *C = dyn_cast(TrueVal)) { - if (C->getZExtValue()) { - // Change: A = select B, true, C --> A = or B, C - return BinaryOperator::CreateOr(CondVal, FalseVal); - } else { - // Change: A = select B, false, C --> A = and !B, C - Value *NotCond = - InsertNewInstBefore(BinaryOperator::CreateNot(CondVal, - "not."+CondVal->getName()), SI); - return BinaryOperator::CreateAnd(NotCond, FalseVal); - } - } else if (ConstantInt *C = dyn_cast(FalseVal)) { - if (C->getZExtValue() == false) { - // Change: A = select B, C, false --> A = and B, C - return BinaryOperator::CreateAnd(CondVal, TrueVal); - } else { - // Change: A = select B, C, true --> A = or !B, C - Value *NotCond = - InsertNewInstBefore(BinaryOperator::CreateNot(CondVal, - "not."+CondVal->getName()), SI); - return BinaryOperator::CreateOr(NotCond, TrueVal); - } - } - - // select a, b, a -> a&b - // select a, a, b -> a|b - if (CondVal == TrueVal) - return BinaryOperator::CreateOr(CondVal, FalseVal); - else if (CondVal == FalseVal) - return BinaryOperator::CreateAnd(CondVal, TrueVal); - } - - // Selecting between two integer constants? - if (ConstantInt *TrueValC = dyn_cast(TrueVal)) - if (ConstantInt *FalseValC = dyn_cast(FalseVal)) { - // select C, 1, 0 -> zext C to int - if (FalseValC->isZero() && TrueValC->getValue() == 1) { - return CastInst::Create(Instruction::ZExt, CondVal, SI.getType()); - } else if (TrueValC->isZero() && FalseValC->getValue() == 1) { - // select C, 0, 1 -> zext !C to int - Value *NotCond = - InsertNewInstBefore(BinaryOperator::CreateNot(CondVal, - "not."+CondVal->getName()), SI); - return CastInst::Create(Instruction::ZExt, NotCond, SI.getType()); - } - - if (ICmpInst *IC = dyn_cast(SI.getCondition())) { - // If one of the constants is zero (we know they can't both be) and we - // have an icmp instruction with zero, and we have an 'and' with the - // non-constant value, eliminate this whole mess. This corresponds to - // cases like this: ((X & 27) ? 27 : 0) - if (TrueValC->isZero() || FalseValC->isZero()) - if (IC->isEquality() && isa(IC->getOperand(1)) && - cast(IC->getOperand(1))->isNullValue()) - if (Instruction *ICA = dyn_cast(IC->getOperand(0))) - if (ICA->getOpcode() == Instruction::And && - isa(ICA->getOperand(1)) && - (ICA->getOperand(1) == TrueValC || - ICA->getOperand(1) == FalseValC) && - isOneBitSet(cast(ICA->getOperand(1)))) { - // Okay, now we know that everything is set up, we just don't - // know whether we have a icmp_ne or icmp_eq and whether the - // true or false val is the zero. - bool ShouldNotVal = !TrueValC->isZero(); - ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE; - Value *V = ICA; - if (ShouldNotVal) - V = InsertNewInstBefore(BinaryOperator::Create( - Instruction::Xor, V, ICA->getOperand(1)), SI); - return ReplaceInstUsesWith(SI, V); - } - } - } - - // See if we are selecting two values based on a comparison of the two values. - if (FCmpInst *FCI = dyn_cast(CondVal)) { - if (FCI->getOperand(0) == TrueVal && FCI->getOperand(1) == FalseVal) { - // Transform (X == Y) ? X : Y -> Y - if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) { - // This is not safe in general for floating point: - // consider X== -0, Y== +0. - // It becomes safe if either operand is a nonzero constant. - ConstantFP *CFPt, *CFPf; - if (((CFPt = dyn_cast(TrueVal)) && - !CFPt->getValueAPF().isZero()) || - ((CFPf = dyn_cast(FalseVal)) && - !CFPf->getValueAPF().isZero())) - return ReplaceInstUsesWith(SI, FalseVal); - } - // Transform (X != Y) ? X : Y -> X - if (FCI->getPredicate() == FCmpInst::FCMP_ONE) - return ReplaceInstUsesWith(SI, TrueVal); - // NOTE: if we wanted to, this is where to detect MIN/MAX - - } else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){ - // Transform (X == Y) ? Y : X -> X - if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) { - // This is not safe in general for floating point: - // consider X== -0, Y== +0. - // It becomes safe if either operand is a nonzero constant. - ConstantFP *CFPt, *CFPf; - if (((CFPt = dyn_cast(TrueVal)) && - !CFPt->getValueAPF().isZero()) || - ((CFPf = dyn_cast(FalseVal)) && - !CFPf->getValueAPF().isZero())) - return ReplaceInstUsesWith(SI, FalseVal); - } - // Transform (X != Y) ? Y : X -> Y - if (FCI->getPredicate() == FCmpInst::FCMP_ONE) - return ReplaceInstUsesWith(SI, TrueVal); - // NOTE: if we wanted to, this is where to detect MIN/MAX - } - // NOTE: if we wanted to, this is where to detect ABS - } - - // See if we are selecting two values based on a comparison of the two values. - if (ICmpInst *ICI = dyn_cast(CondVal)) - if (Instruction *Result = visitSelectInstWithICmp(SI, ICI)) - return Result; - - if (Instruction *TI = dyn_cast(TrueVal)) - if (Instruction *FI = dyn_cast(FalseVal)) - if (TI->hasOneUse() && FI->hasOneUse()) { - Instruction *AddOp = 0, *SubOp = 0; - - // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z)) - if (TI->getOpcode() == FI->getOpcode()) - if (Instruction *IV = FoldSelectOpOp(SI, TI, FI)) - return IV; - - // Turn select C, (X+Y), (X-Y) --> (X+(select C, Y, (-Y))). This is - // even legal for FP. - if ((TI->getOpcode() == Instruction::Sub && - FI->getOpcode() == Instruction::Add) || - (TI->getOpcode() == Instruction::FSub && - FI->getOpcode() == Instruction::FAdd)) { - AddOp = FI; SubOp = TI; - } else if ((FI->getOpcode() == Instruction::Sub && - TI->getOpcode() == Instruction::Add) || - (FI->getOpcode() == Instruction::FSub && - TI->getOpcode() == Instruction::FAdd)) { - AddOp = TI; SubOp = FI; - } - - if (AddOp) { - Value *OtherAddOp = 0; - if (SubOp->getOperand(0) == AddOp->getOperand(0)) { - OtherAddOp = AddOp->getOperand(1); - } else if (SubOp->getOperand(0) == AddOp->getOperand(1)) { - OtherAddOp = AddOp->getOperand(0); - } - - if (OtherAddOp) { - // So at this point we know we have (Y -> OtherAddOp): - // select C, (add X, Y), (sub X, Z) - Value *NegVal; // Compute -Z - if (Constant *C = dyn_cast(SubOp->getOperand(1))) { - NegVal = ConstantExpr::getNeg(C); - } else { - NegVal = InsertNewInstBefore( - BinaryOperator::CreateNeg(SubOp->getOperand(1), - "tmp"), SI); - } - - Value *NewTrueOp = OtherAddOp; - Value *NewFalseOp = NegVal; - if (AddOp != TI) - std::swap(NewTrueOp, NewFalseOp); - Instruction *NewSel = - SelectInst::Create(CondVal, NewTrueOp, - NewFalseOp, SI.getName() + ".p"); - - NewSel = InsertNewInstBefore(NewSel, SI); - return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel); - } - } - } - - // See if we can fold the select into one of our operands. - if (SI.getType()->isInteger()) { - if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal)) - return FoldI; - - // MAX(MAX(a, b), a) -> MAX(a, b) - // MIN(MIN(a, b), a) -> MIN(a, b) - // MAX(MIN(a, b), a) -> a - // MIN(MAX(a, b), a) -> a - Value *LHS, *RHS, *LHS2, *RHS2; - if (SelectPatternFlavor SPF = MatchSelectPattern(&SI, LHS, RHS)) { - if (SelectPatternFlavor SPF2 = MatchSelectPattern(LHS, LHS2, RHS2)) - if (Instruction *R = FoldSPFofSPF(cast(LHS),SPF2,LHS2,RHS2, - SI, SPF, RHS)) - return R; - if (SelectPatternFlavor SPF2 = MatchSelectPattern(RHS, LHS2, RHS2)) - if (Instruction *R = FoldSPFofSPF(cast(RHS),SPF2,LHS2,RHS2, - SI, SPF, LHS)) - return R; - } - - // TODO. - // ABS(-X) -> ABS(X) - // ABS(ABS(X)) -> ABS(X) - } - - // See if we can fold the select into a phi node if the condition is a select. - if (isa(SI.getCondition())) - // The true/false values have to be live in the PHI predecessor's blocks. - if (CanSelectOperandBeMappingIntoPredBlock(TrueVal, SI) && - CanSelectOperandBeMappingIntoPredBlock(FalseVal, SI)) - if (Instruction *NV = FoldOpIntoPhi(SI)) - return NV; - - if (BinaryOperator::isNot(CondVal)) { - SI.setOperand(0, BinaryOperator::getNotArgument(CondVal)); - SI.setOperand(1, FalseVal); - SI.setOperand(2, TrueVal); - return &SI; - } - - return 0; -} - -/// EnforceKnownAlignment - If the specified pointer points to an object that -/// we control, modify the object's alignment to PrefAlign. This isn't -/// often possible though. If alignment is important, a more reliable approach -/// is to simply align all global variables and allocation instructions to -/// their preferred alignment from the beginning. -/// -static unsigned EnforceKnownAlignment(Value *V, - unsigned Align, unsigned PrefAlign) { - - User *U = dyn_cast(V); - if (!U) return Align; - - switch (Operator::getOpcode(U)) { - default: break; - case Instruction::BitCast: - return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign); - case Instruction::GetElementPtr: { - // If all indexes are zero, it is just the alignment of the base pointer. - bool AllZeroOperands = true; - for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i) - if (!isa(*i) || - !cast(*i)->isNullValue()) { - AllZeroOperands = false; - break; - } - - if (AllZeroOperands) { - // Treat this like a bitcast. - return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign); - } - break; - } - } - - if (GlobalValue *GV = dyn_cast(V)) { - // If there is a large requested alignment and we can, bump up the alignment - // of the global. - if (!GV->isDeclaration()) { - if (GV->getAlignment() >= PrefAlign) - Align = GV->getAlignment(); - else { - GV->setAlignment(PrefAlign); - Align = PrefAlign; - } - } - } else if (AllocaInst *AI = dyn_cast(V)) { - // If there is a requested alignment and if this is an alloca, round up. - if (AI->getAlignment() >= PrefAlign) - Align = AI->getAlignment(); - else { - AI->setAlignment(PrefAlign); - Align = PrefAlign; - } - } - - return Align; -} - -/// GetOrEnforceKnownAlignment - If the specified pointer has an alignment that -/// we can determine, return it, otherwise return 0. If PrefAlign is specified, -/// and it is more than the alignment of the ultimate object, see if we can -/// increase the alignment of the ultimate object, making this check succeed. -unsigned InstCombiner::GetOrEnforceKnownAlignment(Value *V, - unsigned PrefAlign) { - unsigned BitWidth = TD ? TD->getTypeSizeInBits(V->getType()) : - sizeof(PrefAlign) * CHAR_BIT; - APInt Mask = APInt::getAllOnesValue(BitWidth); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(V, Mask, KnownZero, KnownOne); - unsigned TrailZ = KnownZero.countTrailingOnes(); - unsigned Align = 1u << std::min(BitWidth - 1, TrailZ); - - if (PrefAlign > Align) - Align = EnforceKnownAlignment(V, Align, PrefAlign); - - // We don't need to make any adjustment. - return Align; -} - -Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { - unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getOperand(1)); - unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getOperand(2)); - unsigned MinAlign = std::min(DstAlign, SrcAlign); - unsigned CopyAlign = MI->getAlignment(); - - if (CopyAlign < MinAlign) { - MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), - MinAlign, false)); - return MI; - } - - // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with - // load/store. - ConstantInt *MemOpLength = dyn_cast(MI->getOperand(3)); - if (MemOpLength == 0) return 0; - - // Source and destination pointer types are always "i8*" for intrinsic. See - // if the size is something we can handle with a single primitive load/store. - // A single load+store correctly handles overlapping memory in the memmove - // case. - unsigned Size = MemOpLength->getZExtValue(); - if (Size == 0) return MI; // Delete this mem transfer. - - if (Size > 8 || (Size&(Size-1))) - return 0; // If not 1/2/4/8 bytes, exit. - - // Use an integer load+store unless we can find something better. - Type *NewPtrTy = - PointerType::getUnqual(IntegerType::get(*Context, Size<<3)); - - // Memcpy forces the use of i8* for the source and destination. That means - // that if you're using memcpy to move one double around, you'll get a cast - // from double* to i8*. We'd much rather use a double load+store rather than - // an i64 load+store, here because this improves the odds that the source or - // dest address will be promotable. See if we can find a better type than the - // integer datatype. - if (Value *Op = getBitCastOperand(MI->getOperand(1))) { - const Type *SrcETy = cast(Op->getType())->getElementType(); - if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) { - // The SrcETy might be something like {{{double}}} or [1 x double]. Rip - // down through these levels if so. - while (!SrcETy->isSingleValueType()) { - if (const StructType *STy = dyn_cast(SrcETy)) { - if (STy->getNumElements() == 1) - SrcETy = STy->getElementType(0); - else - break; - } else if (const ArrayType *ATy = dyn_cast(SrcETy)) { - if (ATy->getNumElements() == 1) - SrcETy = ATy->getElementType(); - else - break; - } else - break; - } - - if (SrcETy->isSingleValueType()) - NewPtrTy = PointerType::getUnqual(SrcETy); - } - } - - - // If the memcpy/memmove provides better alignment info than we can - // infer, use it. - SrcAlign = std::max(SrcAlign, CopyAlign); - DstAlign = std::max(DstAlign, CopyAlign); - - Value *Src = Builder->CreateBitCast(MI->getOperand(2), NewPtrTy); - Value *Dest = Builder->CreateBitCast(MI->getOperand(1), NewPtrTy); - Instruction *L = new LoadInst(Src, "tmp", false, SrcAlign); - InsertNewInstBefore(L, *MI); - InsertNewInstBefore(new StoreInst(L, Dest, false, DstAlign), *MI); - - // Set the size of the copy to 0, it will be deleted on the next iteration. - MI->setOperand(3, Constant::getNullValue(MemOpLength->getType())); - return MI; -} - -Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { - unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest()); - if (MI->getAlignment() < Alignment) { - MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), - Alignment, false)); - return MI; - } - - // Extract the length and alignment and fill if they are constant. - ConstantInt *LenC = dyn_cast(MI->getLength()); - ConstantInt *FillC = dyn_cast(MI->getValue()); - if (!LenC || !FillC || FillC->getType() != Type::getInt8Ty(*Context)) - return 0; - uint64_t Len = LenC->getZExtValue(); - Alignment = MI->getAlignment(); - - // If the length is zero, this is a no-op - if (Len == 0) return MI; // memset(d,c,0,a) -> noop - - // memset(s,c,n) -> store s, c (for n=1,2,4,8) - if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) { - const Type *ITy = IntegerType::get(*Context, Len*8); // n=1 -> i8. - - Value *Dest = MI->getDest(); - Dest = Builder->CreateBitCast(Dest, PointerType::getUnqual(ITy)); - - // Alignment 0 is identity for alignment 1 for memset, but not store. - if (Alignment == 0) Alignment = 1; - - // Extract the fill value and store. - uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL; - InsertNewInstBefore(new StoreInst(ConstantInt::get(ITy, Fill), - Dest, false, Alignment), *MI); - - // Set the size of the copy to 0, it will be deleted on the next iteration. - MI->setLength(Constant::getNullValue(LenC->getType())); - return MI; - } - - return 0; -} - - -/// visitCallInst - CallInst simplification. This mostly only handles folding -/// of intrinsic instructions. For normal calls, it allows visitCallSite to do -/// the heavy lifting. -/// -Instruction *InstCombiner::visitCallInst(CallInst &CI) { - if (isFreeCall(&CI)) - return visitFree(CI); - - // If the caller function is nounwind, mark the call as nounwind, even if the - // callee isn't. - if (CI.getParent()->getParent()->doesNotThrow() && - !CI.doesNotThrow()) { - CI.setDoesNotThrow(); - return &CI; - } - - IntrinsicInst *II = dyn_cast(&CI); - if (!II) return visitCallSite(&CI); - - // Intrinsics cannot occur in an invoke, so handle them here instead of in - // visitCallSite. - if (MemIntrinsic *MI = dyn_cast(II)) { - bool Changed = false; - - // memmove/cpy/set of zero bytes is a noop. - if (Constant *NumBytes = dyn_cast(MI->getLength())) { - if (NumBytes->isNullValue()) return EraseInstFromFunction(CI); - - if (ConstantInt *CI = dyn_cast(NumBytes)) - if (CI->getZExtValue() == 1) { - // Replace the instruction with just byte operations. We would - // transform other cases to loads/stores, but we don't know if - // alignment is sufficient. - } - } - - // If we have a memmove and the source operation is a constant global, - // then the source and dest pointers can't alias, so we can change this - // into a call to memcpy. - if (MemMoveInst *MMI = dyn_cast(MI)) { - if (GlobalVariable *GVSrc = dyn_cast(MMI->getSource())) - if (GVSrc->isConstant()) { - Module *M = CI.getParent()->getParent()->getParent(); - Intrinsic::ID MemCpyID = Intrinsic::memcpy; - const Type *Tys[1]; - Tys[0] = CI.getOperand(3)->getType(); - CI.setOperand(0, - Intrinsic::getDeclaration(M, MemCpyID, Tys, 1)); - Changed = true; - } - } - - if (MemTransferInst *MTI = dyn_cast(MI)) { - // memmove(x,x,size) -> noop. - if (MTI->getSource() == MTI->getDest()) - return EraseInstFromFunction(CI); - } - - // If we can determine a pointer alignment that is bigger than currently - // set, update the alignment. - if (isa(MI)) { - if (Instruction *I = SimplifyMemTransfer(MI)) - return I; - } else if (MemSetInst *MSI = dyn_cast(MI)) { - if (Instruction *I = SimplifyMemSet(MSI)) - return I; - } - - if (Changed) return II; - } - - switch (II->getIntrinsicID()) { - default: break; - case Intrinsic::bswap: - // bswap(bswap(x)) -> x - if (IntrinsicInst *Operand = dyn_cast(II->getOperand(1))) - if (Operand->getIntrinsicID() == Intrinsic::bswap) - return ReplaceInstUsesWith(CI, Operand->getOperand(1)); - break; - case Intrinsic::powi: - if (ConstantInt *Power = dyn_cast(II->getOperand(2))) { - // powi(x, 0) -> 1.0 - if (Power->isZero()) - return ReplaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0)); - // powi(x, 1) -> x - if (Power->isOne()) - return ReplaceInstUsesWith(CI, II->getOperand(1)); - // powi(x, -1) -> 1/x - if (Power->isAllOnesValue()) - return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0), - II->getOperand(1)); - } - break; - - case Intrinsic::uadd_with_overflow: { - Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); - const IntegerType *IT = cast(II->getOperand(1)->getType()); - uint32_t BitWidth = IT->getBitWidth(); - APInt Mask = APInt::getSignBit(BitWidth); - APInt LHSKnownZero(BitWidth, 0); - APInt LHSKnownOne(BitWidth, 0); - ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne); - bool LHSKnownNegative = LHSKnownOne[BitWidth - 1]; - bool LHSKnownPositive = LHSKnownZero[BitWidth - 1]; - - if (LHSKnownNegative || LHSKnownPositive) { - APInt RHSKnownZero(BitWidth, 0); - APInt RHSKnownOne(BitWidth, 0); - ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne); - bool RHSKnownNegative = RHSKnownOne[BitWidth - 1]; - bool RHSKnownPositive = RHSKnownZero[BitWidth - 1]; - if (LHSKnownNegative && RHSKnownNegative) { - // The sign bit is set in both cases: this MUST overflow. - // Create a simple add instruction, and insert it into the struct. - Instruction *Add = BinaryOperator::CreateAdd(LHS, RHS, "", &CI); - Worklist.Add(Add); - Constant *V[] = { - UndefValue::get(LHS->getType()), ConstantInt::getTrue(*Context) - }; - Constant *Struct = ConstantStruct::get(*Context, V, 2, false); - return InsertValueInst::Create(Struct, Add, 0); - } - - if (LHSKnownPositive && RHSKnownPositive) { - // The sign bit is clear in both cases: this CANNOT overflow. - // Create a simple add instruction, and insert it into the struct. - Instruction *Add = BinaryOperator::CreateNUWAdd(LHS, RHS, "", &CI); - Worklist.Add(Add); - Constant *V[] = { - UndefValue::get(LHS->getType()), ConstantInt::getFalse(*Context) - }; - Constant *Struct = ConstantStruct::get(*Context, V, 2, false); - return InsertValueInst::Create(Struct, Add, 0); - } - } - } - // FALL THROUGH uadd into sadd - case Intrinsic::sadd_with_overflow: - // Canonicalize constants into the RHS. - if (isa(II->getOperand(1)) && - !isa(II->getOperand(2))) { - Value *LHS = II->getOperand(1); - II->setOperand(1, II->getOperand(2)); - II->setOperand(2, LHS); - return II; - } - - // X + undef -> undef - if (isa(II->getOperand(2))) - return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); - - if (ConstantInt *RHS = dyn_cast(II->getOperand(2))) { - // X + 0 -> {X, false} - if (RHS->isZero()) { - Constant *V[] = { - UndefValue::get(II->getOperand(0)->getType()), - ConstantInt::getFalse(*Context) - }; - Constant *Struct = ConstantStruct::get(*Context, V, 2, false); - return InsertValueInst::Create(Struct, II->getOperand(1), 0); - } - } - break; - case Intrinsic::usub_with_overflow: - case Intrinsic::ssub_with_overflow: - // undef - X -> undef - // X - undef -> undef - if (isa(II->getOperand(1)) || - isa(II->getOperand(2))) - return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); - - if (ConstantInt *RHS = dyn_cast(II->getOperand(2))) { - // X - 0 -> {X, false} - if (RHS->isZero()) { - Constant *V[] = { - UndefValue::get(II->getOperand(1)->getType()), - ConstantInt::getFalse(*Context) - }; - Constant *Struct = ConstantStruct::get(*Context, V, 2, false); - return InsertValueInst::Create(Struct, II->getOperand(1), 0); - } - } - break; - case Intrinsic::umul_with_overflow: - case Intrinsic::smul_with_overflow: - // Canonicalize constants into the RHS. - if (isa(II->getOperand(1)) && - !isa(II->getOperand(2))) { - Value *LHS = II->getOperand(1); - II->setOperand(1, II->getOperand(2)); - II->setOperand(2, LHS); - return II; - } - - // X * undef -> undef - if (isa(II->getOperand(2))) - return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); - - if (ConstantInt *RHSI = dyn_cast(II->getOperand(2))) { - // X*0 -> {0, false} - if (RHSI->isZero()) - return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType())); - - // X * 1 -> {X, false} - if (RHSI->equalsInt(1)) { - Constant *V[] = { - UndefValue::get(II->getOperand(1)->getType()), - ConstantInt::getFalse(*Context) - }; - Constant *Struct = ConstantStruct::get(*Context, V, 2, false); - return InsertValueInst::Create(Struct, II->getOperand(1), 0); - } - } - break; - case Intrinsic::ppc_altivec_lvx: - case Intrinsic::ppc_altivec_lvxl: - case Intrinsic::x86_sse_loadu_ps: - case Intrinsic::x86_sse2_loadu_pd: - case Intrinsic::x86_sse2_loadu_dq: - // Turn PPC lvx -> load if the pointer is known aligned. - // Turn X86 loadups -> load if the pointer is known aligned. - if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) { - Value *Ptr = Builder->CreateBitCast(II->getOperand(1), - PointerType::getUnqual(II->getType())); - return new LoadInst(Ptr); - } - break; - case Intrinsic::ppc_altivec_stvx: - case Intrinsic::ppc_altivec_stvxl: - // Turn stvx -> store if the pointer is known aligned. - if (GetOrEnforceKnownAlignment(II->getOperand(2), 16) >= 16) { - const Type *OpPtrTy = - PointerType::getUnqual(II->getOperand(1)->getType()); - Value *Ptr = Builder->CreateBitCast(II->getOperand(2), OpPtrTy); - return new StoreInst(II->getOperand(1), Ptr); - } - break; - case Intrinsic::x86_sse_storeu_ps: - case Intrinsic::x86_sse2_storeu_pd: - case Intrinsic::x86_sse2_storeu_dq: - // Turn X86 storeu -> store if the pointer is known aligned. - if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) { - const Type *OpPtrTy = - PointerType::getUnqual(II->getOperand(2)->getType()); - Value *Ptr = Builder->CreateBitCast(II->getOperand(1), OpPtrTy); - return new StoreInst(II->getOperand(2), Ptr); - } - break; - - case Intrinsic::x86_sse_cvttss2si: { - // These intrinsics only demands the 0th element of its input vector. If - // we can simplify the input based on that, do so now. - unsigned VWidth = - cast(II->getOperand(1)->getType())->getNumElements(); - APInt DemandedElts(VWidth, 1); - APInt UndefElts(VWidth, 0); - if (Value *V = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts, - UndefElts)) { - II->setOperand(1, V); - return II; - } - break; - } - - case Intrinsic::ppc_altivec_vperm: - // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. - if (ConstantVector *Mask = dyn_cast(II->getOperand(3))) { - assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!"); - - // Check that all of the elements are integer constants or undefs. - bool AllEltsOk = true; - for (unsigned i = 0; i != 16; ++i) { - if (!isa(Mask->getOperand(i)) && - !isa(Mask->getOperand(i))) { - AllEltsOk = false; - break; - } - } - - if (AllEltsOk) { - // Cast the input vectors to byte vectors. - Value *Op0 = Builder->CreateBitCast(II->getOperand(1), Mask->getType()); - Value *Op1 = Builder->CreateBitCast(II->getOperand(2), Mask->getType()); - Value *Result = UndefValue::get(Op0->getType()); - - // Only extract each element once. - Value *ExtractedElts[32]; - memset(ExtractedElts, 0, sizeof(ExtractedElts)); - - for (unsigned i = 0; i != 16; ++i) { - if (isa(Mask->getOperand(i))) - continue; - unsigned Idx=cast(Mask->getOperand(i))->getZExtValue(); - Idx &= 31; // Match the hardware behavior. - - if (ExtractedElts[Idx] == 0) { - ExtractedElts[Idx] = - Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1, - ConstantInt::get(Type::getInt32Ty(*Context), Idx&15, false), - "tmp"); - } - - // Insert this value into the result vector. - Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx], - ConstantInt::get(Type::getInt32Ty(*Context), i, false), - "tmp"); - } - return CastInst::Create(Instruction::BitCast, Result, CI.getType()); - } - } - break; - - case Intrinsic::stackrestore: { - // If the save is right next to the restore, remove the restore. This can - // happen when variable allocas are DCE'd. - if (IntrinsicInst *SS = dyn_cast(II->getOperand(1))) { - if (SS->getIntrinsicID() == Intrinsic::stacksave) { - BasicBlock::iterator BI = SS; - if (&*++BI == II) - return EraseInstFromFunction(CI); - } - } - - // Scan down this block to see if there is another stack restore in the - // same block without an intervening call/alloca. - BasicBlock::iterator BI = II; - TerminatorInst *TI = II->getParent()->getTerminator(); - bool CannotRemove = false; - for (++BI; &*BI != TI; ++BI) { - if (isa(BI) || isMalloc(BI)) { - CannotRemove = true; - break; - } - if (CallInst *BCI = dyn_cast(BI)) { - if (IntrinsicInst *II = dyn_cast(BCI)) { - // If there is a stackrestore below this one, remove this one. - if (II->getIntrinsicID() == Intrinsic::stackrestore) - return EraseInstFromFunction(CI); - // Otherwise, ignore the intrinsic. - } else { - // If we found a non-intrinsic call, we can't remove the stack - // restore. - CannotRemove = true; - break; - } - } - } - - // If the stack restore is in a return/unwind block and if there are no - // allocas or calls between the restore and the return, nuke the restore. - if (!CannotRemove && (isa(TI) || isa(TI))) - return EraseInstFromFunction(CI); - break; - } - } - - return visitCallSite(II); -} - -// InvokeInst simplification -// -Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) { - return visitCallSite(&II); -} - -/// isSafeToEliminateVarargsCast - If this cast does not affect the value -/// passed through the varargs area, we can eliminate the use of the cast. -static bool isSafeToEliminateVarargsCast(const CallSite CS, - const CastInst * const CI, - const TargetData * const TD, - const int ix) { - if (!CI->isLosslessCast()) - return false; - - // The size of ByVal arguments is derived from the type, so we - // can't change to a type with a different size. If the size were - // passed explicitly we could avoid this check. - if (!CS.paramHasAttr(ix, Attribute::ByVal)) - return true; - - const Type* SrcTy = - cast(CI->getOperand(0)->getType())->getElementType(); - const Type* DstTy = cast(CI->getType())->getElementType(); - if (!SrcTy->isSized() || !DstTy->isSized()) - return false; - if (!TD || TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy)) - return false; - return true; -} - -// visitCallSite - Improvements for call and invoke instructions. -// -Instruction *InstCombiner::visitCallSite(CallSite CS) { - bool Changed = false; - - // If the callee is a constexpr cast of a function, attempt to move the cast - // to the arguments of the call/invoke. - if (transformConstExprCastCall(CS)) return 0; - - Value *Callee = CS.getCalledValue(); - - if (Function *CalleeF = dyn_cast(Callee)) - if (CalleeF->getCallingConv() != CS.getCallingConv()) { - Instruction *OldCall = CS.getInstruction(); - // If the call and callee calling conventions don't match, this call must - // be unreachable, as the call is undefined. - new StoreInst(ConstantInt::getTrue(*Context), - UndefValue::get(Type::getInt1PtrTy(*Context)), - OldCall); - // If OldCall dues not return void then replaceAllUsesWith undef. - // This allows ValueHandlers and custom metadata to adjust itself. - if (!OldCall->getType()->isVoidTy()) - OldCall->replaceAllUsesWith(UndefValue::get(OldCall->getType())); - if (isa(OldCall)) // Not worth removing an invoke here. - return EraseInstFromFunction(*OldCall); - return 0; - } - - if (isa(Callee) || isa(Callee)) { - // This instruction is not reachable, just remove it. We insert a store to - // undef so that we know that this code is not reachable, despite the fact - // that we can't modify the CFG here. - new StoreInst(ConstantInt::getTrue(*Context), - UndefValue::get(Type::getInt1PtrTy(*Context)), - CS.getInstruction()); - - // If CS dues not return void then replaceAllUsesWith undef. - // This allows ValueHandlers and custom metadata to adjust itself. - if (!CS.getInstruction()->getType()->isVoidTy()) - CS.getInstruction()-> - replaceAllUsesWith(UndefValue::get(CS.getInstruction()->getType())); - - if (InvokeInst *II = dyn_cast(CS.getInstruction())) { - // Don't break the CFG, insert a dummy cond branch. - BranchInst::Create(II->getNormalDest(), II->getUnwindDest(), - ConstantInt::getTrue(*Context), II); - } - return EraseInstFromFunction(*CS.getInstruction()); - } - - if (BitCastInst *BC = dyn_cast(Callee)) - if (IntrinsicInst *In = dyn_cast(BC->getOperand(0))) - if (In->getIntrinsicID() == Intrinsic::init_trampoline) - return transformCallThroughTrampoline(CS); - - const PointerType *PTy = cast(Callee->getType()); - const FunctionType *FTy = cast(PTy->getElementType()); - if (FTy->isVarArg()) { - int ix = FTy->getNumParams() + (isa(Callee) ? 3 : 1); - // See if we can optimize any arguments passed through the varargs area of - // the call. - for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(), - E = CS.arg_end(); I != E; ++I, ++ix) { - CastInst *CI = dyn_cast(*I); - if (CI && isSafeToEliminateVarargsCast(CS, CI, TD, ix)) { - *I = CI->getOperand(0); - Changed = true; - } - } - } - - if (isa(Callee) && !CS.doesNotThrow()) { - // Inline asm calls cannot throw - mark them 'nounwind'. - CS.setDoesNotThrow(); - Changed = true; - } - - return Changed ? CS.getInstruction() : 0; -} - -// transformConstExprCastCall - If the callee is a constexpr cast of a function, -// attempt to move the cast to the arguments of the call/invoke. -// -bool InstCombiner::transformConstExprCastCall(CallSite CS) { - if (!isa(CS.getCalledValue())) return false; - ConstantExpr *CE = cast(CS.getCalledValue()); - if (CE->getOpcode() != Instruction::BitCast || - !isa(CE->getOperand(0))) - return false; - Function *Callee = cast(CE->getOperand(0)); - Instruction *Caller = CS.getInstruction(); - const AttrListPtr &CallerPAL = CS.getAttributes(); - - // Okay, this is a cast from a function to a different type. Unless doing so - // would cause a type conversion of one of our arguments, change this call to - // be a direct call with arguments casted to the appropriate types. - // - const FunctionType *FT = Callee->getFunctionType(); - const Type *OldRetTy = Caller->getType(); - const Type *NewRetTy = FT->getReturnType(); - - if (isa(NewRetTy)) - return false; // TODO: Handle multiple return values. - - // Check to see if we are changing the return type... - if (OldRetTy != NewRetTy) { - if (Callee->isDeclaration() && - // Conversion is ok if changing from one pointer type to another or from - // a pointer to an integer of the same size. - !((isa(OldRetTy) || !TD || - OldRetTy == TD->getIntPtrType(Caller->getContext())) && - (isa(NewRetTy) || !TD || - NewRetTy == TD->getIntPtrType(Caller->getContext())))) - return false; // Cannot transform this return value. - - if (!Caller->use_empty() && - // void -> non-void is handled specially - !NewRetTy->isVoidTy() && !CastInst::isCastable(NewRetTy, OldRetTy)) - return false; // Cannot transform this return value. - - if (!CallerPAL.isEmpty() && !Caller->use_empty()) { - Attributes RAttrs = CallerPAL.getRetAttributes(); - if (RAttrs & Attribute::typeIncompatible(NewRetTy)) - return false; // Attribute not compatible with transformed value. - } - - // If the callsite is an invoke instruction, and the return value is used by - // a PHI node in a successor, we cannot change the return type of the call - // because there is no place to put the cast instruction (without breaking - // the critical edge). Bail out in this case. - if (!Caller->use_empty()) - if (InvokeInst *II = dyn_cast(Caller)) - for (Value::use_iterator UI = II->use_begin(), E = II->use_end(); - UI != E; ++UI) - if (PHINode *PN = dyn_cast(*UI)) - if (PN->getParent() == II->getNormalDest() || - PN->getParent() == II->getUnwindDest()) - return false; - } - - unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin()); - unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs); - - CallSite::arg_iterator AI = CS.arg_begin(); - for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) { - const Type *ParamTy = FT->getParamType(i); - const Type *ActTy = (*AI)->getType(); - - if (!CastInst::isCastable(ActTy, ParamTy)) - return false; // Cannot transform this parameter value. - - if (CallerPAL.getParamAttributes(i + 1) - & Attribute::typeIncompatible(ParamTy)) - return false; // Attribute not compatible with transformed value. - - // Converting from one pointer type to another or between a pointer and an - // integer of the same size is safe even if we do not have a body. - bool isConvertible = ActTy == ParamTy || - (TD && ((isa(ParamTy) || - ParamTy == TD->getIntPtrType(Caller->getContext())) && - (isa(ActTy) || - ActTy == TD->getIntPtrType(Caller->getContext())))); - if (Callee->isDeclaration() && !isConvertible) return false; - } - - if (FT->getNumParams() < NumActualArgs && !FT->isVarArg() && - Callee->isDeclaration()) - return false; // Do not delete arguments unless we have a function body. - - if (FT->getNumParams() < NumActualArgs && FT->isVarArg() && - !CallerPAL.isEmpty()) - // In this case we have more arguments than the new function type, but we - // won't be dropping them. Check that these extra arguments have attributes - // that are compatible with being a vararg call argument. - for (unsigned i = CallerPAL.getNumSlots(); i; --i) { - if (CallerPAL.getSlot(i - 1).Index <= FT->getNumParams()) - break; - Attributes PAttrs = CallerPAL.getSlot(i - 1).Attrs; - if (PAttrs & Attribute::VarArgsIncompatible) - return false; - } - - // Okay, we decided that this is a safe thing to do: go ahead and start - // inserting cast instructions as necessary... - std::vector Args; - Args.reserve(NumActualArgs); - SmallVector attrVec; - attrVec.reserve(NumCommonArgs); - - // Get any return attributes. - Attributes RAttrs = CallerPAL.getRetAttributes(); - - // If the return value is not being used, the type may not be compatible - // with the existing attributes. Wipe out any problematic attributes. - RAttrs &= ~Attribute::typeIncompatible(NewRetTy); - - // Add the new return attributes. - if (RAttrs) - attrVec.push_back(AttributeWithIndex::get(0, RAttrs)); - - AI = CS.arg_begin(); - for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) { - const Type *ParamTy = FT->getParamType(i); - if ((*AI)->getType() == ParamTy) { - Args.push_back(*AI); - } else { - Instruction::CastOps opcode = CastInst::getCastOpcode(*AI, - false, ParamTy, false); - Args.push_back(Builder->CreateCast(opcode, *AI, ParamTy, "tmp")); - } - - // Add any parameter attributes. - if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1)) - attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs)); - } - - // If the function takes more arguments than the call was taking, add them - // now. - for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) - Args.push_back(Constant::getNullValue(FT->getParamType(i))); - - // If we are removing arguments to the function, emit an obnoxious warning. - if (FT->getNumParams() < NumActualArgs) { - if (!FT->isVarArg()) { - errs() << "WARNING: While resolving call to function '" - << Callee->getName() << "' arguments were dropped!\n"; - } else { - // Add all of the arguments in their promoted form to the arg list. - for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) { - const Type *PTy = getPromotedType((*AI)->getType()); - if (PTy != (*AI)->getType()) { - // Must promote to pass through va_arg area! - Instruction::CastOps opcode = - CastInst::getCastOpcode(*AI, false, PTy, false); - Args.push_back(Builder->CreateCast(opcode, *AI, PTy, "tmp")); - } else { - Args.push_back(*AI); - } - - // Add any parameter attributes. - if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1)) - attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs)); - } - } - } - - if (Attributes FnAttrs = CallerPAL.getFnAttributes()) - attrVec.push_back(AttributeWithIndex::get(~0, FnAttrs)); - - if (NewRetTy->isVoidTy()) - Caller->setName(""); // Void type should not have a name. - - const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec.begin(), - attrVec.end()); - - Instruction *NC; - if (InvokeInst *II = dyn_cast(Caller)) { - NC = InvokeInst::Create(Callee, II->getNormalDest(), II->getUnwindDest(), - Args.begin(), Args.end(), - Caller->getName(), Caller); - cast(NC)->setCallingConv(II->getCallingConv()); - cast(NC)->setAttributes(NewCallerPAL); - } else { - NC = CallInst::Create(Callee, Args.begin(), Args.end(), - Caller->getName(), Caller); - CallInst *CI = cast(Caller); - if (CI->isTailCall()) - cast(NC)->setTailCall(); - cast(NC)->setCallingConv(CI->getCallingConv()); - cast(NC)->setAttributes(NewCallerPAL); - } - - // Insert a cast of the return type as necessary. - Value *NV = NC; - if (OldRetTy != NV->getType() && !Caller->use_empty()) { - if (!NV->getType()->isVoidTy()) { - Instruction::CastOps opcode = CastInst::getCastOpcode(NC, false, - OldRetTy, false); - NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp"); - - // If this is an invoke instruction, we should insert it after the first - // non-phi, instruction in the normal successor block. - if (InvokeInst *II = dyn_cast(Caller)) { - BasicBlock::iterator I = II->getNormalDest()->getFirstNonPHI(); - InsertNewInstBefore(NC, *I); - } else { - // Otherwise, it's a call, just insert cast right after the call instr - InsertNewInstBefore(NC, *Caller); - } - Worklist.AddUsersToWorkList(*Caller); - } else { - NV = UndefValue::get(Caller->getType()); - } - } - - - if (!Caller->use_empty()) - Caller->replaceAllUsesWith(NV); - - EraseInstFromFunction(*Caller); - return true; -} - -// transformCallThroughTrampoline - Turn a call to a function created by the -// init_trampoline intrinsic into a direct call to the underlying function. -// -Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { - Value *Callee = CS.getCalledValue(); - const PointerType *PTy = cast(Callee->getType()); - const FunctionType *FTy = cast(PTy->getElementType()); - const AttrListPtr &Attrs = CS.getAttributes(); - - // If the call already has the 'nest' attribute somewhere then give up - - // otherwise 'nest' would occur twice after splicing in the chain. - if (Attrs.hasAttrSomewhere(Attribute::Nest)) - return 0; - - IntrinsicInst *Tramp = - cast(cast(Callee)->getOperand(0)); - - Function *NestF = cast(Tramp->getOperand(2)->stripPointerCasts()); - const PointerType *NestFPTy = cast(NestF->getType()); - const FunctionType *NestFTy = cast(NestFPTy->getElementType()); - - const AttrListPtr &NestAttrs = NestF->getAttributes(); - if (!NestAttrs.isEmpty()) { - unsigned NestIdx = 1; - const Type *NestTy = 0; - Attributes NestAttr = Attribute::None; - - // Look for a parameter marked with the 'nest' attribute. - for (FunctionType::param_iterator I = NestFTy->param_begin(), - E = NestFTy->param_end(); I != E; ++NestIdx, ++I) - if (NestAttrs.paramHasAttr(NestIdx, Attribute::Nest)) { - // Record the parameter type and any other attributes. - NestTy = *I; - NestAttr = NestAttrs.getParamAttributes(NestIdx); - break; - } - - if (NestTy) { - Instruction *Caller = CS.getInstruction(); - std::vector NewArgs; - NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1); - - SmallVector NewAttrs; - NewAttrs.reserve(Attrs.getNumSlots() + 1); - - // Insert the nest argument into the call argument list, which may - // mean appending it. Likewise for attributes. - - // Add any result attributes. - if (Attributes Attr = Attrs.getRetAttributes()) - NewAttrs.push_back(AttributeWithIndex::get(0, Attr)); - - { - unsigned Idx = 1; - CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); - do { - if (Idx == NestIdx) { - // Add the chain argument and attributes. - Value *NestVal = Tramp->getOperand(3); - if (NestVal->getType() != NestTy) - NestVal = new BitCastInst(NestVal, NestTy, "nest", Caller); - NewArgs.push_back(NestVal); - NewAttrs.push_back(AttributeWithIndex::get(NestIdx, NestAttr)); - } - - if (I == E) - break; - - // Add the original argument and attributes. - NewArgs.push_back(*I); - if (Attributes Attr = Attrs.getParamAttributes(Idx)) - NewAttrs.push_back - (AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr)); - - ++Idx, ++I; - } while (1); - } - - // Add any function attributes. - if (Attributes Attr = Attrs.getFnAttributes()) - NewAttrs.push_back(AttributeWithIndex::get(~0, Attr)); - - // The trampoline may have been bitcast to a bogus type (FTy). - // Handle this by synthesizing a new function type, equal to FTy - // with the chain parameter inserted. - - std::vector NewTypes; - NewTypes.reserve(FTy->getNumParams()+1); - - // Insert the chain's type into the list of parameter types, which may - // mean appending it. - { - unsigned Idx = 1; - FunctionType::param_iterator I = FTy->param_begin(), - E = FTy->param_end(); - - do { - if (Idx == NestIdx) - // Add the chain's type. - NewTypes.push_back(NestTy); - - if (I == E) - break; - - // Add the original type. - NewTypes.push_back(*I); - - ++Idx, ++I; - } while (1); - } - - // Replace the trampoline call with a direct call. Let the generic - // code sort out any function type mismatches. - FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes, - FTy->isVarArg()); - Constant *NewCallee = - NestF->getType() == PointerType::getUnqual(NewFTy) ? - NestF : ConstantExpr::getBitCast(NestF, - PointerType::getUnqual(NewFTy)); - const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(), - NewAttrs.end()); - - Instruction *NewCaller; - if (InvokeInst *II = dyn_cast(Caller)) { - NewCaller = InvokeInst::Create(NewCallee, - II->getNormalDest(), II->getUnwindDest(), - NewArgs.begin(), NewArgs.end(), - Caller->getName(), Caller); - cast(NewCaller)->setCallingConv(II->getCallingConv()); - cast(NewCaller)->setAttributes(NewPAL); - } else { - NewCaller = CallInst::Create(NewCallee, NewArgs.begin(), NewArgs.end(), - Caller->getName(), Caller); - if (cast(Caller)->isTailCall()) - cast(NewCaller)->setTailCall(); - cast(NewCaller)-> - setCallingConv(cast(Caller)->getCallingConv()); - cast(NewCaller)->setAttributes(NewPAL); - } - if (!Caller->getType()->isVoidTy()) - Caller->replaceAllUsesWith(NewCaller); - Caller->eraseFromParent(); - Worklist.Remove(Caller); - return 0; - } - } - - // Replace the trampoline call with a direct call. Since there is no 'nest' - // parameter, there is no need to adjust the argument list. Let the generic - // code sort out any function type mismatches. - Constant *NewCallee = - NestF->getType() == PTy ? NestF : - ConstantExpr::getBitCast(NestF, PTy); - CS.setCalledFunction(NewCallee); - return CS.getInstruction(); -} - -/// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(a,c)] -/// and if a/b/c and the add's all have a single use, turn this into a phi -/// and a single binop. -Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { - Instruction *FirstInst = cast(PN.getIncomingValue(0)); - assert(isa(FirstInst) || isa(FirstInst)); - unsigned Opc = FirstInst->getOpcode(); - Value *LHSVal = FirstInst->getOperand(0); - Value *RHSVal = FirstInst->getOperand(1); - - const Type *LHSType = LHSVal->getType(); - const Type *RHSType = RHSVal->getType(); - - // Scan to see if all operands are the same opcode, and all have one use. - for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { - Instruction *I = dyn_cast(PN.getIncomingValue(i)); - if (!I || I->getOpcode() != Opc || !I->hasOneUse() || - // Verify type of the LHS matches so we don't fold cmp's of different - // types or GEP's with different index types. - I->getOperand(0)->getType() != LHSType || - I->getOperand(1)->getType() != RHSType) - return 0; - - // If they are CmpInst instructions, check their predicates - if (Opc == Instruction::ICmp || Opc == Instruction::FCmp) - if (cast(I)->getPredicate() != - cast(FirstInst)->getPredicate()) - return 0; - - // Keep track of which operand needs a phi node. - if (I->getOperand(0) != LHSVal) LHSVal = 0; - if (I->getOperand(1) != RHSVal) RHSVal = 0; - } - - // If both LHS and RHS would need a PHI, don't do this transformation, - // because it would increase the number of PHIs entering the block, - // which leads to higher register pressure. This is especially - // bad when the PHIs are in the header of a loop. - if (!LHSVal && !RHSVal) - return 0; - - // Otherwise, this is safe to transform! - - Value *InLHS = FirstInst->getOperand(0); - Value *InRHS = FirstInst->getOperand(1); - PHINode *NewLHS = 0, *NewRHS = 0; - if (LHSVal == 0) { - NewLHS = PHINode::Create(LHSType, - FirstInst->getOperand(0)->getName() + ".pn"); - NewLHS->reserveOperandSpace(PN.getNumOperands()/2); - NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0)); - InsertNewInstBefore(NewLHS, PN); - LHSVal = NewLHS; - } - - if (RHSVal == 0) { - NewRHS = PHINode::Create(RHSType, - FirstInst->getOperand(1)->getName() + ".pn"); - NewRHS->reserveOperandSpace(PN.getNumOperands()/2); - NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0)); - InsertNewInstBefore(NewRHS, PN); - RHSVal = NewRHS; - } - - // Add all operands to the new PHIs. - if (NewLHS || NewRHS) { - for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { - Instruction *InInst = cast(PN.getIncomingValue(i)); - if (NewLHS) { - Value *NewInLHS = InInst->getOperand(0); - NewLHS->addIncoming(NewInLHS, PN.getIncomingBlock(i)); - } - if (NewRHS) { - Value *NewInRHS = InInst->getOperand(1); - NewRHS->addIncoming(NewInRHS, PN.getIncomingBlock(i)); - } - } - } - - if (BinaryOperator *BinOp = dyn_cast(FirstInst)) - return BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal); - CmpInst *CIOp = cast(FirstInst); - return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), - LHSVal, RHSVal); -} - -Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { - GetElementPtrInst *FirstInst =cast(PN.getIncomingValue(0)); - - SmallVector FixedOperands(FirstInst->op_begin(), - FirstInst->op_end()); - // This is true if all GEP bases are allocas and if all indices into them are - // constants. - bool AllBasePointersAreAllocas = true; - - // We don't want to replace this phi if the replacement would require - // more than one phi, which leads to higher register pressure. This is - // especially bad when the PHIs are in the header of a loop. - bool NeededPhi = false; - - // Scan to see if all operands are the same opcode, and all have one use. - for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { - GetElementPtrInst *GEP= dyn_cast(PN.getIncomingValue(i)); - if (!GEP || !GEP->hasOneUse() || GEP->getType() != FirstInst->getType() || - GEP->getNumOperands() != FirstInst->getNumOperands()) - return 0; - - // Keep track of whether or not all GEPs are of alloca pointers. - if (AllBasePointersAreAllocas && - (!isa(GEP->getOperand(0)) || - !GEP->hasAllConstantIndices())) - AllBasePointersAreAllocas = false; - - // Compare the operand lists. - for (unsigned op = 0, e = FirstInst->getNumOperands(); op != e; ++op) { - if (FirstInst->getOperand(op) == GEP->getOperand(op)) - continue; - - // Don't merge two GEPs when two operands differ (introducing phi nodes) - // if one of the PHIs has a constant for the index. The index may be - // substantially cheaper to compute for the constants, so making it a - // variable index could pessimize the path. This also handles the case - // for struct indices, which must always be constant. - if (isa(FirstInst->getOperand(op)) || - isa(GEP->getOperand(op))) - return 0; - - if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType()) - return 0; - - // If we already needed a PHI for an earlier operand, and another operand - // also requires a PHI, we'd be introducing more PHIs than we're - // eliminating, which increases register pressure on entry to the PHI's - // block. - if (NeededPhi) - return 0; - - FixedOperands[op] = 0; // Needs a PHI. - NeededPhi = true; - } - } - - // If all of the base pointers of the PHI'd GEPs are from allocas, don't - // bother doing this transformation. At best, this will just save a bit of - // offset calculation, but all the predecessors will have to materialize the - // stack address into a register anyway. We'd actually rather *clone* the - // load up into the predecessors so that we have a load of a gep of an alloca, - // which can usually all be folded into the load. - if (AllBasePointersAreAllocas) - return 0; - - // Otherwise, this is safe to transform. Insert PHI nodes for each operand - // that is variable. - SmallVector OperandPhis(FixedOperands.size()); - - bool HasAnyPHIs = false; - for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) { - if (FixedOperands[i]) continue; // operand doesn't need a phi. - Value *FirstOp = FirstInst->getOperand(i); - PHINode *NewPN = PHINode::Create(FirstOp->getType(), - FirstOp->getName()+".pn"); - InsertNewInstBefore(NewPN, PN); - - NewPN->reserveOperandSpace(e); - NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0)); - OperandPhis[i] = NewPN; - FixedOperands[i] = NewPN; - HasAnyPHIs = true; - } - - - // Add all operands to the new PHIs. - if (HasAnyPHIs) { - for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { - GetElementPtrInst *InGEP =cast(PN.getIncomingValue(i)); - BasicBlock *InBB = PN.getIncomingBlock(i); - - for (unsigned op = 0, e = OperandPhis.size(); op != e; ++op) - if (PHINode *OpPhi = OperandPhis[op]) - OpPhi->addIncoming(InGEP->getOperand(op), InBB); - } - } - - Value *Base = FixedOperands[0]; - return cast(FirstInst)->isInBounds() ? - GetElementPtrInst::CreateInBounds(Base, FixedOperands.begin()+1, - FixedOperands.end()) : - GetElementPtrInst::Create(Base, FixedOperands.begin()+1, - FixedOperands.end()); -} - - -/// isSafeAndProfitableToSinkLoad - Return true if we know that it is safe to -/// sink the load out of the block that defines it. This means that it must be -/// obvious the value of the load is not changed from the point of the load to -/// the end of the block it is in. -/// -/// Finally, it is safe, but not profitable, to sink a load targetting a -/// non-address-taken alloca. Doing so will cause us to not promote the alloca -/// to a register. -static bool isSafeAndProfitableToSinkLoad(LoadInst *L) { - BasicBlock::iterator BBI = L, E = L->getParent()->end(); - - for (++BBI; BBI != E; ++BBI) - if (BBI->mayWriteToMemory()) - return false; - - // Check for non-address taken alloca. If not address-taken already, it isn't - // profitable to do this xform. - if (AllocaInst *AI = dyn_cast(L->getOperand(0))) { - bool isAddressTaken = false; - for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); - UI != E; ++UI) { - if (isa(UI)) continue; - if (StoreInst *SI = dyn_cast(*UI)) { - // If storing TO the alloca, then the address isn't taken. - if (SI->getOperand(1) == AI) continue; - } - isAddressTaken = true; - break; - } - - if (!isAddressTaken && AI->isStaticAlloca()) - return false; - } - - // If this load is a load from a GEP with a constant offset from an alloca, - // then we don't want to sink it. In its present form, it will be - // load [constant stack offset]. Sinking it will cause us to have to - // materialize the stack addresses in each predecessor in a register only to - // do a shared load from register in the successor. - if (GetElementPtrInst *GEP = dyn_cast(L->getOperand(0))) - if (AllocaInst *AI = dyn_cast(GEP->getOperand(0))) - if (AI->isStaticAlloca() && GEP->hasAllConstantIndices()) - return false; - - return true; -} - -Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { - LoadInst *FirstLI = cast(PN.getIncomingValue(0)); - - // When processing loads, we need to propagate two bits of information to the - // sunk load: whether it is volatile, and what its alignment is. We currently - // don't sink loads when some have their alignment specified and some don't. - // visitLoadInst will propagate an alignment onto the load when TD is around, - // and if TD isn't around, we can't handle the mixed case. - bool isVolatile = FirstLI->isVolatile(); - unsigned LoadAlignment = FirstLI->getAlignment(); - - // We can't sink the load if the loaded value could be modified between the - // load and the PHI. - if (FirstLI->getParent() != PN.getIncomingBlock(0) || - !isSafeAndProfitableToSinkLoad(FirstLI)) - return 0; - - // If the PHI is of volatile loads and the load block has multiple - // successors, sinking it would remove a load of the volatile value from - // the path through the other successor. - if (isVolatile && - FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1) - return 0; - - // Check to see if all arguments are the same operation. - for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { - LoadInst *LI = dyn_cast(PN.getIncomingValue(i)); - if (!LI || !LI->hasOneUse()) - return 0; - - // We can't sink the load if the loaded value could be modified between - // the load and the PHI. - if (LI->isVolatile() != isVolatile || - LI->getParent() != PN.getIncomingBlock(i) || - !isSafeAndProfitableToSinkLoad(LI)) - return 0; - - // If some of the loads have an alignment specified but not all of them, - // we can't do the transformation. - if ((LoadAlignment != 0) != (LI->getAlignment() != 0)) - return 0; - - LoadAlignment = std::min(LoadAlignment, LI->getAlignment()); - - // If the PHI is of volatile loads and the load block has multiple - // successors, sinking it would remove a load of the volatile value from - // the path through the other successor. - if (isVolatile && - LI->getParent()->getTerminator()->getNumSuccessors() != 1) - return 0; - } - - // Okay, they are all the same operation. Create a new PHI node of the - // correct type, and PHI together all of the LHS's of the instructions. - PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(), - PN.getName()+".in"); - NewPN->reserveOperandSpace(PN.getNumOperands()/2); - - Value *InVal = FirstLI->getOperand(0); - NewPN->addIncoming(InVal, PN.getIncomingBlock(0)); - - // Add all operands to the new PHI. - for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { - Value *NewInVal = cast(PN.getIncomingValue(i))->getOperand(0); - if (NewInVal != InVal) - InVal = 0; - NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i)); - } - - Value *PhiVal; - if (InVal) { - // The new PHI unions all of the same values together. This is really - // common, so we handle it intelligently here for compile-time speed. - PhiVal = InVal; - delete NewPN; - } else { - InsertNewInstBefore(NewPN, PN); - PhiVal = NewPN; - } - - // If this was a volatile load that we are merging, make sure to loop through - // and mark all the input loads as non-volatile. If we don't do this, we will - // insert a new volatile load and the old ones will not be deletable. - if (isVolatile) - for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) - cast(PN.getIncomingValue(i))->setVolatile(false); - - return new LoadInst(PhiVal, "", isVolatile, LoadAlignment); -} - - - -/// FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary" -/// operator and they all are only used by the PHI, PHI together their -/// inputs, and do the operation once, to the result of the PHI. -Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { - Instruction *FirstInst = cast(PN.getIncomingValue(0)); - - if (isa(FirstInst)) - return FoldPHIArgGEPIntoPHI(PN); - if (isa(FirstInst)) - return FoldPHIArgLoadIntoPHI(PN); - - // Scan the instruction, looking for input operations that can be folded away. - // If all input operands to the phi are the same instruction (e.g. a cast from - // the same type or "+42") we can pull the operation through the PHI, reducing - // code size and simplifying code. - Constant *ConstantOp = 0; - const Type *CastSrcTy = 0; - - if (isa(FirstInst)) { - CastSrcTy = FirstInst->getOperand(0)->getType(); - - // Be careful about transforming integer PHIs. We don't want to pessimize - // the code by turning an i32 into an i1293. - if (isa(PN.getType()) && isa(CastSrcTy)) { - if (!ShouldChangeType(PN.getType(), CastSrcTy, TD)) - return 0; - } - } else if (isa(FirstInst) || isa(FirstInst)) { - // Can fold binop, compare or shift here if the RHS is a constant, - // otherwise call FoldPHIArgBinOpIntoPHI. - ConstantOp = dyn_cast(FirstInst->getOperand(1)); - if (ConstantOp == 0) - return FoldPHIArgBinOpIntoPHI(PN); - } else { - return 0; // Cannot fold this operation. - } - - // Check to see if all arguments are the same operation. - for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { - Instruction *I = dyn_cast(PN.getIncomingValue(i)); - if (I == 0 || !I->hasOneUse() || !I->isSameOperationAs(FirstInst)) - return 0; - if (CastSrcTy) { - if (I->getOperand(0)->getType() != CastSrcTy) - return 0; // Cast operation must match. - } else if (I->getOperand(1) != ConstantOp) { - return 0; - } - } - - // Okay, they are all the same operation. Create a new PHI node of the - // correct type, and PHI together all of the LHS's of the instructions. - PHINode *NewPN = PHINode::Create(FirstInst->getOperand(0)->getType(), - PN.getName()+".in"); - NewPN->reserveOperandSpace(PN.getNumOperands()/2); - - Value *InVal = FirstInst->getOperand(0); - NewPN->addIncoming(InVal, PN.getIncomingBlock(0)); - - // Add all operands to the new PHI. - for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { - Value *NewInVal = cast(PN.getIncomingValue(i))->getOperand(0); - if (NewInVal != InVal) - InVal = 0; - NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i)); - } - - Value *PhiVal; - if (InVal) { - // The new PHI unions all of the same values together. This is really - // common, so we handle it intelligently here for compile-time speed. - PhiVal = InVal; - delete NewPN; - } else { - InsertNewInstBefore(NewPN, PN); - PhiVal = NewPN; - } - - // Insert and return the new operation. - if (CastInst *FirstCI = dyn_cast(FirstInst)) - return CastInst::Create(FirstCI->getOpcode(), PhiVal, PN.getType()); - - if (BinaryOperator *BinOp = dyn_cast(FirstInst)) - return BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp); - - CmpInst *CIOp = cast(FirstInst); - return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), - PhiVal, ConstantOp); -} - -/// DeadPHICycle - Return true if this PHI node is only used by a PHI node cycle -/// that is dead. -static bool DeadPHICycle(PHINode *PN, - SmallPtrSet &PotentiallyDeadPHIs) { - if (PN->use_empty()) return true; - if (!PN->hasOneUse()) return false; - - // Remember this node, and if we find the cycle, return. - if (!PotentiallyDeadPHIs.insert(PN)) - return true; - - // Don't scan crazily complex things. - if (PotentiallyDeadPHIs.size() == 16) - return false; - - if (PHINode *PU = dyn_cast(PN->use_back())) - return DeadPHICycle(PU, PotentiallyDeadPHIs); - - return false; -} - -/// PHIsEqualValue - Return true if this phi node is always equal to -/// NonPhiInVal. This happens with mutually cyclic phi nodes like: -/// z = some value; x = phi (y, z); y = phi (x, z) -static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal, - SmallPtrSet &ValueEqualPHIs) { - // See if we already saw this PHI node. - if (!ValueEqualPHIs.insert(PN)) - return true; - - // Don't scan crazily complex things. - if (ValueEqualPHIs.size() == 16) - return false; - - // Scan the operands to see if they are either phi nodes or are equal to - // the value. - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - Value *Op = PN->getIncomingValue(i); - if (PHINode *OpPN = dyn_cast(Op)) { - if (!PHIsEqualValue(OpPN, NonPhiInVal, ValueEqualPHIs)) - return false; - } else if (Op != NonPhiInVal) - return false; - } - - return true; -} - - -namespace { -struct PHIUsageRecord { - unsigned PHIId; // The ID # of the PHI (something determinstic to sort on) - unsigned Shift; // The amount shifted. - Instruction *Inst; // The trunc instruction. - - PHIUsageRecord(unsigned pn, unsigned Sh, Instruction *User) - : PHIId(pn), Shift(Sh), Inst(User) {} - - bool operator<(const PHIUsageRecord &RHS) const { - if (PHIId < RHS.PHIId) return true; - if (PHIId > RHS.PHIId) return false; - if (Shift < RHS.Shift) return true; - if (Shift > RHS.Shift) return false; - return Inst->getType()->getPrimitiveSizeInBits() < - RHS.Inst->getType()->getPrimitiveSizeInBits(); - } -}; - -struct LoweredPHIRecord { - PHINode *PN; // The PHI that was lowered. - unsigned Shift; // The amount shifted. - unsigned Width; // The width extracted. - - LoweredPHIRecord(PHINode *pn, unsigned Sh, const Type *Ty) - : PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {} - - // Ctor form used by DenseMap. - LoweredPHIRecord(PHINode *pn, unsigned Sh) - : PN(pn), Shift(Sh), Width(0) {} -}; -} - -namespace llvm { - template<> - struct DenseMapInfo { - static inline LoweredPHIRecord getEmptyKey() { - return LoweredPHIRecord(0, 0); - } - static inline LoweredPHIRecord getTombstoneKey() { - return LoweredPHIRecord(0, 1); - } - static unsigned getHashValue(const LoweredPHIRecord &Val) { - return DenseMapInfo::getHashValue(Val.PN) ^ (Val.Shift>>3) ^ - (Val.Width>>3); - } - static bool isEqual(const LoweredPHIRecord &LHS, - const LoweredPHIRecord &RHS) { - return LHS.PN == RHS.PN && LHS.Shift == RHS.Shift && - LHS.Width == RHS.Width; - } - }; - template <> - struct isPodLike { static const bool value = true; }; -} - - -/// SliceUpIllegalIntegerPHI - This is an integer PHI and we know that it has an -/// illegal type: see if it is only used by trunc or trunc(lshr) operations. If -/// so, we split the PHI into the various pieces being extracted. This sort of -/// thing is introduced when SROA promotes an aggregate to large integer values. -/// -/// TODO: The user of the trunc may be an bitcast to float/double/vector or an -/// inttoptr. We should produce new PHIs in the right type. -/// -Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { - // PHIUsers - Keep track of all of the truncated values extracted from a set - // of PHIs, along with their offset. These are the things we want to rewrite. - SmallVector PHIUsers; - - // PHIs are often mutually cyclic, so we keep track of a whole set of PHI - // nodes which are extracted from. PHIsToSlice is a set we use to avoid - // revisiting PHIs, PHIsInspected is a ordered list of PHIs that we need to - // check the uses of (to ensure they are all extracts). - SmallVector PHIsToSlice; - SmallPtrSet PHIsInspected; - - PHIsToSlice.push_back(&FirstPhi); - PHIsInspected.insert(&FirstPhi); - - for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) { - PHINode *PN = PHIsToSlice[PHIId]; - - // Scan the input list of the PHI. If any input is an invoke, and if the - // input is defined in the predecessor, then we won't be split the critical - // edge which is required to insert a truncate. Because of this, we have to - // bail out. - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - InvokeInst *II = dyn_cast(PN->getIncomingValue(i)); - if (II == 0) continue; - if (II->getParent() != PN->getIncomingBlock(i)) - continue; - - // If we have a phi, and if it's directly in the predecessor, then we have - // a critical edge where we need to put the truncate. Since we can't - // split the edge in instcombine, we have to bail out. - return 0; - } - - - for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); - UI != E; ++UI) { - Instruction *User = cast(*UI); - - // If the user is a PHI, inspect its uses recursively. - if (PHINode *UserPN = dyn_cast(User)) { - if (PHIsInspected.insert(UserPN)) - PHIsToSlice.push_back(UserPN); - continue; - } - - // Truncates are always ok. - if (isa(User)) { - PHIUsers.push_back(PHIUsageRecord(PHIId, 0, User)); - continue; - } - - // Otherwise it must be a lshr which can only be used by one trunc. - if (User->getOpcode() != Instruction::LShr || - !User->hasOneUse() || !isa(User->use_back()) || - !isa(User->getOperand(1))) - return 0; - - unsigned Shift = cast(User->getOperand(1))->getZExtValue(); - PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, User->use_back())); - } - } - - // If we have no users, they must be all self uses, just nuke the PHI. - if (PHIUsers.empty()) - return ReplaceInstUsesWith(FirstPhi, UndefValue::get(FirstPhi.getType())); - - // If this phi node is transformable, create new PHIs for all the pieces - // extracted out of it. First, sort the users by their offset and size. - array_pod_sort(PHIUsers.begin(), PHIUsers.end()); - - DEBUG(errs() << "SLICING UP PHI: " << FirstPhi << '\n'; - for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i) - errs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] <<'\n'; - ); - - // PredValues - This is a temporary used when rewriting PHI nodes. It is - // hoisted out here to avoid construction/destruction thrashing. - DenseMap PredValues; - - // ExtractedVals - Each new PHI we introduce is saved here so we don't - // introduce redundant PHIs. - DenseMap ExtractedVals; - - for (unsigned UserI = 0, UserE = PHIUsers.size(); UserI != UserE; ++UserI) { - unsigned PHIId = PHIUsers[UserI].PHIId; - PHINode *PN = PHIsToSlice[PHIId]; - unsigned Offset = PHIUsers[UserI].Shift; - const Type *Ty = PHIUsers[UserI].Inst->getType(); - - PHINode *EltPHI; - - // If we've already lowered a user like this, reuse the previously lowered - // value. - if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) { - - // Otherwise, Create the new PHI node for this user. - EltPHI = PHINode::Create(Ty, PN->getName()+".off"+Twine(Offset), PN); - assert(EltPHI->getType() != PN->getType() && - "Truncate didn't shrink phi?"); - - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - BasicBlock *Pred = PN->getIncomingBlock(i); - Value *&PredVal = PredValues[Pred]; - - // If we already have a value for this predecessor, reuse it. - if (PredVal) { - EltPHI->addIncoming(PredVal, Pred); - continue; - } - - // Handle the PHI self-reuse case. - Value *InVal = PN->getIncomingValue(i); - if (InVal == PN) { - PredVal = EltPHI; - EltPHI->addIncoming(PredVal, Pred); - continue; - } - - if (PHINode *InPHI = dyn_cast(PN)) { - // If the incoming value was a PHI, and if it was one of the PHIs we - // already rewrote it, just use the lowered value. - if (Value *Res = ExtractedVals[LoweredPHIRecord(InPHI, Offset, Ty)]) { - PredVal = Res; - EltPHI->addIncoming(PredVal, Pred); - continue; - } - } - - // Otherwise, do an extract in the predecessor. - Builder->SetInsertPoint(Pred, Pred->getTerminator()); - Value *Res = InVal; - if (Offset) - Res = Builder->CreateLShr(Res, ConstantInt::get(InVal->getType(), - Offset), "extract"); - Res = Builder->CreateTrunc(Res, Ty, "extract.t"); - PredVal = Res; - EltPHI->addIncoming(Res, Pred); - - // If the incoming value was a PHI, and if it was one of the PHIs we are - // rewriting, we will ultimately delete the code we inserted. This - // means we need to revisit that PHI to make sure we extract out the - // needed piece. - if (PHINode *OldInVal = dyn_cast(PN->getIncomingValue(i))) - if (PHIsInspected.count(OldInVal)) { - unsigned RefPHIId = std::find(PHIsToSlice.begin(),PHIsToSlice.end(), - OldInVal)-PHIsToSlice.begin(); - PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset, - cast(Res))); - ++UserE; - } - } - PredValues.clear(); - - DEBUG(errs() << " Made element PHI for offset " << Offset << ": " - << *EltPHI << '\n'); - ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI; - } - - // Replace the use of this piece with the PHI node. - ReplaceInstUsesWith(*PHIUsers[UserI].Inst, EltPHI); - } - - // Replace all the remaining uses of the PHI nodes (self uses and the lshrs) - // with undefs. - Value *Undef = UndefValue::get(FirstPhi.getType()); - for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i) - ReplaceInstUsesWith(*PHIsToSlice[i], Undef); - return ReplaceInstUsesWith(FirstPhi, Undef); -} - -// PHINode simplification -// -Instruction *InstCombiner::visitPHINode(PHINode &PN) { - // If LCSSA is around, don't mess with Phi nodes - if (MustPreserveLCSSA) return 0; - - if (Value *V = PN.hasConstantValue()) - return ReplaceInstUsesWith(PN, V); - - // If all PHI operands are the same operation, pull them through the PHI, - // reducing code size. - if (isa(PN.getIncomingValue(0)) && - isa(PN.getIncomingValue(1)) && - cast(PN.getIncomingValue(0))->getOpcode() == - cast(PN.getIncomingValue(1))->getOpcode() && - // FIXME: The hasOneUse check will fail for PHIs that use the value more - // than themselves more than once. - PN.getIncomingValue(0)->hasOneUse()) - if (Instruction *Result = FoldPHIArgOpIntoPHI(PN)) - return Result; - - // If this is a trivial cycle in the PHI node graph, remove it. Basically, if - // this PHI only has a single use (a PHI), and if that PHI only has one use (a - // PHI)... break the cycle. - if (PN.hasOneUse()) { - Instruction *PHIUser = cast(PN.use_back()); - if (PHINode *PU = dyn_cast(PHIUser)) { - SmallPtrSet PotentiallyDeadPHIs; - PotentiallyDeadPHIs.insert(&PN); - if (DeadPHICycle(PU, PotentiallyDeadPHIs)) - return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType())); - } - - // If this phi has a single use, and if that use just computes a value for - // the next iteration of a loop, delete the phi. This occurs with unused - // induction variables, e.g. "for (int j = 0; ; ++j);". Detecting this - // common case here is good because the only other things that catch this - // are induction variable analysis (sometimes) and ADCE, which is only run - // late. - if (PHIUser->hasOneUse() && - (isa(PHIUser) || isa(PHIUser)) && - PHIUser->use_back() == &PN) { - return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType())); - } - } - - // We sometimes end up with phi cycles that non-obviously end up being the - // same value, for example: - // z = some value; x = phi (y, z); y = phi (x, z) - // where the phi nodes don't necessarily need to be in the same block. Do a - // quick check to see if the PHI node only contains a single non-phi value, if - // so, scan to see if the phi cycle is actually equal to that value. - { - unsigned InValNo = 0, NumOperandVals = PN.getNumIncomingValues(); - // Scan for the first non-phi operand. - while (InValNo != NumOperandVals && - isa(PN.getIncomingValue(InValNo))) - ++InValNo; - - if (InValNo != NumOperandVals) { - Value *NonPhiInVal = PN.getOperand(InValNo); - - // Scan the rest of the operands to see if there are any conflicts, if so - // there is no need to recursively scan other phis. - for (++InValNo; InValNo != NumOperandVals; ++InValNo) { - Value *OpVal = PN.getIncomingValue(InValNo); - if (OpVal != NonPhiInVal && !isa(OpVal)) - break; - } - - // If we scanned over all operands, then we have one unique value plus - // phi values. Scan PHI nodes to see if they all merge in each other or - // the value. - if (InValNo == NumOperandVals) { - SmallPtrSet ValueEqualPHIs; - if (PHIsEqualValue(&PN, NonPhiInVal, ValueEqualPHIs)) - return ReplaceInstUsesWith(PN, NonPhiInVal); - } - } - } - - // If there are multiple PHIs, sort their operands so that they all list - // the blocks in the same order. This will help identical PHIs be eliminated - // by other passes. Other passes shouldn't depend on this for correctness - // however. - PHINode *FirstPN = cast(PN.getParent()->begin()); - if (&PN != FirstPN) - for (unsigned i = 0, e = FirstPN->getNumIncomingValues(); i != e; ++i) { - BasicBlock *BBA = PN.getIncomingBlock(i); - BasicBlock *BBB = FirstPN->getIncomingBlock(i); - if (BBA != BBB) { - Value *VA = PN.getIncomingValue(i); - unsigned j = PN.getBasicBlockIndex(BBB); - Value *VB = PN.getIncomingValue(j); - PN.setIncomingBlock(i, BBB); - PN.setIncomingValue(i, VB); - PN.setIncomingBlock(j, BBA); - PN.setIncomingValue(j, VA); - // NOTE: Instcombine normally would want us to "return &PN" if we - // modified any of the operands of an instruction. However, since we - // aren't adding or removing uses (just rearranging them) we don't do - // this in this case. - } - } - - // If this is an integer PHI and we know that it has an illegal type, see if - // it is only used by trunc or trunc(lshr) operations. If so, we split the - // PHI into the various pieces being extracted. This sort of thing is - // introduced when SROA promotes an aggregate to a single large integer type. - if (isa(PN.getType()) && TD && - !TD->isLegalInteger(PN.getType()->getPrimitiveSizeInBits())) - if (Instruction *Res = SliceUpIllegalIntegerPHI(PN)) - return Res; - - return 0; -} - -Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { - SmallVector Ops(GEP.op_begin(), GEP.op_end()); - - if (Value *V = SimplifyGEPInst(&Ops[0], Ops.size(), TD)) - return ReplaceInstUsesWith(GEP, V); - - Value *PtrOp = GEP.getOperand(0); - - if (isa(GEP.getOperand(0))) - return ReplaceInstUsesWith(GEP, UndefValue::get(GEP.getType())); - - // Eliminate unneeded casts for indices. - if (TD) { - bool MadeChange = false; - unsigned PtrSize = TD->getPointerSizeInBits(); - - gep_type_iterator GTI = gep_type_begin(GEP); - for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); - I != E; ++I, ++GTI) { - if (!isa(*GTI)) continue; - - // If we are using a wider index than needed for this platform, shrink it - // to what we need. If narrower, sign-extend it to what we need. This - // explicit cast can make subsequent optimizations more obvious. - unsigned OpBits = cast((*I)->getType())->getBitWidth(); - if (OpBits == PtrSize) - continue; - - *I = Builder->CreateIntCast(*I, TD->getIntPtrType(GEP.getContext()),true); - MadeChange = true; - } - if (MadeChange) return &GEP; - } - - // Combine Indices - If the source pointer to this getelementptr instruction - // is a getelementptr instruction, combine the indices of the two - // getelementptr instructions into a single instruction. - // - if (GEPOperator *Src = dyn_cast(PtrOp)) { - // Note that if our source is a gep chain itself that we wait for that - // chain to be resolved before we perform this transformation. This - // avoids us creating a TON of code in some cases. - // - if (GetElementPtrInst *SrcGEP = - dyn_cast(Src->getOperand(0))) - if (SrcGEP->getNumOperands() == 2) - return 0; // Wait until our source is folded to completion. - - SmallVector Indices; - - // Find out whether the last index in the source GEP is a sequential idx. - bool EndsWithSequential = false; - for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src); - I != E; ++I) - EndsWithSequential = !isa(*I); - - // Can we combine the two pointer arithmetics offsets? - if (EndsWithSequential) { - // Replace: gep (gep %P, long B), long A, ... - // With: T = long A+B; gep %P, T, ... - // - Value *Sum; - Value *SO1 = Src->getOperand(Src->getNumOperands()-1); - Value *GO1 = GEP.getOperand(1); - if (SO1 == Constant::getNullValue(SO1->getType())) { - Sum = GO1; - } else if (GO1 == Constant::getNullValue(GO1->getType())) { - Sum = SO1; - } else { - // If they aren't the same type, then the input hasn't been processed - // by the loop above yet (which canonicalizes sequential index types to - // intptr_t). Just avoid transforming this until the input has been - // normalized. - if (SO1->getType() != GO1->getType()) - return 0; - Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum"); - } - - // Update the GEP in place if possible. - if (Src->getNumOperands() == 2) { - GEP.setOperand(0, Src->getOperand(0)); - GEP.setOperand(1, Sum); - return &GEP; - } - Indices.append(Src->op_begin()+1, Src->op_end()-1); - Indices.push_back(Sum); - Indices.append(GEP.op_begin()+2, GEP.op_end()); - } else if (isa(*GEP.idx_begin()) && - cast(*GEP.idx_begin())->isNullValue() && - Src->getNumOperands() != 1) { - // Otherwise we can do the fold if the first index of the GEP is a zero - Indices.append(Src->op_begin()+1, Src->op_end()); - Indices.append(GEP.idx_begin()+1, GEP.idx_end()); - } - - if (!Indices.empty()) - return (cast(&GEP)->isInBounds() && - Src->isInBounds()) ? - GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices.begin(), - Indices.end(), GEP.getName()) : - GetElementPtrInst::Create(Src->getOperand(0), Indices.begin(), - Indices.end(), GEP.getName()); - } - - // Handle gep(bitcast x) and gep(gep x, 0, 0, 0). - if (Value *X = getBitCastOperand(PtrOp)) { - assert(isa(X->getType()) && "Must be cast from pointer"); - - // If the input bitcast is actually "bitcast(bitcast(x))", then we don't - // want to change the gep until the bitcasts are eliminated. - if (getBitCastOperand(X)) { - Worklist.AddValue(PtrOp); - return 0; - } - - bool HasZeroPointerIndex = false; - if (ConstantInt *C = dyn_cast(GEP.getOperand(1))) - HasZeroPointerIndex = C->isZero(); - - // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... - // into : GEP [10 x i8]* X, i32 0, ... - // - // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ... - // into : GEP i8* X, ... - // - // This occurs when the program declares an array extern like "int X[];" - if (HasZeroPointerIndex) { - const PointerType *CPTy = cast(PtrOp->getType()); - const PointerType *XTy = cast(X->getType()); - if (const ArrayType *CATy = - dyn_cast(CPTy->getElementType())) { - // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ? - if (CATy->getElementType() == XTy->getElementType()) { - // -> GEP i8* X, ... - SmallVector Indices(GEP.idx_begin()+1, GEP.idx_end()); - return cast(&GEP)->isInBounds() ? - GetElementPtrInst::CreateInBounds(X, Indices.begin(), Indices.end(), - GEP.getName()) : - GetElementPtrInst::Create(X, Indices.begin(), Indices.end(), - GEP.getName()); - } - - if (const ArrayType *XATy = dyn_cast(XTy->getElementType())){ - // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ? - if (CATy->getElementType() == XATy->getElementType()) { - // -> GEP [10 x i8]* X, i32 0, ... - // At this point, we know that the cast source type is a pointer - // to an array of the same type as the destination pointer - // array. Because the array type is never stepped over (there - // is a leading zero) we can fold the cast into this GEP. - GEP.setOperand(0, X); - return &GEP; - } - } - } - } else if (GEP.getNumOperands() == 2) { - // Transform things like: - // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V - // into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast - const Type *SrcElTy = cast(X->getType())->getElementType(); - const Type *ResElTy=cast(PtrOp->getType())->getElementType(); - if (TD && isa(SrcElTy) && - TD->getTypeAllocSize(cast(SrcElTy)->getElementType()) == - TD->getTypeAllocSize(ResElTy)) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(*Context)); - Idx[1] = GEP.getOperand(1); - Value *NewGEP = cast(&GEP)->isInBounds() ? - Builder->CreateInBoundsGEP(X, Idx, Idx + 2, GEP.getName()) : - Builder->CreateGEP(X, Idx, Idx + 2, GEP.getName()); - // V and GEP are both pointer types --> BitCast - return new BitCastInst(NewGEP, GEP.getType()); - } - - // Transform things like: - // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp - // (where tmp = 8*tmp2) into: - // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast - - if (TD && isa(SrcElTy) && ResElTy == Type::getInt8Ty(*Context)) { - uint64_t ArrayEltSize = - TD->getTypeAllocSize(cast(SrcElTy)->getElementType()); - - // Check to see if "tmp" is a scale by a multiple of ArrayEltSize. We - // allow either a mul, shift, or constant here. - Value *NewIdx = 0; - ConstantInt *Scale = 0; - if (ArrayEltSize == 1) { - NewIdx = GEP.getOperand(1); - Scale = ConstantInt::get(cast(NewIdx->getType()), 1); - } else if (ConstantInt *CI = dyn_cast(GEP.getOperand(1))) { - NewIdx = ConstantInt::get(CI->getType(), 1); - Scale = CI; - } else if (Instruction *Inst =dyn_cast(GEP.getOperand(1))){ - if (Inst->getOpcode() == Instruction::Shl && - isa(Inst->getOperand(1))) { - ConstantInt *ShAmt = cast(Inst->getOperand(1)); - uint32_t ShAmtVal = ShAmt->getLimitedValue(64); - Scale = ConstantInt::get(cast(Inst->getType()), - 1ULL << ShAmtVal); - NewIdx = Inst->getOperand(0); - } else if (Inst->getOpcode() == Instruction::Mul && - isa(Inst->getOperand(1))) { - Scale = cast(Inst->getOperand(1)); - NewIdx = Inst->getOperand(0); - } - } - - // If the index will be to exactly the right offset with the scale taken - // out, perform the transformation. Note, we don't know whether Scale is - // signed or not. We'll use unsigned version of division/modulo - // operation after making sure Scale doesn't have the sign bit set. - if (ArrayEltSize && Scale && Scale->getSExtValue() >= 0LL && - Scale->getZExtValue() % ArrayEltSize == 0) { - Scale = ConstantInt::get(Scale->getType(), - Scale->getZExtValue() / ArrayEltSize); - if (Scale->getZExtValue() != 1) { - Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(), - false /*ZExt*/); - NewIdx = Builder->CreateMul(NewIdx, C, "idxscale"); - } - - // Insert the new GEP instruction. - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(*Context)); - Idx[1] = NewIdx; - Value *NewGEP = cast(&GEP)->isInBounds() ? - Builder->CreateInBoundsGEP(X, Idx, Idx + 2, GEP.getName()) : - Builder->CreateGEP(X, Idx, Idx + 2, GEP.getName()); - // The NewGEP must be pointer typed, so must the old one -> BitCast - return new BitCastInst(NewGEP, GEP.getType()); - } - } - } - } - - /// See if we can simplify: - /// X = bitcast A* to B* - /// Y = gep X, <...constant indices...> - /// into a gep of the original struct. This is important for SROA and alias - /// analysis of unions. If "A" is also a bitcast, wait for A/X to be merged. - if (BitCastInst *BCI = dyn_cast(PtrOp)) { - if (TD && - !isa(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) { - // Determine how much the GEP moves the pointer. We are guaranteed to get - // a constant back from EmitGEPOffset. - ConstantInt *OffsetV = cast(EmitGEPOffset(&GEP, *this)); - int64_t Offset = OffsetV->getSExtValue(); - - // If this GEP instruction doesn't move the pointer, just replace the GEP - // with a bitcast of the real input to the dest type. - if (Offset == 0) { - // If the bitcast is of an allocation, and the allocation will be - // converted to match the type of the cast, don't touch this. - if (isa(BCI->getOperand(0)) || - isMalloc(BCI->getOperand(0))) { - // See if the bitcast simplifies, if so, don't nuke this GEP yet. - if (Instruction *I = visitBitCast(*BCI)) { - if (I != BCI) { - I->takeName(BCI); - BCI->getParent()->getInstList().insert(BCI, I); - ReplaceInstUsesWith(*BCI, I); - } - return &GEP; - } - } - return new BitCastInst(BCI->getOperand(0), GEP.getType()); - } - - // Otherwise, if the offset is non-zero, we need to find out if there is a - // field at Offset in 'A's type. If so, we can pull the cast through the - // GEP. - SmallVector NewIndices; - const Type *InTy = - cast(BCI->getOperand(0)->getType())->getElementType(); - if (FindElementAtOffset(InTy, Offset, NewIndices, TD, Context)) { - Value *NGEP = cast(&GEP)->isInBounds() ? - Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices.begin(), - NewIndices.end()) : - Builder->CreateGEP(BCI->getOperand(0), NewIndices.begin(), - NewIndices.end()); - - if (NGEP->getType() == GEP.getType()) - return ReplaceInstUsesWith(GEP, NGEP); - NGEP->takeName(&GEP); - return new BitCastInst(NGEP, GEP.getType()); - } - } - } - - return 0; -} - -Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { - // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 - if (AI.isArrayAllocation()) { // Check C != 1 - if (const ConstantInt *C = dyn_cast(AI.getArraySize())) { - const Type *NewTy = - ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); - assert(isa(AI) && "Unknown type of allocation inst!"); - AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName()); - New->setAlignment(AI.getAlignment()); - - // Scan to the end of the allocation instructions, to skip over a block of - // allocas if possible...also skip interleaved debug info - // - BasicBlock::iterator It = New; - while (isa(*It) || isa(*It)) ++It; - - // Now that I is pointing to the first non-allocation-inst in the block, - // insert our getelementptr instruction... - // - Value *NullIdx = Constant::getNullValue(Type::getInt32Ty(*Context)); - Value *Idx[2]; - Idx[0] = NullIdx; - Idx[1] = NullIdx; - Value *V = GetElementPtrInst::CreateInBounds(New, Idx, Idx + 2, - New->getName()+".sub", It); - - // Now make everything use the getelementptr instead of the original - // allocation. - return ReplaceInstUsesWith(AI, V); - } else if (isa(AI.getArraySize())) { - return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); - } - } - - if (TD && isa(AI) && AI.getAllocatedType()->isSized()) { - // If alloca'ing a zero byte object, replace the alloca with a null pointer. - // Note that we only do this for alloca's, because malloc should allocate - // and return a unique pointer, even for a zero byte allocation. - if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0) - return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); - - // If the alignment is 0 (unspecified), assign it the preferred alignment. - if (AI.getAlignment() == 0) - AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType())); - } - - return 0; -} - -Instruction *InstCombiner::visitFree(Instruction &FI) { - Value *Op = FI.getOperand(1); - - // free undef -> unreachable. - if (isa(Op)) { - // Insert a new store to null because we cannot modify the CFG here. - new StoreInst(ConstantInt::getTrue(*Context), - UndefValue::get(Type::getInt1PtrTy(*Context)), &FI); - return EraseInstFromFunction(FI); - } - - // If we have 'free null' delete the instruction. This can happen in stl code - // when lots of inlining happens. - if (isa(Op)) - return EraseInstFromFunction(FI); - - // If we have a malloc call whose only use is a free call, delete both. - if (isMalloc(Op)) { - if (CallInst* CI = extractMallocCallFromBitCast(Op)) { - if (Op->hasOneUse() && CI->hasOneUse()) { - EraseInstFromFunction(FI); - EraseInstFromFunction(*CI); - return EraseInstFromFunction(*cast(Op)); - } - } else { - // Op is a call to malloc - if (Op->hasOneUse()) { - EraseInstFromFunction(FI); - return EraseInstFromFunction(*cast(Op)); - } - } - } - - return 0; -} - -/// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible. -static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, - const TargetData *TD) { - User *CI = cast(LI.getOperand(0)); - Value *CastOp = CI->getOperand(0); - LLVMContext *Context = IC.getContext(); - - const PointerType *DestTy = cast(CI->getType()); - const Type *DestPTy = DestTy->getElementType(); - if (const PointerType *SrcTy = dyn_cast(CastOp->getType())) { - - // If the address spaces don't match, don't eliminate the cast. - if (DestTy->getAddressSpace() != SrcTy->getAddressSpace()) - return 0; - - const Type *SrcPTy = SrcTy->getElementType(); - - if (DestPTy->isInteger() || isa(DestPTy) || - isa(DestPTy)) { - // If the source is an array, the code below will not succeed. Check to - // see if a trivial 'gep P, 0, 0' will help matters. Only do this for - // constants. - if (const ArrayType *ASrcTy = dyn_cast(SrcPTy)) - if (Constant *CSrc = dyn_cast(CastOp)) - if (ASrcTy->getNumElements() != 0) { - Value *Idxs[2]; - Idxs[0] = Constant::getNullValue(Type::getInt32Ty(*Context)); - Idxs[1] = Idxs[0]; - CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs, 2); - SrcTy = cast(CastOp->getType()); - SrcPTy = SrcTy->getElementType(); - } - - if (IC.getTargetData() && - (SrcPTy->isInteger() || isa(SrcPTy) || - isa(SrcPTy)) && - // Do not allow turning this into a load of an integer, which is then - // casted to a pointer, this pessimizes pointer analysis a lot. - (isa(SrcPTy) == isa(LI.getType())) && - IC.getTargetData()->getTypeSizeInBits(SrcPTy) == - IC.getTargetData()->getTypeSizeInBits(DestPTy)) { - - // Okay, we are casting from one integer or pointer type to another of - // the same size. Instead of casting the pointer before the load, cast - // the result of the loaded value. - Value *NewLoad = - IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName()); - // Now cast the result of the load. - return new BitCastInst(NewLoad, LI.getType()); - } - } - } - return 0; -} - -Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { - Value *Op = LI.getOperand(0); - - // Attempt to improve the alignment. - if (TD) { - unsigned KnownAlign = - GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType())); - if (KnownAlign > - (LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) : - LI.getAlignment())) - LI.setAlignment(KnownAlign); - } - - // load (cast X) --> cast (load X) iff safe. - if (isa(Op)) - if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) - return Res; - - // None of the following transforms are legal for volatile loads. - if (LI.isVolatile()) return 0; - - // Do really simple store-to-load forwarding and load CSE, to catch cases - // where there are several consequtive memory accesses to the same location, - // separated by a few arithmetic operations. - BasicBlock::iterator BBI = &LI; - if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6)) - return ReplaceInstUsesWith(LI, AvailableVal); - - // load(gep null, ...) -> unreachable - if (GetElementPtrInst *GEPI = dyn_cast(Op)) { - const Value *GEPI0 = GEPI->getOperand(0); - // TODO: Consider a target hook for valid address spaces for this xform. - if (isa(GEPI0) && GEPI->getPointerAddressSpace() == 0){ - // Insert a new store to null instruction before the load to indicate - // that this code is not reachable. We do this instead of inserting - // an unreachable instruction directly because we cannot modify the - // CFG. - new StoreInst(UndefValue::get(LI.getType()), - Constant::getNullValue(Op->getType()), &LI); - return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); - } - } - - // load null/undef -> unreachable - // TODO: Consider a target hook for valid address spaces for this xform. - if (isa(Op) || - (isa(Op) && LI.getPointerAddressSpace() == 0)) { - // Insert a new store to null instruction before the load to indicate that - // this code is not reachable. We do this instead of inserting an - // unreachable instruction directly because we cannot modify the CFG. - new StoreInst(UndefValue::get(LI.getType()), - Constant::getNullValue(Op->getType()), &LI); - return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); - } - - // Instcombine load (constantexpr_cast global) -> cast (load global) - if (ConstantExpr *CE = dyn_cast(Op)) - if (CE->isCast()) - if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) - return Res; - - if (Op->hasOneUse()) { - // Change select and PHI nodes to select values instead of addresses: this - // helps alias analysis out a lot, allows many others simplifications, and - // exposes redundancy in the code. - // - // Note that we cannot do the transformation unless we know that the - // introduced loads cannot trap! Something like this is valid as long as - // the condition is always false: load (select bool %C, int* null, int* %G), - // but it would not be valid if we transformed it to load from null - // unconditionally. - // - if (SelectInst *SI = dyn_cast(Op)) { - // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2). - if (isSafeToLoadUnconditionally(SI->getOperand(1), SI) && - isSafeToLoadUnconditionally(SI->getOperand(2), SI)) { - Value *V1 = Builder->CreateLoad(SI->getOperand(1), - SI->getOperand(1)->getName()+".val"); - Value *V2 = Builder->CreateLoad(SI->getOperand(2), - SI->getOperand(2)->getName()+".val"); - return SelectInst::Create(SI->getCondition(), V1, V2); - } - - // load (select (cond, null, P)) -> load P - if (Constant *C = dyn_cast(SI->getOperand(1))) - if (C->isNullValue()) { - LI.setOperand(0, SI->getOperand(2)); - return &LI; - } - - // load (select (cond, P, null)) -> load P - if (Constant *C = dyn_cast(SI->getOperand(2))) - if (C->isNullValue()) { - LI.setOperand(0, SI->getOperand(1)); - return &LI; - } - } - } - return 0; -} - -/// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P -/// when possible. This makes it generally easy to do alias analysis and/or -/// SROA/mem2reg of the memory object. -static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { - User *CI = cast(SI.getOperand(1)); - Value *CastOp = CI->getOperand(0); - - const Type *DestPTy = cast(CI->getType())->getElementType(); - const PointerType *SrcTy = dyn_cast(CastOp->getType()); - if (SrcTy == 0) return 0; - - const Type *SrcPTy = SrcTy->getElementType(); - - if (!DestPTy->isInteger() && !isa(DestPTy)) - return 0; - - /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep" - /// to its first element. This allows us to handle things like: - /// store i32 xxx, (bitcast {foo*, float}* %P to i32*) - /// on 32-bit hosts. - SmallVector NewGEPIndices; - - // If the source is an array, the code below will not succeed. Check to - // see if a trivial 'gep P, 0, 0' will help matters. Only do this for - // constants. - if (isa(SrcPTy) || isa(SrcPTy)) { - // Index through pointer. - Constant *Zero = Constant::getNullValue(Type::getInt32Ty(*IC.getContext())); - NewGEPIndices.push_back(Zero); - - while (1) { - if (const StructType *STy = dyn_cast(SrcPTy)) { - if (!STy->getNumElements()) /* Struct can be empty {} */ - break; - NewGEPIndices.push_back(Zero); - SrcPTy = STy->getElementType(0); - } else if (const ArrayType *ATy = dyn_cast(SrcPTy)) { - NewGEPIndices.push_back(Zero); - SrcPTy = ATy->getElementType(); - } else { - break; - } - } - - SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace()); - } - - if (!SrcPTy->isInteger() && !isa(SrcPTy)) - return 0; - - // If the pointers point into different address spaces or if they point to - // values with different sizes, we can't do the transformation. - if (!IC.getTargetData() || - SrcTy->getAddressSpace() != - cast(CI->getType())->getAddressSpace() || - IC.getTargetData()->getTypeSizeInBits(SrcPTy) != - IC.getTargetData()->getTypeSizeInBits(DestPTy)) - return 0; - - // Okay, we are casting from one integer or pointer type to another of - // the same size. Instead of casting the pointer before - // the store, cast the value to be stored. - Value *NewCast; - Value *SIOp0 = SI.getOperand(0); - Instruction::CastOps opcode = Instruction::BitCast; - const Type* CastSrcTy = SIOp0->getType(); - const Type* CastDstTy = SrcPTy; - if (isa(CastDstTy)) { - if (CastSrcTy->isInteger()) - opcode = Instruction::IntToPtr; - } else if (isa(CastDstTy)) { - if (isa(SIOp0->getType())) - opcode = Instruction::PtrToInt; - } - - // SIOp0 is a pointer to aggregate and this is a store to the first field, - // emit a GEP to index into its first field. - if (!NewGEPIndices.empty()) - CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices.begin(), - NewGEPIndices.end()); - - NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy, - SIOp0->getName()+".c"); - return new StoreInst(NewCast, CastOp); -} - -/// equivalentAddressValues - Test if A and B will obviously have the same -/// value. This includes recognizing that %t0 and %t1 will have the same -/// value in code like this: -/// %t0 = getelementptr \@a, 0, 3 -/// store i32 0, i32* %t0 -/// %t1 = getelementptr \@a, 0, 3 -/// %t2 = load i32* %t1 -/// -static bool equivalentAddressValues(Value *A, Value *B) { - // Test if the values are trivially equivalent. - if (A == B) return true; - - // Test if the values come form identical arithmetic instructions. - // This uses isIdenticalToWhenDefined instead of isIdenticalTo because - // its only used to compare two uses within the same basic block, which - // means that they'll always either have the same value or one of them - // will have an undefined value. - if (isa(A) || - isa(A) || - isa(A) || - isa(A)) - if (Instruction *BI = dyn_cast(B)) - if (cast(A)->isIdenticalToWhenDefined(BI)) - return true; - - // Otherwise they may not be equivalent. - return false; -} - -// If this instruction has two uses, one of which is a llvm.dbg.declare, -// return the llvm.dbg.declare. -DbgDeclareInst *InstCombiner::hasOneUsePlusDeclare(Value *V) { - if (!V->hasNUses(2)) - return 0; - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); - UI != E; ++UI) { - if (DbgDeclareInst *DI = dyn_cast(UI)) - return DI; - if (isa(UI) && UI->hasOneUse()) { - if (DbgDeclareInst *DI = dyn_cast(UI->use_begin())) - return DI; - } - } - return 0; -} - -Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { - Value *Val = SI.getOperand(0); - Value *Ptr = SI.getOperand(1); - - // If the RHS is an alloca with a single use, zapify the store, making the - // alloca dead. - // If the RHS is an alloca with a two uses, the other one being a - // llvm.dbg.declare, zapify the store and the declare, making the - // alloca dead. We must do this to prevent declare's from affecting - // codegen. - if (!SI.isVolatile()) { - if (Ptr->hasOneUse()) { - if (isa(Ptr)) { - EraseInstFromFunction(SI); - ++NumCombined; - return 0; - } - if (GetElementPtrInst *GEP = dyn_cast(Ptr)) { - if (isa(GEP->getOperand(0))) { - if (GEP->getOperand(0)->hasOneUse()) { - EraseInstFromFunction(SI); - ++NumCombined; - return 0; - } - if (DbgDeclareInst *DI = hasOneUsePlusDeclare(GEP->getOperand(0))) { - EraseInstFromFunction(*DI); - EraseInstFromFunction(SI); - ++NumCombined; - return 0; - } - } - } - } - if (DbgDeclareInst *DI = hasOneUsePlusDeclare(Ptr)) { - EraseInstFromFunction(*DI); - EraseInstFromFunction(SI); - ++NumCombined; - return 0; - } - } - - // Attempt to improve the alignment. - if (TD) { - unsigned KnownAlign = - GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType())); - if (KnownAlign > - (SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) : - SI.getAlignment())) - SI.setAlignment(KnownAlign); - } - - // Do really simple DSE, to catch cases where there are several consecutive - // stores to the same location, separated by a few arithmetic operations. This - // situation often occurs with bitfield accesses. - BasicBlock::iterator BBI = &SI; - for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts; - --ScanInsts) { - --BBI; - // Don't count debug info directives, lest they affect codegen, - // and we skip pointer-to-pointer bitcasts, which are NOPs. - // It is necessary for correctness to skip those that feed into a - // llvm.dbg.declare, as these are not present when debugging is off. - if (isa(BBI) || - (isa(BBI) && isa(BBI->getType()))) { - ScanInsts++; - continue; - } - - if (StoreInst *PrevSI = dyn_cast(BBI)) { - // Prev store isn't volatile, and stores to the same location? - if (!PrevSI->isVolatile() &&equivalentAddressValues(PrevSI->getOperand(1), - SI.getOperand(1))) { - ++NumDeadStore; - ++BBI; - EraseInstFromFunction(*PrevSI); - continue; - } - break; - } - - // If this is a load, we have to stop. However, if the loaded value is from - // the pointer we're loading and is producing the pointer we're storing, - // then *this* store is dead (X = load P; store X -> P). - if (LoadInst *LI = dyn_cast(BBI)) { - if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) && - !SI.isVolatile()) { - EraseInstFromFunction(SI); - ++NumCombined; - return 0; - } - // Otherwise, this is a load from some other location. Stores before it - // may not be dead. - break; - } - - // Don't skip over loads or things that can modify memory. - if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory()) - break; - } - - - if (SI.isVolatile()) return 0; // Don't hack volatile stores. - - // store X, null -> turns into 'unreachable' in SimplifyCFG - if (isa(Ptr) && SI.getPointerAddressSpace() == 0) { - if (!isa(Val)) { - SI.setOperand(0, UndefValue::get(Val->getType())); - if (Instruction *U = dyn_cast(Val)) - Worklist.Add(U); // Dropped a use. - ++NumCombined; - } - return 0; // Do not modify these! - } - - // store undef, Ptr -> noop - if (isa(Val)) { - EraseInstFromFunction(SI); - ++NumCombined; - return 0; - } - - // If the pointer destination is a cast, see if we can fold the cast into the - // source instead. - if (isa(Ptr)) - if (Instruction *Res = InstCombineStoreToCast(*this, SI)) - return Res; - if (ConstantExpr *CE = dyn_cast(Ptr)) - if (CE->isCast()) - if (Instruction *Res = InstCombineStoreToCast(*this, SI)) - return Res; - - - // If this store is the last instruction in the basic block (possibly - // excepting debug info instructions and the pointer bitcasts that feed - // into them), and if the block ends with an unconditional branch, try - // to move it to the successor block. - BBI = &SI; - do { - ++BBI; - } while (isa(BBI) || - (isa(BBI) && isa(BBI->getType()))); - if (BranchInst *BI = dyn_cast(BBI)) - if (BI->isUnconditional()) - if (SimplifyStoreAtEndOfBlock(SI)) - return 0; // xform done! - - return 0; -} - -/// SimplifyStoreAtEndOfBlock - Turn things like: -/// if () { *P = v1; } else { *P = v2 } -/// into a phi node with a store in the successor. -/// -/// Simplify things like: -/// *P = v1; if () { *P = v2; } -/// into a phi node with a store in the successor. -/// -bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { - BasicBlock *StoreBB = SI.getParent(); - - // Check to see if the successor block has exactly two incoming edges. If - // so, see if the other predecessor contains a store to the same location. - // if so, insert a PHI node (if needed) and move the stores down. - BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0); - - // Determine whether Dest has exactly two predecessors and, if so, compute - // the other predecessor. - pred_iterator PI = pred_begin(DestBB); - BasicBlock *OtherBB = 0; - if (*PI != StoreBB) - OtherBB = *PI; - ++PI; - if (PI == pred_end(DestBB)) - return false; - - if (*PI != StoreBB) { - if (OtherBB) - return false; - OtherBB = *PI; - } - if (++PI != pred_end(DestBB)) - return false; - - // Bail out if all the relevant blocks aren't distinct (this can happen, - // for example, if SI is in an infinite loop) - if (StoreBB == DestBB || OtherBB == DestBB) - return false; - - // Verify that the other block ends in a branch and is not otherwise empty. - BasicBlock::iterator BBI = OtherBB->getTerminator(); - BranchInst *OtherBr = dyn_cast(BBI); - if (!OtherBr || BBI == OtherBB->begin()) - return false; - - // If the other block ends in an unconditional branch, check for the 'if then - // else' case. there is an instruction before the branch. - StoreInst *OtherStore = 0; - if (OtherBr->isUnconditional()) { - --BBI; - // Skip over debugging info. - while (isa(BBI) || - (isa(BBI) && isa(BBI->getType()))) { - if (BBI==OtherBB->begin()) - return false; - --BBI; - } - // If this isn't a store, isn't a store to the same location, or if the - // alignments differ, bail out. - OtherStore = dyn_cast(BBI); - if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) || - OtherStore->getAlignment() != SI.getAlignment()) - return false; - } else { - // Otherwise, the other block ended with a conditional branch. If one of the - // destinations is StoreBB, then we have the if/then case. - if (OtherBr->getSuccessor(0) != StoreBB && - OtherBr->getSuccessor(1) != StoreBB) - return false; - - // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an - // if/then triangle. See if there is a store to the same ptr as SI that - // lives in OtherBB. - for (;; --BBI) { - // Check to see if we find the matching store. - if ((OtherStore = dyn_cast(BBI))) { - if (OtherStore->getOperand(1) != SI.getOperand(1) || - OtherStore->getAlignment() != SI.getAlignment()) - return false; - break; - } - // If we find something that may be using or overwriting the stored - // value, or if we run out of instructions, we can't do the xform. - if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() || - BBI == OtherBB->begin()) - return false; - } - - // In order to eliminate the store in OtherBr, we have to - // make sure nothing reads or overwrites the stored value in - // StoreBB. - for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) { - // FIXME: This should really be AA driven. - if (I->mayReadFromMemory() || I->mayWriteToMemory()) - return false; - } - } - - // Insert a PHI node now if we need it. - Value *MergedVal = OtherStore->getOperand(0); - if (MergedVal != SI.getOperand(0)) { - PHINode *PN = PHINode::Create(MergedVal->getType(), "storemerge"); - PN->reserveOperandSpace(2); - PN->addIncoming(SI.getOperand(0), SI.getParent()); - PN->addIncoming(OtherStore->getOperand(0), OtherBB); - MergedVal = InsertNewInstBefore(PN, DestBB->front()); - } - - // Advance to a place where it is safe to insert the new store and - // insert it. - BBI = DestBB->getFirstNonPHI(); - InsertNewInstBefore(new StoreInst(MergedVal, SI.getOperand(1), - OtherStore->isVolatile(), - SI.getAlignment()), *BBI); - - // Nuke the old stores. - EraseInstFromFunction(SI); - EraseInstFromFunction(*OtherStore); - ++NumCombined; - return true; -} - - -Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { - // Change br (not X), label True, label False to: br X, label False, True - Value *X = 0; - BasicBlock *TrueDest; - BasicBlock *FalseDest; - if (match(&BI, m_Br(m_Not(m_Value(X)), TrueDest, FalseDest)) && - !isa(X)) { - // Swap Destinations and condition... - BI.setCondition(X); - BI.setSuccessor(0, FalseDest); - BI.setSuccessor(1, TrueDest); - return &BI; - } - - // Cannonicalize fcmp_one -> fcmp_oeq - FCmpInst::Predicate FPred; Value *Y; - if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)), - TrueDest, FalseDest)) && - BI.getCondition()->hasOneUse()) - if (FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE || - FPred == FCmpInst::FCMP_OGE) { - FCmpInst *Cond = cast(BI.getCondition()); - Cond->setPredicate(FCmpInst::getInversePredicate(FPred)); - - // Swap Destinations and condition. - BI.setSuccessor(0, FalseDest); - BI.setSuccessor(1, TrueDest); - Worklist.Add(Cond); - return &BI; - } - - // Cannonicalize icmp_ne -> icmp_eq - ICmpInst::Predicate IPred; - if (match(&BI, m_Br(m_ICmp(IPred, m_Value(X), m_Value(Y)), - TrueDest, FalseDest)) && - BI.getCondition()->hasOneUse()) - if (IPred == ICmpInst::ICMP_NE || IPred == ICmpInst::ICMP_ULE || - IPred == ICmpInst::ICMP_SLE || IPred == ICmpInst::ICMP_UGE || - IPred == ICmpInst::ICMP_SGE) { - ICmpInst *Cond = cast(BI.getCondition()); - Cond->setPredicate(ICmpInst::getInversePredicate(IPred)); - // Swap Destinations and condition. - BI.setSuccessor(0, FalseDest); - BI.setSuccessor(1, TrueDest); - Worklist.Add(Cond); - return &BI; - } - - return 0; -} - -Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { - Value *Cond = SI.getCondition(); - if (Instruction *I = dyn_cast(Cond)) { - if (I->getOpcode() == Instruction::Add) - if (ConstantInt *AddRHS = dyn_cast(I->getOperand(1))) { - // change 'switch (X+4) case 1:' into 'switch (X) case -3' - for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2) - SI.setOperand(i, - ConstantExpr::getSub(cast(SI.getOperand(i)), - AddRHS)); - SI.setOperand(0, I->getOperand(0)); - Worklist.Add(I); - return &SI; - } - } - return 0; -} - -Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { - Value *Agg = EV.getAggregateOperand(); - - if (!EV.hasIndices()) - return ReplaceInstUsesWith(EV, Agg); - - if (Constant *C = dyn_cast(Agg)) { - if (isa(C)) - return ReplaceInstUsesWith(EV, UndefValue::get(EV.getType())); - - if (isa(C)) - return ReplaceInstUsesWith(EV, Constant::getNullValue(EV.getType())); - - if (isa(C) || isa(C)) { - // Extract the element indexed by the first index out of the constant - Value *V = C->getOperand(*EV.idx_begin()); - if (EV.getNumIndices() > 1) - // Extract the remaining indices out of the constant indexed by the - // first index - return ExtractValueInst::Create(V, EV.idx_begin() + 1, EV.idx_end()); - else - return ReplaceInstUsesWith(EV, V); - } - return 0; // Can't handle other constants - } - if (InsertValueInst *IV = dyn_cast(Agg)) { - // We're extracting from an insertvalue instruction, compare the indices - const unsigned *exti, *exte, *insi, *inse; - for (exti = EV.idx_begin(), insi = IV->idx_begin(), - exte = EV.idx_end(), inse = IV->idx_end(); - exti != exte && insi != inse; - ++exti, ++insi) { - if (*insi != *exti) - // The insert and extract both reference distinctly different elements. - // This means the extract is not influenced by the insert, and we can - // replace the aggregate operand of the extract with the aggregate - // operand of the insert. i.e., replace - // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1 - // %E = extractvalue { i32, { i32 } } %I, 0 - // with - // %E = extractvalue { i32, { i32 } } %A, 0 - return ExtractValueInst::Create(IV->getAggregateOperand(), - EV.idx_begin(), EV.idx_end()); - } - if (exti == exte && insi == inse) - // Both iterators are at the end: Index lists are identical. Replace - // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0 - // %C = extractvalue { i32, { i32 } } %B, 1, 0 - // with "i32 42" - return ReplaceInstUsesWith(EV, IV->getInsertedValueOperand()); - if (exti == exte) { - // The extract list is a prefix of the insert list. i.e. replace - // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0 - // %E = extractvalue { i32, { i32 } } %I, 1 - // with - // %X = extractvalue { i32, { i32 } } %A, 1 - // %E = insertvalue { i32 } %X, i32 42, 0 - // by switching the order of the insert and extract (though the - // insertvalue should be left in, since it may have other uses). - Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(), - EV.idx_begin(), EV.idx_end()); - return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(), - insi, inse); - } - if (insi == inse) - // The insert list is a prefix of the extract list - // We can simply remove the common indices from the extract and make it - // operate on the inserted value instead of the insertvalue result. - // i.e., replace - // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1 - // %E = extractvalue { i32, { i32 } } %I, 1, 0 - // with - // %E extractvalue { i32 } { i32 42 }, 0 - return ExtractValueInst::Create(IV->getInsertedValueOperand(), - exti, exte); - } - if (IntrinsicInst *II = dyn_cast(Agg)) { - // We're extracting from an intrinsic, see if we're the only user, which - // allows us to simplify multiple result intrinsics to simpler things that - // just get one value.. - if (II->hasOneUse()) { - // Check if we're grabbing the overflow bit or the result of a 'with - // overflow' intrinsic. If it's the latter we can remove the intrinsic - // and replace it with a traditional binary instruction. - switch (II->getIntrinsicID()) { - case Intrinsic::uadd_with_overflow: - case Intrinsic::sadd_with_overflow: - if (*EV.idx_begin() == 0) { // Normal result. - Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); - II->replaceAllUsesWith(UndefValue::get(II->getType())); - EraseInstFromFunction(*II); - return BinaryOperator::CreateAdd(LHS, RHS); - } - break; - case Intrinsic::usub_with_overflow: - case Intrinsic::ssub_with_overflow: - if (*EV.idx_begin() == 0) { // Normal result. - Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); - II->replaceAllUsesWith(UndefValue::get(II->getType())); - EraseInstFromFunction(*II); - return BinaryOperator::CreateSub(LHS, RHS); - } - break; - case Intrinsic::umul_with_overflow: - case Intrinsic::smul_with_overflow: - if (*EV.idx_begin() == 0) { // Normal result. - Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); - II->replaceAllUsesWith(UndefValue::get(II->getType())); - EraseInstFromFunction(*II); - return BinaryOperator::CreateMul(LHS, RHS); - } - break; - default: - break; - } - } - } - // Can't simplify extracts from other values. Note that nested extracts are - // already simplified implicitely by the above (extract ( extract (insert) ) - // will be translated into extract ( insert ( extract ) ) first and then just - // the value inserted, if appropriate). - return 0; -} - -/// CheapToScalarize - Return true if the value is cheaper to scalarize than it -/// is to leave as a vector operation. -static bool CheapToScalarize(Value *V, bool isConstant) { - if (isa(V)) - return true; - if (ConstantVector *C = dyn_cast(V)) { - if (isConstant) return true; - // If all elts are the same, we can extract. - Constant *Op0 = C->getOperand(0); - for (unsigned i = 1; i < C->getNumOperands(); ++i) - if (C->getOperand(i) != Op0) - return false; - return true; - } - Instruction *I = dyn_cast(V); - if (!I) return false; - - // Insert element gets simplified to the inserted element or is deleted if - // this is constant idx extract element and its a constant idx insertelt. - if (I->getOpcode() == Instruction::InsertElement && isConstant && - isa(I->getOperand(2))) - return true; - if (I->getOpcode() == Instruction::Load && I->hasOneUse()) - return true; - if (BinaryOperator *BO = dyn_cast(I)) - if (BO->hasOneUse() && - (CheapToScalarize(BO->getOperand(0), isConstant) || - CheapToScalarize(BO->getOperand(1), isConstant))) - return true; - if (CmpInst *CI = dyn_cast(I)) - if (CI->hasOneUse() && - (CheapToScalarize(CI->getOperand(0), isConstant) || - CheapToScalarize(CI->getOperand(1), isConstant))) - return true; - - return false; -} - -/// Read and decode a shufflevector mask. -/// -/// It turns undef elements into values that are larger than the number of -/// elements in the input. -static std::vector getShuffleMask(const ShuffleVectorInst *SVI) { - unsigned NElts = SVI->getType()->getNumElements(); - if (isa(SVI->getOperand(2))) - return std::vector(NElts, 0); - if (isa(SVI->getOperand(2))) - return std::vector(NElts, 2*NElts); - - std::vector Result; - const ConstantVector *CP = cast(SVI->getOperand(2)); - for (User::const_op_iterator i = CP->op_begin(), e = CP->op_end(); i!=e; ++i) - if (isa(*i)) - Result.push_back(NElts*2); // undef -> 8 - else - Result.push_back(cast(*i)->getZExtValue()); - return Result; -} - -/// FindScalarElement - Given a vector and an element number, see if the scalar -/// value is already around as a register, for example if it were inserted then -/// extracted from the vector. -static Value *FindScalarElement(Value *V, unsigned EltNo, - LLVMContext *Context) { - assert(isa(V->getType()) && "Not looking at a vector?"); - const VectorType *PTy = cast(V->getType()); - unsigned Width = PTy->getNumElements(); - if (EltNo >= Width) // Out of range access. - return UndefValue::get(PTy->getElementType()); - - if (isa(V)) - return UndefValue::get(PTy->getElementType()); - else if (isa(V)) - return Constant::getNullValue(PTy->getElementType()); - else if (ConstantVector *CP = dyn_cast(V)) - return CP->getOperand(EltNo); - else if (InsertElementInst *III = dyn_cast(V)) { - // If this is an insert to a variable element, we don't know what it is. - if (!isa(III->getOperand(2))) - return 0; - unsigned IIElt = cast(III->getOperand(2))->getZExtValue(); - - // If this is an insert to the element we are looking for, return the - // inserted value. - if (EltNo == IIElt) - return III->getOperand(1); - - // Otherwise, the insertelement doesn't modify the value, recurse on its - // vector input. - return FindScalarElement(III->getOperand(0), EltNo, Context); - } else if (ShuffleVectorInst *SVI = dyn_cast(V)) { - unsigned LHSWidth = - cast(SVI->getOperand(0)->getType())->getNumElements(); - unsigned InEl = getShuffleMask(SVI)[EltNo]; - if (InEl < LHSWidth) - return FindScalarElement(SVI->getOperand(0), InEl, Context); - else if (InEl < LHSWidth*2) - return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth, Context); - else - return UndefValue::get(PTy->getElementType()); - } - - // Otherwise, we don't know. - return 0; -} - -Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { - // If vector val is undef, replace extract with scalar undef. - if (isa(EI.getOperand(0))) - return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); - - // If vector val is constant 0, replace extract with scalar 0. - if (isa(EI.getOperand(0))) - return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType())); - - if (ConstantVector *C = dyn_cast(EI.getOperand(0))) { - // If vector val is constant with all elements the same, replace EI with - // that element. When the elements are not identical, we cannot replace yet - // (we do that below, but only when the index is constant). - Constant *op0 = C->getOperand(0); - for (unsigned i = 1; i != C->getNumOperands(); ++i) - if (C->getOperand(i) != op0) { - op0 = 0; - break; - } - if (op0) - return ReplaceInstUsesWith(EI, op0); - } - - // If extracting a specified index from the vector, see if we can recursively - // find a previously computed scalar that was inserted into the vector. - if (ConstantInt *IdxC = dyn_cast(EI.getOperand(1))) { - unsigned IndexVal = IdxC->getZExtValue(); - unsigned VectorWidth = EI.getVectorOperandType()->getNumElements(); - - // If this is extracting an invalid index, turn this into undef, to avoid - // crashing the code below. - if (IndexVal >= VectorWidth) - return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); - - // This instruction only demands the single element from the input vector. - // If the input vector has a single use, simplify it based on this use - // property. - if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) { - APInt UndefElts(VectorWidth, 0); - APInt DemandedMask(VectorWidth, 1 << IndexVal); - if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), - DemandedMask, UndefElts)) { - EI.setOperand(0, V); - return &EI; - } - } - - if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal, Context)) - return ReplaceInstUsesWith(EI, Elt); - - // If the this extractelement is directly using a bitcast from a vector of - // the same number of elements, see if we can find the source element from - // it. In this case, we will end up needing to bitcast the scalars. - if (BitCastInst *BCI = dyn_cast(EI.getOperand(0))) { - if (const VectorType *VT = - dyn_cast(BCI->getOperand(0)->getType())) - if (VT->getNumElements() == VectorWidth) - if (Value *Elt = FindScalarElement(BCI->getOperand(0), - IndexVal, Context)) - return new BitCastInst(Elt, EI.getType()); - } - } - - if (Instruction *I = dyn_cast(EI.getOperand(0))) { - // Push extractelement into predecessor operation if legal and - // profitable to do so - if (BinaryOperator *BO = dyn_cast(I)) { - if (I->hasOneUse() && - CheapToScalarize(BO, isa(EI.getOperand(1)))) { - Value *newEI0 = - Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1), - EI.getName()+".lhs"); - Value *newEI1 = - Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1), - EI.getName()+".rhs"); - return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1); - } - } else if (InsertElementInst *IE = dyn_cast(I)) { - // Extracting the inserted element? - if (IE->getOperand(2) == EI.getOperand(1)) - return ReplaceInstUsesWith(EI, IE->getOperand(1)); - // If the inserted and extracted elements are constants, they must not - // be the same value, extract from the pre-inserted value instead. - if (isa(IE->getOperand(2)) && isa(EI.getOperand(1))) { - Worklist.AddValue(EI.getOperand(0)); - EI.setOperand(0, IE->getOperand(0)); - return &EI; - } - } else if (ShuffleVectorInst *SVI = dyn_cast(I)) { - // If this is extracting an element from a shufflevector, figure out where - // it came from and extract from the appropriate input element instead. - if (ConstantInt *Elt = dyn_cast(EI.getOperand(1))) { - unsigned SrcIdx = getShuffleMask(SVI)[Elt->getZExtValue()]; - Value *Src; - unsigned LHSWidth = - cast(SVI->getOperand(0)->getType())->getNumElements(); - - if (SrcIdx < LHSWidth) - Src = SVI->getOperand(0); - else if (SrcIdx < LHSWidth*2) { - SrcIdx -= LHSWidth; - Src = SVI->getOperand(1); - } else { - return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); - } - return ExtractElementInst::Create(Src, - ConstantInt::get(Type::getInt32Ty(*Context), SrcIdx, - false)); - } - } - // FIXME: Canonicalize extractelement(bitcast) -> bitcast(extractelement) - } - return 0; -} - -/// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns -/// elements from either LHS or RHS, return the shuffle mask and true. -/// Otherwise, return false. -static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, - std::vector &Mask, - LLVMContext *Context) { - assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() && - "Invalid CollectSingleShuffleElements"); - unsigned NumElts = cast(V->getType())->getNumElements(); - - if (isa(V)) { - Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(*Context))); - return true; - } else if (V == LHS) { - for (unsigned i = 0; i != NumElts; ++i) - Mask.push_back(ConstantInt::get(Type::getInt32Ty(*Context), i)); - return true; - } else if (V == RHS) { - for (unsigned i = 0; i != NumElts; ++i) - Mask.push_back(ConstantInt::get(Type::getInt32Ty(*Context), i+NumElts)); - return true; - } else if (InsertElementInst *IEI = dyn_cast(V)) { - // If this is an insert of an extract from some other vector, include it. - Value *VecOp = IEI->getOperand(0); - Value *ScalarOp = IEI->getOperand(1); - Value *IdxOp = IEI->getOperand(2); - - if (!isa(IdxOp)) - return false; - unsigned InsertedIdx = cast(IdxOp)->getZExtValue(); - - if (isa(ScalarOp)) { // inserting undef into vector. - // Okay, we can handle this if the vector we are insertinting into is - // transitively ok. - if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask, Context)) { - // If so, update the mask to reflect the inserted undef. - Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(*Context)); - return true; - } - } else if (ExtractElementInst *EI = dyn_cast(ScalarOp)){ - if (isa(EI->getOperand(1)) && - EI->getOperand(0)->getType() == V->getType()) { - unsigned ExtractedIdx = - cast(EI->getOperand(1))->getZExtValue(); - - // This must be extracting from either LHS or RHS. - if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) { - // Okay, we can handle this if the vector we are insertinting into is - // transitively ok. - if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask, Context)) { - // If so, update the mask to reflect the inserted value. - if (EI->getOperand(0) == LHS) { - Mask[InsertedIdx % NumElts] = - ConstantInt::get(Type::getInt32Ty(*Context), ExtractedIdx); - } else { - assert(EI->getOperand(0) == RHS); - Mask[InsertedIdx % NumElts] = - ConstantInt::get(Type::getInt32Ty(*Context), ExtractedIdx+NumElts); - - } - return true; - } - } - } - } - } - // TODO: Handle shufflevector here! - - return false; -} - -/// CollectShuffleElements - We are building a shuffle of V, using RHS as the -/// RHS of the shuffle instruction, if it is not null. Return a shuffle mask -/// that computes V and the LHS value of the shuffle. -static Value *CollectShuffleElements(Value *V, std::vector &Mask, - Value *&RHS, LLVMContext *Context) { - assert(isa(V->getType()) && - (RHS == 0 || V->getType() == RHS->getType()) && - "Invalid shuffle!"); - unsigned NumElts = cast(V->getType())->getNumElements(); - - if (isa(V)) { - Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(*Context))); - return V; - } else if (isa(V)) { - Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(*Context), 0)); - return V; - } else if (InsertElementInst *IEI = dyn_cast(V)) { - // If this is an insert of an extract from some other vector, include it. - Value *VecOp = IEI->getOperand(0); - Value *ScalarOp = IEI->getOperand(1); - Value *IdxOp = IEI->getOperand(2); - - if (ExtractElementInst *EI = dyn_cast(ScalarOp)) { - if (isa(EI->getOperand(1)) && isa(IdxOp) && - EI->getOperand(0)->getType() == V->getType()) { - unsigned ExtractedIdx = - cast(EI->getOperand(1))->getZExtValue(); - unsigned InsertedIdx = cast(IdxOp)->getZExtValue(); - - // Either the extracted from or inserted into vector must be RHSVec, - // otherwise we'd end up with a shuffle of three inputs. - if (EI->getOperand(0) == RHS || RHS == 0) { - RHS = EI->getOperand(0); - Value *V = CollectShuffleElements(VecOp, Mask, RHS, Context); - Mask[InsertedIdx % NumElts] = - ConstantInt::get(Type::getInt32Ty(*Context), NumElts+ExtractedIdx); - return V; - } - - if (VecOp == RHS) { - Value *V = CollectShuffleElements(EI->getOperand(0), Mask, - RHS, Context); - // Everything but the extracted element is replaced with the RHS. - for (unsigned i = 0; i != NumElts; ++i) { - if (i != InsertedIdx) - Mask[i] = ConstantInt::get(Type::getInt32Ty(*Context), NumElts+i); - } - return V; - } - - // If this insertelement is a chain that comes from exactly these two - // vectors, return the vector and the effective shuffle. - if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask, - Context)) - return EI->getOperand(0); - - } - } - } - // TODO: Handle shufflevector here! - - // Otherwise, can't do anything fancy. Return an identity vector. - for (unsigned i = 0; i != NumElts; ++i) - Mask.push_back(ConstantInt::get(Type::getInt32Ty(*Context), i)); - return V; -} - -Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { - Value *VecOp = IE.getOperand(0); - Value *ScalarOp = IE.getOperand(1); - Value *IdxOp = IE.getOperand(2); - - // Inserting an undef or into an undefined place, remove this. - if (isa(ScalarOp) || isa(IdxOp)) - ReplaceInstUsesWith(IE, VecOp); - - // If the inserted element was extracted from some other vector, and if the - // indexes are constant, try to turn this into a shufflevector operation. - if (ExtractElementInst *EI = dyn_cast(ScalarOp)) { - if (isa(EI->getOperand(1)) && isa(IdxOp) && - EI->getOperand(0)->getType() == IE.getType()) { - unsigned NumVectorElts = IE.getType()->getNumElements(); - unsigned ExtractedIdx = - cast(EI->getOperand(1))->getZExtValue(); - unsigned InsertedIdx = cast(IdxOp)->getZExtValue(); - - if (ExtractedIdx >= NumVectorElts) // Out of range extract. - return ReplaceInstUsesWith(IE, VecOp); - - if (InsertedIdx >= NumVectorElts) // Out of range insert. - return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType())); - - // If we are extracting a value from a vector, then inserting it right - // back into the same place, just use the input vector. - if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx) - return ReplaceInstUsesWith(IE, VecOp); - - // If this insertelement isn't used by some other insertelement, turn it - // (and any insertelements it points to), into one big shuffle. - if (!IE.hasOneUse() || !isa(IE.use_back())) { - std::vector Mask; - Value *RHS = 0; - Value *LHS = CollectShuffleElements(&IE, Mask, RHS, Context); - if (RHS == 0) RHS = UndefValue::get(LHS->getType()); - // We now have a shuffle of LHS, RHS, Mask. - return new ShuffleVectorInst(LHS, RHS, - ConstantVector::get(Mask)); - } - } - } - - unsigned VWidth = cast(VecOp->getType())->getNumElements(); - APInt UndefElts(VWidth, 0); - APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); - if (SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) - return &IE; - - return 0; -} - - -Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { - Value *LHS = SVI.getOperand(0); - Value *RHS = SVI.getOperand(1); - std::vector Mask = getShuffleMask(&SVI); - - bool MadeChange = false; - - // Undefined shuffle mask -> undefined value. - if (isa(SVI.getOperand(2))) - return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType())); - - unsigned VWidth = cast(SVI.getType())->getNumElements(); - - if (VWidth != cast(LHS->getType())->getNumElements()) - return 0; - - APInt UndefElts(VWidth, 0); - APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); - if (SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) { - LHS = SVI.getOperand(0); - RHS = SVI.getOperand(1); - MadeChange = true; - } - - // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask') - // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask'). - if (LHS == RHS || isa(LHS)) { - if (isa(LHS) && LHS == RHS) { - // shuffle(undef,undef,mask) -> undef. - return ReplaceInstUsesWith(SVI, LHS); - } - - // Remap any references to RHS to use LHS. - std::vector Elts; - for (unsigned i = 0, e = Mask.size(); i != e; ++i) { - if (Mask[i] >= 2*e) - Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context))); - else { - if ((Mask[i] >= e && isa(RHS)) || - (Mask[i] < e && isa(LHS))) { - Mask[i] = 2*e; // Turn into undef. - Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context))); - } else { - Mask[i] = Mask[i] % e; // Force to LHS. - Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context), Mask[i])); - } - } - } - SVI.setOperand(0, SVI.getOperand(1)); - SVI.setOperand(1, UndefValue::get(RHS->getType())); - SVI.setOperand(2, ConstantVector::get(Elts)); - LHS = SVI.getOperand(0); - RHS = SVI.getOperand(1); - MadeChange = true; - } - - // Analyze the shuffle, are the LHS or RHS and identity shuffles? - bool isLHSID = true, isRHSID = true; - - for (unsigned i = 0, e = Mask.size(); i != e; ++i) { - if (Mask[i] >= e*2) continue; // Ignore undef values. - // Is this an identity shuffle of the LHS value? - isLHSID &= (Mask[i] == i); - - // Is this an identity shuffle of the RHS value? - isRHSID &= (Mask[i]-e == i); - } - - // Eliminate identity shuffles. - if (isLHSID) return ReplaceInstUsesWith(SVI, LHS); - if (isRHSID) return ReplaceInstUsesWith(SVI, RHS); - - // If the LHS is a shufflevector itself, see if we can combine it with this - // one without producing an unusual shuffle. Here we are really conservative: - // we are absolutely afraid of producing a shuffle mask not in the input - // program, because the code gen may not be smart enough to turn a merged - // shuffle into two specific shuffles: it may produce worse code. As such, - // we only merge two shuffles if the result is one of the two input shuffle - // masks. In this case, merging the shuffles just removes one instruction, - // which we know is safe. This is good for things like turning: - // (splat(splat)) -> splat. - if (ShuffleVectorInst *LHSSVI = dyn_cast(LHS)) { - if (isa(RHS)) { - std::vector LHSMask = getShuffleMask(LHSSVI); - - if (LHSMask.size() == Mask.size()) { - std::vector NewMask; - for (unsigned i = 0, e = Mask.size(); i != e; ++i) - if (Mask[i] >= e) - NewMask.push_back(2*e); - else - NewMask.push_back(LHSMask[Mask[i]]); - - // If the result mask is equal to the src shuffle or this - // shuffle mask, do the replacement. - if (NewMask == LHSMask || NewMask == Mask) { - unsigned LHSInNElts = - cast(LHSSVI->getOperand(0)->getType())-> - getNumElements(); - std::vector Elts; - for (unsigned i = 0, e = NewMask.size(); i != e; ++i) { - if (NewMask[i] >= LHSInNElts*2) { - Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context))); - } else { - Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context), - NewMask[i])); - } - } - return new ShuffleVectorInst(LHSSVI->getOperand(0), - LHSSVI->getOperand(1), - ConstantVector::get(Elts)); - } - } - } - } - - return MadeChange ? &SVI : 0; -} - - - - -/// TryToSinkInstruction - Try to move the specified instruction from its -/// current block into the beginning of DestBlock, which can only happen if it's -/// safe to move the instruction past all of the instructions between it and the -/// end of its block. -static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { - assert(I->hasOneUse() && "Invariants didn't hold!"); - - // Cannot move control-flow-involving, volatile loads, vaarg, etc. - if (isa(I) || I->mayHaveSideEffects() || isa(I)) - return false; - - // Do not sink alloca instructions out of the entry block. - if (isa(I) && I->getParent() == - &DestBlock->getParent()->getEntryBlock()) - return false; - - // We can only sink load instructions if there is nothing between the load and - // the end of block that could change the value. - if (I->mayReadFromMemory()) { - for (BasicBlock::iterator Scan = I, E = I->getParent()->end(); - Scan != E; ++Scan) - if (Scan->mayWriteToMemory()) - return false; - } - - BasicBlock::iterator InsertPos = DestBlock->getFirstNonPHI(); - - CopyPrecedingStopPoint(I, InsertPos); - I->moveBefore(InsertPos); - ++NumSunkInst; - return true; -} - - -/// AddReachableCodeToWorklist - Walk the function in depth-first order, adding -/// all reachable code to the worklist. -/// -/// This has a couple of tricks to make the code faster and more powerful. In -/// particular, we constant fold and DCE instructions as we go, to avoid adding -/// them to the worklist (this significantly speeds up instcombine on code where -/// many instructions are dead or constant). Additionally, if we find a branch -/// whose condition is a known constant, we only visit the reachable successors. -/// -static bool AddReachableCodeToWorklist(BasicBlock *BB, - SmallPtrSet &Visited, - InstCombiner &IC, - const TargetData *TD) { - bool MadeIRChange = false; - SmallVector Worklist; - Worklist.push_back(BB); - - std::vector InstrsForInstCombineWorklist; - InstrsForInstCombineWorklist.reserve(128); - - SmallPtrSet FoldedConstants; - - while (!Worklist.empty()) { - BB = Worklist.back(); - Worklist.pop_back(); - - // We have now visited this block! If we've already been here, ignore it. - if (!Visited.insert(BB)) continue; - - for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { - Instruction *Inst = BBI++; - - // DCE instruction if trivially dead. - if (isInstructionTriviallyDead(Inst)) { - ++NumDeadInst; - DEBUG(errs() << "IC: DCE: " << *Inst << '\n'); - Inst->eraseFromParent(); - continue; - } - - // ConstantProp instruction if trivially constant. - if (!Inst->use_empty() && isa(Inst->getOperand(0))) - if (Constant *C = ConstantFoldInstruction(Inst, TD)) { - DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " - << *Inst << '\n'); - Inst->replaceAllUsesWith(C); - ++NumConstProp; - Inst->eraseFromParent(); - continue; - } - - - - if (TD) { - // See if we can constant fold its operands. - for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end(); - i != e; ++i) { - ConstantExpr *CE = dyn_cast(i); - if (CE == 0) continue; - - // If we already folded this constant, don't try again. - if (!FoldedConstants.insert(CE)) - continue; - - Constant *NewC = ConstantFoldConstantExpression(CE, TD); - if (NewC && NewC != CE) { - *i = NewC; - MadeIRChange = true; - } - } - } - - - InstrsForInstCombineWorklist.push_back(Inst); - } - - // Recursively visit successors. If this is a branch or switch on a - // constant, only visit the reachable successor. - TerminatorInst *TI = BB->getTerminator(); - if (BranchInst *BI = dyn_cast(TI)) { - if (BI->isConditional() && isa(BI->getCondition())) { - bool CondVal = cast(BI->getCondition())->getZExtValue(); - BasicBlock *ReachableBB = BI->getSuccessor(!CondVal); - Worklist.push_back(ReachableBB); - continue; - } - } else if (SwitchInst *SI = dyn_cast(TI)) { - if (ConstantInt *Cond = dyn_cast(SI->getCondition())) { - // See if this is an explicit destination. - for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) - if (SI->getCaseValue(i) == Cond) { - BasicBlock *ReachableBB = SI->getSuccessor(i); - Worklist.push_back(ReachableBB); - continue; - } - - // Otherwise it is the default destination. - Worklist.push_back(SI->getSuccessor(0)); - continue; - } - } - - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) - Worklist.push_back(TI->getSuccessor(i)); - } - - // Once we've found all of the instructions to add to instcombine's worklist, - // add them in reverse order. This way instcombine will visit from the top - // of the function down. This jives well with the way that it adds all uses - // of instructions to the worklist after doing a transformation, thus avoiding - // some N^2 behavior in pathological cases. - IC.Worklist.AddInitialGroup(&InstrsForInstCombineWorklist[0], - InstrsForInstCombineWorklist.size()); - - return MadeIRChange; -} - -bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { - MadeIRChange = false; - - DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " - << F.getNameStr() << "\n"); - - { - // Do a depth-first traversal of the function, populate the worklist with - // the reachable instructions. Ignore blocks that are not reachable. Keep - // track of which blocks we visit. - SmallPtrSet Visited; - MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD); - - // Do a quick scan over the function. If we find any blocks that are - // unreachable, remove any instructions inside of them. This prevents - // the instcombine code from having to deal with some bad special cases. - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) - if (!Visited.count(BB)) { - Instruction *Term = BB->getTerminator(); - while (Term != BB->begin()) { // Remove instrs bottom-up - BasicBlock::iterator I = Term; --I; - - DEBUG(errs() << "IC: DCE: " << *I << '\n'); - // A debug intrinsic shouldn't force another iteration if we weren't - // going to do one without it. - if (!isa(I)) { - ++NumDeadInst; - MadeIRChange = true; - } - - // If I is not void type then replaceAllUsesWith undef. - // This allows ValueHandlers and custom metadata to adjust itself. - if (!I->getType()->isVoidTy()) - I->replaceAllUsesWith(UndefValue::get(I->getType())); - I->eraseFromParent(); - } - } - } - - while (!Worklist.isEmpty()) { - Instruction *I = Worklist.RemoveOne(); - if (I == 0) continue; // skip null values. - - // Check to see if we can DCE the instruction. - if (isInstructionTriviallyDead(I)) { - DEBUG(errs() << "IC: DCE: " << *I << '\n'); - EraseInstFromFunction(*I); - ++NumDeadInst; - MadeIRChange = true; - continue; - } - - // Instruction isn't dead, see if we can constant propagate it. - if (!I->use_empty() && isa(I->getOperand(0))) - if (Constant *C = ConstantFoldInstruction(I, TD)) { - DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n'); - - // Add operands to the worklist. - ReplaceInstUsesWith(*I, C); - ++NumConstProp; - EraseInstFromFunction(*I); - MadeIRChange = true; - continue; - } - - // See if we can trivially sink this instruction to a successor basic block. - if (I->hasOneUse()) { - BasicBlock *BB = I->getParent(); - Instruction *UserInst = cast(I->use_back()); - BasicBlock *UserParent; - - // Get the block the use occurs in. - if (PHINode *PN = dyn_cast(UserInst)) - UserParent = PN->getIncomingBlock(I->use_begin().getUse()); - else - UserParent = UserInst->getParent(); - - if (UserParent != BB) { - bool UserIsSuccessor = false; - // See if the user is one of our successors. - for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) - if (*SI == UserParent) { - UserIsSuccessor = true; - break; - } - - // If the user is one of our immediate successors, and if that successor - // only has us as a predecessors (we'd have to split the critical edge - // otherwise), we can keep going. - if (UserIsSuccessor && UserParent->getSinglePredecessor()) - // Okay, the CFG is simple enough, try to sink this instruction. - MadeIRChange |= TryToSinkInstruction(I, UserParent); - } - } - - // Now that we have an instruction, try combining it to simplify it. - Builder->SetInsertPoint(I->getParent(), I); - -#ifndef NDEBUG - std::string OrigI; -#endif - DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str();); - DEBUG(errs() << "IC: Visiting: " << OrigI << '\n'); - - if (Instruction *Result = visit(*I)) { - ++NumCombined; - // Should we replace the old instruction with a new one? - if (Result != I) { - DEBUG(errs() << "IC: Old = " << *I << '\n' - << " New = " << *Result << '\n'); - - // Everything uses the new instruction now. - I->replaceAllUsesWith(Result); - - // Push the new instruction and any users onto the worklist. - Worklist.Add(Result); - Worklist.AddUsersToWorkList(*Result); - - // Move the name to the new instruction first. - Result->takeName(I); - - // Insert the new instruction into the basic block... - BasicBlock *InstParent = I->getParent(); - BasicBlock::iterator InsertPos = I; - - if (!isa(Result)) // If combining a PHI, don't insert - while (isa(InsertPos)) // middle of a block of PHIs. - ++InsertPos; - - InstParent->getInstList().insert(InsertPos, Result); - - EraseInstFromFunction(*I); - } else { -#ifndef NDEBUG - DEBUG(errs() << "IC: Mod = " << OrigI << '\n' - << " New = " << *I << '\n'); -#endif - - // If the instruction was modified, it's possible that it is now dead. - // if so, remove it. - if (isInstructionTriviallyDead(I)) { - EraseInstFromFunction(*I); - } else { - Worklist.Add(I); - Worklist.AddUsersToWorkList(*I); - } - } - MadeIRChange = true; - } - } - - Worklist.Zap(); - return MadeIRChange; -} - - -bool InstCombiner::runOnFunction(Function &F) { - MustPreserveLCSSA = mustPreserveAnalysisID(LCSSAID); - Context = &F.getContext(); - TD = getAnalysisIfAvailable(); - - - /// Builder - This is an IRBuilder that automatically inserts new - /// instructions into the worklist when they are created. - IRBuilder - TheBuilder(F.getContext(), TargetFolder(TD), - InstCombineIRInserter(Worklist)); - Builder = &TheBuilder; - - bool EverMadeChange = false; - - // Iterate while there is work to do. - unsigned Iteration = 0; - while (DoOneIteration(F, Iteration++)) - EverMadeChange = true; - - Builder = 0; - return EverMadeChange; -} - -FunctionPass *llvm::createInstructionCombiningPass() { - return new InstCombiner(); -} diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 7e6cf79d8cba..953131155181 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -89,7 +89,7 @@ namespace { bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl &PredBBs, BasicBlock *SuccBB); bool DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, - BasicBlock *PredBB); + const SmallVectorImpl &PredBBs); typedef SmallVectorImpl > PredValueInfo; @@ -102,7 +102,8 @@ namespace { bool ProcessBranchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB); bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB); - bool ProcessJumpOnPHI(PHINode *PN); + bool ProcessBranchOnPHI(PHINode *PN); + bool ProcessBranchOnXOR(BinaryOperator *BO); bool SimplifyPartiallyRedundantLoad(LoadInst *LI); }; @@ -118,16 +119,15 @@ FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); } /// runOnFunction - Top level algorithm. /// bool JumpThreading::runOnFunction(Function &F) { - DEBUG(errs() << "Jump threading on function '" << F.getName() << "'\n"); + DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n"); TD = getAnalysisIfAvailable(); LVI = EnableLVI ? &getAnalysis() : 0; FindLoopHeaders(F); - bool AnotherIteration = true, EverChanged = false; - while (AnotherIteration) { - AnotherIteration = false; - bool Changed = false; + bool Changed, EverChanged = false; + do { + Changed = false; for (Function::iterator I = F.begin(), E = F.end(); I != E;) { BasicBlock *BB = I; // Thread all of the branches we can over this block. @@ -140,7 +140,7 @@ bool JumpThreading::runOnFunction(Function &F) { // edges which simplifies the CFG. if (pred_begin(BB) == pred_end(BB) && BB != &BB->getParent()->getEntryBlock()) { - DEBUG(errs() << " JT: Deleting dead block '" << BB->getName() + DEBUG(dbgs() << " JT: Deleting dead block '" << BB->getName() << "' with terminator: " << *BB->getTerminator() << '\n'); LoopHeaders.erase(BB); DeleteDeadBlock(BB); @@ -176,9 +176,8 @@ bool JumpThreading::runOnFunction(Function &F) { } } } - AnotherIteration = Changed; EverChanged |= Changed; - } + } while (Changed); LoopHeaders.clear(); return EverChanged; @@ -490,7 +489,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { // terminator to an unconditional branch. This can occur due to threading in // other blocks. if (isa(Condition)) { - DEBUG(errs() << " In block '" << BB->getName() + DEBUG(dbgs() << " In block '" << BB->getName() << "' folding terminator: " << *BB->getTerminator() << '\n'); ++NumFolds; ConstantFoldTerminator(BB); @@ -509,7 +508,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { RemovePredecessorAndSimplify(BBTerm->getSuccessor(i), BB, TD); } - DEBUG(errs() << " In block '" << BB->getName() + DEBUG(dbgs() << " In block '" << BB->getName() << "' folding undef terminator: " << *BBTerm << '\n'); BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm); BBTerm->eraseFromParent(); @@ -552,11 +551,6 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { } - // See if this is a phi node in the current block. - if (PHINode *PN = dyn_cast(CondInst)) - if (PN->getParent() == BB) - return ProcessJumpOnPHI(PN); - if (CmpInst *CondCmp = dyn_cast(CondInst)) { if (!LVI && (!isa(CondCmp->getOperand(0)) || @@ -585,8 +579,6 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { // we see one, check to see if it's partially redundant. If so, insert a PHI // which can then be used to thread the values. // - // This is particularly important because reg2mem inserts loads and stores all - // over the place, and this blocks jump threading if we don't zap them. Value *SimplifyValue = CondInst; if (CmpInst *CondCmp = dyn_cast(SimplifyValue)) if (isa(CondCmp->getOperand(1))) @@ -606,9 +598,21 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { if (ProcessThreadableEdges(CondInst, BB)) return true; + // If this is an otherwise-unfoldable branch on a phi node in the current + // block, see if we can simplify. + if (PHINode *PN = dyn_cast(CondInst)) + if (PN->getParent() == BB && isa(BB->getTerminator())) + return ProcessBranchOnPHI(PN); + + + // If this is an otherwise-unfoldable branch on a XOR, see if we can simplify. + if (CondInst->getOpcode() == Instruction::Xor && + CondInst->getParent() == BB && isa(BB->getTerminator())) + return ProcessBranchOnXOR(cast(CondInst)); + // TODO: If we have: "br (X > 0)" and we have a predecessor where we know - // "(X == 4)" thread through this block. + // "(X == 4)", thread through this block. return false; } @@ -636,7 +640,7 @@ bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB, else if (PredBI->getSuccessor(0) != BB) BranchDir = false; else { - DEBUG(errs() << " In block '" << PredBB->getName() + DEBUG(dbgs() << " In block '" << PredBB->getName() << "' folding terminator: " << *PredBB->getTerminator() << '\n'); ++NumFolds; ConstantFoldTerminator(PredBB); @@ -648,7 +652,7 @@ bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB, // If the dest block has one predecessor, just fix the branch condition to a // constant and fold it. if (BB->getSinglePredecessor()) { - DEBUG(errs() << " In block '" << BB->getName() + DEBUG(dbgs() << " In block '" << BB->getName() << "' folding condition to '" << BranchDir << "': " << *BB->getTerminator() << '\n'); ++NumFolds; @@ -727,8 +731,8 @@ bool JumpThreading::ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, // Otherwise, we're safe to make the change. Make sure that the edge from // DestSI to DestSucc is not critical and has no PHI nodes. - DEBUG(errs() << "FORWARDING EDGE " << *DestVal << " FROM: " << *PredSI); - DEBUG(errs() << "THROUGH: " << *DestSI); + DEBUG(dbgs() << "FORWARDING EDGE " << *DestVal << " FROM: " << *PredSI); + DEBUG(dbgs() << "THROUGH: " << *DestSI); // If the destination has PHI nodes, just split the edge for updating // simplicity. @@ -979,14 +983,14 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) { assert(!PredValues.empty() && "ComputeValueKnownInPredecessors returned true with no values"); - DEBUG(errs() << "IN BB: " << *BB; + DEBUG(dbgs() << "IN BB: " << *BB; for (unsigned i = 0, e = PredValues.size(); i != e; ++i) { - errs() << " BB '" << BB->getName() << "': FOUND condition = "; + dbgs() << " BB '" << BB->getName() << "': FOUND condition = "; if (PredValues[i].first) - errs() << *PredValues[i].first; + dbgs() << *PredValues[i].first; else - errs() << "UNDEF"; - errs() << " for pred '" << PredValues[i].second->getName() + dbgs() << "UNDEF"; + dbgs() << " for pred '" << PredValues[i].second->getName() << "'.\n"; }); @@ -1070,36 +1074,110 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) { return ThreadEdge(BB, PredsToFactor, MostPopularDest); } -/// ProcessJumpOnPHI - We have a conditional branch or switch on a PHI node in -/// the current block. See if there are any simplifications we can do based on -/// inputs to the phi node. +/// ProcessBranchOnPHI - We have an otherwise unthreadable conditional branch on +/// a PHI node in the current block. See if there are any simplifications we +/// can do based on inputs to the phi node. /// -bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) { +bool JumpThreading::ProcessBranchOnPHI(PHINode *PN) { BasicBlock *BB = PN->getParent(); - // If any of the predecessor blocks end in an unconditional branch, we can - // *duplicate* the jump into that block in order to further encourage jump - // threading and to eliminate cases where we have branch on a phi of an icmp - // (branch on icmp is much better). - - // We don't want to do this tranformation for switches, because we don't - // really want to duplicate a switch. - if (isa(BB->getTerminator())) - return false; + // TODO: We could make use of this to do it once for blocks with common PHI + // values. + SmallVector PredBBs; + PredBBs.resize(1); - // Look for unconditional branch predecessors. + // If any of the predecessor blocks end in an unconditional branch, we can + // *duplicate* the conditional branch into that block in order to further + // encourage jump threading and to eliminate cases where we have branch on a + // phi of an icmp (branch on icmp is much better). for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { BasicBlock *PredBB = PN->getIncomingBlock(i); if (BranchInst *PredBr = dyn_cast(PredBB->getTerminator())) - if (PredBr->isUnconditional() && - // Try to duplicate BB into PredBB. - DuplicateCondBranchOnPHIIntoPred(BB, PredBB)) - return true; + if (PredBr->isUnconditional()) { + PredBBs[0] = PredBB; + // Try to duplicate BB into PredBB. + if (DuplicateCondBranchOnPHIIntoPred(BB, PredBBs)) + return true; + } } return false; } +/// ProcessBranchOnXOR - We have an otherwise unthreadable conditional branch on +/// a xor instruction in the current block. See if there are any +/// simplifications we can do based on inputs to the xor. +/// +bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) { + BasicBlock *BB = BO->getParent(); + + // If either the LHS or RHS of the xor is a constant, don't do this + // optimization. + if (isa(BO->getOperand(0)) || + isa(BO->getOperand(1))) + return false; + + // If we have a xor as the branch input to this block, and we know that the + // LHS or RHS of the xor in any predecessor is true/false, then we can clone + // the condition into the predecessor and fix that value to true, saving some + // logical ops on that path and encouraging other paths to simplify. + // + // This copies something like this: + // + // BB: + // %X = phi i1 [1], [%X'] + // %Y = icmp eq i32 %A, %B + // %Z = xor i1 %X, %Y + // br i1 %Z, ... + // + // Into: + // BB': + // %Y = icmp ne i32 %A, %B + // br i1 %Z, ... + + SmallVector, 8> XorOpValues; + bool isLHS = true; + if (!ComputeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues)) { + assert(XorOpValues.empty()); + if (!ComputeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues)) + return false; + isLHS = false; + } + + assert(!XorOpValues.empty() && + "ComputeValueKnownInPredecessors returned true with no values"); + + // Scan the information to see which is most popular: true or false. The + // predecessors can be of the set true, false, or undef. + unsigned NumTrue = 0, NumFalse = 0; + for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) { + if (!XorOpValues[i].first) continue; // Ignore undefs for the count. + if (XorOpValues[i].first->isZero()) + ++NumFalse; + else + ++NumTrue; + } + + // Determine which value to split on, true, false, or undef if neither. + ConstantInt *SplitVal = 0; + if (NumTrue > NumFalse) + SplitVal = ConstantInt::getTrue(BB->getContext()); + else if (NumTrue != 0 || NumFalse != 0) + SplitVal = ConstantInt::getFalse(BB->getContext()); + + // Collect all of the blocks that this can be folded into so that we can + // factor this once and clone it once. + SmallVector BlocksToFoldInto; + for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) { + if (XorOpValues[i].first != SplitVal && XorOpValues[i].first != 0) continue; + + BlocksToFoldInto.push_back(XorOpValues[i].second); + } + + // Try to duplicate BB into PredBB. + return DuplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto); +} + /// AddPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new /// predecessor to the PHIBB block. If it has PHI nodes, add entries for @@ -1133,7 +1211,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *SuccBB) { // If threading to the same block as we come from, we would infinite loop. if (SuccBB == BB) { - DEBUG(errs() << " Not threading across BB '" << BB->getName() + DEBUG(dbgs() << " Not threading across BB '" << BB->getName() << "' - would thread to self!\n"); return false; } @@ -1141,7 +1219,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, // If threading this would thread across a loop header, don't thread the edge. // See the comments above FindLoopHeaders for justifications and caveats. if (LoopHeaders.count(BB)) { - DEBUG(errs() << " Not threading across loop header BB '" << BB->getName() + DEBUG(dbgs() << " Not threading across loop header BB '" << BB->getName() << "' to dest BB '" << SuccBB->getName() << "' - it might create an irreducible loop!\n"); return false; @@ -1149,7 +1227,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB); if (JumpThreadCost > Threshold) { - DEBUG(errs() << " Not threading BB '" << BB->getName() + DEBUG(dbgs() << " Not threading BB '" << BB->getName() << "' - Cost is too high: " << JumpThreadCost << "\n"); return false; } @@ -1159,14 +1237,14 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, if (PredBBs.size() == 1) PredBB = PredBBs[0]; else { - DEBUG(errs() << " Factoring out " << PredBBs.size() + DEBUG(dbgs() << " Factoring out " << PredBBs.size() << " common predecessors.\n"); PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(), ".thr_comm", this); } // And finally, do it! - DEBUG(errs() << " Threading edge from '" << PredBB->getName() << "' to '" + DEBUG(dbgs() << " Threading edge from '" << PredBB->getName() << "' to '" << SuccBB->getName() << "' with cost: " << JumpThreadCost << ", across block:\n " << *BB << "\n"); @@ -1235,7 +1313,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, if (UsesToRename.empty()) continue; - DEBUG(errs() << "JT: Renaming non-local uses of: " << *I << "\n"); + DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n"); // We found a use of I outside of BB. Rename all uses of I that are outside // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks @@ -1246,7 +1324,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, while (!UsesToRename.empty()) SSAUpdate.RewriteUse(*UsesToRename.pop_back_val()); - DEBUG(errs() << "\n"); + DEBUG(dbgs() << "\n"); } @@ -1263,20 +1341,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, // At this point, the IR is fully up to date and consistent. Do a quick scan // over the new instructions and zap any that are constants or dead. This // frequently happens because of phi translation. - BI = NewBB->begin(); - for (BasicBlock::iterator E = NewBB->end(); BI != E; ) { - Instruction *Inst = BI++; - - if (Value *V = SimplifyInstruction(Inst, TD)) { - WeakVH BIHandle(BI); - ReplaceAndSimplifyAllUses(Inst, V, TD); - if (BIHandle == 0) - BI = NewBB->begin(); - continue; - } - - RecursivelyDeleteTriviallyDeadInstructions(Inst); - } + SimplifyInstructionsInBlock(NewBB, TD); // Threaded an edge! ++NumThreads; @@ -1289,30 +1354,52 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, /// improves the odds that the branch will be on an analyzable instruction like /// a compare. bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, - BasicBlock *PredBB) { + const SmallVectorImpl &PredBBs) { + assert(!PredBBs.empty() && "Can't handle an empty set"); + // If BB is a loop header, then duplicating this block outside the loop would // cause us to transform this into an irreducible loop, don't do this. // See the comments above FindLoopHeaders for justifications and caveats. if (LoopHeaders.count(BB)) { - DEBUG(errs() << " Not duplicating loop header '" << BB->getName() - << "' into predecessor block '" << PredBB->getName() + DEBUG(dbgs() << " Not duplicating loop header '" << BB->getName() + << "' into predecessor block '" << PredBBs[0]->getName() << "' - it might create an irreducible loop!\n"); return false; } unsigned DuplicationCost = getJumpThreadDuplicationCost(BB); if (DuplicationCost > Threshold) { - DEBUG(errs() << " Not duplicating BB '" << BB->getName() + DEBUG(dbgs() << " Not duplicating BB '" << BB->getName() << "' - Cost is too high: " << DuplicationCost << "\n"); return false; } + // And finally, do it! Start by factoring the predecessors is needed. + BasicBlock *PredBB; + if (PredBBs.size() == 1) + PredBB = PredBBs[0]; + else { + DEBUG(dbgs() << " Factoring out " << PredBBs.size() + << " common predecessors.\n"); + PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(), + ".thr_comm", this); + } + // Okay, we decided to do this! Clone all the instructions in BB onto the end // of PredBB. - DEBUG(errs() << " Duplicating block '" << BB->getName() << "' into end of '" + DEBUG(dbgs() << " Duplicating block '" << BB->getName() << "' into end of '" << PredBB->getName() << "' to eliminate branch on phi. Cost: " << DuplicationCost << " block is:" << *BB << "\n"); + // Unless PredBB ends with an unconditional branch, split the edge so that we + // can just clone the bits from BB into the end of the new PredBB. + BranchInst *OldPredBranch = cast(PredBB->getTerminator()); + + if (!OldPredBranch->isUnconditional()) { + PredBB = SplitEdge(PredBB, BB, this); + OldPredBranch = cast(PredBB->getTerminator()); + } + // We are going to have to map operands from the original BB block into the // PredBB block. Evaluate PHI nodes in BB. DenseMap ValueMapping; @@ -1321,15 +1408,10 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, for (; PHINode *PN = dyn_cast(BI); ++BI) ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB); - BranchInst *OldPredBranch = cast(PredBB->getTerminator()); - // Clone the non-phi instructions of BB into PredBB, keeping track of the // mapping and using it to remap operands in the cloned instructions. for (; BI != BB->end(); ++BI) { Instruction *New = BI->clone(); - New->setName(BI->getName()); - PredBB->getInstList().insert(OldPredBranch, New); - ValueMapping[BI] = New; // Remap operands to patch up intra-block references. for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i) @@ -1338,6 +1420,19 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, if (I != ValueMapping.end()) New->setOperand(i, I->second); } + + // If this instruction can be simplified after the operands are updated, + // just use the simplified value instead. This frequently happens due to + // phi translation. + if (Value *IV = SimplifyInstruction(New, TD)) { + delete New; + ValueMapping[BI] = IV; + } else { + // Otherwise, insert the new instruction into the block. + New->setName(BI->getName()); + PredBB->getInstList().insert(OldPredBranch, New); + ValueMapping[BI] = New; + } } // Check to see if the targets of the branch had PHI nodes. If so, we need to @@ -1373,7 +1468,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, if (UsesToRename.empty()) continue; - DEBUG(errs() << "JT: Renaming non-local uses of: " << *I << "\n"); + DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n"); // We found a use of I outside of BB. Rename all uses of I that are outside // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks @@ -1384,7 +1479,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, while (!UsesToRename.empty()) SSAUpdate.RewriteUse(*UsesToRename.pop_back_val()); - DEBUG(errs() << "\n"); + DEBUG(dbgs() << "\n"); } // PredBB no longer jumps to BB, remove entries in the PHI node for the edge diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 99f3ae07f605..81f9ae61aa2c 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -384,10 +384,6 @@ bool LICM::canSinkOrHoistInst(Instruction &I) { Size = AA->getTypeStoreSize(LI->getType()); return !pointerInvalidatedByLoop(LI->getOperand(0), Size); } else if (CallInst *CI = dyn_cast(&I)) { - if (isa(CI)) { - // Don't hoist/sink dbgstoppoints, we handle them separately - return false; - } // Handle obvious cases efficiently. AliasAnalysis::ModRefBehavior Behavior = AA->getModRefBehavior(CI); if (Behavior == AliasAnalysis::DoesNotAccessMemory) @@ -461,7 +457,7 @@ bool LICM::isLoopInvariantInst(Instruction &I) { /// position, and may either delete it or move it to outside of the loop. /// void LICM::sink(Instruction &I) { - DEBUG(errs() << "LICM sinking instruction: " << I); + DEBUG(dbgs() << "LICM sinking instruction: " << I); SmallVector ExitBlocks; CurLoop->getExitBlocks(ExitBlocks); @@ -603,7 +599,7 @@ void LICM::sink(Instruction &I) { /// that is safe to hoist, this instruction is called to do the dirty work. /// void LICM::hoist(Instruction &I) { - DEBUG(errs() << "LICM hoisting to " << Preheader->getName() << ": " + DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " << I << "\n"); // Remove the instruction from its current basic block... but don't delete the @@ -859,7 +855,7 @@ void LICM::FindPromotableValuesInLoop( for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I) ValueToAllocaMap.insert(std::make_pair(I->getValue(), AI)); - DEBUG(errs() << "LICM: Promoting value: " << *V << "\n"); + DEBUG(dbgs() << "LICM: Promoting value: " << *V << "\n"); } } diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp index 1d9dd6841213..16d3f2f703ff 100644 --- a/lib/Transforms/Scalar/LoopIndexSplit.cpp +++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp @@ -708,7 +708,7 @@ void LoopIndexSplit::removeBlocks(BasicBlock *DeadBB, Loop *LP, } while (!WorkList.empty()) { - BasicBlock *BB = WorkList.back(); WorkList.pop_back(); + BasicBlock *BB = WorkList.pop_back_val(); LPM->deleteSimpleAnalysisValue(BB, LP); for(BasicBlock::iterator BBI = BB->begin(), BBE = BB->end(); BBI != BBE; ) { @@ -726,7 +726,7 @@ void LoopIndexSplit::removeBlocks(BasicBlock *DeadBB, Loop *LP, // Update Frontier BBs' dominator info. while (!FrontierBBs.empty()) { - BasicBlock *FBB = FrontierBBs.back(); FrontierBBs.pop_back(); + BasicBlock *FBB = FrontierBBs.pop_back_val(); BasicBlock *NewDominator = FBB->getSinglePredecessor(); if (!NewDominator) { pred_iterator PI = pred_begin(FBB), PE = pred_end(FBB); diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 85f7368fb330..fa820ed8e402 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -2723,7 +2723,7 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { // At this point, it is worth checking to see if any recurrence PHIs are also // dead, so that we can remove them as well. - DeleteDeadPHIs(L->getHeader()); + Changed |= DeleteDeadPHIs(L->getHeader()); return Changed; } diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index c2bf9f2c5898..ee8cb4f9a732 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -89,7 +89,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { LoopInfo *LI = &getAnalysis(); BasicBlock *Header = L->getHeader(); - DEBUG(errs() << "Loop Unroll: F[" << Header->getParent()->getName() + DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName() << "] Loop %" << Header->getName() << "\n"); (void)Header; @@ -111,13 +111,13 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { // Enforce the threshold. if (UnrollThreshold != NoThreshold) { unsigned LoopSize = ApproximateLoopSize(L); - DEBUG(errs() << " Loop Size = " << LoopSize << "\n"); + DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n"); uint64_t Size = (uint64_t)LoopSize*Count; if (TripCount != 1 && Size > UnrollThreshold) { - DEBUG(errs() << " Too large to fully unroll with count: " << Count + DEBUG(dbgs() << " Too large to fully unroll with count: " << Count << " because size: " << Size << ">" << UnrollThreshold << "\n"); if (!UnrollAllowPartial) { - DEBUG(errs() << " will not try to unroll partially because " + DEBUG(dbgs() << " will not try to unroll partially because " << "-unroll-allow-partial not given\n"); return false; } @@ -127,10 +127,10 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { Count--; } if (Count < 2) { - DEBUG(errs() << " could not unroll partially\n"); + DEBUG(dbgs() << " could not unroll partially\n"); return false; } - DEBUG(errs() << " partially unrolling with count: " << Count << "\n"); + DEBUG(dbgs() << " partially unrolling with count: " << Count << "\n"); } } diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 0c19133ed156..527a7b51e11f 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -436,7 +436,7 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val){ if (Metrics.NumInsts > Threshold || Metrics.NumBlocks * 5 > Threshold || Metrics.NeverInline) { - DEBUG(errs() << "NOT unswitching loop %" + DEBUG(dbgs() << "NOT unswitching loop %" << currentLoop->getHeader()->getName() << ", cost too high: " << currentLoop->getBlocks().size() << "\n"); return false; @@ -522,7 +522,7 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val, void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val, BasicBlock *ExitBlock) { - DEBUG(errs() << "loop-unswitch: Trivial-Unswitch loop %" + DEBUG(dbgs() << "loop-unswitch: Trivial-Unswitch loop %" << loopHeader->getName() << " [" << L->getBlocks().size() << " blocks] in Function " << L->getHeader()->getParent()->getName() << " on cond: " << *Val << " == " << *Cond << "\n"); @@ -581,7 +581,7 @@ void LoopUnswitch::SplitExitEdges(Loop *L, void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, Loop *L) { Function *F = loopHeader->getParent(); - DEBUG(errs() << "loop-unswitch: Unswitching loop %" + DEBUG(dbgs() << "loop-unswitch: Unswitching loop %" << loopHeader->getName() << " [" << L->getBlocks().size() << " blocks] in Function " << F->getName() << " when '" << *Val << "' == " << *LIC << "\n"); @@ -707,7 +707,7 @@ static void RemoveFromWorklist(Instruction *I, static void ReplaceUsesOfWith(Instruction *I, Value *V, std::vector &Worklist, Loop *L, LPPassManager *LPM) { - DEBUG(errs() << "Replace with '" << *V << "': " << *I); + DEBUG(dbgs() << "Replace with '" << *V << "': " << *I); // Add uses to the worklist, which may be dead now. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) @@ -769,7 +769,7 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB, return; } - DEBUG(errs() << "Nuking dead block: " << *BB); + DEBUG(dbgs() << "Nuking dead block: " << *BB); // Remove the instructions in the basic block from the worklist. for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { @@ -867,7 +867,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, // If we know that LIC == Val, or that LIC == NotVal, just replace uses of LIC // in the loop with the appropriate one directly. if (IsEqual || (isa(Val) && - Val->getType() == Type::getInt1Ty(Val->getContext()))) { + Val->getType()->isInteger(1))) { Value *Replacement; if (IsEqual) Replacement = Val; @@ -968,7 +968,7 @@ void LoopUnswitch::SimplifyCode(std::vector &Worklist, Loop *L) { // Simple DCE. if (isInstructionTriviallyDead(I)) { - DEBUG(errs() << "Remove dead instruction '" << *I); + DEBUG(dbgs() << "Remove dead instruction '" << *I); // Add uses to the worklist, which may be dead now. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) @@ -993,10 +993,10 @@ void LoopUnswitch::SimplifyCode(std::vector &Worklist, Loop *L) { case Instruction::And: if (isa(I->getOperand(0)) && // constant -> RHS - I->getOperand(0)->getType() == Type::getInt1Ty(I->getContext())) + I->getOperand(0)->getType()->isInteger(1)) cast(I)->swapOperands(); if (ConstantInt *CB = dyn_cast(I->getOperand(1))) - if (CB->getType() == Type::getInt1Ty(I->getContext())) { + if (CB->getType()->isInteger(1)) { if (CB->isOne()) // X & 1 -> X ReplaceUsesOfWith(I, I->getOperand(0), Worklist, L, LPM); else // X & 0 -> 0 @@ -1007,10 +1007,10 @@ void LoopUnswitch::SimplifyCode(std::vector &Worklist, Loop *L) { case Instruction::Or: if (isa(I->getOperand(0)) && // constant -> RHS - I->getOperand(0)->getType() == Type::getInt1Ty(I->getContext())) + I->getOperand(0)->getType()->isInteger(1)) cast(I)->swapOperands(); if (ConstantInt *CB = dyn_cast(I->getOperand(1))) - if (CB->getType() == Type::getInt1Ty(I->getContext())) { + if (CB->getType()->isInteger(1)) { if (CB->isOne()) // X | 1 -> 1 ReplaceUsesOfWith(I, I->getOperand(1), Worklist, L, LPM); else // X | 0 -> X @@ -1029,7 +1029,7 @@ void LoopUnswitch::SimplifyCode(std::vector &Worklist, Loop *L) { if (!SinglePred) continue; // Nothing to do. assert(SinglePred == Pred && "CFG broken"); - DEBUG(errs() << "Merging blocks: " << Pred->getName() << " <- " + DEBUG(dbgs() << "Merging blocks: " << Pred->getName() << " <- " << Succ->getName() << "\n"); // Resolve any single entry PHI nodes in Succ. @@ -1057,7 +1057,7 @@ void LoopUnswitch::SimplifyCode(std::vector &Worklist, Loop *L) { // remove dead blocks. break; // FIXME: Enable. - DEBUG(errs() << "Folded branch: " << *BI); + DEBUG(dbgs() << "Folded branch: " << *BI); BasicBlock *DeadSucc = BI->getSuccessor(CB->getZExtValue()); BasicBlock *LiveSucc = BI->getSuccessor(!CB->getZExtValue()); DeadSucc->removePredecessor(BI->getParent(), true); diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index c922814833c5..e0aa49154ffb 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -42,7 +42,7 @@ static Value *isBytewiseValue(Value *V) { LLVMContext &Context = V->getContext(); // All byte-wide stores are splatable, even of arbitrary variables. - if (V->getType() == Type::getInt8Ty(Context)) return V; + if (V->getType()->isInteger(8)) return V; // Constant float and double values can be handled as integer values if the // corresponding integer value is "byteable". An important case is 0.0. @@ -456,10 +456,10 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { ConstantInt::get(Type::getInt32Ty(Context), Range.Alignment) }; Value *C = CallInst::Create(MemSetF, Ops, Ops+4, "", InsertPt); - DEBUG(errs() << "Replace stores:\n"; + DEBUG(dbgs() << "Replace stores:\n"; for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i) - errs() << *Range.TheStores[i]; - errs() << "With: " << *C); C=C; + dbgs() << *Range.TheStores[i]; + dbgs() << "With: " << *C); C=C; // Don't invalidate the iterator BBI = BI; @@ -562,8 +562,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { SmallVector srcUseList(srcAlloca->use_begin(), srcAlloca->use_end()); while (!srcUseList.empty()) { - User *UI = srcUseList.back(); - srcUseList.pop_back(); + User *UI = srcUseList.pop_back_val(); if (isa(UI)) { for (User::use_iterator I = UI->use_begin(), E = UI->use_end(); @@ -725,7 +724,7 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) { AliasAnalysis::NoAlias) return false; - DEBUG(errs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n"); + DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n"); // If not, then we know we can transform this. Module *Mod = M->getParent()->getParent()->getParent(); diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index 827b47d3feeb..4a99f4a844ec 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -60,12 +60,12 @@ namespace { /// static void PrintOps(Instruction *I, const SmallVectorImpl &Ops) { Module *M = I->getParent()->getParent()->getParent(); - errs() << Instruction::getOpcodeName(I->getOpcode()) << " " + dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " " << *Ops[0].Op->getType() << '\t'; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - errs() << "[ "; - WriteAsOperand(errs(), Ops[i].Op, false, M); - errs() << ", #" << Ops[i].Rank << "] "; + dbgs() << "[ "; + WriteAsOperand(dbgs(), Ops[i].Op, false, M); + dbgs() << ", #" << Ops[i].Rank << "] "; } } #endif @@ -186,7 +186,7 @@ unsigned Reassociate::getRank(Value *V) { (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I))) ++Rank; - //DEBUG(errs() << "Calculated Rank[" << V->getName() << "] = " + //DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " // << Rank << "\n"); return ValueRankMap[I] = Rank; @@ -226,7 +226,7 @@ void Reassociate::LinearizeExpr(BinaryOperator *I) { isReassociableOp(RHS, I->getOpcode()) && "Not an expression that needs linearization?"); - DEBUG(errs() << "Linear" << *LHS << '\n' << *RHS << '\n' << *I << '\n'); + DEBUG(dbgs() << "Linear" << *LHS << '\n' << *RHS << '\n' << *I << '\n'); // Move the RHS instruction to live immediately before I, avoiding breaking // dominator properties. @@ -239,7 +239,7 @@ void Reassociate::LinearizeExpr(BinaryOperator *I) { ++NumLinear; MadeChange = true; - DEBUG(errs() << "Linearized: " << *I << '\n'); + DEBUG(dbgs() << "Linearized: " << *I << '\n'); // If D is part of this expression tree, tail recurse. if (isReassociableOp(I->getOperand(1), I->getOpcode())) @@ -335,10 +335,10 @@ void Reassociate::RewriteExprTree(BinaryOperator *I, if (I->getOperand(0) != Ops[i].Op || I->getOperand(1) != Ops[i+1].Op) { Value *OldLHS = I->getOperand(0); - DEBUG(errs() << "RA: " << *I << '\n'); + DEBUG(dbgs() << "RA: " << *I << '\n'); I->setOperand(0, Ops[i].Op); I->setOperand(1, Ops[i+1].Op); - DEBUG(errs() << "TO: " << *I << '\n'); + DEBUG(dbgs() << "TO: " << *I << '\n'); MadeChange = true; ++NumChanged; @@ -351,9 +351,9 @@ void Reassociate::RewriteExprTree(BinaryOperator *I, assert(i+2 < Ops.size() && "Ops index out of range!"); if (I->getOperand(1) != Ops[i].Op) { - DEBUG(errs() << "RA: " << *I << '\n'); + DEBUG(dbgs() << "RA: " << *I << '\n'); I->setOperand(1, Ops[i].Op); - DEBUG(errs() << "TO: " << *I << '\n'); + DEBUG(dbgs() << "TO: " << *I << '\n'); MadeChange = true; ++NumChanged; } @@ -414,6 +414,10 @@ static Value *NegateValue(Value *V, Instruction *BI) { // non-instruction value) or right after the definition. These negates will // be zapped by reassociate later, so we don't need much finesse here. BinaryOperator *TheNeg = cast(*UI); + + // Verify that the negate is in this function, V might be a constant expr. + if (TheNeg->getParent()->getParent() != BI->getParent()->getParent()) + continue; BasicBlock::iterator InsertPt; if (Instruction *InstInput = dyn_cast(V)) { @@ -480,7 +484,7 @@ static Instruction *BreakUpSubtract(Instruction *Sub, Sub->replaceAllUsesWith(New); Sub->eraseFromParent(); - DEBUG(errs() << "Negated: " << *New << '\n'); + DEBUG(dbgs() << "Negated: " << *New << '\n'); return New; } @@ -788,6 +792,11 @@ Value *Reassociate::OptimizeAdd(Instruction *I, Instruction *DummyInst = BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal); SmallVector NewMulOps; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + // Only try to remove factors from expressions we're allowed to. + BinaryOperator *BOp = dyn_cast(Ops[i].Op); + if (BOp == 0 || BOp->getOpcode() != Instruction::Mul || !BOp->use_empty()) + continue; + if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) { NewMulOps.push_back(V); Ops.erase(Ops.begin()+i); @@ -797,14 +806,15 @@ Value *Reassociate::OptimizeAdd(Instruction *I, // No need for extra uses anymore. delete DummyInst; - + unsigned NumAddedValues = NewMulOps.size(); Value *V = EmitAddTreeOfValues(I, NewMulOps); - + // Now that we have inserted the add tree, optimize it. This allows us to // handle cases that require multiple factoring steps, such as this: // A*A*B + A*A*C --> A*(A*B+A*C) --> A*(A*(B+C)) assert(NumAddedValues > 1 && "Each occurrence should contribute a value"); + (void)NumAddedValues; V = ReassociateExpression(cast(V)); // Create the multiply. @@ -928,6 +938,10 @@ void Reassociate::ReassociateBB(BasicBlock *BB) { if (BI->getOpcode() == Instruction::Sub) { if (ShouldBreakUpSubtract(BI)) { BI = BreakUpSubtract(BI, ValueRankMap); + // Reset the BBI iterator in case BreakUpSubtract changed the + // instruction it points to. + BBI = BI; + ++BBI; MadeChange = true; } else if (BinaryOperator::isNeg(BI)) { // Otherwise, this is a negation. See if the operand is a multiply tree @@ -967,7 +981,7 @@ Value *Reassociate::ReassociateExpression(BinaryOperator *I) { SmallVector Ops; LinearizeExprTree(I, Ops); - DEBUG(errs() << "RAIn:\t"; PrintOps(I, Ops); errs() << '\n'); + DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n'); // Now that we have linearized the tree to a list and have gathered all of // the operands and their ranks, sort the operands by their rank. Use a @@ -982,7 +996,7 @@ Value *Reassociate::ReassociateExpression(BinaryOperator *I) { if (Value *V = OptimizeExpression(I, Ops)) { // This expression tree simplified to something that isn't a tree, // eliminate it. - DEBUG(errs() << "Reassoc to scalar: " << *V << '\n'); + DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n'); I->replaceAllUsesWith(V); RemoveDeadBinaryOp(I); ++NumAnnihil; @@ -1001,7 +1015,7 @@ Value *Reassociate::ReassociateExpression(BinaryOperator *I) { Ops.insert(Ops.begin(), Tmp); } - DEBUG(errs() << "RAOut:\t"; PrintOps(I, Ops); errs() << '\n'); + DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << '\n'); if (Ops.size() == 1) { // This expression tree simplified to something that isn't a tree, diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index d8c59b1d7421..02b45a148372 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -218,7 +218,7 @@ class SCCPSolver : public InstVisitor { /// This returns true if the block was not considered live before. bool MarkBlockExecutable(BasicBlock *BB) { if (!BBExecutable.insert(BB)) return false; - DEBUG(errs() << "Marking Block Executable: " << BB->getName() << "\n"); + DEBUG(dbgs() << "Marking Block Executable: " << BB->getName() << "\n"); BBWorkList.push_back(BB); // Add the block to the work list! return true; } @@ -316,7 +316,7 @@ class SCCPSolver : public InstVisitor { // void markConstant(LatticeVal &IV, Value *V, Constant *C) { if (!IV.markConstant(C)) return; - DEBUG(errs() << "markConstant: " << *C << ": " << *V << '\n'); + DEBUG(dbgs() << "markConstant: " << *C << ": " << *V << '\n'); InstWorkList.push_back(V); } @@ -328,7 +328,7 @@ class SCCPSolver : public InstVisitor { void markForcedConstant(Value *V, Constant *C) { assert(!isa(V->getType()) && "Should use other method"); ValueState[V].markForcedConstant(C); - DEBUG(errs() << "markForcedConstant: " << *C << ": " << *V << '\n'); + DEBUG(dbgs() << "markForcedConstant: " << *C << ": " << *V << '\n'); InstWorkList.push_back(V); } @@ -339,11 +339,11 @@ class SCCPSolver : public InstVisitor { void markOverdefined(LatticeVal &IV, Value *V) { if (!IV.markOverdefined()) return; - DEBUG(errs() << "markOverdefined: "; + DEBUG(dbgs() << "markOverdefined: "; if (Function *F = dyn_cast(V)) - errs() << "Function '" << F->getName() << "'\n"; + dbgs() << "Function '" << F->getName() << "'\n"; else - errs() << *V << '\n'); + dbgs() << *V << '\n'); // Only instructions go on the work list OverdefinedInstWorkList.push_back(V); } @@ -431,7 +431,7 @@ class SCCPSolver : public InstVisitor { // If the destination is already executable, we just made an *edge* // feasible that wasn't before. Revisit the PHI nodes in the block // because they have potentially new operands. - DEBUG(errs() << "Marking Edge Executable: " << Source->getName() + DEBUG(dbgs() << "Marking Edge Executable: " << Source->getName() << " -> " << Dest->getName() << "\n"); PHINode *PN; @@ -516,7 +516,7 @@ class SCCPSolver : public InstVisitor { void visitInstruction(Instruction &I) { // If a new instruction is added to LLVM that we don't handle. - errs() << "SCCP: Don't know how to handle: " << I; + dbgs() << "SCCP: Don't know how to handle: " << I; markAnythingOverdefined(&I); // Just in case } }; @@ -580,7 +580,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI, } #ifndef NDEBUG - errs() << "Unknown terminator instruction: " << TI << '\n'; + dbgs() << "Unknown terminator instruction: " << TI << '\n'; #endif llvm_unreachable("SCCP: Don't know how to handle this terminator!"); } @@ -640,7 +640,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) { return true; #ifndef NDEBUG - errs() << "Unknown terminator instruction: " << *TI << '\n'; + dbgs() << "Unknown terminator instruction: " << *TI << '\n'; #endif llvm_unreachable(0); } @@ -1324,7 +1324,7 @@ void SCCPSolver::Solve() { while (!OverdefinedInstWorkList.empty()) { Value *I = OverdefinedInstWorkList.pop_back_val(); - DEBUG(errs() << "\nPopped off OI-WL: " << *I << '\n'); + DEBUG(dbgs() << "\nPopped off OI-WL: " << *I << '\n'); // "I" got into the work list because it either made the transition from // bottom to constant @@ -1343,7 +1343,7 @@ void SCCPSolver::Solve() { while (!InstWorkList.empty()) { Value *I = InstWorkList.pop_back_val(); - DEBUG(errs() << "\nPopped off I-WL: " << *I << '\n'); + DEBUG(dbgs() << "\nPopped off I-WL: " << *I << '\n'); // "I" got into the work list because it made the transition from undef to // constant. @@ -1364,7 +1364,7 @@ void SCCPSolver::Solve() { BasicBlock *BB = BBWorkList.back(); BBWorkList.pop_back(); - DEBUG(errs() << "\nPopped off BBWL: " << *BB << '\n'); + DEBUG(dbgs() << "\nPopped off BBWL: " << *BB << '\n'); // Notify all instructions in this basic block that they are newly // executable. @@ -1597,7 +1597,7 @@ FunctionPass *llvm::createSCCPPass() { } static void DeleteInstructionInBlock(BasicBlock *BB) { - DEBUG(errs() << " BasicBlock Dead:" << *BB); + DEBUG(dbgs() << " BasicBlock Dead:" << *BB); ++NumDeadBlocks; // Delete the instructions backwards, as it has a reduced likelihood of @@ -1616,7 +1616,7 @@ static void DeleteInstructionInBlock(BasicBlock *BB) { // and return true if the function was modified. // bool SCCP::runOnFunction(Function &F) { - DEBUG(errs() << "SCCP on function '" << F.getName() << "'\n"); + DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n"); SCCPSolver Solver(getAnalysisIfAvailable()); // Mark the first block of the function as being executable. @@ -1630,7 +1630,7 @@ bool SCCP::runOnFunction(Function &F) { bool ResolvedUndefs = true; while (ResolvedUndefs) { Solver.Solve(); - DEBUG(errs() << "RESOLVING UNDEFs\n"); + DEBUG(dbgs() << "RESOLVING UNDEFs\n"); ResolvedUndefs = Solver.ResolvedUndefsIn(F); } @@ -1665,7 +1665,7 @@ bool SCCP::runOnFunction(Function &F) { Constant *Const = IV.isConstant() ? IV.getConstant() : UndefValue::get(Inst->getType()); - DEBUG(errs() << " Constant: " << *Const << " = " << *Inst); + DEBUG(dbgs() << " Constant: " << *Const << " = " << *Inst); // Replaces all of the uses of a variable with uses of the constant. Inst->replaceAllUsesWith(Const); @@ -1775,7 +1775,7 @@ bool IPSCCP::runOnModule(Module &M) { while (ResolvedUndefs) { Solver.Solve(); - DEBUG(errs() << "RESOLVING UNDEFS\n"); + DEBUG(dbgs() << "RESOLVING UNDEFS\n"); ResolvedUndefs = false; for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) ResolvedUndefs |= Solver.ResolvedUndefsIn(*F); @@ -1802,7 +1802,7 @@ bool IPSCCP::runOnModule(Module &M) { Constant *CST = IV.isConstant() ? IV.getConstant() : UndefValue::get(AI->getType()); - DEBUG(errs() << "*** Arg " << *AI << " = " << *CST <<"\n"); + DEBUG(dbgs() << "*** Arg " << *AI << " = " << *CST <<"\n"); // Replaces all of the uses of a variable with uses of the // constant. @@ -1847,7 +1847,7 @@ bool IPSCCP::runOnModule(Module &M) { Constant *Const = IV.isConstant() ? IV.getConstant() : UndefValue::get(Inst->getType()); - DEBUG(errs() << " Constant: " << *Const << " = " << *Inst); + DEBUG(dbgs() << " Constant: " << *Const << " = " << *Inst); // Replaces all of the uses of a variable with uses of the // constant. @@ -1944,7 +1944,7 @@ bool IPSCCP::runOnModule(Module &M) { GlobalVariable *GV = I->first; assert(!I->second.isOverdefined() && "Overdefined values should have been taken out of the map!"); - DEBUG(errs() << "Found that GV '" << GV->getName() << "' is constant!\n"); + DEBUG(dbgs() << "Found that GV '" << GV->getName() << "' is constant!\n"); while (!GV->use_empty()) { StoreInst *SI = cast(GV->use_back()); SI->eraseFromParent(); diff --git a/lib/Transforms/Scalar/SCCVN.cpp b/lib/Transforms/Scalar/SCCVN.cpp index f91fbdaa4f23..9685a2945f8c 100644 --- a/lib/Transforms/Scalar/SCCVN.cpp +++ b/lib/Transforms/Scalar/SCCVN.cpp @@ -678,8 +678,7 @@ bool SCCVN::runOnFunction(Function& F) { stack.push_back(*PI); while (!stack.empty()) { - BasicBlock* CurrBB = stack.back(); - stack.pop_back(); + BasicBlock* CurrBB = stack.pop_back_val(); visited.insert(CurrBB); ValueNumberScope* S = BBMap[CurrBB]; diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 79bb7c547caa..9e1e79a1c29c 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -252,8 +252,8 @@ bool SROA::performScalarRepl(Function &F) { // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' // is only subsequently read. if (Instruction *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) { - DEBUG(errs() << "Found alloca equal to global: " << *AI << '\n'); - DEBUG(errs() << " memcpy = " << *TheCopy << '\n'); + DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n'); + DEBUG(dbgs() << " memcpy = " << *TheCopy << '\n'); Constant *TheSrc = cast(TheCopy->getOperand(2)); AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType())); TheCopy->eraseFromParent(); // Don't mutate the global. @@ -314,14 +314,14 @@ bool SROA::performScalarRepl(Function &F) { // we just get a lot of insert/extracts. If at least one vector is // involved, then we probably really do have a union of vector/array. if (VectorTy && isa(VectorTy) && HadAVector) { - DEBUG(errs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = " + DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = " << *VectorTy << '\n'); // Create and insert the vector alloca. NewAI = new AllocaInst(VectorTy, 0, "", AI->getParent()->begin()); ConvertUsesToScalar(AI, NewAI, 0); } else { - DEBUG(errs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"); + DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"); // Create and insert the integer alloca. const Type *NewTy = IntegerType::get(AI->getContext(), AllocaSize*8); @@ -345,7 +345,7 @@ bool SROA::performScalarRepl(Function &F) { /// predicate, do SROA now. void SROA::DoScalarReplacement(AllocaInst *AI, std::vector &WorkList) { - DEBUG(errs() << "Found inst to SROA: " << *AI << '\n'); + DEBUG(dbgs() << "Found inst to SROA: " << *AI << '\n'); SmallVector ElementAllocas; if (const StructType *ST = dyn_cast(AI->getAllocatedType())) { ElementAllocas.reserve(ST->getNumContainedTypes()); @@ -919,7 +919,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, IntegerType::get(SI->getContext(), AllocaSizeBits), "", SI); - DEBUG(errs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI + DEBUG(dbgs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI << '\n'); // There are two forms here: AI could be an array or struct. Both cases @@ -1029,7 +1029,7 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, const Type *AllocaEltTy = AI->getAllocatedType(); uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); - DEBUG(errs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI + DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI << '\n'); // There are two forms here: AI could be an array or struct. Both cases @@ -1153,7 +1153,7 @@ int SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { isSafeForScalarRepl(AI, AI, 0, Info); if (Info.isUnsafe) { - DEBUG(errs() << "Cannot transform: " << *AI << '\n'); + DEBUG(dbgs() << "Cannot transform: " << *AI << '\n'); return 0; } @@ -1181,7 +1181,7 @@ void SROA::CleanupAllocaUsers(Value *V) { if (!isa(I) && OnlyUsedByDbgInfoIntrinsics(I, &DbgInUses)) { // Safe to remove debug info uses. while (!DbgInUses.empty()) { - DbgInfoIntrinsic *DI = DbgInUses.back(); DbgInUses.pop_back(); + DbgInfoIntrinsic *DI = DbgInUses.pop_back_val(); DI->eraseFromParent(); } I->eraseFromParent(); diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index a36da7851967..43447de0792f 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -99,9 +99,8 @@ static bool MarkAliveBlocks(BasicBlock *BB, SmallVector Worklist; Worklist.push_back(BB); bool Changed = false; - while (!Worklist.empty()) { - BB = Worklist.back(); - Worklist.pop_back(); + do { + BB = Worklist.pop_back_val(); if (!Reachable.insert(BB)) continue; @@ -150,7 +149,7 @@ static bool MarkAliveBlocks(BasicBlock *BB, Changed |= ConstantFoldTerminator(BB); for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) Worklist.push_back(*SI); - } + } while (!Worklist.empty()); return Changed; } diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 3c28ad27e57a..9183f3aac1fb 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -80,7 +80,7 @@ class LibCallOptimization { /// specified pointer and character. Ptr is required to be some pointer type, /// and the return value has 'i8*' type. Value *EmitStrChr(Value *Ptr, char C, IRBuilder<> &B); - + /// EmitMemCpy - Emit a call to the memcpy function to the builder. This /// always expects that the size has type 'intptr_t' and Dst/Src are pointers. Value *EmitMemCpy(Value *Dst, Value *Src, Value *Len, @@ -101,10 +101,11 @@ class LibCallOptimization { /// EmitMemSet - Emit a call to the memset function Value *EmitMemSet(Value *Dst, Value *Val, Value *Len, IRBuilder<> &B); - /// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g. - /// 'floor'). This function is known to take a single of type matching 'Op' - /// and returns one value with the same type. If 'Op' is a long double, 'l' - /// is added as the suffix of name, if 'Op' is a float, we add a 'f' suffix. + /// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' + /// (e.g. 'floor'). This function is known to take a single of type matching + /// 'Op' and returns one value with the same type. If 'Op' is a long double, + /// 'l' is added as the suffix of name, if 'Op' is a float, we add a 'f' + /// suffix. Value *EmitUnaryFloatFnCall(Value *Op, const char *Name, IRBuilder<> &B, const AttrListPtr &Attrs); @@ -163,7 +164,7 @@ Value *LibCallOptimization::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B) { Module *M = Caller->getParent(); AttributeWithIndex AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind); - + const Type *I8Ptr = Type::getInt8PtrTy(*Context); const Type *I32Ty = Type::getInt32Ty(*Context); Constant *StrChr = M->getOrInsertFunction("strchr", AttrListPtr::get(&AWI, 1), @@ -236,8 +237,8 @@ Value *LibCallOptimization::EmitMemCmp(Value *Ptr1, Value *Ptr2, Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI, 3), Type::getInt32Ty(*Context), - Type::getInt8PtrTy(*Context), - Type::getInt8PtrTy(*Context), + Type::getInt8PtrTy(*Context), + Type::getInt8PtrTy(*Context), TD->getIntPtrType(*Context), NULL); CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B), Len, "memcmp"); @@ -504,8 +505,7 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet &PHIs) { // Must be a Constant Array ConstantArray *Array = dyn_cast(GlobalInit); - if (!Array || - Array->getType()->getElementType() != Type::getInt8Ty(V->getContext())) + if (!Array || !Array->getType()->getElementType()->isInteger(8)) return false; // Get the number of elements in the array @@ -677,8 +677,7 @@ struct StrChrOpt : public LibCallOptimization { if (!TD) return 0; uint64_t Len = GetStringLength(SrcStr); - if (Len == 0 || - FT->getParamType(1) != Type::getInt32Ty(*Context)) // memchr needs i32. + if (Len == 0 || !FT->getParamType(1)->isInteger(32)) // memchr needs i32. return 0; return EmitMemChr(SrcStr, CI->getOperand(2), // include nul. @@ -720,7 +719,7 @@ struct StrCmpOpt : public LibCallOptimization { // Verify the "strcmp" function prototype. const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || - FT->getReturnType() != Type::getInt32Ty(*Context) || + !FT->getReturnType()->isInteger(32) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != Type::getInt8PtrTy(*Context)) return 0; @@ -768,7 +767,7 @@ struct StrNCmpOpt : public LibCallOptimization { // Verify the "strncmp" function prototype. const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || - FT->getReturnType() != Type::getInt32Ty(*Context) || + !FT->getReturnType()->isInteger(32) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != Type::getInt8PtrTy(*Context) || !isa(FT->getParamType(2))) @@ -949,20 +948,20 @@ struct StrStrOpt : public LibCallOptimization { // fold strstr(x, x) -> x. if (CI->getOperand(1) == CI->getOperand(2)) return B.CreateBitCast(CI->getOperand(1), CI->getType()); - + // See if either input string is a constant string. std::string SearchStr, ToFindStr; bool HasStr1 = GetConstantStringInfo(CI->getOperand(1), SearchStr); bool HasStr2 = GetConstantStringInfo(CI->getOperand(2), ToFindStr); - + // fold strstr(x, "") -> x. if (HasStr2 && ToFindStr.empty()) return B.CreateBitCast(CI->getOperand(1), CI->getType()); - + // If both strings are known, constant fold it. if (HasStr1 && HasStr2) { std::string::size_type Offset = SearchStr.find(ToFindStr); - + if (Offset == std::string::npos) // strstr("foo", "bar") -> null return Constant::getNullValue(CI->getType()); @@ -971,7 +970,7 @@ struct StrStrOpt : public LibCallOptimization { Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr"); return B.CreateBitCast(Result, CI->getType()); } - + // fold strstr(x, "y") -> strchr(x, 'y'). if (HasStr2 && ToFindStr.size() == 1) return B.CreateBitCast(EmitStrChr(CI->getOperand(1), ToFindStr[0], B), @@ -979,7 +978,7 @@ struct StrStrOpt : public LibCallOptimization { return 0; } }; - + //===---------------------------------------===// // 'memcmp' Optimizations @@ -989,7 +988,7 @@ struct MemCmpOpt : public LibCallOptimization { const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || !isa(FT->getParamType(0)) || !isa(FT->getParamType(1)) || - FT->getReturnType() != Type::getInt32Ty(*Context)) + !FT->getReturnType()->isInteger(32)) return 0; Value *LHS = CI->getOperand(1), *RHS = CI->getOperand(2); @@ -1095,27 +1094,6 @@ struct MemSetOpt : public LibCallOptimization { // Object Size Checking Optimizations //===----------------------------------------------------------------------===// -//===---------------------------------------===// -// 'object size' -namespace { -struct SizeOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // TODO: We can do more with this, but delaying to here should be no change - // in behavior. - ConstantInt *Const = dyn_cast(CI->getOperand(2)); - - if (!Const) return 0; - - const Type *Ty = Callee->getFunctionType()->getReturnType(); - - if (Const->getZExtValue() == 0) - return Constant::getAllOnesValue(Ty); - else - return ConstantInt::get(Ty, 0); - } -}; -} - //===---------------------------------------===// // 'memcpy_chk' Optimizations @@ -1351,7 +1329,7 @@ struct FFSOpt : public LibCallOptimization { // Just make sure this has 2 arguments of the same FP type, which match the // result type. if (FT->getNumParams() != 1 || - FT->getReturnType() != Type::getInt32Ty(*Context) || + !FT->getReturnType()->isInteger(32) || !isa(FT->getParamType(0))) return 0; @@ -1387,7 +1365,7 @@ struct IsDigitOpt : public LibCallOptimization { const FunctionType *FT = Callee->getFunctionType(); // We require integer(i32) if (FT->getNumParams() != 1 || !isa(FT->getReturnType()) || - FT->getParamType(0) != Type::getInt32Ty(*Context)) + !FT->getParamType(0)->isInteger(32)) return 0; // isdigit(c) -> (c-'0') getFunctionType(); // We require integer(i32) if (FT->getNumParams() != 1 || !isa(FT->getReturnType()) || - FT->getParamType(0) != Type::getInt32Ty(*Context)) + !FT->getParamType(0)->isInteger(32)) return 0; // isascii(c) -> c getFunctionType(); // We require i32(i32) if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || - FT->getParamType(0) != Type::getInt32Ty(*Context)) + !FT->getParamType(0)->isInteger(32)) return 0; // isascii(c) -> c & 0x7f @@ -1558,7 +1536,8 @@ struct SPrintFOpt : public LibCallOptimization { // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1) EmitMemCpy(CI->getOperand(1), CI->getOperand(2), // Copy the nul byte. - ConstantInt::get(TD->getIntPtrType(*Context), FormatStr.size()+1),1,B); + ConstantInt::get + (TD->getIntPtrType(*Context), FormatStr.size()+1),1,B); return ConstantInt::get(CI->getType(), FormatStr.size()); } @@ -1688,8 +1667,9 @@ struct FPrintFOpt : public LibCallOptimization { // These optimizations require TargetData. if (!TD) return 0; - EmitFWrite(CI->getOperand(2), ConstantInt::get(TD->getIntPtrType(*Context), - FormatStr.size()), + EmitFWrite(CI->getOperand(2), + ConstantInt::get(TD->getIntPtrType(*Context), + FormatStr.size()), CI->getOperand(1), B); return ConstantInt::get(CI->getType(), FormatStr.size()); } @@ -1744,7 +1724,6 @@ namespace { FWriteOpt FWrite; FPutsOpt FPuts; FPrintFOpt FPrintF; // Object Size Checking - SizeOpt ObjectSize; MemCpyChkOpt MemCpyChk; MemSetChkOpt MemSetChk; MemMoveChkOpt MemMoveChk; bool Modified; // This is only used by doInitialization. @@ -1854,8 +1833,6 @@ void SimplifyLibCalls::InitOptimizations() { Optimizations["fprintf"] = &FPrintF; // Object Size Checking - Optimizations["llvm.objectsize.i32"] = &ObjectSize; - Optimizations["llvm.objectsize.i64"] = &ObjectSize; Optimizations["__memcpy_chk"] = &MemCpyChk; Optimizations["__memset_chk"] = &MemSetChk; Optimizations["__memmove_chk"] = &MemMoveChk; @@ -1896,8 +1873,8 @@ bool SimplifyLibCalls::runOnFunction(Function &F) { Value *Result = LCO->OptimizeCall(CI, TD, Builder); if (Result == 0) continue; - DEBUG(errs() << "SimplifyLibCalls simplified: " << *CI; - errs() << " into: " << *Result << "\n"); + DEBUG(dbgs() << "SimplifyLibCalls simplified: " << *CI; + dbgs() << " into: " << *Result << "\n"); // Something changed! Changed = true; diff --git a/lib/Transforms/Scalar/TailDuplication.cpp b/lib/Transforms/Scalar/TailDuplication.cpp index b06ae3def00e..2306a77670fe 100644 --- a/lib/Transforms/Scalar/TailDuplication.cpp +++ b/lib/Transforms/Scalar/TailDuplication.cpp @@ -243,13 +243,13 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) { BasicBlock *DestBlock = Branch->getSuccessor(0); assert(SourceBlock != DestBlock && "Our predicate is broken!"); - DEBUG(errs() << "TailDuplication[" << SourceBlock->getParent()->getName() + DEBUG(dbgs() << "TailDuplication[" << SourceBlock->getParent()->getName() << "]: Eliminating branch: " << *Branch); // See if we can avoid duplicating code by moving it up to a dominator of both // blocks. if (BasicBlock *DomBlock = FindObviousSharedDomOf(SourceBlock, DestBlock)) { - DEBUG(errs() << "Found shared dominator: " << DomBlock->getName() << "\n"); + DEBUG(dbgs() << "Found shared dominator: " << DomBlock->getName() << "\n"); // If there are non-phi instructions in DestBlock that have no operands // defined in DestBlock, and if the instruction has no side effects, we can @@ -272,7 +272,7 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) { // Remove from DestBlock, move right before the term in DomBlock. DestBlock->getInstList().remove(I); DomBlock->getInstList().insert(DomBlock->getTerminator(), I); - DEBUG(errs() << "Hoisted: " << *I); + DEBUG(dbgs() << "Hoisted: " << *I); } } } diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp index 135a621f5d96..8c4aa592ddfe 100644 --- a/lib/Transforms/Utils/AddrModeMatcher.cpp +++ b/lib/Transforms/Utils/AddrModeMatcher.cpp @@ -17,6 +17,7 @@ #include "llvm/Instruction.h" #include "llvm/Assembly/Writer.h" #include "llvm/Target/TargetData.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" #include "llvm/Support/raw_ostream.h" @@ -54,8 +55,8 @@ void ExtAddrMode::print(raw_ostream &OS) const { } void ExtAddrMode::dump() const { - print(errs()); - errs() << '\n'; + print(dbgs()); + dbgs() << '\n'; } diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 2962e8497e8d..e902688f2066 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -78,7 +78,7 @@ void llvm::FoldSingleEntryPHINodes(BasicBlock *BB) { /// is dead. Also recursively delete any operands that become dead as /// a result. This includes tracing the def-use list from the PHI to see if /// it is ultimately unused or if it reaches an unused cycle. -void llvm::DeleteDeadPHIs(BasicBlock *BB) { +bool llvm::DeleteDeadPHIs(BasicBlock *BB) { // Recursively deleting a PHI may cause multiple PHIs to be deleted // or RAUW'd undef, so use an array of WeakVH for the PHIs to delete. SmallVector PHIs; @@ -86,9 +86,12 @@ void llvm::DeleteDeadPHIs(BasicBlock *BB) { PHINode *PN = dyn_cast(I); ++I) PHIs.push_back(PN); + bool Changed = false; for (unsigned i = 0, e = PHIs.size(); i != e; ++i) if (PHINode *PN = dyn_cast_or_null(PHIs[i].operator Value*())) - RecursivelyDeleteDeadPHINode(PN); + Changed |= RecursivelyDeleteDeadPHINode(PN); + + return Changed; } /// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor, @@ -252,7 +255,7 @@ void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) { Value *RetVal = 0; // Create a value to return... if the function doesn't return null... - if (BB->getParent()->getReturnType() != Type::getVoidTy(TI->getContext())) + if (!BB->getParent()->getReturnType()->isVoidTy()) RetVal = Constant::getNullValue(BB->getParent()->getReturnType()); // Create the return... @@ -673,16 +676,3 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, return 0; } -/// CopyPrecedingStopPoint - If I is immediately preceded by a StopPoint, -/// make a copy of the stoppoint before InsertPos (presumably before copying -/// or moving I). -void llvm::CopyPrecedingStopPoint(Instruction *I, - BasicBlock::iterator InsertPos) { - if (I != I->getParent()->begin()) { - BasicBlock::iterator BBI = I; --BBI; - if (DbgStopPointInst *DSPI = dyn_cast(BBI)) { - CallInst *newDSPI = cast(DSPI->clone()); - newDSPI->insertBefore(InsertPos); - } - } -} diff --git a/lib/Transforms/Utils/BasicInliner.cpp b/lib/Transforms/Utils/BasicInliner.cpp index b5ffe0606504..c580b8fed98c 100644 --- a/lib/Transforms/Utils/BasicInliner.cpp +++ b/lib/Transforms/Utils/BasicInliner.cpp @@ -89,7 +89,7 @@ void BasicInlinerImpl::inlineFunctions() { } } - DEBUG(errs() << ": " << CallSites.size() << " call sites.\n"); + DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n"); // Inline call sites. bool Changed = false; @@ -109,21 +109,21 @@ void BasicInlinerImpl::inlineFunctions() { } InlineCost IC = CA.getInlineCost(CS, NeverInline); if (IC.isAlways()) { - DEBUG(errs() << " Inlining: cost=always" + DEBUG(dbgs() << " Inlining: cost=always" <<", call: " << *CS.getInstruction()); } else if (IC.isNever()) { - DEBUG(errs() << " NOT Inlining: cost=never" + DEBUG(dbgs() << " NOT Inlining: cost=never" <<", call: " << *CS.getInstruction()); continue; } else { int Cost = IC.getValue(); if (Cost >= (int) BasicInlineThreshold) { - DEBUG(errs() << " NOT Inlining: cost = " << Cost + DEBUG(dbgs() << " NOT Inlining: cost = " << Cost << ", call: " << *CS.getInstruction()); continue; } else { - DEBUG(errs() << " Inlining: cost = " << Cost + DEBUG(dbgs() << " Inlining: cost = " << Cost << ", call: " << *CS.getInstruction()); } } diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index c287747b91d3..bd750ccd4d25 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -184,7 +184,6 @@ namespace { const char *NameSuffix; ClonedCodeInfo *CodeInfo; const TargetData *TD; - Value *DbgFnStart; public: PruningFunctionCloner(Function *newFunc, const Function *oldFunc, DenseMap &valueMap, @@ -193,7 +192,7 @@ namespace { ClonedCodeInfo *codeInfo, const TargetData *td) : NewFunc(newFunc), OldFunc(oldFunc), ValueMap(valueMap), Returns(returns), - NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td), DbgFnStart(NULL) { + NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) { } /// CloneBlock - The specified block is found to be reachable, clone it and @@ -235,19 +234,6 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, continue; } - // Do not clone llvm.dbg.region.end. It will be adjusted by the inliner. - if (const DbgFuncStartInst *DFSI = dyn_cast(II)) { - if (DbgFnStart == NULL) { - DISubprogram SP(DFSI->getSubprogram()); - if (SP.describes(BB->getParent())) - DbgFnStart = DFSI->getSubprogram(); - } - } - if (const DbgRegionEndInst *DREIS = dyn_cast(II)) { - if (DREIS->getContext() == DbgFnStart) - continue; - } - Instruction *NewInst = II->clone(); if (II->hasName()) NewInst->setName(II->getName()+NameSuffix); diff --git a/lib/Transforms/Utils/CloneLoop.cpp b/lib/Transforms/Utils/CloneLoop.cpp index 7e000a1a75fe..38928dc7cc88 100644 --- a/lib/Transforms/Utils/CloneLoop.cpp +++ b/lib/Transforms/Utils/CloneLoop.cpp @@ -91,7 +91,7 @@ Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI, Loop *NewParentLoop = NULL; - while (!LoopNest.empty()) { + do { Loop *L = LoopNest.pop_back_val(); Loop *NewLoop = new Loop(); @@ -123,7 +123,7 @@ Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI, // Process sub loops for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) LoopNest.push_back(*I); - } + } while (!LoopNest.empty()); // Remap instructions to reference operands from ValueMap. for(SmallVector::iterator NBItr = NewBlocks.begin(), diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index f966681db933..b20849485306 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -29,6 +29,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/StringExtras.h" #include #include @@ -44,8 +45,8 @@ AggregateArgsOpt("aggregate-extracted-args", cl::Hidden, namespace { class CodeExtractor { - typedef std::vector Values; - std::set BlocksToExtract; + typedef SetVector Values; + SetVector BlocksToExtract; DominatorTree* DT; bool AggregateArgs; unsigned NumExitBlocks; @@ -135,7 +136,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { // We only want to code extract the second block now, and it becomes the new // header of the region. BasicBlock *OldPred = Header; - BlocksToExtract.erase(OldPred); + BlocksToExtract.remove(OldPred); BlocksToExtract.insert(NewBB); Header = NewBB; @@ -180,7 +181,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { } void CodeExtractor::splitReturnBlocks() { - for (std::set::iterator I = BlocksToExtract.begin(), + for (SetVector::iterator I = BlocksToExtract.begin(), E = BlocksToExtract.end(); I != E; ++I) if (ReturnInst *RI = dyn_cast((*I)->getTerminator())) { BasicBlock *New = (*I)->splitBasicBlock(RI, (*I)->getName()+".ret"); @@ -206,7 +207,7 @@ void CodeExtractor::splitReturnBlocks() { // void CodeExtractor::findInputsOutputs(Values &inputs, Values &outputs) { std::set ExitBlocks; - for (std::set::const_iterator ci = BlocksToExtract.begin(), + for (SetVector::const_iterator ci = BlocksToExtract.begin(), ce = BlocksToExtract.end(); ci != ce; ++ci) { BasicBlock *BB = *ci; @@ -215,13 +216,13 @@ void CodeExtractor::findInputsOutputs(Values &inputs, Values &outputs) { // instruction is used outside the region, it's an output. for (User::op_iterator O = I->op_begin(), E = I->op_end(); O != E; ++O) if (definedInCaller(*O)) - inputs.push_back(*O); + inputs.insert(*O); // Consider uses of this instruction (outputs). for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI) if (!definedInRegion(*UI)) { - outputs.push_back(I); + outputs.insert(I); break; } } // for: insts @@ -234,12 +235,6 @@ void CodeExtractor::findInputsOutputs(Values &inputs, Values &outputs) { } // for: basic blocks NumExitBlocks = ExitBlocks.size(); - - // Eliminate duplicates. - std::sort(inputs.begin(), inputs.end()); - inputs.erase(std::unique(inputs.begin(), inputs.end()), inputs.end()); - std::sort(outputs.begin(), outputs.end()); - outputs.erase(std::unique(outputs.begin(), outputs.end()), outputs.end()); } /// constructFunction - make a function based on inputs and outputs, as follows: @@ -252,8 +247,8 @@ Function *CodeExtractor::constructFunction(const Values &inputs, BasicBlock *newHeader, Function *oldFunction, Module *M) { - DEBUG(errs() << "inputs: " << inputs.size() << "\n"); - DEBUG(errs() << "outputs: " << outputs.size() << "\n"); + DEBUG(dbgs() << "inputs: " << inputs.size() << "\n"); + DEBUG(dbgs() << "outputs: " << outputs.size() << "\n"); // This function returns unsigned, outputs will go back by reference. switch (NumExitBlocks) { @@ -269,25 +264,25 @@ Function *CodeExtractor::constructFunction(const Values &inputs, for (Values::const_iterator i = inputs.begin(), e = inputs.end(); i != e; ++i) { const Value *value = *i; - DEBUG(errs() << "value used in func: " << *value << "\n"); + DEBUG(dbgs() << "value used in func: " << *value << "\n"); paramTy.push_back(value->getType()); } // Add the types of the output values to the function's argument list. for (Values::const_iterator I = outputs.begin(), E = outputs.end(); I != E; ++I) { - DEBUG(errs() << "instr used in func: " << **I << "\n"); + DEBUG(dbgs() << "instr used in func: " << **I << "\n"); if (AggregateArgs) paramTy.push_back((*I)->getType()); else paramTy.push_back(PointerType::getUnqual((*I)->getType())); } - DEBUG(errs() << "Function type: " << *RetTy << " f("); + DEBUG(dbgs() << "Function type: " << *RetTy << " f("); for (std::vector::iterator i = paramTy.begin(), e = paramTy.end(); i != e; ++i) - DEBUG(errs() << **i << ", "); - DEBUG(errs() << ")\n"); + DEBUG(dbgs() << **i << ", "); + DEBUG(dbgs() << ")\n"); if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { PointerType *StructPtr = @@ -482,7 +477,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, std::map ExitBlockMap; unsigned switchVal = 0; - for (std::set::const_iterator i = BlocksToExtract.begin(), + for (SetVector::const_iterator i = BlocksToExtract.begin(), e = BlocksToExtract.end(); i != e; ++i) { TerminatorInst *TI = (*i)->getTerminator(); for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) @@ -593,7 +588,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, // this should be rewritten as a `ret' // Check if the function should return a value - if (OldFnRetTy == Type::getVoidTy(Context)) { + if (OldFnRetTy->isVoidTy()) { ReturnInst::Create(Context, 0, TheSwitch); // Return void } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { // return what we have @@ -633,7 +628,7 @@ void CodeExtractor::moveCodeToFunction(Function *newFunction) { Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList(); Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList(); - for (std::set::const_iterator i = BlocksToExtract.begin(), + for (SetVector::const_iterator i = BlocksToExtract.begin(), e = BlocksToExtract.end(); i != e; ++i) { // Delete the basic block from the old function, and the list of blocks oldBlocks.remove(*i); diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 043046c813e6..17f8827fd5c0 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -210,34 +210,6 @@ static void UpdateCallGraphAfterInlining(CallSite CS, CallerNode->removeCallEdgeFor(CS); } -/// findFnRegionEndMarker - This is a utility routine that is used by -/// InlineFunction. Return llvm.dbg.region.end intrinsic that corresponds -/// to the llvm.dbg.func.start of the function F. Otherwise return NULL. -/// -static const DbgRegionEndInst *findFnRegionEndMarker(const Function *F) { - - MDNode *FnStart = NULL; - const DbgRegionEndInst *FnEnd = NULL; - for (Function::const_iterator FI = F->begin(), FE =F->end(); FI != FE; ++FI) - for (BasicBlock::const_iterator BI = FI->begin(), BE = FI->end(); BI != BE; - ++BI) { - if (FnStart == NULL) { - if (const DbgFuncStartInst *FSI = dyn_cast(BI)) { - DISubprogram SP(FSI->getSubprogram()); - assert (SP.isNull() == false && "Invalid llvm.dbg.func.start"); - if (SP.describes(F)) - FnStart = SP.getNode(); - } - continue; - } - - if (const DbgRegionEndInst *REI = dyn_cast(BI)) - if (REI->getContext() == FnStart) - FnEnd = REI; - } - return FnEnd; -} - // InlineFunction - This function inlines the called function into the basic // block of the caller. This returns false if it is not possible to inline this // call. The program is still in a well defined state if this occurs though. @@ -364,23 +336,6 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD, ValueMap[I] = ActualArg; } - // Adjust llvm.dbg.region.end. If the CalledFunc has region end - // marker then clone that marker after next stop point at the - // call site. The function body cloner does not clone original - // region end marker from the CalledFunc. This will ensure that - // inlined function's scope ends at the right place. - if (const DbgRegionEndInst *DREI = findFnRegionEndMarker(CalledFunc)) { - for (BasicBlock::iterator BI = TheCall, BE = TheCall->getParent()->end(); - BI != BE; ++BI) { - if (DbgStopPointInst *DSPI = dyn_cast(BI)) { - if (DbgRegionEndInst *NewDREI = - dyn_cast(DREI->clone())) - NewDREI->insertAfter(DSPI); - break; - } - } - } - // We want the inliner to prune the code as it copies. We would LOVE to // have no dead or constant instructions leftover after inlining occurs // (which can happen, e.g., because an argument was constant), but we'll be diff --git a/lib/Transforms/Utils/InstructionNamer.cpp b/lib/Transforms/Utils/InstructionNamer.cpp index 7f11acf4d8e2..090af95c4b87 100644 --- a/lib/Transforms/Utils/InstructionNamer.cpp +++ b/lib/Transforms/Utils/InstructionNamer.cpp @@ -32,7 +32,7 @@ namespace { bool runOnFunction(Function &F) { for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) - if (!AI->hasName() && AI->getType() != Type::getVoidTy(F.getContext())) + if (!AI->hasName() && !AI->getType()->isVoidTy()) AI->setName("arg"); for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { @@ -40,7 +40,7 @@ namespace { BB->setName("bb"); for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) - if (!I->hasName() && I->getType() != Type::getVoidTy(F.getContext())) + if (!I->hasName() && !I->getType()->isVoidTy()) I->setName("tmp"); } return true; diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 2426e3e41858..90e929e127e2 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -268,16 +268,17 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) { /// RecursivelyDeleteTriviallyDeadInstructions - If the specified value is a /// trivially dead instruction, delete it. If that makes any of its operands -/// trivially dead, delete them too, recursively. -void llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) { +/// trivially dead, delete them too, recursively. Return true if any +/// instructions were deleted. +bool llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) { Instruction *I = dyn_cast(V); if (!I || !I->use_empty() || !isInstructionTriviallyDead(I)) - return; + return false; SmallVector DeadInsts; DeadInsts.push_back(I); - while (!DeadInsts.empty()) { + do { I = DeadInsts.pop_back_val(); // Null out all of the instruction's operands to see if any operand becomes @@ -297,22 +298,25 @@ void llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) { } I->eraseFromParent(); - } + } while (!DeadInsts.empty()); + + return true; } /// RecursivelyDeleteDeadPHINode - If the specified value is an effectively /// dead PHI node, due to being a def-use chain of single-use nodes that /// either forms a cycle or is terminated by a trivially dead instruction, /// delete it. If that makes any of its operands trivially dead, delete them -/// too, recursively. -void +/// too, recursively. Return true if the PHI node is actually deleted. +bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) { // We can remove a PHI if it is on a cycle in the def-use graph // where each node in the cycle has degree one, i.e. only one use, // and is an instruction with no side effects. if (!PN->hasOneUse()) - return; + return false; + bool Changed = false; SmallPtrSet PHIs; PHIs.insert(PN); for (Instruction *J = cast(*PN->use_begin()); @@ -324,9 +328,35 @@ llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) { if (!PHIs.insert(cast(JP))) { // Break the cycle and delete the PHI and its operands. JP->replaceAllUsesWith(UndefValue::get(JP->getType())); - RecursivelyDeleteTriviallyDeadInstructions(JP); + (void)RecursivelyDeleteTriviallyDeadInstructions(JP); + Changed = true; break; } + return Changed; +} + +/// SimplifyInstructionsInBlock - Scan the specified basic block and try to +/// simplify any instructions in it and recursively delete dead instructions. +/// +/// This returns true if it changed the code, note that it can delete +/// instructions in other blocks as well in this block. +bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) { + bool MadeChange = false; + for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) { + Instruction *Inst = BI++; + + if (Value *V = SimplifyInstruction(Inst, TD)) { + WeakVH BIHandle(BI); + ReplaceAndSimplifyAllUses(Inst, V, TD); + MadeChange = true; + if (BIHandle == 0) + BI = BB->begin(); + continue; + } + + MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst); + } + return MadeChange; } //===----------------------------------------------------------------------===// @@ -421,7 +451,7 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) { static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!"); - DEBUG(errs() << "Looking to fold " << BB->getName() << " into " + DEBUG(dbgs() << "Looking to fold " << BB->getName() << " into " << Succ->getName() << "\n"); // Shortcut, if there is only a single predecessor it must be BB and merging // is always safe @@ -456,7 +486,7 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { PI != PE; PI++) { if (BBPN->getIncomingValueForBlock(*PI) != PN->getIncomingValueForBlock(*PI)) { - DEBUG(errs() << "Can't fold, phi node " << PN->getName() << " in " + DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " << Succ->getName() << " is conflicting with " << BBPN->getName() << " with regard to common predecessor " << (*PI)->getName() << "\n"); @@ -471,7 +501,7 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { // one for BB, in which case this phi node will not prevent the merging // of the block. if (Val != PN->getIncomingValueForBlock(*PI)) { - DEBUG(errs() << "Can't fold, phi node " << PN->getName() << " in " + DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " << Succ->getName() << " is conflicting with regard to common " << "predecessor " << (*PI)->getName() << "\n"); return false; @@ -525,7 +555,7 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { } } - DEBUG(errs() << "Killing Trivial BB: \n" << *BB); + DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB); if (isa(Succ->begin())) { // If there is more than one pred of succ, and there are PHI nodes in diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index 6b2c5916d1f2..53117a01a3dc 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -63,7 +63,7 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) { if (OnlyPred->getTerminator()->getNumSuccessors() != 1) return 0; - DEBUG(errs() << "Merging: " << *BB << "into: " << *OnlyPred); + DEBUG(dbgs() << "Merging: " << *BB << "into: " << *OnlyPred); // Resolve any PHI nodes at the start of the block. They are all // guaranteed to have exactly one entry if they exist, unless there are @@ -110,13 +110,13 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { - DEBUG(errs() << " Can't unroll; loop preheader-insertion failed.\n"); + DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); return false; } BasicBlock *LatchBlock = L->getLoopLatch(); if (!LatchBlock) { - DEBUG(errs() << " Can't unroll; loop exit-block-insertion failed.\n"); + DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n"); return false; } @@ -125,7 +125,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) if (!BI || BI->isUnconditional()) { // The loop-rotate pass can be helpful to avoid this in many cases. - DEBUG(errs() << + DEBUG(dbgs() << " Can't unroll; loop not terminated by a conditional branch.\n"); return false; } @@ -138,9 +138,9 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) TripMultiple = L->getSmallConstantTripMultiple(); if (TripCount != 0) - DEBUG(errs() << " Trip Count = " << TripCount << "\n"); + DEBUG(dbgs() << " Trip Count = " << TripCount << "\n"); if (TripMultiple != 1) - DEBUG(errs() << " Trip Multiple = " << TripMultiple << "\n"); + DEBUG(dbgs() << " Trip Multiple = " << TripMultiple << "\n"); // Effectively "DCE" unrolled iterations that are beyond the tripcount // and will never be executed. @@ -166,17 +166,17 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) } if (CompletelyUnroll) { - DEBUG(errs() << "COMPLETELY UNROLLING loop %" << Header->getName() + DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName() << " with trip count " << TripCount << "!\n"); } else { - DEBUG(errs() << "UNROLLING loop %" << Header->getName() + DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by " << Count); if (TripMultiple == 0 || BreakoutTrip != TripMultiple) { - DEBUG(errs() << " with a breakout at trip " << BreakoutTrip); + DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip); } else if (TripMultiple != 1) { - DEBUG(errs() << " with " << TripMultiple << " trips per branch"); + DEBUG(dbgs() << " with " << TripMultiple << " trips per branch"); } - DEBUG(errs() << "!\n"); + DEBUG(dbgs() << "!\n"); } std::vector LoopBlocks = L->getBlocks(); diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp index 6e6e8d2287a1..766c4d99a8f9 100644 --- a/lib/Transforms/Utils/LowerInvoke.cpp +++ b/lib/Transforms/Utils/LowerInvoke.cpp @@ -255,7 +255,7 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) { // Insert a return instruction. This really should be a "barrier", as it // is unreachable. ReturnInst::Create(F.getContext(), - F.getReturnType() == Type::getVoidTy(F.getContext()) ? + F.getReturnType()->isVoidTy() ? 0 : Constant::getNullValue(F.getReturnType()), UI); // Remove the unwind instruction now. diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp index 743bb6e99bc8..468a5fe4c5e5 100644 --- a/lib/Transforms/Utils/LowerSwitch.cpp +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -137,12 +137,12 @@ BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, unsigned Mid = Size / 2; std::vector LHS(Begin, Begin + Mid); - DEBUG(errs() << "LHS: " << LHS << "\n"); + DEBUG(dbgs() << "LHS: " << LHS << "\n"); std::vector RHS(Begin + Mid, End); - DEBUG(errs() << "RHS: " << RHS << "\n"); + DEBUG(dbgs() << "RHS: " << RHS << "\n"); CaseRange& Pivot = *(Begin + Mid); - DEBUG(errs() << "Pivot ==> " + DEBUG(dbgs() << "Pivot ==> " << cast(Pivot.Low)->getValue() << " -" << cast(Pivot.High)->getValue() << "\n"); @@ -306,9 +306,9 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) { CaseVector Cases; unsigned numCmps = Clusterify(Cases, SI); - DEBUG(errs() << "Clusterify finished. Total clusters: " << Cases.size() + DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size() << ". Total compares: " << numCmps << "\n"); - DEBUG(errs() << "Cases: " << Cases << "\n"); + DEBUG(dbgs() << "Cases: " << Cases << "\n"); (void)numCmps; BasicBlock* SwitchBlock = switchConvert(Cases.begin(), Cases.end(), Val, diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 846e432a7df3..baaa130df68f 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -448,13 +448,13 @@ void PromoteMem2Reg::run() { // std::vector RenamePassWorkList; RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values)); - while (!RenamePassWorkList.empty()) { + do { RenamePassData RPD; RPD.swap(RenamePassWorkList.back()); RenamePassWorkList.pop_back(); // RenamePass may add new worklist entries. RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList); - } + } while (!RenamePassWorkList.empty()); // The renamer uses the Visited set to avoid infinite loops. Clear it now. Visited.clear(); diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index 9881b3c2b6c3..161bf217852c 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -191,7 +191,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { // If the client wants to know about all new instructions, tell it. if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); - DEBUG(errs() << " Inserted PHI: " << *InsertedPHI << "\n"); + DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n"); return InsertedPHI; } @@ -352,7 +352,7 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { InsertedPHI->eraseFromParent(); InsertedVal = ConstVal; } else { - DEBUG(errs() << " Inserted PHI: " << *InsertedPHI << "\n"); + DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n"); // If the client wants to know about all new instructions, tell it. if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); diff --git a/lib/Transforms/Utils/SSI.cpp b/lib/Transforms/Utils/SSI.cpp index 1c4afffeb029..4e813ddf95c7 100644 --- a/lib/Transforms/Utils/SSI.cpp +++ b/lib/Transforms/Utils/SSI.cpp @@ -416,7 +416,7 @@ bool SSIEverything::runOnFunction(Function &F) { for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) for (BasicBlock::iterator I = B->begin(), E = B->end(); I != E; ++I) - if (I->getType() != Type::getVoidTy(F.getContext())) + if (!I->getType()->isVoidTy()) Insts.push_back(I); ssi.createSSI(Insts); diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index d7ca45e6e970..cb532969ef91 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -459,7 +459,7 @@ static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, // Remove PHI node entries for the dead edge. ThisCases[0].second->removePredecessor(TI->getParent()); - DEBUG(errs() << "Threading pred instr: " << *Pred->getTerminator() + DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"); EraseTerminatorInstAndDCECond(TI); @@ -472,7 +472,7 @@ static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, for (unsigned i = 0, e = PredCases.size(); i != e; ++i) DeadCases.insert(PredCases[i].first); - DEBUG(errs() << "Threading pred instr: " << *Pred->getTerminator() + DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() << "Through successor TI: " << *TI); for (unsigned i = SI->getNumCases()-1; i != 0; --i) @@ -481,7 +481,7 @@ static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, SI->removeCase(i); } - DEBUG(errs() << "Leaving: " << *TI << "\n"); + DEBUG(dbgs() << "Leaving: " << *TI << "\n"); return true; } } @@ -524,7 +524,7 @@ static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, Instruction *NI = BranchInst::Create(TheRealDest, TI); (void) NI; - DEBUG(errs() << "Threading pred instr: " << *Pred->getTerminator() + DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"); EraseTerminatorInstAndDCECond(TI); @@ -753,7 +753,7 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) { // Okay, it is safe to hoist the terminator. Instruction *NT = I1->clone(); BIParent->getInstList().insert(BI, NT); - if (NT->getType() != Type::getVoidTy(BB1->getContext())) { + if (!NT->getType()->isVoidTy()) { I1->replaceAllUsesWith(NT); I2->replaceAllUsesWith(NT); NT->takeName(I1); @@ -1011,7 +1011,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) { for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { ConstantInt *CB; if ((CB = dyn_cast(PN->getIncomingValue(i))) && - CB->getType() == Type::getInt1Ty(BB->getContext())) { + CB->getType()->isInteger(1)) { // Okay, we now know that all edges from PredBB should be revectored to // branch to RealDest. BasicBlock *PredBB = PN->getIncomingBlock(i); @@ -1111,7 +1111,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN) { if (NumPhis > 2) return false; - DEBUG(errs() << "FOUND IF CONDITION! " << *IfCond << " T: " + DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: " << IfTrue->getName() << " F: " << IfFalse->getName() << "\n"); // Loop over the PHI's seeing if we can promote them all to select @@ -1295,7 +1295,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) { ReturnInst::Create(BI->getContext(), TrueValue, BI); (void) RI; - DEBUG(errs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:" + DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:" << "\n " << *BI << "NewRet = " << *RI << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc); @@ -1377,7 +1377,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { else continue; - DEBUG(errs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); + DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); // If we need to invert the condition in the pred block to match, do so now. if (InvertPredCond) { @@ -1511,7 +1511,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { // Finally, if everything is ok, fold the branches to logical ops. BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1); - DEBUG(errs() << "FOLDING BRs:" << *PBI->getParent() + DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent() << "AND: " << *BI->getParent()); @@ -1531,7 +1531,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { OtherDest = InfLoopBlock; } - DEBUG(errs() << *PBI->getParent()->getParent()); + DEBUG(dbgs() << *PBI->getParent()->getParent()); // BI may have other predecessors. Because of this, we leave // it alone, but modify PBI. @@ -1581,8 +1581,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { } } - DEBUG(errs() << "INTO: " << *PBI->getParent()); - DEBUG(errs() << *PBI->getParent()->getParent()); + DEBUG(dbgs() << "INTO: " << *PBI->getParent()); + DEBUG(dbgs() << *PBI->getParent()->getParent()); // This basic block is probably dead. We know it has at least // one fewer predecessor. @@ -1608,7 +1608,7 @@ bool llvm::SimplifyCFG(BasicBlock *BB) { // Remove basic blocks that have no predecessors... or that just have themself // as a predecessor. These are unreachable. if (pred_begin(BB) == pred_end(BB) || BB->getSinglePredecessor() == BB) { - DEBUG(errs() << "Removing BB: \n" << *BB); + DEBUG(dbgs() << "Removing BB: \n" << *BB); DeleteDeadBlock(BB); return true; } @@ -1651,20 +1651,13 @@ bool llvm::SimplifyCFG(BasicBlock *BB) { if (!UncondBranchPreds.empty()) { while (!UncondBranchPreds.empty()) { BasicBlock *Pred = UncondBranchPreds.pop_back_val(); - DEBUG(errs() << "FOLDING: " << *BB + DEBUG(dbgs() << "FOLDING: " << *BB << "INTO UNCOND BRANCH PRED: " << *Pred); Instruction *UncondBranch = Pred->getTerminator(); // Clone the return and add it to the end of the predecessor. Instruction *NewRet = RI->clone(); Pred->getInstList().push_back(NewRet); - BasicBlock::iterator BBI = RI; - if (BBI != BB->begin()) { - // Move region end info into the predecessor. - if (DbgRegionEndInst *DREI = dyn_cast(--BBI)) - DREI->moveBefore(NewRet); - } - // If the return instruction returns a value, and if the value was a // PHI node in "BB", propagate the right value into the return. for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end(); diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index 30cb94d90385..3fa8b70a8505 100644 --- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -112,7 +112,7 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { "UnifiedReturnBlock", &F); PHINode *PN = 0; - if (F.getReturnType() == Type::getVoidTy(F.getContext())) { + if (F.getReturnType()->isVoidTy()) { ReturnInst::Create(F.getContext(), NULL, NewRetBlock); } else { // If the function doesn't return void... add a PHI node to the block... diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp index d3c9d7794f1e..eff2c772c2b3 100644 --- a/lib/VMCore/AsmWriter.cpp +++ b/lib/VMCore/AsmWriter.cpp @@ -30,6 +30,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -563,11 +564,14 @@ static SlotTracker *createSlotTracker(const Value *V) { if (const Function *Func = dyn_cast(V)) return new SlotTracker(Func); + if (isa(V)) + return new SlotTracker((Function *)0); + return 0; } #if 0 -#define ST_DEBUG(X) errs() << X +#define ST_DEBUG(X) dbgs() << X #else #define ST_DEBUG(X) #endif @@ -614,8 +618,7 @@ void SlotTracker::processModule() { E = TheModule->named_metadata_end(); I != E; ++I) { const NamedMDNode *NMD = I; for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - // FIXME: Change accessor to be type safe. - if (MDNode *MD = cast_or_null(NMD->getOperand(i))) + if (MDNode *MD = NMD->getOperand(i)) CreateMetadataSlot(MD); } } @@ -832,7 +835,7 @@ static void WriteOptimizationInfo(raw_ostream &Out, const User *U) { static void WriteConstantInt(raw_ostream &Out, const Constant *CV, TypePrinting &TypePrinter, SlotTracker *Machine) { if (const ConstantInt *CI = dyn_cast(CV)) { - if (CI->getType() == Type::getInt1Ty(CV->getContext())) { + if (CI->getType()->isInteger(1)) { Out << (CI->getZExtValue() ? "true" : "false"); return; } @@ -1136,6 +1139,8 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V, return; } + if (!Machine) + Machine = createSlotTracker(V); Out << '!' << Machine->getMetadataSlot(N); return; } @@ -1369,10 +1374,10 @@ void AssemblyWriter::printNamedMDNode(const NamedMDNode *NMD) { Out << "!" << NMD->getName() << " = !{"; for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { if (i) Out << ", "; - // FIXME: Change accessor to be typesafe. - // FIXME: This doesn't handle null?? - MDNode *MD = cast_or_null(NMD->getOperand(i)); - Out << '!' << Machine.getMetadataSlot(MD); + if (MDNode *MD = NMD->getOperand(i)) + Out << '!' << Machine.getMetadataSlot(MD); + else + Out << "null"; } Out << "}\n"; } @@ -2057,8 +2062,9 @@ void Value::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const { else W.printAlias(cast(GV)); } else if (const MDNode *N = dyn_cast(this)) { - SlotTracker SlotTable((Function*)0); - AssemblyWriter W(OS, SlotTable, 0, AAW); + Function *F = N->getFunction(); + SlotTracker SlotTable(F); + AssemblyWriter W(OS, SlotTable, getModuleFromVal(F), AAW); W.printMDNodeBody(N); } else if (const NamedMDNode *N = dyn_cast(this)) { SlotTracker SlotTable(N->getParent()); @@ -2085,17 +2091,17 @@ void Value::printCustom(raw_ostream &OS) const { } // Value::dump - allow easy printing of Values from the debugger. -void Value::dump() const { print(errs()); errs() << '\n'; } +void Value::dump() const { print(dbgs()); dbgs() << '\n'; } // Type::dump - allow easy printing of Types from the debugger. // This one uses type names from the given context module void Type::dump(const Module *Context) const { - WriteTypeSymbolic(errs(), this, Context); - errs() << '\n'; + WriteTypeSymbolic(dbgs(), this, Context); + dbgs() << '\n'; } // Type::dump - allow easy printing of Types from the debugger. void Type::dump() const { dump(0); } // Module::dump() - Allow printing of Modules from the debugger. -void Module::dump() const { print(errs(), 0); } +void Module::dump() const { print(dbgs(), 0); } diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp index d68bba30729d..a371c6f92eb4 100644 --- a/lib/VMCore/Attributes.cpp +++ b/lib/VMCore/Attributes.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/FoldingSet.h" #include "llvm/System/Atomic.h" #include "llvm/System/Mutex.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -318,11 +319,11 @@ AttrListPtr AttrListPtr::removeAttr(unsigned Idx, Attributes Attrs) const { } void AttrListPtr::dump() const { - errs() << "PAL[ "; + dbgs() << "PAL[ "; for (unsigned i = 0; i < getNumSlots(); ++i) { const AttributeWithIndex &PAWI = getSlot(i); - errs() << "{" << PAWI.Index << "," << PAWI.Attrs << "} "; + dbgs() << "{" << PAWI.Index << "," << PAWI.Attrs << "} "; } - errs() << "]\n"; + dbgs() << "]\n"; } diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp index 77ab19f417ce..216184188ad2 100644 --- a/lib/VMCore/AutoUpgrade.cpp +++ b/lib/VMCore/AutoUpgrade.cpp @@ -480,61 +480,42 @@ void llvm::UpgradeCallsToIntrinsic(Function* F) { } } -/// This function checks debug info intrinsics. If an intrinsic is invalid -/// then this function simply removes the intrinsic. +/// This function strips all debug info intrinsics, except for llvm.dbg.declare. +/// If an llvm.dbg.declare intrinsic is invalid, then this function simply +/// strips that use. void llvm::CheckDebugInfoIntrinsics(Module *M) { if (Function *FuncStart = M->getFunction("llvm.dbg.func.start")) { - if (!FuncStart->use_empty()) { - DbgFuncStartInst *DFSI = cast(FuncStart->use_back()); - if (!isa(DFSI->getOperand(1))) { - while (!FuncStart->use_empty()) { - CallInst *CI = cast(FuncStart->use_back()); - CI->eraseFromParent(); - } - FuncStart->eraseFromParent(); - } + while (!FuncStart->use_empty()) { + CallInst *CI = cast(FuncStart->use_back()); + CI->eraseFromParent(); } + FuncStart->eraseFromParent(); } - + if (Function *StopPoint = M->getFunction("llvm.dbg.stoppoint")) { - if (!StopPoint->use_empty()) { - DbgStopPointInst *DSPI = cast(StopPoint->use_back()); - if (!isa(DSPI->getOperand(3))) { - while (!StopPoint->use_empty()) { - CallInst *CI = cast(StopPoint->use_back()); - CI->eraseFromParent(); - } - StopPoint->eraseFromParent(); - } + while (!StopPoint->use_empty()) { + CallInst *CI = cast(StopPoint->use_back()); + CI->eraseFromParent(); } + StopPoint->eraseFromParent(); } if (Function *RegionStart = M->getFunction("llvm.dbg.region.start")) { - if (!RegionStart->use_empty()) { - DbgRegionStartInst *DRSI = cast(RegionStart->use_back()); - if (!isa(DRSI->getOperand(1))) { - while (!RegionStart->use_empty()) { - CallInst *CI = cast(RegionStart->use_back()); - CI->eraseFromParent(); - } - RegionStart->eraseFromParent(); - } + while (!RegionStart->use_empty()) { + CallInst *CI = cast(RegionStart->use_back()); + CI->eraseFromParent(); } + RegionStart->eraseFromParent(); } if (Function *RegionEnd = M->getFunction("llvm.dbg.region.end")) { - if (!RegionEnd->use_empty()) { - DbgRegionEndInst *DREI = cast(RegionEnd->use_back()); - if (!isa(DREI->getOperand(1))) { - while (!RegionEnd->use_empty()) { - CallInst *CI = cast(RegionEnd->use_back()); - CI->eraseFromParent(); - } - RegionEnd->eraseFromParent(); - } + while (!RegionEnd->use_empty()) { + CallInst *CI = cast(RegionEnd->use_back()); + CI->eraseFromParent(); } + RegionEnd->eraseFromParent(); } if (Function *Declare = M->getFunction("llvm.dbg.declare")) { diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp index 2449739aaf50..3a24389134e7 100644 --- a/lib/VMCore/ConstantFold.cpp +++ b/lib/VMCore/ConstantFold.cpp @@ -1162,7 +1162,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(LLVMContext &Context, } // i1 can be simplified in many cases. - if (C1->getType() == Type::getInt1Ty(Context)) { + if (C1->getType()->isInteger(1)) { switch (Opcode) { case Instruction::Add: case Instruction::Sub: @@ -1229,10 +1229,10 @@ static int IdxCompare(LLVMContext &Context, Constant *C1, Constant *C2, // Ok, we have two differing integer indices. Sign extend them to be the same // type. Long is always big enough, so we use it. - if (C1->getType() != Type::getInt64Ty(Context)) + if (!C1->getType()->isInteger(64)) C1 = ConstantExpr::getSExt(C1, Type::getInt64Ty(Context)); - if (C2->getType() != Type::getInt64Ty(Context)) + if (!C2->getType()->isInteger(64)) C2 = ConstantExpr::getSExt(C2, Type::getInt64Ty(Context)); if (C1 == C2) return 0; // They are equal @@ -1587,7 +1587,7 @@ Constant *llvm::ConstantFoldCompareInstruction(LLVMContext &Context, } // If the comparison is a comparison between two i1's, simplify it. - if (C1->getType() == Type::getInt1Ty(Context)) { + if (C1->getType()->isInteger(1)) { switch(pred) { case ICmpInst::ICMP_EQ: if (isa(C2)) @@ -2042,10 +2042,10 @@ Constant *llvm::ConstantFoldGetElementPtr(LLVMContext &Context, // Before adding, extend both operands to i64 to avoid // overflow trouble. - if (PrevIdx->getType() != Type::getInt64Ty(Context)) + if (!PrevIdx->getType()->isInteger(64)) PrevIdx = ConstantExpr::getSExt(PrevIdx, Type::getInt64Ty(Context)); - if (Div->getType() != Type::getInt64Ty(Context)) + if (!Div->getType()->isInteger(64)) Div = ConstantExpr::getSExt(Div, Type::getInt64Ty(Context)); diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp index e3c6144c76aa..cc8961fb5f5e 100644 --- a/lib/VMCore/Constants.cpp +++ b/lib/VMCore/Constants.cpp @@ -110,7 +110,7 @@ void Constant::destroyConstantImpl() { Value *V = use_back(); #ifndef NDEBUG // Only in -g mode... if (!isa(V)) { - errs() << "While deleting: " << *this + dbgs() << "While deleting: " << *this << "\n\nUse still stuck around after Def is destroyed: " << *V << "\n\n"; } @@ -197,6 +197,24 @@ Constant::PossibleRelocationsTy Constant::getRelocationInfo() const { if (const BlockAddress *BA = dyn_cast(this)) return BA->getFunction()->getRelocationInfo(); + // While raw uses of blockaddress need to be relocated, differences between + // two of them don't when they are for labels in the same function. This is a + // common idiom when creating a table for the indirect goto extension, so we + // handle it efficiently here. + if (const ConstantExpr *CE = dyn_cast(this)) + if (CE->getOpcode() == Instruction::Sub) { + ConstantExpr *LHS = dyn_cast(CE->getOperand(0)); + ConstantExpr *RHS = dyn_cast(CE->getOperand(1)); + if (LHS && RHS && + LHS->getOpcode() == Instruction::PtrToInt && + RHS->getOpcode() == Instruction::PtrToInt && + isa(LHS->getOperand(0)) && + isa(RHS->getOperand(0)) && + cast(LHS->getOperand(0))->getFunction() == + cast(RHS->getOperand(0))->getFunction()) + return NoRelocation; + } + PossibleRelocationsTy Result = NoRelocation; for (unsigned i = 0, e = getNumOperands(); i != e; ++i) Result = std::max(Result, @@ -910,7 +928,7 @@ void ConstantArray::destroyConstant() { /// if the elements of the array are all ConstantInt's. bool ConstantArray::isString() const { // Check the element type for i8... - if (getType()->getElementType() != Type::getInt8Ty(getContext())) + if (!getType()->getElementType()->isInteger(8)) return false; // Check the elements to make sure they are all integers, not constant // expressions. @@ -925,7 +943,7 @@ bool ConstantArray::isString() const { /// null bytes except its terminator. bool ConstantArray::isCString() const { // Check the element type for i8... - if (getType()->getElementType() != Type::getInt8Ty(getContext())) + if (!getType()->getElementType()->isInteger(8)) return false; // Last element must be a null. @@ -1671,7 +1689,7 @@ Constant *ConstantExpr::getExtractElementTy(const Type *ReqTy, Constant *Val, Constant *ConstantExpr::getExtractElement(Constant *Val, Constant *Idx) { assert(isa(Val->getType()) && "Tried to create extractelement operation on non-vector type!"); - assert(Idx->getType() == Type::getInt32Ty(Val->getContext()) && + assert(Idx->getType()->isInteger(32) && "Extractelement index must be i32 type!"); return getExtractElementTy(cast(Val->getType())->getElementType(), Val, Idx); @@ -1698,7 +1716,7 @@ Constant *ConstantExpr::getInsertElement(Constant *Val, Constant *Elt, "Tried to create insertelement operation on non-vector type!"); assert(Elt->getType() == cast(Val->getType())->getElementType() && "Insertelement types must match!"); - assert(Idx->getType() == Type::getInt32Ty(Val->getContext()) && + assert(Idx->getType()->isInteger(32) && "Insertelement index must be i32 type!"); return getInsertElementTy(Val->getType(), Val, Elt, Idx); } diff --git a/lib/VMCore/ConstantsContext.h b/lib/VMCore/ConstantsContext.h index 268a6602fa90..08224e4488b0 100644 --- a/lib/VMCore/ConstantsContext.h +++ b/lib/VMCore/ConstantsContext.h @@ -764,7 +764,7 @@ class ConstantUniqueMap : public AbstractTypeUser { } void dump() const { - DEBUG(errs() << "Constant.cpp: ConstantUniqueMap\n"); + DEBUG(dbgs() << "Constant.cpp: ConstantUniqueMap\n"); } }; diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp index 449e9671ab7f..984d2457f032 100644 --- a/lib/VMCore/Core.cpp +++ b/lib/VMCore/Core.cpp @@ -89,7 +89,7 @@ void LLVMSetTarget(LLVMModuleRef M, const char *Triple) { } /*--.. Type names ..........................................................--*/ -int LLVMAddTypeName(LLVMModuleRef M, const char *Name, LLVMTypeRef Ty) { +LLVMBool LLVMAddTypeName(LLVMModuleRef M, const char *Name, LLVMTypeRef Ty) { return unwrap(M)->addTypeName(Name, unwrap(Ty)); } @@ -237,7 +237,7 @@ LLVMTypeRef LLVMPPCFP128Type(void) { LLVMTypeRef LLVMFunctionType(LLVMTypeRef ReturnType, LLVMTypeRef *ParamTypes, unsigned ParamCount, - int IsVarArg) { + LLVMBool IsVarArg) { std::vector Tys; for (LLVMTypeRef *I = ParamTypes, *E = ParamTypes + ParamCount; I != E; ++I) Tys.push_back(unwrap(*I)); @@ -245,7 +245,7 @@ LLVMTypeRef LLVMFunctionType(LLVMTypeRef ReturnType, return wrap(FunctionType::get(unwrap(ReturnType), Tys, IsVarArg != 0)); } -int LLVMIsFunctionVarArg(LLVMTypeRef FunctionTy) { +LLVMBool LLVMIsFunctionVarArg(LLVMTypeRef FunctionTy) { return unwrap(FunctionTy)->isVarArg(); } @@ -267,7 +267,7 @@ void LLVMGetParamTypes(LLVMTypeRef FunctionTy, LLVMTypeRef *Dest) { /*--.. Operations on struct types ..........................................--*/ LLVMTypeRef LLVMStructTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes, - unsigned ElementCount, int Packed) { + unsigned ElementCount, LLVMBool Packed) { std::vector Tys; for (LLVMTypeRef *I = ElementTypes, *E = ElementTypes + ElementCount; I != E; ++I) @@ -277,7 +277,7 @@ LLVMTypeRef LLVMStructTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes, } LLVMTypeRef LLVMStructType(LLVMTypeRef *ElementTypes, - unsigned ElementCount, int Packed) { + unsigned ElementCount, LLVMBool Packed) { return LLVMStructTypeInContext(LLVMGetGlobalContext(), ElementTypes, ElementCount, Packed); } @@ -294,7 +294,7 @@ void LLVMGetStructElementTypes(LLVMTypeRef StructTy, LLVMTypeRef *Dest) { *Dest++ = wrap(*I); } -int LLVMIsPackedStruct(LLVMTypeRef StructTy) { +LLVMBool LLVMIsPackedStruct(LLVMTypeRef StructTy) { return unwrap(StructTy)->isPacked(); } @@ -442,17 +442,17 @@ LLVMValueRef LLVMGetUndef(LLVMTypeRef Ty) { return wrap(UndefValue::get(unwrap(Ty))); } -int LLVMIsConstant(LLVMValueRef Ty) { +LLVMBool LLVMIsConstant(LLVMValueRef Ty) { return isa(unwrap(Ty)); } -int LLVMIsNull(LLVMValueRef Val) { +LLVMBool LLVMIsNull(LLVMValueRef Val) { if (Constant *C = dyn_cast(unwrap(Val))) return C->isNullValue(); return false; } -int LLVMIsUndef(LLVMValueRef Val) { +LLVMBool LLVMIsUndef(LLVMValueRef Val) { return isa(unwrap(Val)); } @@ -464,7 +464,7 @@ LLVMValueRef LLVMConstPointerNull(LLVMTypeRef Ty) { /*--.. Operations on scalar constants ......................................--*/ LLVMValueRef LLVMConstInt(LLVMTypeRef IntTy, unsigned long long N, - int SignExtend) { + LLVMBool SignExtend) { return wrap(ConstantInt::get(unwrap(IntTy), N, SignExtend != 0)); } @@ -504,7 +504,8 @@ long long LLVMConstIntGetSExtValue(LLVMValueRef ConstantVal) { /*--.. Operations on composite constants ...................................--*/ LLVMValueRef LLVMConstStringInContext(LLVMContextRef C, const char *Str, - unsigned Length, int DontNullTerminate) { + unsigned Length, + LLVMBool DontNullTerminate) { /* Inverted the sense of AddNull because ', 0)' is a better mnemonic for null termination than ', 1)'. */ return wrap(ConstantArray::get(*unwrap(C), std::string(Str, Length), @@ -512,14 +513,14 @@ LLVMValueRef LLVMConstStringInContext(LLVMContextRef C, const char *Str, } LLVMValueRef LLVMConstStructInContext(LLVMContextRef C, LLVMValueRef *ConstantVals, - unsigned Count, int Packed) { + unsigned Count, LLVMBool Packed) { return wrap(ConstantStruct::get(*unwrap(C), unwrap(ConstantVals, Count), Count, Packed != 0)); } LLVMValueRef LLVMConstString(const char *Str, unsigned Length, - int DontNullTerminate) { + LLVMBool DontNullTerminate) { return LLVMConstStringInContext(LLVMGetGlobalContext(), Str, Length, DontNullTerminate); } @@ -530,7 +531,7 @@ LLVMValueRef LLVMConstArray(LLVMTypeRef ElementTy, Length)); } LLVMValueRef LLVMConstStruct(LLVMValueRef *ConstantVals, unsigned Count, - int Packed) { + LLVMBool Packed) { return LLVMConstStructInContext(LLVMGetGlobalContext(), ConstantVals, Count, Packed); } @@ -820,7 +821,7 @@ LLVMValueRef LLVMConstPointerCast(LLVMValueRef ConstantVal, } LLVMValueRef LLVMConstIntCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType, - unsigned isSigned) { + LLVMBool isSigned) { return wrap(ConstantExpr::getIntegerCast( unwrap(ConstantVal), unwrap(ToType), @@ -883,10 +884,11 @@ LLVMValueRef LLVMConstInsertValue(LLVMValueRef AggConstant, IdxList, NumIdx)); } -LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty, const char *AsmString, - const char *Constraints, int HasSideEffects, - int IsAlignStack) { - return wrap(InlineAsm::get(dyn_cast(unwrap(Ty)), AsmString, +LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty, const char *AsmString, + const char *Constraints, + LLVMBool HasSideEffects, + LLVMBool IsAlignStack) { + return wrap(InlineAsm::get(dyn_cast(unwrap(Ty)), AsmString, Constraints, HasSideEffects, IsAlignStack)); } @@ -896,7 +898,7 @@ LLVMModuleRef LLVMGetGlobalParent(LLVMValueRef Global) { return wrap(unwrap(Global)->getParent()); } -int LLVMIsDeclaration(LLVMValueRef Global) { +LLVMBool LLVMIsDeclaration(LLVMValueRef Global) { return unwrap(Global)->isDeclaration(); } @@ -1079,19 +1081,19 @@ void LLVMSetInitializer(LLVMValueRef GlobalVar, LLVMValueRef ConstantVal) { ->setInitializer(unwrap(ConstantVal)); } -int LLVMIsThreadLocal(LLVMValueRef GlobalVar) { +LLVMBool LLVMIsThreadLocal(LLVMValueRef GlobalVar) { return unwrap(GlobalVar)->isThreadLocal(); } -void LLVMSetThreadLocal(LLVMValueRef GlobalVar, int IsThreadLocal) { +void LLVMSetThreadLocal(LLVMValueRef GlobalVar, LLVMBool IsThreadLocal) { unwrap(GlobalVar)->setThreadLocal(IsThreadLocal != 0); } -int LLVMIsGlobalConstant(LLVMValueRef GlobalVar) { +LLVMBool LLVMIsGlobalConstant(LLVMValueRef GlobalVar) { return unwrap(GlobalVar)->isConstant(); } -void LLVMSetGlobalConstant(LLVMValueRef GlobalVar, int IsConstant) { +void LLVMSetGlobalConstant(LLVMValueRef GlobalVar, LLVMBool IsConstant) { unwrap(GlobalVar)->setConstant(IsConstant != 0); } @@ -1285,7 +1287,7 @@ LLVMValueRef LLVMBasicBlockAsValue(LLVMBasicBlockRef BB) { return wrap(static_cast(unwrap(BB))); } -int LLVMValueIsBasicBlock(LLVMValueRef Val) { +LLVMBool LLVMValueIsBasicBlock(LLVMValueRef Val) { return isa(unwrap(Val)); } @@ -1452,11 +1454,11 @@ void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index, /*--.. Operations on call instructions (only) ..............................--*/ -int LLVMIsTailCall(LLVMValueRef Call) { +LLVMBool LLVMIsTailCall(LLVMValueRef Call) { return unwrap(Call)->isTailCall(); } -void LLVMSetTailCall(LLVMValueRef Call, int isTailCall) { +void LLVMSetTailCall(LLVMValueRef Call, LLVMBool isTailCall) { unwrap(Call)->setTailCall(isTailCall); } @@ -1973,9 +1975,11 @@ void LLVMDisposeModuleProvider(LLVMModuleProviderRef MP) { /*===-- Memory buffers ----------------------------------------------------===*/ -int LLVMCreateMemoryBufferWithContentsOfFile(const char *Path, - LLVMMemoryBufferRef *OutMemBuf, - char **OutMessage) { +LLVMBool LLVMCreateMemoryBufferWithContentsOfFile( + const char *Path, + LLVMMemoryBufferRef *OutMemBuf, + char **OutMessage) { + std::string Error; if (MemoryBuffer *MB = MemoryBuffer::getFile(Path, &Error)) { *OutMemBuf = wrap(MB); @@ -1986,8 +1990,8 @@ int LLVMCreateMemoryBufferWithContentsOfFile(const char *Path, return 1; } -int LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf, - char **OutMessage) { +LLVMBool LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf, + char **OutMessage) { MemoryBuffer *MB = MemoryBuffer::getSTDIN(); if (!MB->getBufferSize()) { delete MB; diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp index e04b6d6a14bd..f00f6ee11fb6 100644 --- a/lib/VMCore/Function.cpp +++ b/lib/VMCore/Function.cpp @@ -189,7 +189,7 @@ void Function::BuildLazyArguments() const { // Create the arguments vector, all arguments start out unnamed. const FunctionType *FT = getFunctionType(); for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) { - assert(FT->getParamType(i) != Type::getVoidTy(FT->getContext()) && + assert(!FT->getParamType(i)->isVoidTy() && "Cannot have void typed arguments!"); ArgumentList.push_back(new Argument(FT->getParamType(i))); } diff --git a/lib/VMCore/InlineAsm.cpp b/lib/VMCore/InlineAsm.cpp index 16de1af71db5..ec21773d83db 100644 --- a/lib/VMCore/InlineAsm.cpp +++ b/lib/VMCore/InlineAsm.cpp @@ -217,7 +217,7 @@ bool InlineAsm::Verify(const FunctionType *Ty, StringRef ConstStr) { switch (NumOutputs) { case 0: - if (Ty->getReturnType() != Type::getVoidTy(Ty->getContext())) return false; + if (!Ty->getReturnType()->isVoidTy()) return false; break; case 1: if (isa(Ty->getReturnType())) return false; diff --git a/lib/VMCore/Instruction.cpp b/lib/VMCore/Instruction.cpp index a5500e6de485..3fabfd07b316 100644 --- a/lib/VMCore/Instruction.cpp +++ b/lib/VMCore/Instruction.cpp @@ -374,37 +374,6 @@ bool Instruction::isCommutative(unsigned op) { } } -// Code here matches isMalloc from MemoryBuiltins, which is not in VMCore. -static bool isMalloc(const Value* I) { - const CallInst *CI = dyn_cast(I); - if (!CI) { - const BitCastInst *BCI = dyn_cast(I); - if (!BCI) return false; - - CI = dyn_cast(BCI->getOperand(0)); - } - - if (!CI) - return false; - Function *Callee = CI->getCalledFunction(); - if (Callee == 0 || !Callee->isDeclaration() || Callee->getName() != "malloc") - return false; - - // Check malloc prototype. - // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin - // attribute will exist. - const FunctionType *FTy = Callee->getFunctionType(); - if (FTy->getNumParams() != 1) - return false; - if (IntegerType *ITy = dyn_cast(FTy->param_begin()->get())) { - if (ITy->getBitWidth() != 32 && ITy->getBitWidth() != 64) - return false; - return true; - } - - return false; -} - bool Instruction::isSafeToSpeculativelyExecute() const { for (unsigned i = 0, e = getNumOperands(); i != e; ++i) if (Constant *C = dyn_cast(getOperand(i))) @@ -430,7 +399,9 @@ bool Instruction::isSafeToSpeculativelyExecute() const { case Load: { if (cast(this)->isVolatile()) return false; - if (isa(getOperand(0)) || isMalloc(getOperand(0))) + // Note that it is not safe to speculate into a malloc'd region because + // malloc may return null. + if (isa(getOperand(0))) return true; if (GlobalVariable *GV = dyn_cast(getOperand(0))) return !GV->hasExternalWeakLinkage(); diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp index 3e9950e2aade..2619047cb306 100644 --- a/lib/VMCore/Instructions.cpp +++ b/lib/VMCore/Instructions.cpp @@ -523,8 +523,7 @@ static Instruction *createMalloc(Instruction *InsertBefore, MCall->setCallingConv(F->getCallingConv()); if (!F->doesNotAlias(0)) F->setDoesNotAlias(0); } - assert(MCall->getType() != Type::getVoidTy(BB->getContext()) && - "Malloc has void return type"); + assert(!MCall->getType()->isVoidTy() && "Malloc has void return type"); return Result; } @@ -788,7 +787,7 @@ BasicBlock *UnreachableInst::getSuccessorV(unsigned idx) const { void BranchInst::AssertOK() { if (isConditional()) - assert(getCondition()->getType() == Type::getInt1Ty(getContext()) && + assert(getCondition()->getType()->isInteger(1) && "May only branch on boolean predicates!"); } @@ -893,7 +892,7 @@ static Value *getAISize(LLVMContext &Context, Value *Amt) { else { assert(!isa(Amt) && "Passed basic block into allocation size parameter! Use other ctor"); - assert(Amt->getType() == Type::getInt32Ty(Context) && + assert(Amt->getType()->isInteger(32) && "Allocation array size is not a 32-bit integer!"); } return Amt; @@ -904,7 +903,7 @@ AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize, : UnaryInstruction(PointerType::getUnqual(Ty), Alloca, getAISize(Ty->getContext(), ArraySize), InsertBefore) { setAlignment(0); - assert(Ty != Type::getVoidTy(Ty->getContext()) && "Cannot allocate void!"); + assert(!Ty->isVoidTy() && "Cannot allocate void!"); setName(Name); } @@ -913,7 +912,7 @@ AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize, : UnaryInstruction(PointerType::getUnqual(Ty), Alloca, getAISize(Ty->getContext(), ArraySize), InsertAtEnd) { setAlignment(0); - assert(Ty != Type::getVoidTy(Ty->getContext()) && "Cannot allocate void!"); + assert(!Ty->isVoidTy() && "Cannot allocate void!"); setName(Name); } @@ -922,7 +921,7 @@ AllocaInst::AllocaInst(const Type *Ty, const Twine &Name, : UnaryInstruction(PointerType::getUnqual(Ty), Alloca, getAISize(Ty->getContext(), 0), InsertBefore) { setAlignment(0); - assert(Ty != Type::getVoidTy(Ty->getContext()) && "Cannot allocate void!"); + assert(!Ty->isVoidTy() && "Cannot allocate void!"); setName(Name); } @@ -931,7 +930,7 @@ AllocaInst::AllocaInst(const Type *Ty, const Twine &Name, : UnaryInstruction(PointerType::getUnqual(Ty), Alloca, getAISize(Ty->getContext(), 0), InsertAtEnd) { setAlignment(0); - assert(Ty != Type::getVoidTy(Ty->getContext()) && "Cannot allocate void!"); + assert(!Ty->isVoidTy() && "Cannot allocate void!"); setName(Name); } @@ -940,7 +939,7 @@ AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize, unsigned Align, : UnaryInstruction(PointerType::getUnqual(Ty), Alloca, getAISize(Ty->getContext(), ArraySize), InsertBefore) { setAlignment(Align); - assert(Ty != Type::getVoidTy(Ty->getContext()) && "Cannot allocate void!"); + assert(!Ty->isVoidTy() && "Cannot allocate void!"); setName(Name); } @@ -949,7 +948,7 @@ AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize, unsigned Align, : UnaryInstruction(PointerType::getUnqual(Ty), Alloca, getAISize(Ty->getContext(), ArraySize), InsertAtEnd) { setAlignment(Align); - assert(Ty != Type::getVoidTy(Ty->getContext()) && "Cannot allocate void!"); + assert(!Ty->isVoidTy() && "Cannot allocate void!"); setName(Name); } @@ -1392,8 +1391,7 @@ ExtractElementInst::ExtractElementInst(Value *Val, Value *Index, bool ExtractElementInst::isValidOperands(const Value *Val, const Value *Index) { - if (!isa(Val->getType()) || - Index->getType() != Type::getInt32Ty(Val->getContext())) + if (!isa(Val->getType()) || !Index->getType()->isInteger(32)) return false; return true; } @@ -1440,7 +1438,7 @@ bool InsertElementInst::isValidOperands(const Value *Vec, const Value *Elt, if (Elt->getType() != cast(Vec->getType())->getElementType()) return false;// Second operand of insertelement must be vector element type. - if (Index->getType() != Type::getInt32Ty(Vec->getContext())) + if (!Index->getType()->isInteger(32)) return false; // Third operand of insertelement must be i32. return true; } @@ -1492,7 +1490,7 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2, const VectorType *MaskTy = dyn_cast(Mask->getType()); if (!isa(Mask) || MaskTy == 0 || - MaskTy->getElementType() != Type::getInt32Ty(V1->getContext())) + !MaskTy->getElementType()->isInteger(32)) return false; return true; } @@ -2287,7 +2285,8 @@ CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty, CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty, bool isSigned, const Twine &Name, Instruction *InsertBefore) { - assert(C->getType()->isInteger() && Ty->isInteger() && "Invalid cast"); + assert(C->getType()->isIntOrIntVector() && Ty->isIntOrIntVector() && + "Invalid integer cast"); unsigned SrcBits = C->getType()->getScalarSizeInBits(); unsigned DstBits = Ty->getScalarSizeInBits(); Instruction::CastOps opcode = diff --git a/lib/VMCore/IntrinsicInst.cpp b/lib/VMCore/IntrinsicInst.cpp index 5e0f42e063bf..cb9252efdf58 100644 --- a/lib/VMCore/IntrinsicInst.cpp +++ b/lib/VMCore/IntrinsicInst.cpp @@ -8,11 +8,7 @@ //===----------------------------------------------------------------------===// // // This file implements methods that make it really easy to deal with intrinsic -// functions with the isa/dyncast family of functions. In particular, this -// allows you to do things like: -// -// if (DbgStopPointInst *SPI = dyn_cast(Inst)) -// ... SPI->getFileName() ... SPI->getDirectory() ... +// functions. // // All intrinsic function calls are instances of the call instruction, so these // are all subclasses of the CallInst class. Note that none of these classes @@ -54,26 +50,14 @@ Value *DbgInfoIntrinsic::StripCast(Value *C) { return dyn_cast(C); } -//===----------------------------------------------------------------------===// -/// DbgStopPointInst - This represents the llvm.dbg.stoppoint instruction. -/// - -Value *DbgStopPointInst::getFileName() const { - // Once the operand indices are verified, update this assert - assert(LLVMDebugVersion == (7 << 16) && "Verify operand indices"); - return getContext()->getOperand(3); -} - -Value *DbgStopPointInst::getDirectory() const { - // Once the operand indices are verified, update this assert - assert(LLVMDebugVersion == (7 << 16) && "Verify operand indices"); - return getContext()->getOperand(4); -} - //===----------------------------------------------------------------------===// /// DbgValueInst - This represents the llvm.dbg.value instruction. /// -Value *DbgValueInst::getValue() const { +const Value *DbgValueInst::getValue() const { + return cast(getOperand(1))->getOperand(0); +} + +Value *DbgValueInst::getValue() { return cast(getOperand(1))->getOperand(0); } diff --git a/lib/VMCore/Mangler.cpp b/lib/VMCore/Mangler.cpp index 33eb0449e824..7d9f330f4c4e 100644 --- a/lib/VMCore/Mangler.cpp +++ b/lib/VMCore/Mangler.cpp @@ -16,7 +16,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" -#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SmallString.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -24,57 +24,57 @@ static char HexDigit(int V) { return V < 10 ? V+'0' : V+'A'-10; } -static std::string MangleLetter(unsigned char C) { - char Result[] = { '_', HexDigit(C >> 4), HexDigit(C & 15), '_', 0 }; - return Result; +static void MangleLetter(SmallVectorImpl &OutName, unsigned char C) { + OutName.push_back('_'); + OutName.push_back(HexDigit(C >> 4)); + OutName.push_back(HexDigit(C & 15)); + OutName.push_back('_'); } /// makeNameProper - We don't want identifier names non-C-identifier characters /// in them, so mangle them as appropriate. /// -std::string Mangler::makeNameProper(const std::string &X, - ManglerPrefixTy PrefixTy) { +/// FIXME: This is deprecated, new code should use getNameWithPrefix and use +/// MCSymbol printing to handle quotes or not etc. +/// +void Mangler::makeNameProper(SmallVectorImpl &OutName, + const Twine &TheName, + ManglerPrefixTy PrefixTy) { + SmallString<256> TmpData; + StringRef X = TheName.toStringRef(TmpData); assert(!X.empty() && "Cannot mangle empty strings"); if (!UseQuotes) { - std::string Result; - // If X does not start with (char)1, add the prefix. - bool NeedPrefix = true; - std::string::const_iterator I = X.begin(); + StringRef::iterator I = X.begin(); if (*I == 1) { - NeedPrefix = false; - ++I; // Skip over the marker. + ++I; // Skip over the no-prefix marker. + } else { + if (PrefixTy == Mangler::Private) + OutName.append(PrivatePrefix, PrivatePrefix+strlen(PrivatePrefix)); + else if (PrefixTy == Mangler::LinkerPrivate) + OutName.append(LinkerPrivatePrefix, + LinkerPrivatePrefix+strlen(LinkerPrivatePrefix)); + OutName.append(Prefix, Prefix+strlen(Prefix)); } // Mangle the first letter specially, don't allow numbers unless the target // explicitly allows them. if (!SymbolsCanStartWithDigit && *I >= '0' && *I <= '9') - Result += MangleLetter(*I++); + MangleLetter(OutName, *I++); - for (std::string::const_iterator E = X.end(); I != E; ++I) { + for (StringRef::iterator E = X.end(); I != E; ++I) { if (!isCharAcceptable(*I)) - Result += MangleLetter(*I); + MangleLetter(OutName, *I); else - Result += *I; + OutName.push_back(*I); } - - if (NeedPrefix) { - Result = Prefix + Result; - - if (PrefixTy == Mangler::Private) - Result = PrivatePrefix + Result; - else if (PrefixTy == Mangler::LinkerPrivate) - Result = LinkerPrivatePrefix + Result; - } - - return Result; + return; } bool NeedPrefix = true; bool NeedQuotes = false; - std::string Result; - std::string::const_iterator I = X.begin(); + StringRef::iterator I = X.begin(); if (*I == 1) { NeedPrefix = false; ++I; // Skip over the marker. @@ -87,7 +87,7 @@ std::string Mangler::makeNameProper(const std::string &X, // Do an initial scan of the string, checking to see if we need quotes or // to escape a '"' or not. if (!NeedQuotes) - for (std::string::const_iterator E = X.end(); I != E; ++I) + for (StringRef::iterator E = X.end(); I != E; ++I) if (!isCharAcceptable(*I)) { NeedQuotes = true; break; @@ -95,43 +95,57 @@ std::string Mangler::makeNameProper(const std::string &X, // In the common case, we don't need quotes. Handle this quickly. if (!NeedQuotes) { - if (!NeedPrefix) - return X.substr(1); // Strip off the \001. - - Result = Prefix + X; + if (!NeedPrefix) { + OutName.append(X.begin()+1, X.end()); // Strip off the \001. + return; + } if (PrefixTy == Mangler::Private) - Result = PrivatePrefix + Result; + OutName.append(PrivatePrefix, PrivatePrefix+strlen(PrivatePrefix)); else if (PrefixTy == Mangler::LinkerPrivate) - Result = LinkerPrivatePrefix + Result; - - return Result; - } - - if (NeedPrefix) - Result = X.substr(0, I-X.begin()); + OutName.append(LinkerPrivatePrefix, + LinkerPrivatePrefix+strlen(LinkerPrivatePrefix)); - // Otherwise, construct the string the expensive way. - for (std::string::const_iterator E = X.end(); I != E; ++I) { - if (*I == '"') - Result += "_QQ_"; - else if (*I == '\n') - Result += "_NL_"; + if (Prefix[0] == 0) + ; // Common noop, no prefix. + else if (Prefix[1] == 0) + OutName.push_back(Prefix[0]); // Common, one character prefix. else - Result += *I; + OutName.append(Prefix, Prefix+strlen(Prefix)); // Arbitrary prefix. + OutName.append(X.begin(), X.end()); + return; } + // Add leading quote. + OutName.push_back('"'); + + // Add prefixes unless disabled. if (NeedPrefix) { - Result = Prefix + Result; - if (PrefixTy == Mangler::Private) - Result = PrivatePrefix + Result; + OutName.append(PrivatePrefix, PrivatePrefix+strlen(PrivatePrefix)); else if (PrefixTy == Mangler::LinkerPrivate) - Result = LinkerPrivatePrefix + Result; + OutName.append(LinkerPrivatePrefix, + LinkerPrivatePrefix+strlen(LinkerPrivatePrefix)); + OutName.append(Prefix, Prefix+strlen(Prefix)); + } + + // Add the piece that we already scanned through. + OutName.append(X.begin()+!NeedPrefix, I); + + // Otherwise, construct the string the expensive way. + for (StringRef::iterator E = X.end(); I != E; ++I) { + if (*I == '"') { + const char *Quote = "_QQ_"; + OutName.append(Quote, Quote+4); + } else if (*I == '\n') { + const char *Newline = "_NL_"; + OutName.append(Newline, Newline+4); + } else + OutName.push_back(*I); } - Result = '"' + Result + '"'; - return Result; + // Add trailing quote. + OutName.push_back('"'); } /// getMangledName - Returns the mangled name of V, an LLVM Value, @@ -139,6 +153,9 @@ std::string Mangler::makeNameProper(const std::string &X, /// specified suffix. If 'ForcePrivate' is specified, the label is specified /// to have a private label prefix. /// +/// FIXME: This is deprecated, new code should use getNameWithPrefix and use +/// MCSymbol printing to handle quotes or not etc. +/// std::string Mangler::getMangledName(const GlobalValue *GV, const char *Suffix, bool ForcePrivate) { assert((!isa(GV) || !cast(GV)->isIntrinsic()) && @@ -148,8 +165,11 @@ std::string Mangler::getMangledName(const GlobalValue *GV, const char *Suffix, (GV->hasPrivateLinkage() || ForcePrivate) ? Mangler::Private : GV->hasLinkerPrivateLinkage() ? Mangler::LinkerPrivate : Mangler::Default; - if (GV->hasName()) - return makeNameProper(GV->getNameStr() + Suffix, PrefixTy); + SmallString<128> Result; + if (GV->hasName()) { + makeNameProper(Result, GV->getNameStr() + Suffix, PrefixTy); + return Result.str().str(); + } // Get the ID for the global, assigning a new one if we haven't got one // already. @@ -157,7 +177,38 @@ std::string Mangler::getMangledName(const GlobalValue *GV, const char *Suffix, if (ID == 0) ID = NextAnonGlobalID++; // Must mangle the global into a unique ID. - return makeNameProper("__unnamed_" + utostr(ID) + Suffix, PrefixTy); + makeNameProper(Result, "__unnamed_" + utostr(ID) + Suffix, PrefixTy); + return Result.str().str(); +} + +/// getNameWithPrefix - Fill OutName with the name of the appropriate prefix +/// and the specified name as the global variable name. GVName must not be +/// empty. +void Mangler::getNameWithPrefix(SmallVectorImpl &OutName, + const Twine &GVName, ManglerPrefixTy PrefixTy) { + SmallString<256> TmpData; + StringRef Name = GVName.toStringRef(TmpData); + assert(!Name.empty() && "getNameWithPrefix requires non-empty name"); + + // If the global name is not led with \1, add the appropriate prefixes. + if (Name[0] != '\1') { + if (PrefixTy == Mangler::Private) + OutName.append(PrivatePrefix, PrivatePrefix+strlen(PrivatePrefix)); + else if (PrefixTy == Mangler::LinkerPrivate) + OutName.append(LinkerPrivatePrefix, + LinkerPrivatePrefix+strlen(LinkerPrivatePrefix)); + + if (Prefix[0] == 0) + ; // Common noop, no prefix. + else if (Prefix[1] == 0) + OutName.push_back(Prefix[0]); // Common, one character prefix. + else + OutName.append(Prefix, Prefix+strlen(Prefix)); // Arbitrary prefix. + } else { + Name = Name.substr(1); + } + + OutName.append(Name.begin(), Name.end()); } @@ -167,33 +218,28 @@ std::string Mangler::getMangledName(const GlobalValue *GV, const char *Suffix, void Mangler::getNameWithPrefix(SmallVectorImpl &OutName, const GlobalValue *GV, bool isImplicitlyPrivate) { - - // If the global is anonymous or not led with \1, then add the appropriate - // prefix. - if (!GV->hasName() || GV->getName()[0] != '\1') { - if (GV->hasPrivateLinkage() || isImplicitlyPrivate) - OutName.append(PrivatePrefix, PrivatePrefix+strlen(PrivatePrefix)); - else if (GV->hasLinkerPrivateLinkage()) - OutName.append(LinkerPrivatePrefix, - LinkerPrivatePrefix+strlen(LinkerPrivatePrefix));; - OutName.append(Prefix, Prefix+strlen(Prefix)); - } - - // If the global has a name, just append it now. + // If this global has a name, handle it simply. if (GV->hasName()) { - StringRef Name = GV->getName(); + ManglerPrefixTy PrefixTy = Mangler::Default; + if (GV->hasPrivateLinkage() || isImplicitlyPrivate) + PrefixTy = Mangler::Private; + else if (GV->hasLinkerPrivateLinkage()) + PrefixTy = Mangler::LinkerPrivate; - // Strip off the prefix marker if present. - if (Name[0] != '\1') - OutName.append(Name.begin(), Name.end()); - else - OutName.append(Name.begin()+1, Name.end()); - return; + return getNameWithPrefix(OutName, GV->getName(), PrefixTy); } // If the global variable doesn't have a name, return a unique name for the // global based on a numbering. + // Anonymous names always get prefixes. + if (GV->hasPrivateLinkage() || isImplicitlyPrivate) + OutName.append(PrivatePrefix, PrivatePrefix+strlen(PrivatePrefix)); + else if (GV->hasLinkerPrivateLinkage()) + OutName.append(LinkerPrivatePrefix, + LinkerPrivatePrefix+strlen(LinkerPrivatePrefix));; + OutName.append(Prefix, Prefix+strlen(Prefix)); + // Get the ID for the global, assigning a new one if we haven't got one // already. unsigned &ID = AnonGlobalIDs[GV]; diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp index 8e9aab93a14a..7988b446fb64 100644 --- a/lib/VMCore/Metadata.cpp +++ b/lib/VMCore/Metadata.cpp @@ -18,6 +18,7 @@ #include "llvm/Instruction.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/SmallString.h" #include "SymbolTableListTraitsImpl.h" #include "llvm/Support/ValueHandle.h" using namespace llvm; @@ -31,7 +32,7 @@ MDString::MDString(LLVMContext &C, StringRef S) MDString *MDString::get(LLVMContext &Context, StringRef Str) { LLVMContextImpl *pImpl = Context.pImpl; - StringMapEntry &Entry = + StringMapEntry &Entry = pImpl->MDStringCache.GetOrCreateValue(Str); MDString *&S = Entry.getValue(); if (!S) S = new MDString(Context, Entry.getKey()); @@ -40,7 +41,7 @@ MDString *MDString::get(LLVMContext &Context, StringRef Str) { MDString *MDString::get(LLVMContext &Context, const char *Str) { LLVMContextImpl *pImpl = Context.pImpl; - StringMapEntry &Entry = + StringMapEntry &Entry = pImpl->MDStringCache.GetOrCreateValue(Str ? StringRef(Str) : StringRef()); MDString *&S = Entry.getValue(); if (!S) S = new MDString(Context, Entry.getKey()); @@ -58,11 +59,11 @@ class MDNodeOperand : public CallbackVH { public: MDNodeOperand(Value *V, MDNode *P) : CallbackVH(V), Parent(P) {} ~MDNodeOperand() {} - + void set(Value *V) { setValPtr(V); } - + virtual void deleted(); virtual void allUsesReplacedWith(Value *NV); }; @@ -94,7 +95,7 @@ MDNode::MDNode(LLVMContext &C, Value *const *Vals, unsigned NumVals, bool isFunctionLocal) : MetadataBase(Type::getMetadataTy(C), Value::MDNodeVal) { NumOperands = NumVals; - + if (isFunctionLocal) setValueSubclassData(getSubclassDataFromValue() | FunctionLocalBit); @@ -107,19 +108,82 @@ MDNode::MDNode(LLVMContext &C, Value *const *Vals, unsigned NumVals, /// ~MDNode - Destroy MDNode. MDNode::~MDNode() { - assert((getSubclassDataFromValue() & DestroyFlag) != 0 && + assert((getSubclassDataFromValue() & DestroyFlag) != 0 && "Not being destroyed through destroy()?"); if (!isNotUniqued()) { LLVMContextImpl *pImpl = getType()->getContext().pImpl; pImpl->MDNodeSet.RemoveNode(this); } - + // Destroy the operands. for (MDNodeOperand *Op = getOperandPtr(this, 0), *E = Op+NumOperands; Op != E; ++Op) Op->~MDNodeOperand(); } +#ifndef NDEBUG +static Function *assertLocalFunction(const MDNode *N, + SmallPtrSet &Visited) { + Function *F = NULL; + // Only visit each MDNode once. + if (!Visited.insert(N)) return F; + + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + Value *V = N->getOperand(i); + Function *NewF = NULL; + if (!V) continue; + if (Instruction *I = dyn_cast(V)) + NewF = I->getParent()->getParent(); + else if (BasicBlock *BB = dyn_cast(V)) + NewF = BB->getParent(); + else if (Argument *A = dyn_cast(V)) + NewF = A->getParent(); + else if (MDNode *MD = dyn_cast(V)) + if (MD->isFunctionLocal()) + NewF = assertLocalFunction(MD, Visited); + if (F && NewF) assert(F == NewF && "inconsistent function-local metadata"); + if (!F) F = NewF; + } + return F; +} +#endif + +static Function *getFunctionHelper(const MDNode *N, + SmallPtrSet &Visited) { + assert(N->isFunctionLocal() && "Should only be called on function-local MD"); +#ifndef NDEBUG + return assertLocalFunction(N, Visited); +#endif + Function *F = NULL; + // Only visit each MDNode once. + if (!Visited.insert(N)) return F; + + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + Value *V = N->getOperand(i); + if (!V) continue; + if (Instruction *I = dyn_cast(V)) + F = I->getParent()->getParent(); + else if (BasicBlock *BB = dyn_cast(V)) + F = BB->getParent(); + else if (Argument *A = dyn_cast(V)) + F = A->getParent(); + else if (MDNode *MD = dyn_cast(V)) + if (MD->isFunctionLocal()) + F = getFunctionHelper(MD, Visited); + if (F) break; + } + return F; +} + +// getFunction - If this metadata is function-local and recursively has a +// function-local operand, return the first such operand's parent function. +// Otherwise, return null. +Function *MDNode::getFunction() const { + if (!isFunctionLocal()) return NULL; + SmallPtrSet Visited; + return getFunctionHelper(this, Visited); +} + // destroy - Delete this node. Only when there are no uses. void MDNode::destroy() { setValueSubclassData(getSubclassDataFromValue() | DestroyFlag); @@ -128,9 +192,8 @@ void MDNode::destroy() { free(this); } - -MDNode *MDNode::get(LLVMContext &Context, Value*const* Vals, unsigned NumVals, - bool isFunctionLocal) { +MDNode *MDNode::getMDNode(LLVMContext &Context, Value *const *Vals, + unsigned NumVals, FunctionLocalness FL) { LLVMContextImpl *pImpl = Context.pImpl; FoldingSetNodeID ID; for (unsigned i = 0; i != NumVals; ++i) @@ -139,16 +202,46 @@ MDNode *MDNode::get(LLVMContext &Context, Value*const* Vals, unsigned NumVals, void *InsertPoint; MDNode *N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint); if (!N) { + bool isFunctionLocal = false; + switch (FL) { + case FL_Unknown: + for (unsigned i = 0; i != NumVals; ++i) { + Value *V = Vals[i]; + if (!V) continue; + if (isa(V) || isa(V) || isa(V) || + (isa(V) && cast(V)->isFunctionLocal())) { + isFunctionLocal = true; + break; + } + } + break; + case FL_No: + isFunctionLocal = false; + break; + case FL_Yes: + isFunctionLocal = true; + break; + } + // Coallocate space for the node and Operands together, then placement new. void *Ptr = malloc(sizeof(MDNode)+NumVals*sizeof(MDNodeOperand)); N = new (Ptr) MDNode(Context, Vals, NumVals, isFunctionLocal); - + // InsertPoint will have been set by the FindNodeOrInsertPos call. pImpl->MDNodeSet.InsertNode(N, InsertPoint); } return N; } +MDNode *MDNode::get(LLVMContext &Context, Value*const* Vals, unsigned NumVals) { + return getMDNode(Context, Vals, NumVals, FL_Unknown); +} + +MDNode *MDNode::getWhenValsUnresolved(LLVMContext &Context, Value*const* Vals, + unsigned NumVals, bool isFunctionLocal) { + return getMDNode(Context, Vals, NumVals, isFunctionLocal ? FL_Yes : FL_No); +} + /// getOperand - Return specified operand. Value *MDNode::getOperand(unsigned i) const { return *getOperandPtr(const_cast(this), i); @@ -163,7 +256,7 @@ void MDNode::Profile(FoldingSetNodeID &ID) const { // Replace value from this node's operand list. void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) { Value *From = *Op; - + if (From == To) return; @@ -173,7 +266,7 @@ void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) { // If this node is already not being uniqued (because one of the operands // already went to null), then there is nothing else to do here. if (isNotUniqued()) return; - + LLVMContextImpl *pImpl = getType()->getContext().pImpl; // Remove "this" from the context map. FoldingSet doesn't have to reprofile @@ -187,7 +280,7 @@ void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) { setIsNotUniqued(); return; } - + // Now that the node is out of the folding set, get ready to reinsert it. // First, check to see if another node with the same operands already exists // in the set. If it doesn't exist, this returns the position to insert it. @@ -210,21 +303,40 @@ void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) { //===----------------------------------------------------------------------===// // NamedMDNode implementation. // -static SmallVector, 4> &getNMDOps(void *Operands) { - return *(SmallVector, 4>*)Operands; + +namespace llvm { +// SymbolTableListTraits specialization for MDSymbolTable. +void ilist_traits +::addNodeToList(NamedMDNode *N) { + assert(N->getParent() == 0 && "Value already in a container!!"); + Module *Owner = getListOwner(); + N->setParent(Owner); + MDSymbolTable &ST = Owner->getMDSymbolTable(); + ST.insert(N->getName(), N); +} + +void ilist_traits::removeNodeFromList(NamedMDNode *N) { + N->setParent(0); + Module *Owner = getListOwner(); + MDSymbolTable &ST = Owner->getMDSymbolTable(); + ST.remove(N->getName()); +} +} + +static SmallVector &getNMDOps(void *Operands) { + return *(SmallVector*)Operands; } NamedMDNode::NamedMDNode(LLVMContext &C, const Twine &N, - MetadataBase *const *MDs, + MDNode *const *MDs, unsigned NumMDs, Module *ParentModule) - : MetadataBase(Type::getMetadataTy(C), Value::NamedMDNodeVal), Parent(0) { + : Value(Type::getMetadataTy(C), Value::NamedMDNodeVal), Parent(0) { setName(N); - - Operands = new SmallVector, 4>(); - - SmallVector, 4> &Node = getNMDOps(Operands); + Operands = new SmallVector(); + + SmallVector &Node = getNMDOps(Operands); for (unsigned i = 0; i != NumMDs; ++i) - Node.push_back(TrackingVH(MDs[i])); + Node.push_back(WeakVH(MDs[i])); if (ParentModule) ParentModule->getNamedMDList().push_back(this); @@ -232,9 +344,9 @@ NamedMDNode::NamedMDNode(LLVMContext &C, const Twine &N, NamedMDNode *NamedMDNode::Create(const NamedMDNode *NMD, Module *M) { assert(NMD && "Invalid source NamedMDNode!"); - SmallVector Elems; + SmallVector Elems; Elems.reserve(NMD->getNumOperands()); - + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) Elems.push_back(NMD->getOperand(i)); return new NamedMDNode(NMD->getContext(), NMD->getName().data(), @@ -252,14 +364,14 @@ unsigned NamedMDNode::getNumOperands() const { } /// getOperand - Return specified operand. -MetadataBase *NamedMDNode::getOperand(unsigned i) const { +MDNode *NamedMDNode::getOperand(unsigned i) const { assert(i < getNumOperands() && "Invalid Operand number!"); - return getNMDOps(Operands)[i]; + return dyn_cast_or_null(getNMDOps(Operands)[i]); } /// addOperand - Add metadata Operand. -void NamedMDNode::addOperand(MetadataBase *M) { - getNMDOps(Operands).push_back(TrackingVH(M)); +void NamedMDNode::addOperand(MDNode *M) { + getNMDOps(Operands).push_back(WeakVH(M)); } /// eraseFromParent - Drop all references and remove the node from parent @@ -273,6 +385,26 @@ void NamedMDNode::dropAllReferences() { getNMDOps(Operands).clear(); } +/// setName - Set the name of this named metadata. +void NamedMDNode::setName(const Twine &NewName) { + assert (!NewName.isTriviallyEmpty() && "Invalid named metadata name!"); + + SmallString<256> NameData; + StringRef NameRef = NewName.toStringRef(NameData); + + // Name isn't changing? + if (getName() == NameRef) + return; + + Name = NameRef.str(); + if (Parent) + Parent->getMDSymbolTable().insert(NameRef, this); +} + +/// getName - Return a constant reference to this named metadata's name. +StringRef NamedMDNode::getName() const { + return StringRef(Name); +} //===----------------------------------------------------------------------===// // LLVMContext MDKind naming implementation. @@ -299,9 +431,9 @@ static bool isValidName(StringRef MDName) { /// getMDKindID - Return a unique non-zero ID for the specified metadata kind. unsigned LLVMContext::getMDKindID(StringRef Name) const { assert(isValidName(Name) && "Invalid MDNode name"); - + unsigned &Entry = pImpl->CustomMDKindNames[Name]; - + // If this is new, assign it its ID. if (Entry == 0) Entry = pImpl->CustomMDKindNames.size(); return Entry; @@ -313,7 +445,7 @@ void LLVMContext::getMDKindNames(SmallVectorImpl &Names) const { Names.resize(pImpl->CustomMDKindNames.size()+1); Names[0] = ""; for (StringMap::const_iterator I = pImpl->CustomMDKindNames.begin(), - E = pImpl->CustomMDKindNames.end(); I != E; ++I) + E = pImpl->CustomMDKindNames.end(); I != E; ++I) // MD Handlers are numbered from 1. Names[I->second] = I->first(); } @@ -336,7 +468,7 @@ MDNode *Instruction::getMetadataImpl(const char *Kind) const { /// Node is null. void Instruction::setMetadata(unsigned KindID, MDNode *Node) { if (Node == 0 && !hasMetadata()) return; - + // Handle the case when we're adding/updating metadata on an instruction. if (Node) { LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this]; @@ -351,24 +483,24 @@ void Instruction::setMetadata(unsigned KindID, MDNode *Node) { return; } } - + // No replacement, just add it to the list. Info.push_back(std::make_pair(KindID, Node)); return; } - + // Otherwise, we're removing metadata from an instruction. assert(hasMetadata() && getContext().pImpl->MetadataStore.count(this) && "HasMetadata bit out of date!"); LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this]; - + // Common case is removing the only entry. if (Info.size() == 1 && Info[0].first == KindID) { getContext().pImpl->MetadataStore.erase(this); setHasMetadata(false); return; } - + // Handle replacement of an existing value. for (unsigned i = 0, e = Info.size(); i != e; ++i) if (Info[i].first == KindID) { @@ -383,7 +515,7 @@ void Instruction::setMetadata(unsigned KindID, MDNode *Node) { MDNode *Instruction::getMetadataImpl(unsigned KindID) const { LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this]; assert(hasMetadata() && !Info.empty() && "Shouldn't have called this"); - + for (LLVMContextImpl::MDMapTy::iterator I = Info.begin(), E = Info.end(); I != E; ++I) if (I->first == KindID) @@ -398,10 +530,10 @@ void Instruction::getAllMetadataImpl(SmallVectorImplMetadataStore.find(this)->second; assert(!Info.empty() && "Shouldn't have called this"); - + Result.clear(); Result.append(Info.begin(), Info.end()); - + // Sort the resulting array so it is stable. if (Result.size() > 1) array_pod_sort(Result.begin(), Result.end()); diff --git a/lib/VMCore/Module.cpp b/lib/VMCore/Module.cpp index a7f503bacb99..503e70891721 100644 --- a/lib/VMCore/Module.cpp +++ b/lib/VMCore/Module.cpp @@ -59,6 +59,7 @@ Module::Module(StringRef MID, LLVMContext& C) : Context(C), ModuleID(MID), DataLayout("") { ValSymTab = new ValueSymbolTable(); TypeSymTab = new TypeSymbolTable(); + NamedMDSymTab = new MDSymbolTable(); } Module::~Module() { @@ -70,15 +71,17 @@ Module::~Module() { NamedMDList.clear(); delete ValSymTab; delete TypeSymTab; + delete NamedMDSymTab; } /// Target endian information... Module::Endianness Module::getEndianness() const { - std::string temp = DataLayout; + StringRef temp = DataLayout; Module::Endianness ret = AnyEndianness; while (!temp.empty()) { - std::string token = getToken(temp, "-"); + StringRef token = DataLayout; + tie(token, temp) = getToken(DataLayout, "-"); if (token[0] == 'e') { ret = LittleEndian; @@ -92,15 +95,17 @@ Module::Endianness Module::getEndianness() const { /// Target Pointer Size information... Module::PointerSize Module::getPointerSize() const { - std::string temp = DataLayout; + StringRef temp = DataLayout; Module::PointerSize ret = AnyPointerSize; while (!temp.empty()) { - std::string token = getToken(temp, "-"); - char signal = getToken(token, ":")[0]; + StringRef token, signalToken; + tie(token, temp) = getToken(temp, "-"); + tie(signalToken, token) = getToken(token, ":"); - if (signal == 'p') { - int size = atoi(getToken(token, ":").c_str()); + if (signalToken[0] == 'p') { + int size = 0; + getToken(token, ":").first.getAsInteger(10, size); if (size == 32) ret = Pointer32; else if (size == 64) @@ -307,15 +312,14 @@ GlobalAlias *Module::getNamedAlias(StringRef Name) const { /// specified name. This method returns null if a NamedMDNode with the //// specified name is not found. NamedMDNode *Module::getNamedMetadata(StringRef Name) const { - return dyn_cast_or_null(getValueSymbolTable().lookup(Name)); + return NamedMDSymTab->lookup(Name); } /// getOrInsertNamedMetadata - Return the first named MDNode in the module /// with the specified name. This method returns a new NamedMDNode if a /// NamedMDNode with the specified name is not found. NamedMDNode *Module::getOrInsertNamedMetadata(StringRef Name) { - NamedMDNode *NMD = - dyn_cast_or_null(getValueSymbolTable().lookup(Name)); + NamedMDNode *NMD = NamedMDSymTab->lookup(Name); if (!NMD) NMD = NamedMDNode::Create(getContext(), Name, NULL, 0, this); return NMD; diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp index 6bea7a8b088a..39da8fbe87ac 100644 --- a/lib/VMCore/Pass.cpp +++ b/lib/VMCore/Pass.cpp @@ -19,6 +19,7 @@ #include "llvm/ModuleProvider.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/raw_ostream.h" #include "llvm/System/Atomic.h" @@ -51,7 +52,7 @@ bool Pass::mustPreserveAnalysisID(const PassInfo *AnalysisID) const { // dumpPassStructure - Implement the -debug-passes=Structure option void Pass::dumpPassStructure(unsigned Offset) { - errs().indent(Offset*2) << getPassName() << "\n"; + dbgs().indent(Offset*2) << getPassName() << "\n"; } /// getPassName - Return a nice clean name for a pass. This usually @@ -95,7 +96,7 @@ void Pass::print(raw_ostream &O,const Module*) const { // dump - call print(cerr); void Pass::dump() const { - print(errs(), 0); + print(dbgs(), 0); } //===----------------------------------------------------------------------===// diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp index d68838584029..b37b2aeda824 100644 --- a/lib/VMCore/PassManager.cpp +++ b/lib/VMCore/PassManager.cpp @@ -15,6 +15,7 @@ #include "llvm/PassManagers.h" #include "llvm/Assembly/Writer.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/Timer.h" #include "llvm/Module.h" #include "llvm/ModuleProvider.h" @@ -132,7 +133,7 @@ class BBPassManager : public PMDataManager, public FunctionPass { // Print passes managed by this manager void dumpPassStructure(unsigned Offset) { - llvm::errs() << std::string(Offset*2, ' ') << "BasicBlockPass Manager\n"; + llvm::dbgs() << std::string(Offset*2, ' ') << "BasicBlockPass Manager\n"; for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { BasicBlockPass *BP = getContainedPass(Index); BP->dumpPassStructure(Offset + 1); @@ -272,7 +273,7 @@ class MPPassManager : public Pass, public PMDataManager { // Print passes managed by this manager void dumpPassStructure(unsigned Offset) { - llvm::errs() << std::string(Offset*2, ' ') << "ModulePass Manager\n"; + llvm::dbgs() << std::string(Offset*2, ' ') << "ModulePass Manager\n"; for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { ModulePass *MP = getContainedPass(Index); MP->dumpPassStructure(Offset + 1); @@ -595,11 +596,11 @@ void PMTopLevelManager::dumpArguments() const { if (PassDebugging < Arguments) return; - errs() << "Pass Arguments: "; + dbgs() << "Pass Arguments: "; for (SmallVector::const_iterator I = PassManagers.begin(), E = PassManagers.end(); I != E; ++I) (*I)->dumpPassArguments(); - errs() << "\n"; + dbgs() << "\n"; } void PMTopLevelManager::initializeAllAnalysisInfo() { @@ -718,8 +719,8 @@ void PMDataManager::removeNotPreservedAnalysis(Pass *P) { // Remove this analysis if (PassDebugging >= Details) { Pass *S = Info->second; - errs() << " -- '" << P->getPassName() << "' is not preserving '"; - errs() << S->getPassName() << "'\n"; + dbgs() << " -- '" << P->getPassName() << "' is not preserving '"; + dbgs() << S->getPassName() << "'\n"; } AvailableAnalysis.erase(Info); } @@ -742,8 +743,8 @@ void PMDataManager::removeNotPreservedAnalysis(Pass *P) { // Remove this analysis if (PassDebugging >= Details) { Pass *S = Info->second; - errs() << " -- '" << P->getPassName() << "' is not preserving '"; - errs() << S->getPassName() << "'\n"; + dbgs() << " -- '" << P->getPassName() << "' is not preserving '"; + dbgs() << S->getPassName() << "'\n"; } InheritedAnalysis[Index]->erase(Info); } @@ -764,9 +765,9 @@ void PMDataManager::removeDeadPasses(Pass *P, StringRef Msg, TPM->collectLastUses(DeadPasses, P); if (PassDebugging >= Details && !DeadPasses.empty()) { - errs() << " -*- '" << P->getPassName(); - errs() << "' is the last user of following pass instances."; - errs() << " Free these instances\n"; + dbgs() << " -*- '" << P->getPassName(); + dbgs() << "' is the last user of following pass instances."; + dbgs() << " Free these instances\n"; } for (SmallVector::iterator I = DeadPasses.begin(), @@ -959,7 +960,7 @@ void PMDataManager::dumpLastUses(Pass *P, unsigned Offset) const{ for (SmallVector::iterator I = LUses.begin(), E = LUses.end(); I != E; ++I) { - llvm::errs() << "--" << std::string(Offset*2, ' '); + llvm::dbgs() << "--" << std::string(Offset*2, ' '); (*I)->dumpPassStructure(0); } } @@ -972,7 +973,7 @@ void PMDataManager::dumpPassArguments() const { else if (const PassInfo *PI = (*I)->getPassInfo()) if (!PI->isAnalysisGroup()) - errs() << " -" << PI->getPassArgument(); + dbgs() << " -" << PI->getPassArgument(); } } @@ -981,35 +982,35 @@ void PMDataManager::dumpPassInfo(Pass *P, enum PassDebuggingString S1, StringRef Msg) { if (PassDebugging < Executions) return; - errs() << (void*)this << std::string(getDepth()*2+1, ' '); + dbgs() << (void*)this << std::string(getDepth()*2+1, ' '); switch (S1) { case EXECUTION_MSG: - errs() << "Executing Pass '" << P->getPassName(); + dbgs() << "Executing Pass '" << P->getPassName(); break; case MODIFICATION_MSG: - errs() << "Made Modification '" << P->getPassName(); + dbgs() << "Made Modification '" << P->getPassName(); break; case FREEING_MSG: - errs() << " Freeing Pass '" << P->getPassName(); + dbgs() << " Freeing Pass '" << P->getPassName(); break; default: break; } switch (S2) { case ON_BASICBLOCK_MSG: - errs() << "' on BasicBlock '" << Msg << "'...\n"; + dbgs() << "' on BasicBlock '" << Msg << "'...\n"; break; case ON_FUNCTION_MSG: - errs() << "' on Function '" << Msg << "'...\n"; + dbgs() << "' on Function '" << Msg << "'...\n"; break; case ON_MODULE_MSG: - errs() << "' on Module '" << Msg << "'...\n"; + dbgs() << "' on Module '" << Msg << "'...\n"; break; case ON_LOOP_MSG: - errs() << "' on Loop '" << Msg << "'...\n"; + dbgs() << "' on Loop '" << Msg << "'...\n"; break; case ON_CG_MSG: - errs() << "' on Call Graph Nodes '" << Msg << "'...\n"; + dbgs() << "' on Call Graph Nodes '" << Msg << "'...\n"; break; default: break; @@ -1039,12 +1040,12 @@ void PMDataManager::dumpAnalysisUsage(StringRef Msg, const Pass *P, assert(PassDebugging >= Details); if (Set.empty()) return; - errs() << (void*)P << std::string(getDepth()*2+3, ' ') << Msg << " Analyses:"; + dbgs() << (void*)P << std::string(getDepth()*2+3, ' ') << Msg << " Analyses:"; for (unsigned i = 0; i != Set.size(); ++i) { - if (i) errs() << ','; - errs() << ' ' << Set[i]->getPassName(); + if (i) dbgs() << ','; + dbgs() << ' ' << Set[i]->getPassName(); } - errs() << '\n'; + dbgs() << '\n'; } /// Add RequiredPass into list of lower level passes required by pass P. @@ -1067,8 +1068,8 @@ void PMDataManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) { // checks whether any lower level manager will be able to provide this // analysis info on demand or not. #ifndef NDEBUG - errs() << "Unable to schedule '" << RequiredPass->getPassName(); - errs() << "' required by '" << P->getPassName() << "'\n"; + dbgs() << "Unable to schedule '" << RequiredPass->getPassName(); + dbgs() << "' required by '" << P->getPassName() << "'\n"; #endif llvm_unreachable("Unable to schedule pass"); } @@ -1300,7 +1301,7 @@ bool FunctionPassManagerImpl::run(Function &F) { char FPPassManager::ID = 0; /// Print passes managed by this manager void FPPassManager::dumpPassStructure(unsigned Offset) { - llvm::errs() << std::string(Offset*2, ' ') << "FunctionPass Manager\n"; + llvm::dbgs() << std::string(Offset*2, ' ') << "FunctionPass Manager\n"; for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { FunctionPass *FP = getContainedPass(Index); FP->dumpPassStructure(Offset + 1); @@ -1698,19 +1699,19 @@ LLVMPassManagerRef LLVMCreateFunctionPassManager(LLVMModuleProviderRef P) { return wrap(new FunctionPassManager(unwrap(P))); } -int LLVMRunPassManager(LLVMPassManagerRef PM, LLVMModuleRef M) { +LLVMBool LLVMRunPassManager(LLVMPassManagerRef PM, LLVMModuleRef M) { return unwrap(PM)->run(*unwrap(M)); } -int LLVMInitializeFunctionPassManager(LLVMPassManagerRef FPM) { +LLVMBool LLVMInitializeFunctionPassManager(LLVMPassManagerRef FPM) { return unwrap(FPM)->doInitialization(); } -int LLVMRunFunctionPassManager(LLVMPassManagerRef FPM, LLVMValueRef F) { +LLVMBool LLVMRunFunctionPassManager(LLVMPassManagerRef FPM, LLVMValueRef F) { return unwrap(FPM)->run(*unwrap(F)); } -int LLVMFinalizeFunctionPassManager(LLVMPassManagerRef FPM) { +LLVMBool LLVMFinalizeFunctionPassManager(LLVMPassManagerRef FPM) { return unwrap(FPM)->doFinalization(); } diff --git a/lib/VMCore/PrintModulePass.cpp b/lib/VMCore/PrintModulePass.cpp index 3d4f19df05d8..f0f6e7a9efe7 100644 --- a/lib/VMCore/PrintModulePass.cpp +++ b/lib/VMCore/PrintModulePass.cpp @@ -16,6 +16,7 @@ #include "llvm/Function.h" #include "llvm/Module.h" #include "llvm/Pass.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -26,7 +27,7 @@ namespace { bool DeleteStream; // Delete the ostream in our dtor? public: static char ID; - PrintModulePass() : ModulePass(&ID), Out(&errs()), + PrintModulePass() : ModulePass(&ID), Out(&dbgs()), DeleteStream(false) {} PrintModulePass(raw_ostream *o, bool DS) : ModulePass(&ID), Out(o), DeleteStream(DS) {} @@ -51,7 +52,7 @@ namespace { bool DeleteStream; // Delete the ostream in our dtor? public: static char ID; - PrintFunctionPass() : FunctionPass(&ID), Banner(""), Out(&errs()), + PrintFunctionPass() : FunctionPass(&ID), Banner(""), Out(&dbgs()), DeleteStream(false) {} PrintFunctionPass(const std::string &B, raw_ostream *o, bool DS) : FunctionPass(&ID), Banner(B), Out(o), DeleteStream(DS) {} diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp index fd46aa1f41d1..044de4fb393f 100644 --- a/lib/VMCore/Type.cpp +++ b/lib/VMCore/Type.cpp @@ -124,6 +124,11 @@ const Type *Type::getScalarType() const { return this; } +/// isInteger - Return true if this is an IntegerType of the specified width. +bool Type::isInteger(unsigned Bitwidth) const { + return isInteger() && cast(this)->getBitWidth() == Bitwidth; +} + /// isIntOrIntVector - Return true if this is an integer type or a vector of /// integer types. /// @@ -280,7 +285,7 @@ std::string Type::getDescription() const { bool StructType::indexValid(const Value *V) const { // Structure indexes require 32-bit integer constants. - if (V->getType() == Type::getInt32Ty(V->getContext())) + if (V->getType()->isInteger(32)) if (const ConstantInt *CU = dyn_cast(V)) return indexValid(CU->getZExtValue()); return false; @@ -487,7 +492,7 @@ PointerType::PointerType(const Type *E, unsigned AddrSpace) OpaqueType::OpaqueType(LLVMContext &C) : DerivedType(C, OpaqueTyID) { setAbstract(true); #ifdef DEBUG_MERGE_TYPES - DEBUG(errs() << "Derived new type: " << *this << "\n"); + DEBUG(dbgs() << "Derived new type: " << *this << "\n"); #endif } @@ -782,7 +787,7 @@ const IntegerType *IntegerType::get(LLVMContext &C, unsigned NumBits) { pImpl->IntegerTypes.add(IVT, ITy); } #ifdef DEBUG_MERGE_TYPES - DEBUG(errs() << "Derived new type: " << *ITy << "\n"); + DEBUG(dbgs() << "Derived new type: " << *ITy << "\n"); #endif return ITy; } @@ -825,7 +830,7 @@ FunctionType *FunctionType::get(const Type *ReturnType, } #ifdef DEBUG_MERGE_TYPES - DEBUG(errs() << "Derived new type: " << FT << "\n"); + DEBUG(dbgs() << "Derived new type: " << FT << "\n"); #endif return FT; } @@ -846,7 +851,7 @@ ArrayType *ArrayType::get(const Type *ElementType, uint64_t NumElements) { pImpl->ArrayTypes.add(AVT, AT = new ArrayType(ElementType, NumElements)); } #ifdef DEBUG_MERGE_TYPES - DEBUG(errs() << "Derived new type: " << *AT << "\n"); + DEBUG(dbgs() << "Derived new type: " << *AT << "\n"); #endif return AT; } @@ -870,7 +875,7 @@ VectorType *VectorType::get(const Type *ElementType, unsigned NumElements) { pImpl->VectorTypes.add(PVT, PT = new VectorType(ElementType, NumElements)); } #ifdef DEBUG_MERGE_TYPES - DEBUG(errs() << "Derived new type: " << *PT << "\n"); + DEBUG(dbgs() << "Derived new type: " << *PT << "\n"); #endif return PT; } @@ -902,7 +907,7 @@ StructType *StructType::get(LLVMContext &Context, pImpl->StructTypes.add(STV, ST); } #ifdef DEBUG_MERGE_TYPES - DEBUG(errs() << "Derived new type: " << *ST << "\n"); + DEBUG(dbgs() << "Derived new type: " << *ST << "\n"); #endif return ST; } @@ -946,7 +951,7 @@ PointerType *PointerType::get(const Type *ValueType, unsigned AddressSpace) { pImpl->PointerTypes.add(PVT, PT = new PointerType(ValueType, AddressSpace)); } #ifdef DEBUG_MERGE_TYPES - DEBUG(errs() << "Derived new type: " << *PT << "\n"); + DEBUG(dbgs() << "Derived new type: " << *PT << "\n"); #endif return PT; } @@ -1009,13 +1014,13 @@ void Type::removeAbstractTypeUser(AbstractTypeUser *U) const { AbstractTypeUsers.erase(AbstractTypeUsers.begin()+i); #ifdef DEBUG_MERGE_TYPES - DEBUG(errs() << " remAbstractTypeUser[" << (void*)this << ", " + DEBUG(dbgs() << " remAbstractTypeUser[" << (void*)this << ", " << *this << "][" << i << "] User = " << U << "\n"); #endif if (AbstractTypeUsers.empty() && getRefCount() == 0 && isAbstract()) { #ifdef DEBUG_MERGE_TYPES - DEBUG(errs() << "DELETEing unused abstract type: <" << *this + DEBUG(dbgs() << "DELETEing unused abstract type: <" << *this << ">[" << (void*)this << "]" << "\n"); #endif @@ -1041,7 +1046,7 @@ void DerivedType::unlockedRefineAbstractTypeTo(const Type *NewType) { pImpl->AbstractTypeDescriptions.clear(); #ifdef DEBUG_MERGE_TYPES - DEBUG(errs() << "REFINING abstract type [" << (void*)this << " " + DEBUG(dbgs() << "REFINING abstract type [" << (void*)this << " " << *this << "] to [" << (void*)NewType << " " << *NewType << "]!\n"); #endif @@ -1078,7 +1083,7 @@ void DerivedType::unlockedRefineAbstractTypeTo(const Type *NewType) { unsigned OldSize = AbstractTypeUsers.size(); OldSize=OldSize; #ifdef DEBUG_MERGE_TYPES - DEBUG(errs() << " REFINING user " << OldSize-1 << "[" << (void*)User + DEBUG(dbgs() << " REFINING user " << OldSize-1 << "[" << (void*)User << "] of abstract type [" << (void*)this << " " << *this << "] to [" << (void*)NewTy.get() << " " << *NewTy << "]!\n"); @@ -1109,7 +1114,7 @@ void DerivedType::refineAbstractTypeTo(const Type *NewType) { // void DerivedType::notifyUsesThatTypeBecameConcrete() { #ifdef DEBUG_MERGE_TYPES - DEBUG(errs() << "typeIsREFINED type: " << (void*)this << " " << *this <<"\n"); + DEBUG(dbgs() << "typeIsREFINED type: " << (void*)this << " " << *this <<"\n"); #endif unsigned OldSize = AbstractTypeUsers.size(); OldSize=OldSize; diff --git a/lib/VMCore/TypeSymbolTable.cpp b/lib/VMCore/TypeSymbolTable.cpp index 0d0cdf5dbc2f..b4daf0f63144 100644 --- a/lib/VMCore/TypeSymbolTable.cpp +++ b/lib/VMCore/TypeSymbolTable.cpp @@ -15,6 +15,7 @@ #include "llvm/DerivedTypes.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/raw_ostream.h" #include @@ -58,7 +59,7 @@ Type* TypeSymbolTable::remove(iterator Entry) { #if DEBUG_SYMBOL_TABLE dump(); - errs() << " Removing Value: " << Result->getDescription() << "\n"; + dbgs() << " Removing Value: " << Result->getDescription() << "\n"; #endif tmap.erase(Entry); @@ -67,7 +68,7 @@ Type* TypeSymbolTable::remove(iterator Entry) { // list... if (Result->isAbstract()) { #if DEBUG_ABSTYPE - errs() << "Removing abstract type from symtab" + dbgs() << "Removing abstract type from symtab" << Result->getDescription() << "\n"; #endif @@ -87,7 +88,7 @@ void TypeSymbolTable::insert(StringRef Name, const Type* T) { #if DEBUG_SYMBOL_TABLE dump(); - errs() << " Inserted type: " << Name << ": " << T->getDescription() << "\n"; + dbgs() << " Inserted type: " << Name << ": " << T->getDescription() << "\n"; #endif } else { // If there is a name conflict... @@ -99,7 +100,7 @@ void TypeSymbolTable::insert(StringRef Name, const Type* T) { #if DEBUG_SYMBOL_TABLE dump(); - errs() << " Inserting type: " << UniqueName << ": " + dbgs() << " Inserting type: " << UniqueName << ": " << T->getDescription() << "\n"; #endif @@ -111,7 +112,7 @@ void TypeSymbolTable::insert(StringRef Name, const Type* T) { if (T->isAbstract()) { cast(T)->addAbstractTypeUser(this); #if DEBUG_ABSTYPE - errs() << "Added abstract type to ST: " << T->getDescription() << "\n"; + dbgs() << "Added abstract type to ST: " << T->getDescription() << "\n"; #endif } } @@ -127,14 +128,14 @@ void TypeSymbolTable::refineAbstractType(const DerivedType *OldType, for (iterator I = begin(), E = end(); I != E; ++I) { if (I->second == (Type*)OldType) { // FIXME when Types aren't const. #if DEBUG_ABSTYPE - errs() << "Removing type " << OldType->getDescription() << "\n"; + dbgs() << "Removing type " << OldType->getDescription() << "\n"; #endif OldType->removeAbstractTypeUser(this); I->second = (Type*)NewType; // TODO FIXME when types aren't const if (NewType->isAbstract()) { #if DEBUG_ABSTYPE - errs() << "Added type " << NewType->getDescription() << "\n"; + dbgs() << "Added type " << NewType->getDescription() << "\n"; #endif cast(NewType)->addAbstractTypeUser(this); } @@ -154,13 +155,13 @@ void TypeSymbolTable::typeBecameConcrete(const DerivedType *AbsTy) { } static void DumpTypes(const std::pair& T ) { - errs() << " '" << T.first << "' = "; + dbgs() << " '" << T.first << "' = "; T.second->dump(); - errs() << "\n"; + dbgs() << "\n"; } void TypeSymbolTable::dump() const { - errs() << "TypeSymbolPlane: "; + dbgs() << "TypeSymbolPlane: "; for_each(tmap.begin(), tmap.end(), DumpTypes); } diff --git a/lib/VMCore/TypesContext.h b/lib/VMCore/TypesContext.h index e7950bd211ff..93a801b9f660 100644 --- a/lib/VMCore/TypesContext.h +++ b/lib/VMCore/TypesContext.h @@ -302,7 +302,7 @@ class TypeMap : public TypeMapBase { void RefineAbstractType(TypeClass *Ty, const DerivedType *OldType, const Type *NewType) { #ifdef DEBUG_MERGE_TYPES - DEBUG(errs() << "RefineAbstractType(" << (void*)OldType << "[" << *OldType + DEBUG(dbgs() << "RefineAbstractType(" << (void*)OldType << "[" << *OldType << "], " << (void*)NewType << " [" << *NewType << "])\n"); #endif @@ -408,11 +408,11 @@ class TypeMap : public TypeMapBase { void print(const char *Arg) const { #ifdef DEBUG_MERGE_TYPES - DEBUG(errs() << "TypeMap<>::" << Arg << " table contents:\n"); + DEBUG(dbgs() << "TypeMap<>::" << Arg << " table contents:\n"); unsigned i = 0; for (typename std::map::const_iterator I = Map.begin(), E = Map.end(); I != E; ++I) - DEBUG(errs() << " " << (++i) << ". " << (void*)I->second.get() << " " + DEBUG(dbgs() << " " << (++i) << ". " << (void*)I->second.get() << " " << *I->second.get() << "\n"); #endif } diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp index fe1219f9a88f..40679bfc2904 100644 --- a/lib/VMCore/Value.cpp +++ b/lib/VMCore/Value.cpp @@ -44,14 +44,12 @@ Value::Value(const Type *ty, unsigned scid) SubclassOptionalData(0), SubclassData(0), VTy(checkType(ty)), UseList(0), Name(0) { if (isa(this) || isa(this)) - assert((VTy->isFirstClassType() || - VTy == Type::getVoidTy(ty->getContext()) || + assert((VTy->isFirstClassType() || VTy->isVoidTy() || isa(ty) || VTy->getTypeID() == Type::StructTyID) && "invalid CallInst type!"); else if (!isa(this) && !isa(this)) - assert((VTy->isFirstClassType() || - VTy == Type::getVoidTy(ty->getContext()) || - isa(ty)) && + assert((VTy->isFirstClassType() || VTy->isVoidTy() || + isa(ty)) && "Cannot create non-first-class values except for constants!"); } @@ -68,9 +66,9 @@ Value::~Value() { // a // if (!use_empty()) { - errs() << "While deleting: " << *VTy << " %" << getNameStr() << "\n"; + dbgs() << "While deleting: " << *VTy << " %" << getNameStr() << "\n"; for (use_iterator I = use_begin(), E = use_end(); I != E; ++I) - errs() << "Use still stuck around after Def is destroyed:" + dbgs() << "Use still stuck around after Def is destroyed:" << **I << "\n"; } #endif @@ -172,17 +170,13 @@ void Value::setName(const Twine &NewName) { return; SmallString<256> NameData; - NewName.toVector(NameData); - - const char *NameStr = NameData.data(); - unsigned NameLen = NameData.size(); + StringRef NameRef = NewName.toStringRef(NameData); // Name isn't changing? - if (getName() == StringRef(NameStr, NameLen)) + if (getName() == NameRef) return; - assert(getType() != Type::getVoidTy(getContext()) && - "Cannot assign a name to void values!"); + assert(!getType()->isVoidTy() && "Cannot assign a name to void values!"); // Get the symbol table to update for this object. ValueSymbolTable *ST; @@ -190,7 +184,7 @@ void Value::setName(const Twine &NewName) { return; // Cannot set a name on this value (e.g. constant). if (!ST) { // No symbol table to update? Just do the change. - if (NameLen == 0) { + if (NameRef.empty()) { // Free the name for this value. Name->Destroy(); Name = 0; @@ -204,7 +198,7 @@ void Value::setName(const Twine &NewName) { // then reallocated. // Create the new name. - Name = ValueName::Create(NameStr, NameStr+NameLen); + Name = ValueName::Create(NameRef.begin(), NameRef.end()); Name->setValue(this); return; } @@ -217,12 +211,12 @@ void Value::setName(const Twine &NewName) { Name->Destroy(); Name = 0; - if (NameLen == 0) + if (NameRef.empty()) return; } // Name is changing to something new. - Name = ST->createValueName(StringRef(NameStr, NameLen), this); + Name = ST->createValueName(NameRef, this); } @@ -522,7 +516,7 @@ void ValueHandleBase::ValueIsDeleted(Value *V) { // All callbacks, weak references, and assertingVHs should be dropped by now. if (V->HasValueHandle) { #ifndef NDEBUG // Only in +Asserts mode... - errs() << "While deleting: " << *V->getType() << " %" << V->getNameStr() + dbgs() << "While deleting: " << *V->getType() << " %" << V->getNameStr() << "\n"; if (pImpl->ValueHandles[V]->getKind() == Assert) llvm_unreachable("An asserting value handle still pointed to this" diff --git a/lib/VMCore/ValueSymbolTable.cpp b/lib/VMCore/ValueSymbolTable.cpp index 9d39a50d27af..d30a9d6e37b3 100644 --- a/lib/VMCore/ValueSymbolTable.cpp +++ b/lib/VMCore/ValueSymbolTable.cpp @@ -24,7 +24,7 @@ using namespace llvm; ValueSymbolTable::~ValueSymbolTable() { #ifndef NDEBUG // Only do this in -g mode... for (iterator VI = vmap.begin(), VE = vmap.end(); VI != VE; ++VI) - errs() << "Value still in symbol table! Type = '" + dbgs() << "Value still in symbol table! Type = '" << VI->getValue()->getType()->getDescription() << "' Name = '" << VI->getKeyData() << "'\n"; assert(vmap.empty() && "Values remain in symbol table!"); @@ -38,7 +38,7 @@ void ValueSymbolTable::reinsertValue(Value* V) { // Try inserting the name, assuming it won't conflict. if (vmap.insert(V->Name)) { - //DEBUG(errs() << " Inserted value: " << V->Name << ": " << *V << "\n"); + //DEBUG(dbgs() << " Inserted value: " << V->Name << ": " << *V << "\n"); return; } @@ -62,14 +62,14 @@ void ValueSymbolTable::reinsertValue(Value* V) { // Newly inserted name. Success! NewName.setValue(V); V->Name = &NewName; - //DEBUG(errs() << " Inserted value: " << UniqueName << ": " << *V << "\n"); + //DEBUG(dbgs() << " Inserted value: " << UniqueName << ": " << *V << "\n"); return; } } } void ValueSymbolTable::removeValueName(ValueName *V) { - //DEBUG(errs() << " Removing Value: " << V->getKeyData() << "\n"); + //DEBUG(dbgs() << " Removing Value: " << V->getKeyData() << "\n"); // Remove the value from the symbol table. vmap.remove(V); } @@ -82,7 +82,7 @@ ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) { ValueName &Entry = vmap.GetOrCreateValue(Name); if (Entry.getValue() == 0) { Entry.setValue(V); - //DEBUG(errs() << " Inserted value: " << Entry.getKeyData() << ": " + //DEBUG(dbgs() << " Inserted value: " << Entry.getKeyData() << ": " // << *V << "\n"); return &Entry; } @@ -102,7 +102,7 @@ ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) { if (NewName.getValue() == 0) { // Newly inserted name. Success! NewName.setValue(V); - //DEBUG(errs() << " Inserted value: " << UniqueName << ": " << *V << "\n"); + //DEBUG(dbgs() << " Inserted value: " << UniqueName << ": " << *V << "\n"); return &NewName; } } @@ -112,10 +112,12 @@ ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) { // dump - print out the symbol table // void ValueSymbolTable::dump() const { - //DEBUG(errs() << "ValueSymbolTable:\n"); + //DEBUG(dbgs() << "ValueSymbolTable:\n"); for (const_iterator I = begin(), E = end(); I != E; ++I) { - //DEBUG(errs() << " '" << I->getKeyData() << "' = "); + //DEBUG(dbgs() << " '" << I->getKeyData() << "' = "); I->getValue()->dump(); - //DEBUG(errs() << "\n"); + //DEBUG(dbgs() << "\n"); } } + +MDSymbolTable::~MDSymbolTable() { } diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp index 30528bfebde7..ec475e472979 100644 --- a/lib/VMCore/Verifier.cpp +++ b/lib/VMCore/Verifier.cpp @@ -56,6 +56,7 @@ #include "llvm/CodeGen/ValueTypes.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/InstVisitor.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" @@ -85,9 +86,9 @@ namespace { // Anonymous namespace for class for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { if (I->empty() || !I->back().isTerminator()) { - errs() << "Basic Block does not have terminator!\n"; - WriteAsOperand(errs(), I, true); - errs() << "\n"; + dbgs() << "Basic Block does not have terminator!\n"; + WriteAsOperand(dbgs(), I, true); + dbgs() << "\n"; Broken = true; } } @@ -262,12 +263,12 @@ namespace { default: llvm_unreachable("Unknown action"); case AbortProcessAction: MessagesStr << "compilation aborted!\n"; - errs() << MessagesStr.str(); + dbgs() << MessagesStr.str(); // Client should choose different reaction if abort is not desired abort(); case PrintMessageAction: MessagesStr << "verification continues.\n"; - errs() << MessagesStr.str(); + dbgs() << MessagesStr.str(); return false; case ReturnStatusAction: MessagesStr << "compilation terminated.\n"; @@ -1589,9 +1590,10 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) { default: break; case Intrinsic::dbg_declare: // llvm.dbg.declare - if (Constant *C = dyn_cast(CI.getOperand(1))) - Assert1(C && !isa(C), - "invalid llvm.dbg.declare intrinsic call", &CI); + if (MDNode *MD = dyn_cast(CI.getOperand(1))) + if (Constant *C = dyn_cast(MD->getOperand(0))) + Assert1(C && !isa(C), + "invalid llvm.dbg.declare intrinsic call", &CI); break; case Intrinsic::memcpy: case Intrinsic::memmove: diff --git a/runtime/libprofile/exported_symbols.lst b/runtime/libprofile/exported_symbols.lst index 45c6d5efe5d5..aafafb68a030 100644 --- a/runtime/libprofile/exported_symbols.lst +++ b/runtime/libprofile/exported_symbols.lst @@ -1,6 +1,4 @@ -llvm_start_func_profiling -llvm_start_block_profiling llvm_start_edge_profiling llvm_start_opt_edge_profiling llvm_start_basic_block_tracing diff --git a/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll b/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll index f0f1535da746..f699ba2911c5 100644 --- a/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll +++ b/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll @@ -23,9 +23,9 @@ entry: cond_true34: ; preds = %entry %tmp631 = getelementptr %struct.usb_hcd* %hcd, i32 0, i32 2, i64 2305843009213693950 ; [#uses=1] - %tmp70 = bitcast i64* %tmp631 to %struct.device** ; + %tmp70 = bitcast i64* %tmp631 to %struct.device** - %tmp71 = load %struct.device** %tmp70, align 8 ; + %tmp71 = load %struct.device** %tmp70, align 8 ret i32 undef diff --git a/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll b/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll index 7f82ea435791..ba57662a81d9 100644 --- a/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll +++ b/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -analyze -scalar-evolution -disable-output \ -; RUN: -scalar-evolution-max-iterations=0 | grep {Loop bb: backedge-taken count is 100} +; RUN: -scalar-evolution-max-iterations=0 | grep {Loop %bb: backedge-taken count is 100} ; PR1533 @array = weak global [101 x i32] zeroinitializer, align 32 ; <[100 x i32]*> [#uses=1] diff --git a/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll b/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll index f623da1b2757..ce8f72511f9c 100644 --- a/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll +++ b/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -scalar-evolution -analyze -disable-output | grep {Loop bb: backedge-taken count is (-1 + (-1 \\* %x) + %y)} +; RUN: opt < %s -scalar-evolution -analyze -disable-output | grep {Loop %bb: backedge-taken count is (-1 + (-1 \\* %x) + %y)} ; PR1597 define i32 @f(i32 %x, i32 %y) { diff --git a/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll b/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll index c8e483e7d50f..6685778d5551 100644 --- a/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll +++ b/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -scalar-evolution -analyze -disable-output | grep {Loop header: backedge-taken count is (0 smax %n)} +; RUN: opt < %s -scalar-evolution -analyze -disable-output | grep {Loop %header: backedge-taken count is (0 smax %n)} define void @foo(i32 %n) { entry: diff --git a/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll b/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll index cb9a1829eb7c..addf346825ef 100644 --- a/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll +++ b/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -scalar-evolution -analyze -disable-output | grep {Loop loop: backedge-taken count is (100 + (-100 smax %n))} +; RUN: opt < %s -scalar-evolution -analyze -disable-output | grep {Loop %loop: backedge-taken count is (100 + (-100 smax %n))} ; PR2002 define void @foo(i8 %n) { diff --git a/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll b/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll index daeb26a202e3..f9dd40f8b5cc 100644 --- a/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll +++ b/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -analyze -scalar-evolution -disable-output |& \ -; RUN: grep {Loop bb: backedge-taken count is (7 + (-1 \\* %argc))} +; RUN: grep {Loop %bb: backedge-taken count is (7 + (-1 \\* %argc))} ; XFAIL: * define i32 @main(i32 %argc, i8** %argv) nounwind { diff --git a/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll b/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll index 9dda78b21f7d..9ee781fba770 100644 --- a/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll +++ b/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -analyze -scalar-evolution -disable-output \ -; RUN: | grep {Loop bb: Unpredictable backedge-taken count\\.} +; RUN: | grep {Loop %bb: Unpredictable backedge-taken count\\.} ; ScalarEvolution can't compute a trip count because it doesn't know if ; dividing by the stride will have a remainder. This could theoretically diff --git a/test/Analysis/ScalarEvolution/avoid-smax-0.ll b/test/Analysis/ScalarEvolution/avoid-smax-0.ll index b733d6acb504..55d3bd588e8d 100644 --- a/test/Analysis/ScalarEvolution/avoid-smax-0.ll +++ b/test/Analysis/ScalarEvolution/avoid-smax-0.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -scalar-evolution -analyze -disable-output | grep {Loop bb3: backedge-taken count is (-1 + %n)} +; RUN: opt < %s -scalar-evolution -analyze -disable-output | grep {Loop %bb3: backedge-taken count is (-1 + %n)} ; We don't want to use a max in the trip count expression in ; this testcase. diff --git a/test/Analysis/ScalarEvolution/max-trip-count.ll b/test/Analysis/ScalarEvolution/max-trip-count.ll index 506401dafea5..a4fdcd0b6d83 100644 --- a/test/Analysis/ScalarEvolution/max-trip-count.ll +++ b/test/Analysis/ScalarEvolution/max-trip-count.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -analyze -scalar-evolution -disable-output \ -; RUN: | grep {\{%d,+,\[^\{\}\]\*\}} +; RUN: | grep {\{%d,+,\[^\{\}\]\*\}<%bb>} ; ScalarEvolution should be able to understand the loop and eliminate the casts. diff --git a/test/Analysis/ScalarEvolution/nsw-offset.ll b/test/Analysis/ScalarEvolution/nsw-offset.ll index 1e165bf62226..fd0dfe66aee6 100644 --- a/test/Analysis/ScalarEvolution/nsw-offset.ll +++ b/test/Analysis/ScalarEvolution/nsw-offset.ll @@ -18,11 +18,11 @@ bb: ; preds = %bb.nph, %bb1 %i.01 = phi i32 [ %16, %bb1 ], [ 0, %bb.nph ] ; [#uses=5] ; CHECK: %1 = sext i32 %i.01 to i64 -; CHECK: --> {0,+,2} +; CHECK: --> {0,+,2}<%bb> %1 = sext i32 %i.01 to i64 ; [#uses=1] ; CHECK: %2 = getelementptr inbounds double* %d, i64 %1 -; CHECK: --> {%d,+,16} +; CHECK: --> {%d,+,16}<%bb> %2 = getelementptr inbounds double* %d, i64 %1 ; [#uses=1] %3 = load double* %2, align 8 ; [#uses=1] @@ -32,11 +32,11 @@ bb: ; preds = %bb.nph, %bb1 %7 = or i32 %i.01, 1 ; [#uses=1] ; CHECK: %8 = sext i32 %7 to i64 -; CHECK: --> {1,+,2} +; CHECK: --> {1,+,2}<%bb> %8 = sext i32 %7 to i64 ; [#uses=1] ; CHECK: %9 = getelementptr inbounds double* %q, i64 %8 -; CHECK: {(8 + %q),+,16} +; CHECK: {(8 + %q),+,16}<%bb> %9 = getelementptr inbounds double* %q, i64 %8 ; [#uses=1] ; Artificially repeat the above three instructions, this time using @@ -44,11 +44,11 @@ bb: ; preds = %bb.nph, %bb1 %t7 = add nsw i32 %i.01, 1 ; [#uses=1] ; CHECK: %t8 = sext i32 %t7 to i64 -; CHECK: --> {1,+,2} +; CHECK: --> {1,+,2}<%bb> %t8 = sext i32 %t7 to i64 ; [#uses=1] ; CHECK: %t9 = getelementptr inbounds double* %q, i64 %t8 -; CHECK: {(8 + %q),+,16} +; CHECK: {(8 + %q),+,16}<%bb> %t9 = getelementptr inbounds double* %q, i64 %t8 ; [#uses=1] %10 = load double* %9, align 8 ; [#uses=1] @@ -72,5 +72,5 @@ return: ; preds = %bb1.return_crit_edg ret void } -; CHECK: Loop bb: backedge-taken count is ((-1 + %n) /u 2) -; CHECK: Loop bb: max backedge-taken count is 1073741823 +; CHECK: Loop %bb: backedge-taken count is ((-1 + %n) /u 2) +; CHECK: Loop %bb: max backedge-taken count is 1073741823 diff --git a/test/Analysis/ScalarEvolution/nsw.ll b/test/Analysis/ScalarEvolution/nsw.ll index c31edabf38ee..e4f2b29677c8 100644 --- a/test/Analysis/ScalarEvolution/nsw.ll +++ b/test/Analysis/ScalarEvolution/nsw.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -analyze -scalar-evolution -disable-output | grep { --> {.*,+,.*}} | count 8 +; RUN: opt < %s -analyze -scalar-evolution -disable-output | grep { --> {.*,+,.*}<%bb>} | count 8 ; The addrecs in this loop are analyzable only by using nsw information. diff --git a/test/Analysis/ScalarEvolution/sext-inreg.ll b/test/Analysis/ScalarEvolution/sext-inreg.ll index 16128354aeb4..4487822541c2 100644 --- a/test/Analysis/ScalarEvolution/sext-inreg.ll +++ b/test/Analysis/ScalarEvolution/sext-inreg.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -analyze -scalar-evolution -disable-output > %t -; RUN: grep {sext i57 \{0,+,199\} to i64} %t | count 1 -; RUN: grep {sext i59 \{0,+,199\} to i64} %t | count 1 +; RUN: grep {sext i57 \{0,+,199\}<%bb> to i64} %t | count 1 +; RUN: grep {sext i59 \{0,+,199\}<%bb> to i64} %t | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin9.6" diff --git a/test/Analysis/ScalarEvolution/sext-iv-0.ll b/test/Analysis/ScalarEvolution/sext-iv-0.ll index 8f887c4a57eb..05983c1ad0b3 100644 --- a/test/Analysis/ScalarEvolution/sext-iv-0.ll +++ b/test/Analysis/ScalarEvolution/sext-iv-0.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -disable-output -scalar-evolution -analyze \ -; RUN: | grep { --> \{-128,+,1\} Exits: 127} | count 5 +; RUN: | grep { --> \{-128,+,1\}<%bb1> Exits: 127} | count 5 ; Convert (sext {-128,+,1}) to {sext(-128),+,sext(1)}, since the ; trip count is within range where this is safe. diff --git a/test/Analysis/ScalarEvolution/sext-iv-1.ll b/test/Analysis/ScalarEvolution/sext-iv-1.ll index 02c3206c6fe7..0bf51d9ba1b5 100644 --- a/test/Analysis/ScalarEvolution/sext-iv-1.ll +++ b/test/Analysis/ScalarEvolution/sext-iv-1.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -disable-output -scalar-evolution -analyze \ -; RUN: | grep { --> (sext i. \{.\*,+,.\*\} to i64)} | count 5 +; RUN: | grep { --> (sext i. \{.\*,+,.\*\}<%bb1> to i64)} | count 5 ; Don't convert (sext {...,+,...}) to {sext(...),+,sext(...)} in cases ; where the trip count is not within range. diff --git a/test/Analysis/ScalarEvolution/sext-iv-2.ll b/test/Analysis/ScalarEvolution/sext-iv-2.ll index b25c237958c0..fc39cae005b6 100644 --- a/test/Analysis/ScalarEvolution/sext-iv-2.ll +++ b/test/Analysis/ScalarEvolution/sext-iv-2.ll @@ -1,9 +1,9 @@ ; RUN: opt < %s -analyze -scalar-evolution -disable-output | FileCheck %s ; CHECK: %tmp3 = sext i8 %tmp2 to i32 -; CHECK: --> (sext i8 {0,+,1} to i32) Exits: -1 +; CHECK: --> (sext i8 {0,+,1}<%bb1> to i32) Exits: -1 ; CHECK: %tmp4 = mul i32 %tmp3, %i.02 -; CHECK: --> ((sext i8 {0,+,1} to i32) * {0,+,1}) Exits: {0,+,-1} +; CHECK: --> ((sext i8 {0,+,1}<%bb1> to i32) * {0,+,1}<%bb>) Exits: {0,+,-1}<%bb> ; These sexts are not foldable. diff --git a/test/Analysis/ScalarEvolution/trip-count3.ll b/test/Analysis/ScalarEvolution/trip-count3.ll index 240983178b40..7d8e0c6d59ef 100644 --- a/test/Analysis/ScalarEvolution/trip-count3.ll +++ b/test/Analysis/ScalarEvolution/trip-count3.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -scalar-evolution -analyze -disable-output \ -; RUN: | grep {Loop bb3\\.i: Unpredictable backedge-taken count\\.} +; RUN: | grep {Loop %bb3\\.i: Unpredictable backedge-taken count\\.} ; ScalarEvolution can't compute a trip count because it doesn't know if ; dividing by the stride will have a remainder. This could theoretically diff --git a/test/Analysis/ScalarEvolution/trip-count7.ll b/test/Analysis/ScalarEvolution/trip-count7.ll index 0cd8d7c4a9a3..74c856feea4d 100644 --- a/test/Analysis/ScalarEvolution/trip-count7.ll +++ b/test/Analysis/ScalarEvolution/trip-count7.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -analyze -scalar-evolution -disable-output \ -; RUN: | grep {Loop bb7.i: Unpredictable backedge-taken count\\.} +; RUN: | grep {Loop %bb7.i: Unpredictable backedge-taken count\\.} target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" diff --git a/test/Analysis/ScalarEvolution/trip-count8.ll b/test/Analysis/ScalarEvolution/trip-count8.ll index c49f5ceea704..5063342f178b 100644 --- a/test/Analysis/ScalarEvolution/trip-count8.ll +++ b/test/Analysis/ScalarEvolution/trip-count8.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -analyze -scalar-evolution -disable-output \ -; RUN: | grep {Loop for\\.body: backedge-taken count is (-1 + \[%\]ecx)} +; RUN: | grep {Loop %for\\.body: backedge-taken count is (-1 + \[%\]ecx)} ; PR4599 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" diff --git a/test/Analysis/ScalarEvolution/zext-wrap.ll b/test/Analysis/ScalarEvolution/zext-wrap.ll index 9ff99be736a0..c4ac5decf3ef 100644 --- a/test/Analysis/ScalarEvolution/zext-wrap.ll +++ b/test/Analysis/ScalarEvolution/zext-wrap.ll @@ -11,7 +11,7 @@ bb.i: ; preds = %bb1.i, %bb.nph ; This cast shouldn't be folded into the addrec. ; CHECK: %tmp = zext i8 %l_95.0.i1 to i16 -; CHECK: --> (zext i8 {0,+,-1} to i16) Exits: 2 +; CHECK: --> (zext i8 {0,+,-1}<%bb.i> to i16) Exits: 2 %tmp = zext i8 %l_95.0.i1 to i16 diff --git a/test/Assembler/functionlocal-metadata.ll b/test/Assembler/functionlocal-metadata.ll new file mode 100644 index 000000000000..8265aa1d3b10 --- /dev/null +++ b/test/Assembler/functionlocal-metadata.ll @@ -0,0 +1,35 @@ +; RUN: llvm-as < %s | llvm-dis | FileCheck %s + +define void @Foo(i32 %a, i32 %b) { +entry: + %0 = add i32 %a, 1 ; [#uses=1] + %two = add i32 %b, %0 ; [#uses=0] + %1 = alloca i32 ; [#uses=1] + + call void @llvm.dbg.declare(metadata !{i32* %1}, metadata !{i32* %1}) +; CHECK: metadata !{i32* %1}, metadata !{i32* %1} + call void @llvm.dbg.declare(metadata !{i32 %two}, metadata !{i32 %0}) + call void @llvm.dbg.declare(metadata !{i32 %0}, metadata !{i32* %1, i32 %0}) + call void @llvm.dbg.declare(metadata !{i32* %1}, metadata !{i32 %b, i32 %0}) + call void @llvm.dbg.declare(metadata !{i32 %a}, metadata !{i32 %a, metadata !"foo"}) +; CHECK: metadata !{i32 %a, metadata !"foo"} + call void @llvm.dbg.declare(metadata !{i32 %b}, metadata !{metadata !0, i32 %two}) + + call void @llvm.dbg.value(metadata !{ i32 %a }, i64 0, metadata !1) + call void @llvm.dbg.value(metadata !{ i32 %0 }, i64 25, metadata !0) + call void @llvm.dbg.value(metadata !{ i32* %1 }, i64 16, metadata !"foo") +; CHECK: call void @llvm.dbg.value(metadata !{i32* %1}, i64 16, metadata !"foo") + call void @llvm.dbg.value(metadata !"foo", i64 12, metadata !"bar") + + ret void, !foo !0, !bar !1 +; CHECK: ret void, !foo !0, !bar !1 +} + +!0 = metadata !{i32 662302, i32 26, metadata !1, null} +!1 = metadata !{i32 4, metadata !"foo"} + +declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone +declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone + +!foo = !{ !0 } +!bar = !{ !1 } diff --git a/test/Assembler/vector-cmp.ll b/test/Assembler/vector-cmp.ll index e4d35d9c9828..688369bb62b4 100644 --- a/test/Assembler/vector-cmp.ll +++ b/test/Assembler/vector-cmp.ll @@ -9,8 +9,8 @@ entry: ret <4 x i1> %cmp } -global <4 x i1> icmp slt ( <4 x i32> , <4 x i32> ) ; +global <4 x i1> icmp slt ( <4 x i32> , <4 x i32> ) -@B = external global i32; +@B = external global i32 -global <4 x i1> icmp slt ( <4 x i32> , <4 x i32> ) ; +global <4 x i1> icmp slt ( <4 x i32> , <4 x i32> ) diff --git a/test/CodeGen/ARM/indirectbr.ll b/test/CodeGen/ARM/indirectbr.ll index 8b56f1321280..cd16084dbdb3 100644 --- a/test/CodeGen/ARM/indirectbr.ll +++ b/test/CodeGen/ARM/indirectbr.ll @@ -55,6 +55,6 @@ L1: ; preds = %L2, %bb2 store i8* blockaddress(@foo, %L5), i8** @nextaddr, align 4 ret i32 %res.3 } -; ARM: .long LBA4__foo__L5-(LPC{{.*}}+8) -; THUMB: .long LBA4__foo__L5-(LPC{{.*}}+4) -; THUMB2: .long LBA4__foo__L5 +; ARM: .long L_BA4__foo_L5-(LPC{{.*}}+8) +; THUMB: .long L_BA4__foo_L5-(LPC{{.*}}+4) +; THUMB2: .long L_BA4__foo_L5 diff --git a/test/CodeGen/ARM/private.ll b/test/CodeGen/ARM/private.ll index 03376a4c61b7..fba56b4ffeeb 100644 --- a/test/CodeGen/ARM/private.ll +++ b/test/CodeGen/ARM/private.ll @@ -12,7 +12,7 @@ define private void @foo() { ret void } -@baz = private global i32 4; +@baz = private global i32 4 define i32 @bar() { call void @foo() diff --git a/test/CodeGen/ARM/tail-opts.ll b/test/CodeGen/ARM/tail-opts.ll index 1a867a9d8f8e..17c8baedbfa8 100644 --- a/test/CodeGen/ARM/tail-opts.ll +++ b/test/CodeGen/ARM/tail-opts.ll @@ -9,7 +9,7 @@ declare i1 @qux() @GHJK = global i32 0 -declare i8* @choose(i8*, i8*); +declare i8* @choose(i8*, i8*) ; BranchFolding should tail-duplicate the indirect jump to avoid ; redundant branching. diff --git a/test/CodeGen/Alpha/private.ll b/test/CodeGen/Alpha/private.ll index 96ab4eb400ea..26076e0f8d3c 100644 --- a/test/CodeGen/Alpha/private.ll +++ b/test/CodeGen/Alpha/private.ll @@ -12,7 +12,7 @@ define private void @foo() { ret void } -@baz = private global i32 4; +@baz = private global i32 4 define i32 @bar() { call void @foo() diff --git a/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll b/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll index f21da52315fa..b6cd2d40d1af 100644 --- a/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll +++ b/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll @@ -1,5 +1,4 @@ ; RUN: llc < %s -march=bfin -verify-machineinstrs -; XFAIL: * ; An undef argument causes a setugt node to escape instruction selection. diff --git a/test/CodeGen/Blackfin/ct32.ll b/test/CodeGen/Blackfin/ct32.ll index e9b66ebe5772..363286d4b2f5 100644 --- a/test/CodeGen/Blackfin/ct32.ll +++ b/test/CodeGen/Blackfin/ct32.ll @@ -6,15 +6,15 @@ declare i32 @llvm.ctpop.i32(i32) define i32 @ctlztest(i32 %B) { %b = call i32 @llvm.ctlz.i32( i32 %B ) - ret i32 %b; + ret i32 %b } define i32 @cttztest(i32 %B) { %b = call i32 @llvm.cttz.i32( i32 %B ) - ret i32 %b; + ret i32 %b } define i32 @ctpoptest(i32 %B) { %b = call i32 @llvm.ctpop.i32( i32 %B ) - ret i32 %b; + ret i32 %b } diff --git a/test/CodeGen/Blackfin/ct64.ll b/test/CodeGen/Blackfin/ct64.ll index ac4bdcffbe95..75024343ea4e 100644 --- a/test/CodeGen/Blackfin/ct64.ll +++ b/test/CodeGen/Blackfin/ct64.ll @@ -6,15 +6,15 @@ declare i64 @llvm.ctpop.i64(i64) define i64 @ctlztest(i64 %B) { %b = call i64 @llvm.ctlz.i64( i64 %B ) - ret i64 %b; + ret i64 %b } define i64 @cttztest(i64 %B) { %b = call i64 @llvm.cttz.i64( i64 %B ) - ret i64 %b; + ret i64 %b } define i64 @ctpoptest(i64 %B) { %b = call i64 @llvm.ctpop.i64( i64 %B ) - ret i64 %b; + ret i64 %b } diff --git a/test/CodeGen/Blackfin/ctlz16.ll b/test/CodeGen/Blackfin/ctlz16.ll index 56a65c05853e..eb4af232cfef 100644 --- a/test/CodeGen/Blackfin/ctlz16.ll +++ b/test/CodeGen/Blackfin/ctlz16.ll @@ -4,15 +4,15 @@ declare i16 @llvm.ctlz.i16(i16) define i16 @ctlztest(i16 %B) { %b = call i16 @llvm.ctlz.i16( i16 %B ) ; [#uses=1] - ret i16 %b; + ret i16 %b } define i16 @ctlztest_z(i16 zeroext %B) { %b = call i16 @llvm.ctlz.i16( i16 %B ) ; [#uses=1] - ret i16 %b; + ret i16 %b } define i16 @ctlztest_s(i16 signext %B) { %b = call i16 @llvm.ctlz.i16( i16 %B ) ; [#uses=1] - ret i16 %b; + ret i16 %b } diff --git a/test/CodeGen/Blackfin/ctpop16.ll b/test/CodeGen/Blackfin/ctpop16.ll index cbbb3d9831a8..8b6c07ef28a8 100644 --- a/test/CodeGen/Blackfin/ctpop16.ll +++ b/test/CodeGen/Blackfin/ctpop16.ll @@ -4,15 +4,15 @@ declare i16 @llvm.ctpop.i16(i16) define i16 @ctpoptest(i16 %B) { %b = call i16 @llvm.ctpop.i16( i16 %B ) ; [#uses=1] - ret i16 %b; + ret i16 %b } define i16 @ctpoptest_z(i16 zeroext %B) { %b = call i16 @llvm.ctpop.i16( i16 %B ) ; [#uses=1] - ret i16 %b; + ret i16 %b } define i16 @ctpoptest_s(i16 signext %B) { %b = call i16 @llvm.ctpop.i16( i16 %B ) ; [#uses=1] - ret i16 %b; + ret i16 %b } diff --git a/test/CodeGen/Blackfin/cttz16.ll b/test/CodeGen/Blackfin/cttz16.ll index 05fe9bfd4469..510882ad41fa 100644 --- a/test/CodeGen/Blackfin/cttz16.ll +++ b/test/CodeGen/Blackfin/cttz16.ll @@ -4,15 +4,15 @@ declare i16 @llvm.cttz.i16(i16) define i16 @cttztest(i16 %B) { %b = call i16 @llvm.cttz.i16( i16 %B ) ; [#uses=1] - ret i16 %b; + ret i16 %b } define i16 @cttztest_z(i16 zeroext %B) { %b = call i16 @llvm.cttz.i16( i16 %B ) ; [#uses=1] - ret i16 %b; + ret i16 %b } define i16 @cttztest_s(i16 signext %B) { %b = call i16 @llvm.cttz.i16( i16 %B ) ; [#uses=1] - ret i16 %b; + ret i16 %b } diff --git a/test/CodeGen/Blackfin/promote-logic.ll b/test/CodeGen/Blackfin/promote-logic.ll index c247aca0a5b0..46da56681d4f 100644 --- a/test/CodeGen/Blackfin/promote-logic.ll +++ b/test/CodeGen/Blackfin/promote-logic.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=bfin > %t +; XFAIL: * ; DAGCombiner::SimplifyBinOpWithSameOpcodeHands can produce an illegal i16 OR ; operation after LegalizeOps. diff --git a/test/CodeGen/CellSPU/dp_farith.ll b/test/CodeGen/CellSPU/dp_farith.ll index b0a372beba0d..66bff3eb7835 100644 --- a/test/CodeGen/CellSPU/dp_farith.ll +++ b/test/CodeGen/CellSPU/dp_farith.ll @@ -83,7 +83,7 @@ define double @d_fnms_2(double %arg1, double %arg2, double %arg3) { ; FNMS: - (a * b - c) => c - (a * b) define <2 x double> @d_fnms_vec_1(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) { %A = fmul <2 x double> %arg1, %arg2 - %B = fsub <2 x double> %arg3, %A ; + %B = fsub <2 x double> %arg3, %A ret <2 x double> %B } diff --git a/test/CodeGen/CellSPU/mul_ops.ll b/test/CodeGen/CellSPU/mul_ops.ll index 031d6c37ce70..1e28fc7a918d 100644 --- a/test/CodeGen/CellSPU/mul_ops.ll +++ b/test/CodeGen/CellSPU/mul_ops.ll @@ -11,7 +11,6 @@ ; RUN: grep shli %t1.s | count 4 ; RUN: grep shlhi %t1.s | count 4 ; RUN: grep ila %t1.s | count 2 -; RUN: grep xsbh %t1.s | count 4 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" target triple = "spu" diff --git a/test/CodeGen/CellSPU/private.ll b/test/CodeGen/CellSPU/private.ll index 7452276ccc8c..56f72e75b16a 100644 --- a/test/CodeGen/CellSPU/private.ll +++ b/test/CodeGen/CellSPU/private.ll @@ -13,7 +13,7 @@ define private void @foo() { ret void } -@baz = private global i32 4; +@baz = private global i32 4 define i32 @bar() { call void @foo() diff --git a/test/CodeGen/Generic/2009-03-29-SoftFloatVectorExtract.ll b/test/CodeGen/Generic/2009-03-29-SoftFloatVectorExtract.ll index 9a9c1a110d61..45b561affffa 100644 --- a/test/CodeGen/Generic/2009-03-29-SoftFloatVectorExtract.ll +++ b/test/CodeGen/Generic/2009-03-29-SoftFloatVectorExtract.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -soft-float ; PR3899 -@m = external global <2 x double>; +@m = external global <2 x double> define double @vector_ex() nounwind { %v = load <2 x double>* @m diff --git a/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll b/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll index 577b547007d0..b62f811e8d16 100644 --- a/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll +++ b/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll @@ -1,5 +1,7 @@ ; RUN: llc < %s ; rdar://6836460 +; rdar://7516906 +; PR5963 define i32 @test(i128* %P) nounwind { entry: @@ -14,3 +16,17 @@ if.then50: ; preds = %if.then20 if.end61: ; preds = %if.then50, %if.then20, %entry ret i32 123 } + +define i32 @test2(i320* %P) nounwind { +entry: + %tmp48 = load i320* %P + %and49 = and i320 %tmp48, 25108406941546723055343157692830665664409421777856138051584 + %tobool = icmp ne i320 %and49, 0 ; [#uses=1] + br i1 %tobool, label %if.then50, label %if.end61 + +if.then50: ; preds = %if.then20 + ret i32 1241 + +if.end61: ; preds = %if.then50, %if.then20, %entry + ret i32 123 +} diff --git a/test/CodeGen/MSP430/bit.ll b/test/CodeGen/MSP430/bit.ll index 2c7836661ff6..0dc21584e8cf 100644 --- a/test/CodeGen/MSP430/bit.ll +++ b/test/CodeGen/MSP430/bit.ll @@ -1,9 +1,10 @@ ; RUN: llvm-as < %s | llc -march=msp430 | FileCheck %s +; XFAIL: * target datalayout = "e-p:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:16:32" target triple = "msp430-generic-generic" -@foo8 = external global i8; -@bar8 = external global i8; +@foo8 = external global i8 +@bar8 = external global i8 define i8 @bitbrr(i8 %a, i8 %b) nounwind { %t1 = and i8 %a, %b @@ -83,8 +84,8 @@ define i8 @bitbmm() nounwind { ; CHECK: bitbmm: ; CHECK: bit.b &bar8, &foo8 -@foo16 = external global i16; -@bar16 = external global i16; +@foo16 = external global i16 +@bar16 = external global i16 define i16 @bitwrr(i16 %a, i16 %b) nounwind { %t1 = and i16 %a, %b diff --git a/test/CodeGen/MSP430/setcc.ll b/test/CodeGen/MSP430/setcc.ll index 971d1b5be8cf..ecf066154fc3 100644 --- a/test/CodeGen/MSP430/setcc.ll +++ b/test/CodeGen/MSP430/setcc.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=msp430 < %s | FileCheck %s +; XFAIL: * target datalayout = "e-p:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:16:32" target triple = "msp430-generic-generic" diff --git a/test/CodeGen/MSP430/shifts.ll b/test/CodeGen/MSP430/shifts.ll new file mode 100644 index 000000000000..b5b3054b9621 --- /dev/null +++ b/test/CodeGen/MSP430/shifts.ll @@ -0,0 +1,51 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8-n8:16" +target triple = "msp430-elf" + +define zeroext i8 @lshr8(i8 zeroext %a, i8 zeroext %cnt) nounwind readnone { +entry: +; CHECK: lshr8: +; CHECK: rrc.b + %shr = lshr i8 %a, %cnt + ret i8 %shr +} + +define signext i8 @ashr8(i8 signext %a, i8 zeroext %cnt) nounwind readnone { +entry: +; CHECK: ashr8: +; CHECK: rra.b + %shr = ashr i8 %a, %cnt + ret i8 %shr +} + +define zeroext i8 @shl8(i8 zeroext %a, i8 zeroext %cnt) nounwind readnone { +entry: +; CHECK: shl8 +; CHECK: rla.b + %shl = shl i8 %a, %cnt + ret i8 %shl +} + +define zeroext i16 @lshr16(i16 zeroext %a, i16 zeroext %cnt) nounwind readnone { +entry: +; CHECK: lshr16: +; CHECK: rrc.w + %shr = lshr i16 %a, %cnt + ret i16 %shr +} + +define signext i16 @ashr16(i16 signext %a, i16 zeroext %cnt) nounwind readnone { +entry: +; CHECK: ashr16: +; CHECK: rra.w + %shr = ashr i16 %a, %cnt + ret i16 %shr +} + +define zeroext i16 @shl16(i16 zeroext %a, i16 zeroext %cnt) nounwind readnone { +entry: +; CHECK: shl16: +; CHECK: rla.w + %shl = shl i16 %a, %cnt + ret i16 %shl +} diff --git a/test/CodeGen/Mips/private.ll b/test/CodeGen/Mips/private.ll index a1b45c2a63e1..34b75477b689 100644 --- a/test/CodeGen/Mips/private.ll +++ b/test/CodeGen/Mips/private.ll @@ -12,7 +12,7 @@ define private void @foo() { ret void } -@baz = private global i32 4; +@baz = private global i32 4 define i32 @bar() { call void @foo() diff --git a/test/CodeGen/PowerPC/indirectbr.ll b/test/CodeGen/PowerPC/indirectbr.ll index 1b302e41c09b..fbc7bd2264b6 100644 --- a/test/CodeGen/PowerPC/indirectbr.ll +++ b/test/CodeGen/PowerPC/indirectbr.ll @@ -43,12 +43,12 @@ L2: ; preds = %L3, %bb2 L1: ; preds = %L2, %bb2 %res.3 = phi i32 [ %phitmp, %L2 ], [ 2, %bb2 ] ; [#uses=1] -; PIC: addis r4, r2, ha16(LBA4__foo__L5-"L1$pb") -; PIC: li r5, lo16(LBA4__foo__L5-"L1$pb") +; PIC: addis r4, r2, ha16(L_BA4__foo_L5-"L1$pb") +; PIC: li r5, lo16(L_BA4__foo_L5-"L1$pb") ; PIC: add r4, r4, r5 ; PIC: stw r4 -; STATIC: li r2, lo16(LBA4__foo__L5) -; STATIC: addis r2, r2, ha16(LBA4__foo__L5) +; STATIC: li r2, lo16(L_BA4__foo_L5) +; STATIC: addis r2, r2, ha16(L_BA4__foo_L5) ; STATIC: stw r2 store i8* blockaddress(@foo, %L5), i8** @nextaddr, align 4 ret i32 %res.3 diff --git a/test/CodeGen/PowerPC/private.ll b/test/CodeGen/PowerPC/private.ll index d6e67708ac25..f9405f6af2ff 100644 --- a/test/CodeGen/PowerPC/private.ll +++ b/test/CodeGen/PowerPC/private.ll @@ -15,7 +15,7 @@ define private void @foo() nounwind { ret void } -@baz = private global i32 4; +@baz = private global i32 4 define i32 @bar() nounwind { call void @foo() diff --git a/test/CodeGen/SPARC/private.ll b/test/CodeGen/SPARC/private.ll index 8fa3e7e52d8d..f091aa63d70d 100644 --- a/test/CodeGen/SPARC/private.ll +++ b/test/CodeGen/SPARC/private.ll @@ -12,7 +12,7 @@ define private void @foo() { ret void } -@baz = private global i32 4; +@baz = private global i32 4 define i32 @bar() { call void @foo() diff --git a/test/CodeGen/SystemZ/2009-06-02-Rotate.ll b/test/CodeGen/SystemZ/2009-06-02-Rotate.ll index 3317864c0147..07a164d42645 100644 --- a/test/CodeGen/SystemZ/2009-06-02-Rotate.ll +++ b/test/CodeGen/SystemZ/2009-06-02-Rotate.ll @@ -5,8 +5,8 @@ target triple = "s390x-linux" define i32 @rotl(i32 %x, i32 %y, i32 %z) nounwind readnone { entry: - %shl = shl i32 %x, 0 ; [#uses=1] - %sub = sub i32 32, 0 ; [#uses=1] + %shl = shl i32 %x, 1 ; [#uses=1] + %sub = sub i32 32, 1 ; [#uses=1] %shr = lshr i32 %x, %sub ; [#uses=1] %or = or i32 %shr, %shl ; [#uses=1] ret i32 %or diff --git a/test/CodeGen/SystemZ/2010-01-04-DivMem.ll b/test/CodeGen/SystemZ/2010-01-04-DivMem.ll new file mode 100644 index 000000000000..d730beca245b --- /dev/null +++ b/test/CodeGen/SystemZ/2010-01-04-DivMem.ll @@ -0,0 +1,50 @@ +; RUN: llc < %s +target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16-n32:64" +target triple = "s390x-elf" + +@REGISTER = external global [10 x i32] ; <[10 x i32]*> [#uses=2] + +define void @DIVR_P(i32 signext %PRINT_EFFECT) nounwind { +entry: + %REG1 = alloca i32, align 4 ; [#uses=2] + %REG2 = alloca i32, align 4 ; [#uses=2] + %call = call signext i32 (...)* @FORMAT2(i32* %REG1, i32* %REG2) nounwind ; [#uses=0] + %tmp = load i32* %REG1 ; [#uses=1] + %idxprom = sext i32 %tmp to i64 ; [#uses=1] + %arrayidx = getelementptr inbounds [10 x i32]* @REGISTER, i64 0, i64 %idxprom ; [#uses=2] + %tmp1 = load i32* %arrayidx ; [#uses=2] + %tmp2 = load i32* %REG2 ; [#uses=1] + %idxprom3 = sext i32 %tmp2 to i64 ; [#uses=1] + %arrayidx4 = getelementptr inbounds [10 x i32]* @REGISTER, i64 0, i64 %idxprom3 ; [#uses=3] + %tmp5 = load i32* %arrayidx4 ; [#uses=3] + %cmp6 = icmp sgt i32 %tmp5, 8388607 ; [#uses=1] + %REG2_SIGN.0 = select i1 %cmp6, i32 -1, i32 1 ; [#uses=2] + %cmp10 = icmp eq i32 %REG2_SIGN.0, 1 ; [#uses=1] + %not.cmp = icmp slt i32 %tmp1, 8388608 ; [#uses=2] + %or.cond = and i1 %cmp10, %not.cmp ; [#uses=1] + br i1 %or.cond, label %if.then13, label %if.end25 + +if.then13: ; preds = %entry + %div = sdiv i32 %tmp5, %tmp1 ; [#uses=2] + store i32 %div, i32* %arrayidx4 + br label %if.end25 + +if.end25: ; preds = %if.then13, %entry + %tmp35 = phi i32 [ %div, %if.then13 ], [ %tmp5, %entry ] ; [#uses=1] + %cmp27 = icmp eq i32 %REG2_SIGN.0, -1 ; [#uses=1] + %or.cond46 = and i1 %cmp27, %not.cmp ; [#uses=1] + br i1 %or.cond46, label %if.then31, label %if.end45 + +if.then31: ; preds = %if.end25 + %sub = sub i32 16777216, %tmp35 ; [#uses=1] + %tmp39 = load i32* %arrayidx ; [#uses=1] + %div40 = udiv i32 %sub, %tmp39 ; [#uses=1] + %sub41 = sub i32 16777216, %div40 ; [#uses=1] + store i32 %sub41, i32* %arrayidx4 + ret void + +if.end45: ; preds = %if.end25 + ret void +} + +declare signext i32 @FORMAT2(...) diff --git a/test/CodeGen/Thumb2/2010-01-06-TailDuplicateLabels.ll b/test/CodeGen/Thumb2/2010-01-06-TailDuplicateLabels.ll new file mode 100644 index 000000000000..6a05df111472 --- /dev/null +++ b/test/CodeGen/Thumb2/2010-01-06-TailDuplicateLabels.ll @@ -0,0 +1,89 @@ +; RUN: llc -relocation-model=pic -pre-regalloc-taildup < %s | grep {:$} | sort | uniq -d | count 0 +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" +target triple = "thumbv7-apple-darwin10" + +; This function produces a duplicate LPC label unless special care is taken when duplicating a t2LDRpci_pic instruction. + +%struct.PlatformMutex = type { i32, [40 x i8] } +%struct.SpinLock = type { %struct.PlatformMutex } +%"struct.WTF::TCMalloc_ThreadCache" = type { i32, %struct._opaque_pthread_t*, i8, [68 x %"struct.WTF::TCMalloc_ThreadCache_FreeList"], i32, i32, %"struct.WTF::TCMalloc_ThreadCache"*, %"struct.WTF::TCMalloc_ThreadCache"* } +%"struct.WTF::TCMalloc_ThreadCache_FreeList" = type { i8*, i16, i16 } +%struct.__darwin_pthread_handler_rec = type { void (i8*)*, i8*, %struct.__darwin_pthread_handler_rec* } +%struct._opaque_pthread_t = type { i32, %struct.__darwin_pthread_handler_rec*, [596 x i8] } + +@_ZN3WTFL8heap_keyE = internal global i32 0 ; [#uses=1] +@_ZN3WTFL10tsd_initedE.b = internal global i1 false ; [#uses=2] +@_ZN3WTFL13pageheap_lockE = internal global %struct.SpinLock { %struct.PlatformMutex { i32 850045863, [40 x i8] zeroinitializer } } ; <%struct.SpinLock*> [#uses=1] +@_ZN3WTFL12thread_heapsE = internal global %"struct.WTF::TCMalloc_ThreadCache"* null ; <%"struct.WTF::TCMalloc_ThreadCache"**> [#uses=1] +@llvm.used = appending global [1 x i8*] [i8* bitcast (%"struct.WTF::TCMalloc_ThreadCache"* ()* @_ZN3WTF20TCMalloc_ThreadCache22CreateCacheIfNecessaryEv to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0] + +define arm_apcscc %"struct.WTF::TCMalloc_ThreadCache"* @_ZN3WTF20TCMalloc_ThreadCache22CreateCacheIfNecessaryEv() nounwind { +entry: + %0 = tail call arm_apcscc i32 @pthread_mutex_lock(%struct.PlatformMutex* getelementptr inbounds (%struct.SpinLock* @_ZN3WTFL13pageheap_lockE, i32 0, i32 0)) nounwind + %.b24 = load i1* @_ZN3WTFL10tsd_initedE.b, align 4 ; [#uses=1] + br i1 %.b24, label %bb5, label %bb6 + +bb5: ; preds = %entry + %1 = tail call arm_apcscc %struct._opaque_pthread_t* @pthread_self() nounwind + br label %bb6 + +bb6: ; preds = %bb5, %entry + %me.0 = phi %struct._opaque_pthread_t* [ %1, %bb5 ], [ null, %entry ] ; <%struct._opaque_pthread_t*> [#uses=2] + br label %bb11 + +bb7: ; preds = %bb11 + %2 = getelementptr inbounds %"struct.WTF::TCMalloc_ThreadCache"* %h.0, i32 0, i32 1 + %3 = load %struct._opaque_pthread_t** %2, align 4 + %4 = tail call arm_apcscc i32 @pthread_equal(%struct._opaque_pthread_t* %3, %struct._opaque_pthread_t* %me.0) nounwind + %5 = icmp eq i32 %4, 0 + br i1 %5, label %bb10, label %bb14 + +bb10: ; preds = %bb7 + %6 = getelementptr inbounds %"struct.WTF::TCMalloc_ThreadCache"* %h.0, i32 0, i32 6 + br label %bb11 + +bb11: ; preds = %bb10, %bb6 + %h.0.in = phi %"struct.WTF::TCMalloc_ThreadCache"** [ @_ZN3WTFL12thread_heapsE, %bb6 ], [ %6, %bb10 ] ; <%"struct.WTF::TCMalloc_ThreadCache"**> [#uses=1] + %h.0 = load %"struct.WTF::TCMalloc_ThreadCache"** %h.0.in, align 4 ; <%"struct.WTF::TCMalloc_ThreadCache"*> [#uses=4] + %7 = icmp eq %"struct.WTF::TCMalloc_ThreadCache"* %h.0, null + br i1 %7, label %bb13, label %bb7 + +bb13: ; preds = %bb11 + %8 = tail call arm_apcscc %"struct.WTF::TCMalloc_ThreadCache"* @_ZN3WTF20TCMalloc_ThreadCache7NewHeapEP17_opaque_pthread_t(%struct._opaque_pthread_t* %me.0) nounwind + br label %bb14 + +bb14: ; preds = %bb13, %bb7 + %heap.1 = phi %"struct.WTF::TCMalloc_ThreadCache"* [ %8, %bb13 ], [ %h.0, %bb7 ] ; <%"struct.WTF::TCMalloc_ThreadCache"*> [#uses=4] + %9 = tail call arm_apcscc i32 @pthread_mutex_unlock(%struct.PlatformMutex* getelementptr inbounds (%struct.SpinLock* @_ZN3WTFL13pageheap_lockE, i32 0, i32 0)) nounwind + %10 = getelementptr inbounds %"struct.WTF::TCMalloc_ThreadCache"* %heap.1, i32 0, i32 2 + %11 = load i8* %10, align 4 + %toBool15not = icmp eq i8 %11, 0 ; [#uses=1] + br i1 %toBool15not, label %bb19, label %bb22 + +bb19: ; preds = %bb14 + %.b = load i1* @_ZN3WTFL10tsd_initedE.b, align 4 ; [#uses=1] + br i1 %.b, label %bb21, label %bb22 + +bb21: ; preds = %bb19 + store i8 1, i8* %10, align 4 + %12 = load i32* @_ZN3WTFL8heap_keyE, align 4 + %13 = bitcast %"struct.WTF::TCMalloc_ThreadCache"* %heap.1 to i8* + %14 = tail call arm_apcscc i32 @pthread_setspecific(i32 %12, i8* %13) nounwind + ret %"struct.WTF::TCMalloc_ThreadCache"* %heap.1 + +bb22: ; preds = %bb19, %bb14 + ret %"struct.WTF::TCMalloc_ThreadCache"* %heap.1 +} + +declare arm_apcscc i32 @pthread_mutex_lock(%struct.PlatformMutex*) + +declare arm_apcscc i32 @pthread_mutex_unlock(%struct.PlatformMutex*) + +declare hidden arm_apcscc %"struct.WTF::TCMalloc_ThreadCache"* @_ZN3WTF20TCMalloc_ThreadCache7NewHeapEP17_opaque_pthread_t(%struct._opaque_pthread_t*) nounwind + +declare arm_apcscc i32 @pthread_setspecific(i32, i8*) + +declare arm_apcscc %struct._opaque_pthread_t* @pthread_self() + +declare arm_apcscc i32 @pthread_equal(%struct._opaque_pthread_t*, %struct._opaque_pthread_t*) + diff --git a/test/CodeGen/Thumb2/thumb2-add.ll b/test/CodeGen/Thumb2/thumb2-add.ll index d42ea7138e46..5e25cf64bccb 100644 --- a/test/CodeGen/Thumb2/thumb2-add.ll +++ b/test/CodeGen/Thumb2/thumb2-add.ll @@ -8,43 +8,43 @@ ; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add | grep lsl | grep #8 define i32 @t2ADDrc_255(i32 %lhs) { - %Rd = add i32 %lhs, 255; + %Rd = add i32 %lhs, 255 ret i32 %Rd } define i32 @t2ADDrc_256(i32 %lhs) { - %Rd = add i32 %lhs, 256; + %Rd = add i32 %lhs, 256 ret i32 %Rd } define i32 @t2ADDrc_257(i32 %lhs) { - %Rd = add i32 %lhs, 257; + %Rd = add i32 %lhs, 257 ret i32 %Rd } define i32 @t2ADDrc_4094(i32 %lhs) { - %Rd = add i32 %lhs, 4094; + %Rd = add i32 %lhs, 4094 ret i32 %Rd } define i32 @t2ADDrc_4095(i32 %lhs) { - %Rd = add i32 %lhs, 4095; + %Rd = add i32 %lhs, 4095 ret i32 %Rd } define i32 @t2ADDrc_4096(i32 %lhs) { - %Rd = add i32 %lhs, 4096; + %Rd = add i32 %lhs, 4096 ret i32 %Rd } define i32 @t2ADDrr(i32 %lhs, i32 %rhs) { - %Rd = add i32 %lhs, %rhs; + %Rd = add i32 %lhs, %rhs ret i32 %Rd } define i32 @t2ADDrs(i32 %lhs, i32 %rhs) { %tmp = shl i32 %rhs, 8 - %Rd = add i32 %lhs, %tmp; + %Rd = add i32 %lhs, %tmp ret i32 %Rd } diff --git a/test/CodeGen/X86/2006-05-11-InstrSched.ll b/test/CodeGen/X86/2006-05-11-InstrSched.ll index 89b127cccf82..bdbe713a2956 100644 --- a/test/CodeGen/X86/2006-05-11-InstrSched.ll +++ b/test/CodeGen/X86/2006-05-11-InstrSched.ll @@ -10,9 +10,8 @@ entry: cond_true: ; preds = %cond_true, %entry %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %cond_true ] ; [#uses=2] %tmp. = shl i32 %indvar, 2 ; [#uses=1] - %tmp.10 = add i32 %tmp., 1 ; [#uses=2] - %k.0.0 = bitcast i32 %tmp.10 to i32 ; [#uses=2] - %tmp31 = add i32 %k.0.0, -1 ; [#uses=4] + %tmp.10 = add nsw i32 %tmp., 1 ; [#uses=2] + %tmp31 = add nsw i32 %tmp.10, -1 ; [#uses=4] %tmp32 = getelementptr i32* %mpp, i32 %tmp31 ; [#uses=1] %tmp34 = bitcast i32* %tmp32 to <16 x i8>* ; [#uses=1] %tmp = load <16 x i8>* %tmp34, align 1 @@ -37,14 +36,13 @@ cond_true: ; preds = %cond_true, %entry %tmp111 = and <2 x i64> %tmp110, %tmp55.upgrd.2 ; <<2 x i64>> [#uses=1] %tmp121 = and <2 x i64> %tmp99.upgrd.5, %tmp88.upgrd.4 ; <<2 x i64>> [#uses=1] %tmp131 = or <2 x i64> %tmp121, %tmp111 ; <<2 x i64>> [#uses=1] - %gep.upgrd.6 = zext i32 %tmp.10 to i64 ; [#uses=1] - %tmp137 = getelementptr i32* %mc, i64 %gep.upgrd.6 ; [#uses=1] + %tmp137 = getelementptr i32* %mc, i32 %tmp.10 ; [#uses=1] %tmp137.upgrd.7 = bitcast i32* %tmp137 to <2 x i64>* ; <<2 x i64>*> [#uses=1] store <2 x i64> %tmp131, <2 x i64>* %tmp137.upgrd.7 - %tmp147 = add i32 %k.0.0, 8 ; [#uses=1] - %tmp.upgrd.8 = icmp sgt i32 %tmp147, %M ; [#uses=1] + %tmp147 = add nsw i32 %tmp.10, 8 ; [#uses=1] + %tmp.upgrd.8 = icmp slt i32 %tmp147, %M ; [#uses=1] %indvar.next = add i32 %indvar, 1 ; [#uses=1] - br i1 %tmp.upgrd.8, label %return, label %cond_true + br i1 %tmp.upgrd.8, label %cond_true, label %return return: ; preds = %cond_true, %entry ret void diff --git a/test/Transforms/IndVarSimplify/2007-01-08-X86-64-Pointer.ll b/test/CodeGen/X86/2007-01-08-X86-64-Pointer.ll similarity index 100% rename from test/Transforms/IndVarSimplify/2007-01-08-X86-64-Pointer.ll rename to test/CodeGen/X86/2007-01-08-X86-64-Pointer.ll diff --git a/test/CodeGen/X86/2007-02-04-OrAddrMode.ll b/test/CodeGen/X86/2007-02-04-OrAddrMode.ll index 93e880854985..10bbe7442007 100644 --- a/test/CodeGen/X86/2007-02-04-OrAddrMode.ll +++ b/test/CodeGen/X86/2007-02-04-OrAddrMode.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -march=x86 | grep {leal 3(,%eax,8)} ;; This example can't fold the or into an LEA. -define i32 @test(float ** %tmp2, i32 %tmp12) { +define i32 @test(float ** %tmp2, i32 %tmp12) nounwind { %tmp3 = load float** %tmp2 %tmp132 = shl i32 %tmp12, 2 ; [#uses=1] %tmp4 = bitcast float* %tmp3 to i8* ; [#uses=1] @@ -14,7 +14,7 @@ define i32 @test(float ** %tmp2, i32 %tmp12) { ;; This can! -define i32 @test2(i32 %a, i32 %b) { +define i32 @test2(i32 %a, i32 %b) nounwind { %c = shl i32 %a, 3 %d = or i32 %c, 3 ret i32 %d diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll index cb1b1efae3e2..a4d642b40354 100644 --- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll +++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll @@ -6,13 +6,13 @@ target triple = "x86_64-apple-darwin10.0" %struct.__Rec = type opaque %struct.__vv = type { } -define %struct.__vv* @t(%struct.Key* %desc) nounwind ssp { +define %struct.__vv* @t(%struct.Key* %desc, i64 %p) nounwind ssp { entry: br label %bb4 bb4: ; preds = %bb.i, %bb26, %bb4, %entry %0 = call i32 (...)* @xxGetOffsetForCode(i32 undef) nounwind ; [#uses=0] - %ins = or i64 0, 0 ; [#uses=1] + %ins = or i64 %p, 2097152 ; [#uses=1] %1 = call i32 (...)* @xxCalculateMidType(%struct.Key* %desc, i32 0) nounwind ; [#uses=1] %cond = icmp eq i32 %1, 1 ; [#uses=1] br i1 %cond, label %bb26, label %bb4 diff --git a/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll b/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll index 8a0b244a23fa..3cd54169745d 100644 --- a/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll +++ b/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll @@ -1,12 +1,18 @@ -; RUN: llc < %s | grep -E {sar|shl|mov|or} | count 4 +; RUN: llc < %s | FileCheck %s + ; Check that the shr(shl X, 56), 48) is not mistakenly turned into ; a shr (X, -8) that gets subsequently "optimized away" as undef ; PR4254 + target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "x86_64-unknown-linux-gnu" define i64 @foo(i64 %b) nounwind readnone { entry: +; CHECK: foo: +; CHECK: shlq $56, %rdi +; CHECK: sarq $48, %rdi +; CHECK: leaq 1(%rdi), %rax %shl = shl i64 %b, 56 ; [#uses=1] %shr = ashr i64 %shl, 48 ; [#uses=1] %add5 = or i64 %shr, 1 ; [#uses=1] diff --git a/test/CodeGen/X86/2009-11-16-MachineLICM.ll b/test/CodeGen/X86/2009-11-16-MachineLICM.ll index a7c202076da8..8f274df918d1 100644 --- a/test/CodeGen/X86/2009-11-16-MachineLICM.ll +++ b/test/CodeGen/X86/2009-11-16-MachineLICM.ll @@ -10,7 +10,7 @@ entry: br i1 %0, label %bb.nph, label %return bb.nph: ; preds = %entry -; CHECK: movq _g@GOTPCREL(%rip), %rcx +; CHECK: movq _g@GOTPCREL(%rip), [[REG:%[a-z]+]] %tmp = zext i32 %n to i64 ; [#uses=1] br label %bb diff --git a/test/CodeGen/X86/2010-01-05-ZExt-Shl.ll b/test/CodeGen/X86/2010-01-05-ZExt-Shl.ll new file mode 100644 index 000000000000..e7004e28752e --- /dev/null +++ b/test/CodeGen/X86/2010-01-05-ZExt-Shl.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -march=x86-64 +; +target triple = "i686-apple-darwin8" + +declare void @func2(i16 zeroext) + +define void @func1() nounwind { +entry: + %t1 = icmp ne i8 undef, 0 + %t2 = icmp eq i8 undef, 14 + %t3 = and i1 %t1, %t2 + %t4 = select i1 %t3, i16 0, i16 128 + call void @func2(i16 zeroext %t4) nounwind + ret void +} diff --git a/test/CodeGen/X86/2010-01-07-ISelBug.ll b/test/CodeGen/X86/2010-01-07-ISelBug.ll new file mode 100644 index 000000000000..081fab7facfe --- /dev/null +++ b/test/CodeGen/X86/2010-01-07-ISelBug.ll @@ -0,0 +1,27 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin10 +; rdar://r7519827 + +define i32 @t() nounwind ssp { +entry: + br label %if.end.i11 + +if.end.i11: ; preds = %lor.lhs.false.i10, %lor.lhs.false.i10, %lor.lhs.false.i10 + br i1 undef, label %for.body161, label %for.end197 + +for.body161: ; preds = %if.end.i11 + br label %for.end197 + +for.end197: ; preds = %for.body161, %if.end.i11 + %mlucEntry.4 = phi i96 [ undef, %for.body161 ], [ undef, %if.end.i11 ] ; [#uses=2] + store i96 %mlucEntry.4, i96* undef, align 8 + %tmp172 = lshr i96 %mlucEntry.4, 64 ; [#uses=1] + %tmp173 = trunc i96 %tmp172 to i32 ; [#uses=1] + %tmp1.i1.i = call i32 @llvm.bswap.i32(i32 %tmp173) nounwind ; [#uses=1] + store i32 %tmp1.i1.i, i32* undef, align 8 + unreachable + +if.then283: ; preds = %lor.lhs.false.i10, %do.end105, %for.end + ret i32 undef +} + +declare i32 @llvm.bswap.i32(i32) nounwind readnone diff --git a/test/CodeGen/X86/2010-01-07-UAMemFeature.ll b/test/CodeGen/X86/2010-01-07-UAMemFeature.ll new file mode 100644 index 000000000000..3728f15d969c --- /dev/null +++ b/test/CodeGen/X86/2010-01-07-UAMemFeature.ll @@ -0,0 +1,11 @@ +; RUN: llc -mcpu=yonah -mattr=vector-unaligned-mem -march=x86 < %s | FileCheck %s +; CHECK: addps ( + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +define <4 x float> @foo(<4 x float>* %P, <4 x float> %In) nounwind { + %A = load <4 x float>* %P, align 4 + %B = add <4 x float> %A, %In + ret <4 x float> %B +} diff --git a/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll b/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll new file mode 100644 index 000000000000..172e1c73d568 --- /dev/null +++ b/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s +; rdar://r7512579 + +; PHI defs in the atomic loop should be used by the add / adc +; instructions. They should not be dead. + +define void @t(i64* nocapture %p) nounwind ssp { +entry: +; CHECK: t: +; CHECK: movl $1 +; CHECK: movl (%ebp), %eax +; CHECK: movl 4(%ebp), %edx +; CHECK: LBB1_1: +; CHECK-NOT: movl $1 +; CHECK-NOT: movl $0 +; CHECK: addl +; CHECK: adcl +; CHECK: lock +; CHECK: cmpxchg8b +; CHECK: jne + tail call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true) + %0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 1) ; [#uses=0] + tail call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true) + ret void +} + +declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind + +declare i64 @llvm.atomic.load.add.i64.p0i64(i64* nocapture, i64) nounwind diff --git a/test/CodeGen/X86/2010-01-11-ExtraPHIArg.ll b/test/CodeGen/X86/2010-01-11-ExtraPHIArg.ll new file mode 100644 index 000000000000..db98eef30e1e --- /dev/null +++ b/test/CodeGen/X86/2010-01-11-ExtraPHIArg.ll @@ -0,0 +1,97 @@ +; RUN: llc -verify-machineinstrs < %s +; +; The lowering of a switch combined with constand folding would leave spurious extra arguments on a PHI instruction. +; +target triple = "x86_64-apple-darwin10" + +define void @foo() { + br label %cond_true813.i + +cond_true813.i: ; preds = %0 + br i1 false, label %cond_true818.i, label %cond_next1146.i + +cond_true818.i: ; preds = %cond_true813.i + br i1 false, label %recog_memoized.exit52, label %cond_next1146.i + +recog_memoized.exit52: ; preds = %cond_true818.i + switch i32 0, label %bb886.i.preheader [ + i32 0, label %bb907.i + i32 44, label %bb866.i + i32 103, label %bb874.i + i32 114, label %bb874.i + ] + +bb857.i: ; preds = %bb886.i, %bb866.i + %tmp862.i494.24 = phi i8* [ null, %bb866.i ], [ %tmp862.i494.26, %bb886.i ] ; [#uses=1] + switch i32 0, label %bb886.i.preheader [ + i32 0, label %bb907.i + i32 44, label %bb866.i + i32 103, label %bb874.i + i32 114, label %bb874.i + ] + +bb866.i.loopexit: ; preds = %bb874.i + br label %bb866.i + +bb866.i.loopexit31: ; preds = %cond_true903.i + br label %bb866.i + +bb866.i: ; preds = %bb866.i.loopexit31, %bb866.i.loopexit, %bb857.i, %recog_memoized.exit52 + br i1 false, label %bb907.i, label %bb857.i + +bb874.i.preheader.loopexit: ; preds = %cond_true903.i, %cond_true903.i + ret void + +bb874.i: ; preds = %bb857.i, %bb857.i, %recog_memoized.exit52, %recog_memoized.exit52 + switch i32 0, label %bb886.i.preheader.loopexit [ + i32 0, label %bb907.i + i32 44, label %bb866.i.loopexit + i32 103, label %bb874.i.backedge + i32 114, label %bb874.i.backedge + ] + +bb874.i.backedge: ; preds = %bb874.i, %bb874.i + ret void + +bb886.i.preheader.loopexit: ; preds = %bb874.i + ret void + +bb886.i.preheader: ; preds = %bb857.i, %recog_memoized.exit52 + %tmp862.i494.26 = phi i8* [ undef, %recog_memoized.exit52 ], [ %tmp862.i494.24, %bb857.i ] ; [#uses=1] + br label %bb886.i + +bb886.i: ; preds = %cond_true903.i, %bb886.i.preheader + br i1 false, label %bb857.i, label %cond_true903.i + +cond_true903.i: ; preds = %bb886.i + switch i32 0, label %bb886.i [ + i32 0, label %bb907.i + i32 44, label %bb866.i.loopexit31 + i32 103, label %bb874.i.preheader.loopexit + i32 114, label %bb874.i.preheader.loopexit + ] + +bb907.i: ; preds = %cond_true903.i, %bb874.i, %bb866.i, %bb857.i, %recog_memoized.exit52 + br i1 false, label %cond_next1146.i, label %cond_true910.i + +cond_true910.i: ; preds = %bb907.i + ret void + +cond_next1146.i: ; preds = %bb907.i, %cond_true818.i, %cond_true813.i + ret void + +bb2060.i: ; No predecessors! + br i1 false, label %cond_true2064.i, label %bb2067.i + +cond_true2064.i: ; preds = %bb2060.i + unreachable + +bb2067.i: ; preds = %bb2060.i + ret void + +cond_next3473: ; No predecessors! + ret void + +cond_next3521: ; No predecessors! + ret void +} diff --git a/test/CodeGen/X86/2010-01-13-OptExtBug.ll b/test/CodeGen/X86/2010-01-13-OptExtBug.ll new file mode 100644 index 000000000000..d49e2a8d0798 --- /dev/null +++ b/test/CodeGen/X86/2010-01-13-OptExtBug.ll @@ -0,0 +1,46 @@ +; RUN: llc < %s -mtriple=i386-pc-linux-gnu +; PR6027 + +%class.OlsonTimeZone = type { i16, i32*, i8*, i16 } + +define void @XX(%class.OlsonTimeZone* %this) align 2 { +entry: + %call = tail call i8* @_Z15uprv_malloc_4_2v() + %0 = bitcast i8* %call to double* + %tmp = getelementptr inbounds %class.OlsonTimeZone* %this, i32 0, i32 3 + %tmp2 = load i16* %tmp + %tmp525 = getelementptr inbounds %class.OlsonTimeZone* %this, i32 0, i32 0 + %tmp626 = load i16* %tmp525 + %cmp27 = icmp slt i16 %tmp2, %tmp626 + br i1 %cmp27, label %bb.nph, label %for.end + +for.cond: + %tmp6 = load i16* %tmp5 + %cmp = icmp slt i16 %inc, %tmp6 + %indvar.next = add i32 %indvar, 1 + br i1 %cmp, label %for.body, label %for.end + +bb.nph: + %tmp10 = getelementptr inbounds %class.OlsonTimeZone* %this, i32 0, i32 2 + %tmp17 = getelementptr inbounds %class.OlsonTimeZone* %this, i32 0, i32 1 + %tmp5 = getelementptr inbounds %class.OlsonTimeZone* %this, i32 0, i32 0 + %tmp29 = sext i16 %tmp2 to i32 + %tmp31 = add i16 %tmp2, 1 + %tmp32 = zext i16 %tmp31 to i32 + br label %for.body + +for.body: + %indvar = phi i32 [ 0, %bb.nph ], [ %indvar.next, %for.cond ] + %tmp30 = add i32 %indvar, %tmp29 + %tmp33 = add i32 %indvar, %tmp32 + %inc = trunc i32 %tmp33 to i16 + %tmp11 = load i8** %tmp10 + %arrayidx = getelementptr i8* %tmp11, i32 %tmp30 + %tmp12 = load i8* %arrayidx + br label %for.cond + +for.end: + ret void +} + +declare i8* @_Z15uprv_malloc_4_2v() diff --git a/test/CodeGen/X86/3addr-or.ll b/test/CodeGen/X86/3addr-or.ll new file mode 100644 index 000000000000..30a1f36850de --- /dev/null +++ b/test/CodeGen/X86/3addr-or.ll @@ -0,0 +1,27 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s +; rdar://7527734 + +define i32 @test(i32 %x) nounwind readnone ssp { +entry: +; CHECK: test: +; CHECK: leal 3(%rdi), %eax + %0 = shl i32 %x, 5 ; [#uses=1] + %1 = or i32 %0, 3 ; [#uses=1] + ret i32 %1 +} + +define i64 @test2(i8 %A, i8 %B) nounwind { +; CHECK: test2: +; CHECK: shrq $4 +; CHECK-NOT: movq +; CHECK-NOT: orq +; CHECK: leaq +; CHECK: ret + %C = zext i8 %A to i64 ; [#uses=1] + %D = shl i64 %C, 4 ; [#uses=1] + %E = and i64 %D, 48 ; [#uses=1] + %F = zext i8 %B to i64 ; [#uses=1] + %G = lshr i64 %F, 4 ; [#uses=1] + %H = or i64 %G, %E ; [#uses=1] + ret i64 %H +} diff --git a/test/CodeGen/X86/addr-label-difference.ll b/test/CodeGen/X86/addr-label-difference.ll new file mode 100644 index 000000000000..547d6b57657a --- /dev/null +++ b/test/CodeGen/X86/addr-label-difference.ll @@ -0,0 +1,22 @@ +; RUN: llc %s -o - | grep {__TEXT,__const} +; PR5929 +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" +target triple = "i386-apple-darwin10.0" + +; This array should go into the __TEXT,__const section, not into the +; __DATA,__const section, because the elements don't need relocations. +@test.array = internal constant [3 x i32] [i32 sub (i32 ptrtoint (i8* blockaddress(@test, %foo) to i32), i32 ptrtoint (i8* blockaddress(@test, %foo) to i32)), i32 sub (i32 ptrtoint (i8* blockaddress(@test, %bar) to i32), i32 ptrtoint (i8* blockaddress(@test, %foo) to i32)), i32 sub (i32 ptrtoint (i8* blockaddress(@test, %hack) to i32), i32 ptrtoint (i8* blockaddress(@test, %foo) to i32))] ; <[3 x i32]*> [#uses=1] + +define void @test(i32 %i) nounwind ssp { +entry: + br label %foo + +foo: ; preds = %indirectgoto, %indirectgoto, %indirectgoto, %indirectgoto, %indirectgoto + br label %bar + +bar: ; preds = %foo, %indirectgoto + br label %hack + +hack: ; preds = %bar, %indirectgoto + ret void +} diff --git a/test/CodeGen/X86/and-su.ll b/test/CodeGen/X86/and-su.ll index b5ac23b24128..38db88af12c2 100644 --- a/test/CodeGen/X86/and-su.ll +++ b/test/CodeGen/X86/and-su.ll @@ -1,16 +1,53 @@ -; RUN: llc < %s -march=x86 | grep {(%} | count 1 +; RUN: llc < %s -march=x86 | FileCheck %s ; Don't duplicate the load. define fastcc i32 @foo(i32* %p) nounwind { +; CHECK: foo: +; CHECK: andl $10, %eax +; CHECK: je %t0 = load i32* %p %t2 = and i32 %t0, 10 %t3 = icmp ne i32 %t2, 0 br i1 %t3, label %bb63, label %bb76 - bb63: ret i32 %t2 - bb76: ret i32 0 } + +define fastcc double @bar(i32 %hash, double %x, double %y) nounwind { +entry: +; CHECK: bar: + %0 = and i32 %hash, 15 + %1 = icmp ult i32 %0, 8 + br i1 %1, label %bb11, label %bb10 + +bb10: +; CHECK: bb10 +; CHECK: testb $1 + %2 = and i32 %hash, 1 + %3 = icmp eq i32 %2, 0 + br i1 %3, label %bb13, label %bb11 + +bb11: + %4 = fsub double -0.000000e+00, %x + br label %bb13 + +bb13: +; CHECK: bb13 +; CHECK: testb $2 + %iftmp.9.0 = phi double [ %4, %bb11 ], [ %x, %bb10 ] + %5 = and i32 %hash, 2 + %6 = icmp eq i32 %5, 0 + br i1 %6, label %bb16, label %bb14 + +bb14: + %7 = fsub double -0.000000e+00, %y + br label %bb16 + +bb16: + %iftmp.10.0 = phi double [ %7, %bb14 ], [ %y, %bb13 ] + %8 = fadd double %iftmp.9.0, %iftmp.10.0 + ret double %8 +} diff --git a/test/CodeGen/X86/anyext-uses.ll b/test/CodeGen/X86/anyext-uses.ll deleted file mode 100644 index 0cf169eb28d8..000000000000 --- a/test/CodeGen/X86/anyext-uses.ll +++ /dev/null @@ -1,47 +0,0 @@ -; RUN: llc < %s -march=x86-64 > %t -; RUN: grep mov %t | count 8 -; RUN: not grep implicit %t - -; Avoid partial register updates; don't define an i8 register and read -; the i32 super-register. - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" -target triple = "x86_64-apple-darwin9.6" - %struct.RC4_KEY = type { i8, i8, [256 x i8] } - -define void @foo(%struct.RC4_KEY* nocapture %key, i64 %len, i8* %indata, i8* %outdata) nounwind { -entry: - br label %bb24 - -bb24: ; preds = %bb24, %entry - %0 = load i8* null, align 1 ; [#uses=1] - %1 = zext i8 %0 to i64 ; [#uses=1] - %2 = shl i64 %1, 32 ; [#uses=1] - %3 = getelementptr %struct.RC4_KEY* %key, i64 0, i32 2, i64 0 ; [#uses=1] - %4 = load i8* %3, align 1 ; [#uses=2] - %5 = add i8 %4, 0 ; [#uses=2] - %6 = zext i8 %5 to i64 ; [#uses=0] - %7 = load i8* null, align 1 ; [#uses=1] - %8 = zext i8 %4 to i32 ; [#uses=1] - %9 = zext i8 %7 to i32 ; [#uses=1] - %10 = add i32 %9, %8 ; [#uses=1] - %11 = and i32 %10, 255 ; [#uses=1] - %12 = zext i32 %11 to i64 ; [#uses=1] - %13 = getelementptr %struct.RC4_KEY* %key, i64 0, i32 2, i64 %12 ; [#uses=1] - %14 = load i8* %13, align 1 ; [#uses=1] - %15 = zext i8 %14 to i64 ; [#uses=1] - %16 = shl i64 %15, 48 ; [#uses=1] - %17 = getelementptr %struct.RC4_KEY* %key, i64 0, i32 2, i64 0 ; [#uses=1] - %18 = load i8* %17, align 1 ; [#uses=2] - %19 = add i8 %18, %5 ; [#uses=1] - %20 = zext i8 %19 to i64 ; [#uses=1] - %21 = getelementptr %struct.RC4_KEY* %key, i64 0, i32 2, i64 %20 ; [#uses=1] - store i8 %18, i8* %21, align 1 - %22 = or i64 0, %2 ; [#uses=1] - %23 = or i64 %22, 0 ; [#uses=1] - %24 = or i64 %23, %16 ; [#uses=1] - %25 = or i64 %24, 0 ; [#uses=1] - %26 = xor i64 %25, 0 ; [#uses=1] - store i64 %26, i64* null, align 8 - br label %bb24 -} diff --git a/test/CodeGen/X86/br-fold.ll b/test/CodeGen/X86/br-fold.ll new file mode 100644 index 000000000000..8af3bd1bc229 --- /dev/null +++ b/test/CodeGen/X86/br-fold.ll @@ -0,0 +1,20 @@ +; RUN: llc -march=x86-64 < %s | FileCheck %s + +; CHECK: orq +; CHECK-NEXT: jne + +@_ZN11xercesc_2_513SchemaSymbols21fgURI_SCHEMAFORSCHEMAE = external constant [33 x i16], align 32 ; <[33 x i16]*> [#uses=1] +@_ZN11xercesc_2_56XMLUni16fgNotationStringE = external constant [9 x i16], align 16 ; <[9 x i16]*> [#uses=1] + +define fastcc void @foo() { +entry: + br i1 icmp eq (i64 or (i64 ptrtoint ([33 x i16]* @_ZN11xercesc_2_513SchemaSymbols21fgURI_SCHEMAFORSCHEMAE to i64), + i64 ptrtoint ([9 x i16]* @_ZN11xercesc_2_56XMLUni16fgNotationStringE to i64)), i64 0), + label %bb8.i329, label %bb4.i.i318.preheader + +bb4.i.i318.preheader: ; preds = %bb6 + unreachable + +bb8.i329: ; preds = %bb6 + unreachable +} diff --git a/test/CodeGen/X86/brcond.ll b/test/CodeGen/X86/brcond.ll new file mode 100644 index 000000000000..130483ad8410 --- /dev/null +++ b/test/CodeGen/X86/brcond.ll @@ -0,0 +1,69 @@ +; RUN: llc < %s -mtriple=i386-apple-darwin10 | FileCheck %s +; rdar://7475489 + +define i32 @test1(i32 %a, i32 %b) nounwind ssp { +entry: +; CHECK: test1: +; CHECK: xorb +; CHECK-NOT: andb +; CHECK-NOT: shrb +; CHECK: testb $64 + %0 = and i32 %a, 16384 + %1 = icmp ne i32 %0, 0 + %2 = and i32 %b, 16384 + %3 = icmp ne i32 %2, 0 + %4 = xor i1 %1, %3 + br i1 %4, label %bb1, label %bb + +bb: ; preds = %entry + %5 = tail call i32 (...)* @foo() nounwind ; [#uses=1] + ret i32 %5 + +bb1: ; preds = %entry + %6 = tail call i32 (...)* @bar() nounwind ; [#uses=1] + ret i32 %6 +} + +declare i32 @foo(...) + +declare i32 @bar(...) + + + +; PR3351 - (P == 0) & (Q == 0) -> (P|Q) == 0 +define i32 @test2(i32* %P, i32* %Q) nounwind ssp { +entry: + %a = icmp eq i32* %P, null ; [#uses=1] + %b = icmp eq i32* %Q, null ; [#uses=1] + %c = and i1 %a, %b + br i1 %c, label %bb1, label %return + +bb1: ; preds = %entry + ret i32 4 + +return: ; preds = %entry + ret i32 192 +; CHECK: test2: +; CHECK: movl 4(%esp), %eax +; CHECK-NEXT: orl 8(%esp), %eax +; CHECK-NEXT: jne LBB2_2 +} + +; PR3351 - (P != 0) | (Q != 0) -> (P|Q) != 0 +define i32 @test3(i32* %P, i32* %Q) nounwind ssp { +entry: + %a = icmp ne i32* %P, null ; [#uses=1] + %b = icmp ne i32* %Q, null ; [#uses=1] + %c = or i1 %a, %b + br i1 %c, label %bb1, label %return + +bb1: ; preds = %entry + ret i32 4 + +return: ; preds = %entry + ret i32 192 +; CHECK: test3: +; CHECK: movl 4(%esp), %eax +; CHECK-NEXT: orl 8(%esp), %eax +; CHECK-NEXT: je LBB3_2 +} diff --git a/test/CodeGen/X86/darwin-bzero.ll b/test/CodeGen/X86/darwin-bzero.ll index a3c1e6f0c554..a9573cfc6a2a 100644 --- a/test/CodeGen/X86/darwin-bzero.ll +++ b/test/CodeGen/X86/darwin-bzero.ll @@ -3,6 +3,6 @@ declare void @llvm.memset.i32(i8*, i8, i32, i32) define void @foo(i8* %p, i32 %len) { - call void @llvm.memset.i32(i8* %p, i8 0, i32 %len, i32 1); + call void @llvm.memset.i32(i8* %p, i8 0, i32 %len, i32 1) ret void } diff --git a/test/CodeGen/X86/extractelement-shuffle.ll b/test/CodeGen/X86/extractelement-shuffle.ll index 12a2ef30e17e..d1ba9a845800 100644 --- a/test/CodeGen/X86/extractelement-shuffle.ll +++ b/test/CodeGen/X86/extractelement-shuffle.ll @@ -6,8 +6,8 @@ ; through the 3rd mask element, which doesn't exist. define i32 @update(<2 x i64> %val1, <2 x i64> %val2) nounwind readnone { entry: - %shuf = shufflevector <2 x i64> %val1, <2 x i64> %val2, <2 x i32> ; - %bit = bitcast <2 x i64> %shuf to <4 x i32>; - %res = extractelement <4 x i32> %bit, i32 3; - ret i32 %res; -} \ No newline at end of file + %shuf = shufflevector <2 x i64> %val1, <2 x i64> %val2, <2 x i32> + %bit = bitcast <2 x i64> %shuf to <4 x i32> + %res = extractelement <4 x i32> %bit, i32 3 + ret i32 %res +} diff --git a/test/CodeGen/X86/fast-isel.ll b/test/CodeGen/X86/fast-isel.ll index 3dcd736a1404..84b3fd7caf3a 100644 --- a/test/CodeGen/X86/fast-isel.ll +++ b/test/CodeGen/X86/fast-isel.ll @@ -14,7 +14,7 @@ fast: %t1 = mul i32 %t0, %s %t2 = sub i32 %t1, %s %t3 = and i32 %t2, %s - %t4 = or i32 %t3, %s + %t4 = xor i32 %t3, 3 %t5 = xor i32 %t4, %s %t6 = add i32 %t5, 2 %t7 = getelementptr i32* %y, i32 1 diff --git a/test/CodeGen/X86/fold-load.ll b/test/CodeGen/X86/fold-load.ll index eb182da10129..5525af25270f 100644 --- a/test/CodeGen/X86/fold-load.ll +++ b/test/CodeGen/X86/fold-load.ll @@ -1,11 +1,12 @@ -; RUN: llc < %s -march=x86 +; RUN: llc < %s -march=x86 | FileCheck %s %struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] } %struct.obstack = type { i32, %struct._obstack_chunk*, i8*, i8*, i8*, i32, i32, %struct._obstack_chunk* (...)*, void (...)*, i8*, i8 } @stmt_obstack = external global %struct.obstack ; <%struct.obstack*> [#uses=1] -define void @expand_start_bindings() { +; This should just not crash. +define void @test1() nounwind { entry: - br i1 false, label %cond_true, label %cond_next + br i1 true, label %cond_true, label %cond_next cond_true: ; preds = %entry %new_size.0.i = select i1 false, i32 0, i32 0 ; [#uses=1] @@ -25,3 +26,22 @@ cond_false30.i: ; preds = %cond_true cond_next: ; preds = %entry ret void } + + + +define i32 @test2(i16* %P, i16* %Q) nounwind { + %A = load i16* %P, align 4 ; [#uses=11] + %C = zext i16 %A to i32 ; [#uses=1] + %D = and i32 %C, 255 ; [#uses=1] + br label %L +L: + + store i16 %A, i16* %Q + ret i32 %D + +; CHECK: test2: +; CHECK: movl 4(%esp), %eax +; CHECK-NEXT: movzwl (%eax), %ecx + +} + diff --git a/test/CodeGen/X86/lsr-sort.ll b/test/CodeGen/X86/lsr-sort.ll index 40589892bb6f..1f3b59a905b9 100644 --- a/test/CodeGen/X86/lsr-sort.ll +++ b/test/CodeGen/X86/lsr-sort.ll @@ -4,7 +4,7 @@ @X = common global i16 0 ; [#uses=1] -define void @foo(i32 %N) nounwind { +define i32 @foo(i32 %N) nounwind { entry: %0 = icmp sgt i32 %N, 0 ; [#uses=1] br i1 %0, label %bb, label %return @@ -18,5 +18,6 @@ bb: ; preds = %bb, %entry br i1 %exitcond, label %return, label %bb return: ; preds = %bb, %entry - ret void + %h = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] + ret i32 %h } diff --git a/test/CodeGen/X86/mul-legalize.ll b/test/CodeGen/X86/mul-legalize.ll index eca9e6f436c2..069737d4d10d 100644 --- a/test/CodeGen/X86/mul-legalize.ll +++ b/test/CodeGen/X86/mul-legalize.ll @@ -19,6 +19,6 @@ return: ret void } -declare i1 @report__equal(i32 %x, i32 %y) nounwind; +declare i1 @report__equal(i32 %x, i32 %y) nounwind declare void @abort() diff --git a/test/CodeGen/X86/private.ll b/test/CodeGen/X86/private.ll index 22b6f35a70ef..f52f8c7af8c1 100644 --- a/test/CodeGen/X86/private.ll +++ b/test/CodeGen/X86/private.ll @@ -11,7 +11,7 @@ define private void @foo() { ret void } -@baz = private global i32 4; +@baz = private global i32 4 define i32 @bar() { call void @foo() diff --git a/test/CodeGen/X86/remat-mov-0.ll b/test/CodeGen/X86/remat-mov-0.ll new file mode 100644 index 000000000000..c4f768ca529b --- /dev/null +++ b/test/CodeGen/X86/remat-mov-0.ll @@ -0,0 +1,13 @@ +; RUN: llc < %s -march=x86-64 | grep {xorl %edi, %edi} | count 4 + +; CodeGen should remat the zero instead of spilling it. + +declare void @foo(i64 %p) + +define void @bar() nounwind { + call void @foo(i64 0) + call void @foo(i64 0) + call void @foo(i64 0) + call void @foo(i64 0) + ret void +} diff --git a/test/CodeGen/X86/sext-subreg.ll b/test/CodeGen/X86/sext-subreg.ll new file mode 100644 index 000000000000..b2b9f8121fd6 --- /dev/null +++ b/test/CodeGen/X86/sext-subreg.ll @@ -0,0 +1,17 @@ +; RUN: llc < %s -march=x86-64 | FileCheck %s +; rdar://7529457 + +define i64 @t(i64 %A, i64 %B, i32* %P, i64 *%P2) nounwind { +; CHECK: t: +; CHECK: movslq %e{{.*}}, %rax +; CHECK: movq %rax +; CHECK: movl %eax + %C = add i64 %A, %B + %D = trunc i64 %C to i32 + volatile store i32 %D, i32* %P + %E = shl i64 %C, 32 + %F = ashr i64 %E, 32 + volatile store i64 %F, i64 *%P2 + volatile store i32 %D, i32* %P + ret i64 undef +} diff --git a/test/CodeGen/X86/stack-color-with-reg.ll b/test/CodeGen/X86/stack-color-with-reg.ll index d7623920ff28..7d85818d46b9 100644 --- a/test/CodeGen/X86/stack-color-with-reg.ll +++ b/test/CodeGen/X86/stack-color-with-reg.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t -; RUN: grep stackcoloring %t | grep "stack slot refs replaced with reg refs" | grep 6 +; RUN: grep stackcoloring %t | grep "stack slot refs replaced with reg refs" | grep 14 type { [62 x %struct.Bitvec*] } ; type %0 type { i8* } ; type %1 diff --git a/test/CodeGen/X86/stride-nine-with-base-reg.ll b/test/CodeGen/X86/stride-nine-with-base-reg.ll index 7aae9eb1ab96..f4847a31c81f 100644 --- a/test/CodeGen/X86/stride-nine-with-base-reg.ll +++ b/test/CodeGen/X86/stride-nine-with-base-reg.ll @@ -7,6 +7,7 @@ @B = external global [1000 x i8], align 32 @A = external global [1000 x i8], align 32 @P = external global [1000 x i8], align 32 +@Q = external global [1000 x i8], align 32 define void @foo(i32 %m, i32 %p) nounwind { entry: @@ -24,6 +25,8 @@ bb: %tmp0 = add i32 %tmp8, %p %tmp10 = getelementptr [1000 x i8]* @P, i32 0, i32 %tmp0 store i8 17, i8* %tmp10, align 4 + %tmp11 = getelementptr [1000 x i8]* @Q, i32 0, i32 %tmp0 + store i8 19, i8* %tmp11, align 4 %indvar.next = add i32 %i.019.0, 1 %exitcond = icmp eq i32 %indvar.next, %m br i1 %exitcond, label %return, label %bb diff --git a/test/CodeGen/X86/tail-opts.ll b/test/CodeGen/X86/tail-opts.ll index 8c3cae9e8d4c..c5dbb04a051b 100644 --- a/test/CodeGen/X86/tail-opts.ll +++ b/test/CodeGen/X86/tail-opts.ll @@ -55,7 +55,7 @@ altret: ret void } -declare i8* @choose(i8*, i8*); +declare i8* @choose(i8*, i8*) ; BranchFolding should tail-duplicate the indirect jump to avoid ; redundant branching. diff --git a/test/CodeGen/X86/tailcall-largecode.ll b/test/CodeGen/X86/tailcall-largecode.ll new file mode 100644 index 000000000000..8ddc4054ca88 --- /dev/null +++ b/test/CodeGen/X86/tailcall-largecode.ll @@ -0,0 +1,71 @@ +; RUN: llc < %s -mtriple=x86_64-linux-gnu -tailcallopt -code-model=large | FileCheck %s + +declare fastcc i32 @callee(i32 %arg) +define fastcc i32 @directcall(i32 %arg) { +entry: +; This is the large code model, so &callee may not fit into the jmp +; instruction. Instead, stick it into a register. +; CHECK: movabsq $callee, [[REGISTER:%r[a-z0-9]+]] +; CHECK: jmpq *[[REGISTER]] # TAILCALL + %res = tail call fastcc i32 @callee(i32 %arg) + ret i32 %res +} + +; Check that the register used for an indirect tail call doesn't +; clobber any of the arguments. +define fastcc i32 @indirect_manyargs(i32(i32,i32,i32,i32,i32,i32,i32)* %target) { +; Adjust the stack to enter the function. (The amount of the +; adjustment may change in the future, in which case the location of +; the stack argument and the return adjustment will change too.) +; CHECK: subq $8, %rsp +; Put the call target into R11, which won't be clobbered while restoring +; callee-saved registers and won't be used for passing arguments. +; CHECK: movq %rdi, %r11 +; Pass the stack argument. +; CHECK: movl $7, 16(%rsp) +; Pass the register arguments, in the right registers. +; CHECK: movl $1, %edi +; CHECK: movl $2, %esi +; CHECK: movl $3, %edx +; CHECK: movl $4, %ecx +; CHECK: movl $5, %r8d +; CHECK: movl $6, %r9d +; Adjust the stack to "return". +; CHECK: addq $8, %rsp +; And tail-call to the target. +; CHECK: jmpq *%r11 # TAILCALL + %res = tail call fastcc i32 %target(i32 1, i32 2, i32 3, i32 4, i32 5, + i32 6, i32 7) + ret i32 %res +} + +; Check that the register used for a direct tail call doesn't clobber +; any of the arguments. +declare fastcc i32 @manyargs_callee(i32,i32,i32,i32,i32,i32,i32) +define fastcc i32 @direct_manyargs() { +; Adjust the stack to enter the function. (The amount of the +; adjustment may change in the future, in which case the location of +; the stack argument and the return adjustment will change too.) +; CHECK: subq $8, %rsp +; Pass the stack argument. +; CHECK: movl $7, 16(%rsp) +; Pass the register arguments, in the right registers. +; CHECK: movl $1, %edi +; CHECK: movl $2, %esi +; CHECK: movl $3, %edx +; CHECK: movl $4, %ecx +; CHECK: movl $5, %r8d +; CHECK: movl $6, %r9d +; This is the large code model, so &manyargs_callee may not fit into +; the jmp instruction. Put it into R11, which won't be clobbered +; while restoring callee-saved registers and won't be used for passing +; arguments. +; CHECK: movabsq $manyargs_callee, %r11 +; Adjust the stack to "return". +; CHECK: addq $8, %rsp +; And tail-call to the target. +; CHECK: jmpq *%r11 # TAILCALL + %res = tail call fastcc i32 @manyargs_callee(i32 1, i32 2, i32 3, i32 4, + i32 5, i32 6, i32 7) + ret i32 %res +} diff --git a/test/CodeGen/X86/test-nofold.ll b/test/CodeGen/X86/test-nofold.ll index 772ff6c3e766..f1063dcabf4f 100644 --- a/test/CodeGen/X86/test-nofold.ll +++ b/test/CodeGen/X86/test-nofold.ll @@ -1,22 +1,35 @@ -; RUN: llc < %s -march=x86 -mcpu=yonah | grep {testl.*%e.x.*%e.x} +; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s ; rdar://5752025 -; We don't want to fold the and into the test, because the and clobbers its -; input forcing a copy. We want: -; movl $15, %ecx -; andl 4(%esp), %ecx -; testl %ecx, %ecx +; We want: +; CHECK: movl 4(%esp), %ecx +; CHECK-NEXT: andl $15, %ecx +; CHECK-NEXT: movl $42, %eax +; CHECK-NEXT: cmovel %ecx, %eax +; CHECK-NEXT: ret +; +; We don't want: +; movl 4(%esp), %eax +; movl %eax, %ecx # bad: extra copy +; andl $15, %ecx +; testl $15, %eax # bad: peep obstructed ; movl $42, %eax -; cmove %ecx, %eax +; cmovel %ecx, %eax ; ret ; -; Not: -; movl 4(%esp), %eax -; movl %eax, %ecx -; andl $15, %ecx -; testl $15, %eax +; We also don't want: +; movl $15, %ecx # bad: larger encoding +; andl 4(%esp), %ecx ; movl $42, %eax -; cmove %ecx, %eax +; cmovel %ecx, %eax +; ret +; +; We also don't want: +; movl 4(%esp), %ecx +; andl $15, %ecx +; testl %ecx, %ecx # bad: unnecessary test +; movl $42, %eax +; cmovel %ecx, %eax ; ret define i32 @t1(i32 %X) nounwind { diff --git a/test/CodeGen/X86/twoaddr-lea.ll b/test/CodeGen/X86/twoaddr-lea.ll new file mode 100644 index 000000000000..a245ed7caa84 --- /dev/null +++ b/test/CodeGen/X86/twoaddr-lea.ll @@ -0,0 +1,24 @@ +;; X's live range extends beyond the shift, so the register allocator +;; cannot coalesce it with Y. Because of this, a copy needs to be +;; emitted before the shift to save the register value before it is +;; clobbered. However, this copy is not needed if the register +;; allocator turns the shift into an LEA. This also occurs for ADD. + +; Check that the shift gets turned into an LEA. +; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \ +; RUN: not grep {mov E.X, E.X} + +@G = external global i32 ; [#uses=3] + +define i32 @test1(i32 %X, i32 %Y) { + %Z = add i32 %X, %Y ; [#uses=1] + volatile store i32 %Y, i32* @G + volatile store i32 %Z, i32* @G + ret i32 %X +} + +define i32 @test2(i32 %X) { + %Z = add i32 %X, 1 ; [#uses=1] + volatile store i32 %Z, i32* @G + ret i32 %X +} diff --git a/test/CodeGen/X86/use-add-flags.ll b/test/CodeGen/X86/use-add-flags.ll new file mode 100644 index 000000000000..2dd2a4adac55 --- /dev/null +++ b/test/CodeGen/X86/use-add-flags.ll @@ -0,0 +1,56 @@ +; RUN: llc < %s -march=x86-64 -o - | FileCheck %s + +; Reuse the flags value from the add instructions instead of emitting separate +; testl instructions. + +; Use the flags on the add. + +; CHECK: add_zf: +; CHECK: addl (%rdi), %esi +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: cmovnsl %ecx, %eax +; CHECK-NEXT: ret + +define i32 @add_zf(i32* %x, i32 %y, i32 %a, i32 %b) nounwind { + %tmp2 = load i32* %x, align 4 ; [#uses=1] + %tmp4 = add i32 %tmp2, %y ; [#uses=1] + %tmp5 = icmp slt i32 %tmp4, 0 ; [#uses=1] + %tmp.0 = select i1 %tmp5, i32 %a, i32 %b ; [#uses=1] + ret i32 %tmp.0 +} + +declare void @foo(i32) + +; Don't use the flags result of the and here, since the and has no +; other use. A simple test is better. + +; CHECK: bar: +; CHECK: testb $16, %dil + +define void @bar(i32 %x) nounwind { + %y = and i32 %x, 16 + %t = icmp eq i32 %y, 0 + br i1 %t, label %true, label %false +true: + call void @foo(i32 %x) + ret void +false: + ret void +} + +; Do use the flags result of the and here, since the and has another use. + +; CHECK: qux: +; CHECK: andl $16, %edi +; CHECK-NEXT: jne + +define void @qux(i32 %x) nounwind { + %y = and i32 %x, 16 + %t = icmp eq i32 %y, 0 + br i1 %t, label %true, label %false +true: + call void @foo(i32 %y) + ret void +false: + ret void +} diff --git a/test/CodeGen/X86/vec_cast.ll b/test/CodeGen/X86/vec_cast.ll new file mode 100644 index 000000000000..1f899b3c20aa --- /dev/null +++ b/test/CodeGen/X86/vec_cast.ll @@ -0,0 +1,48 @@ +; RUN: llc < %s -march=x86-64 +; RUN: llc < %s -march=x86-64 -disable-mmx + +define <8 x i32> @a(<8 x i16> %a) nounwind { + %c = sext <8 x i16> %a to <8 x i32> + ret <8 x i32> %c +} + +define <3 x i32> @b(<3 x i16> %a) nounwind { + %c = sext <3 x i16> %a to <3 x i32> + ret <3 x i32> %c +} + +define <1 x i32> @c(<1 x i16> %a) nounwind { + %c = sext <1 x i16> %a to <1 x i32> + ret <1 x i32> %c +} + +define <8 x i32> @d(<8 x i16> %a) nounwind { + %c = zext <8 x i16> %a to <8 x i32> + ret <8 x i32> %c +} + +define <3 x i32> @e(<3 x i16> %a) nounwind { + %c = zext <3 x i16> %a to <3 x i32> + ret <3 x i32> %c +} + +define <1 x i32> @f(<1 x i16> %a) nounwind { + %c = zext <1 x i16> %a to <1 x i32> + ret <1 x i32> %c +} + +; TODO: Legalize doesn't yet handle this. +;define <8 x i16> @g(<8 x i32> %a) nounwind { +; %c = trunc <8 x i32> %a to <8 x i16> +; ret <8 x i16> %c +;} + +define <3 x i16> @h(<3 x i32> %a) nounwind { + %c = trunc <3 x i32> %a to <3 x i16> + ret <3 x i16> %c +} + +define <1 x i16> @i(<1 x i32> %a) nounwind { + %c = trunc <1 x i32> %a to <1 x i16> + ret <1 x i16> %c +} diff --git a/test/CodeGen/X86/vec_ext_inreg.ll b/test/CodeGen/X86/vec_ext_inreg.ll index 02b16a79f4a0..8d2a3c31aedf 100644 --- a/test/CodeGen/X86/vec_ext_inreg.ll +++ b/test/CodeGen/X86/vec_ext_inreg.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=x86-64 +; RUN: llc < %s -march=x86-64 -disable-mmx define <8 x i32> @a(<8 x i32> %a) nounwind { %b = trunc <8 x i32> %a to <8 x i16> diff --git a/test/CodeGen/X86/vec_shuffle-22.ll b/test/CodeGen/X86/vec_shuffle-22.ll index 1cf37d4b9ba9..6807e4d63909 100644 --- a/test/CodeGen/X86/vec_shuffle-22.ll +++ b/test/CodeGen/X86/vec_shuffle-22.ll @@ -9,7 +9,7 @@ define <4 x float> @t1(<4 x float> %a) nounwind { define <4 x i32> @t2(<4 x i32>* %a) nounwind { ; CHECK: pshufd ; CHECK: ret - %tmp1 = load <4 x i32>* %a; + %tmp1 = load <4 x i32>* %a %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> < i32 0, i32 1, i32 0, i32 1 > ; <<4 x i32>> [#uses=1] ret <4 x i32> %tmp2 } diff --git a/test/CodeGen/X86/vec_shuffle-25.ll b/test/CodeGen/X86/vec_shuffle-25.ll index 2aa2d252849c..d9b2388809aa 100644 --- a/test/CodeGen/X86/vec_shuffle-25.ll +++ b/test/CodeGen/X86/vec_shuffle-25.ll @@ -19,16 +19,16 @@ entry: %unpcklps8 = shufflevector <4 x float> %p1, <4 x float> %p3, <4 x i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x float>> [#uses=2] %unpckhps11 = shufflevector <4 x float> %p1, <4 x float> %p3, <4 x i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x float>> [#uses=2] %unpcklps14 = shufflevector <4 x float> %unpcklps, <4 x float> %unpcklps8, <4 x i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x float>> [#uses=1] - %unpcklps14a = shufflevector <4 x float> %unpcklps14, <4 x float> undef, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>; + %unpcklps14a = shufflevector <4 x float> %unpcklps14, <4 x float> undef, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> %unpckhps17 = shufflevector <4 x float> %unpcklps, <4 x float> %unpcklps8, <4 x i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x float>> [#uses=1] - %unpckhps17a = shufflevector <4 x float> %unpckhps17, <4 x float> undef, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>; - %r1 = shufflevector <16 x float> %unpcklps14a, <16 x float> %unpckhps17a, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>; + %unpckhps17a = shufflevector <4 x float> %unpckhps17, <4 x float> undef, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %r1 = shufflevector <16 x float> %unpcklps14a, <16 x float> %unpckhps17a, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> %unpcklps20 = shufflevector <4 x float> %unpckhps, <4 x float> %unpckhps11, <4 x i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x float>> [#uses=1] - %unpcklps20a = shufflevector <4 x float> %unpcklps20, <4 x float> undef, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>; - %r2 = shufflevector <16 x float> %r1, <16 x float> %unpcklps20a, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>; + %unpcklps20a = shufflevector <4 x float> %unpcklps20, <4 x float> undef, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %r2 = shufflevector <16 x float> %r1, <16 x float> %unpcklps20a, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15> %unpckhps23 = shufflevector <4 x float> %unpckhps, <4 x float> %unpckhps11, <4 x i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x float>> [#uses=1] - %unpckhps23a = shufflevector <4 x float> %unpckhps23, <4 x float> undef, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>; - %r3 = shufflevector <16 x float> %r2, <16 x float> %unpckhps23a, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>; - %r4 = shufflevector <16 x float> %r3, <16 x float> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>; - ret <8 x float> %r4; + %unpckhps23a = shufflevector <4 x float> %unpckhps23, <4 x float> undef, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %r3 = shufflevector <16 x float> %r2, <16 x float> %unpckhps23a, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19> + %r4 = shufflevector <16 x float> %r3, <16 x float> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + ret <8 x float> %r4 } diff --git a/test/CodeGen/X86/vec_shuffle-26.ll b/test/CodeGen/X86/vec_shuffle-26.ll index 8cc15d1e7c27..086af6bb114b 100644 --- a/test/CodeGen/X86/vec_shuffle-26.ll +++ b/test/CodeGen/X86/vec_shuffle-26.ll @@ -20,10 +20,10 @@ entry: %unpckhps11 = shufflevector <4 x float> %p1, <4 x float> %p3, <4 x i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x float>> [#uses=2] %unpcklps14 = shufflevector <4 x float> %unpcklps, <4 x float> %unpcklps8, <4 x i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x float>> [#uses=1] %unpckhps17 = shufflevector <4 x float> %unpcklps, <4 x float> %unpcklps8, <4 x i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x float>> [#uses=1] - %r1 = shufflevector <4 x float> %unpcklps14, <4 x float> %unpckhps17, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >; + %r1 = shufflevector <4 x float> %unpcklps14, <4 x float> %unpckhps17, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 > %unpcklps20 = shufflevector <4 x float> %unpckhps, <4 x float> %unpckhps11, <4 x i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x float>> [#uses=1] %unpckhps23 = shufflevector <4 x float> %unpckhps, <4 x float> %unpckhps11, <4 x i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x float>> [#uses=1] - %r2 = shufflevector <4 x float> %unpcklps20, <4 x float> %unpckhps23, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >; + %r2 = shufflevector <4 x float> %unpcklps20, <4 x float> %unpckhps23, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 > ; %r3 = shufflevector <8 x float> %r1, <8 x float> %r2, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15 >; - ret <8 x float> %r2; + ret <8 x float> %r2 } diff --git a/test/CodeGen/X86/widen_select-1.ll b/test/CodeGen/X86/widen_select-1.ll index 4154433fa704..d9de892933e0 100644 --- a/test/CodeGen/X86/widen_select-1.ll +++ b/test/CodeGen/X86/widen_select-1.ll @@ -6,7 +6,7 @@ define void @select(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2) nounwind { entry: %x = select i1 %c, <6 x i32> %src1, <6 x i32> %src2 - %val = sub <6 x i32> %x, < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >; + %val = sub <6 x i32> %x, < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 > store <6 x i32> %val, <6 x i32>* %dst.addr ret void } diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll index dd02241c1dd6..47dba4b4a04b 100644 --- a/test/CodeGen/X86/widen_shuffle-1.ll +++ b/test/CodeGen/X86/widen_shuffle-1.ll @@ -7,7 +7,7 @@ define void @shuf(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind { entry: %x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 1, i32 2> - %val = fadd <3 x float> %x, %src2; + %val = fadd <3 x float> %x, %src2 store <3 x float> %val, <3 x float>* %dst.addr ret void } diff --git a/test/CodeGen/X86/widen_shuffle-2.ll b/test/CodeGen/X86/widen_shuffle-2.ll index d097e4142bcc..9374a028631d 100644 --- a/test/CodeGen/X86/widen_shuffle-2.ll +++ b/test/CodeGen/X86/widen_shuffle-2.ll @@ -7,7 +7,7 @@ define void @shuf(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind { entry: %x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 4, i32 2> - %val = fadd <3 x float> %x, %src2; + %val = fadd <3 x float> %x, %src2 store <3 x float> %val, <3 x float>* %dst.addr ret void } diff --git a/test/CodeGen/X86/x86-64-and-mask.ll b/test/CodeGen/X86/x86-64-and-mask.ll index 3c7389111267..2465f23a7689 100644 --- a/test/CodeGen/X86/x86-64-and-mask.ll +++ b/test/CodeGen/X86/x86-64-and-mask.ll @@ -1,12 +1,49 @@ -; RUN: llc < %s | grep {movl.*%edi, %eax} -; This should be a single mov, not a load of immediate + andq. +; RUN: llc < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" target triple = "x86_64-apple-darwin8" -define i64 @test(i64 %x) nounwind { +; This should be a single mov, not a load of immediate + andq. +; CHECK: test: +; CHECK: movl %edi, %eax + +define i64 @test(i64 %x) nounwind { entry: %tmp123 = and i64 %x, 4294967295 ; [#uses=1] ret i64 %tmp123 } +; This copy can't be coalesced away because it needs the implicit zero-extend. +; CHECK: bbb: +; CHECK: movl %edi, %edi + +define void @bbb(i64 %x) nounwind { + %t = and i64 %x, 4294967295 + call void @foo(i64 %t) + ret void +} + +; This should use a 32-bit and with implicit zero-extension, not a 64-bit and +; with a separate mov to materialize the mask. +; rdar://7527390 +; CHECK: ccc: +; CHECK: andl $-1048593, %edi + +declare void @foo(i64 %x) nounwind + +define void @ccc(i64 %x) nounwind { + %t = and i64 %x, 4293918703 + call void @foo(i64 %t) + ret void +} + +; This requires a mov and a 64-bit and. +; CHECK: ddd: +; CHECK: movabsq $4294967296, %rax +; CHECK: andq %rax, %rdi + +define void @ddd(i64 %x) nounwind { + %t = and i64 %x, 4294967296 + call void @foo(i64 %t) + ret void +} diff --git a/test/CodeGen/X86/x86-64-jumps.ll b/test/CodeGen/X86/x86-64-jumps.ll index 5ed6a23ef876..11b40c897618 100644 --- a/test/CodeGen/X86/x86-64-jumps.ll +++ b/test/CodeGen/X86/x86-64-jumps.ll @@ -14,3 +14,32 @@ bb6: ; preds = %entry ret i8 2 } + +; PR5930 - Trunc of block address differences. +@test.array = internal constant [3 x i32] [i32 trunc (i64 sub (i64 ptrtoint (i8* blockaddress(@test2, %foo) to i64), i64 ptrtoint (i8* blockaddress(@test2, %foo) to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (i8* blockaddress(@test2, %bar) to i64), i64 ptrtoint (i8* blockaddress(@test2, %foo) to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (i8* blockaddress(@test2, %hack) to i64), i64 ptrtoint (i8* blockaddress(@test2, %foo) to i64)) to i32)] ; <[3 x i32]*> [#uses=1] + +define void @test2(i32 %i) nounwind ssp { +entry: + %i.addr = alloca i32 ; [#uses=2] + store i32 %i, i32* %i.addr + %tmp = load i32* %i.addr ; [#uses=1] + %idxprom = sext i32 %tmp to i64 ; [#uses=1] + %arrayidx = getelementptr inbounds i32* getelementptr inbounds ([3 x i32]* @test.array, i32 0, i32 0), i64 %idxprom ; [#uses=1] + %tmp1 = load i32* %arrayidx ; [#uses=1] + %idx.ext = sext i32 %tmp1 to i64 ; [#uses=1] + %add.ptr = getelementptr i8* blockaddress(@test2, %foo), i64 %idx.ext ; [#uses=1] + br label %indirectgoto + +foo: ; preds = %indirectgoto, %indirectgoto, %indirectgoto, %indirectgoto, %indirectgoto + br label %bar + +bar: ; preds = %foo, %indirectgoto + br label %hack + +hack: ; preds = %bar, %indirectgoto + ret void + +indirectgoto: ; preds = %entry + %indirect.goto.dest = phi i8* [ %add.ptr, %entry ] ; [#uses=1] + indirectbr i8* %indirect.goto.dest, [label %foo, label %foo, label %bar, label %foo, label %hack, label %foo, label %foo] +} diff --git a/test/CodeGen/X86/brcond-srl.ll b/test/CodeGen/X86/xor-icmp.ll similarity index 64% rename from test/CodeGen/X86/brcond-srl.ll rename to test/CodeGen/X86/xor-icmp.ll index 12674e91a0bd..a6bdb13ec6b4 100644 --- a/test/CodeGen/X86/brcond-srl.ll +++ b/test/CodeGen/X86/xor-icmp.ll @@ -1,13 +1,20 @@ -; RUN: llc < %s -march=x86 | FileCheck %s -; rdar://7475489 +; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32 +; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=X64 define i32 @t(i32 %a, i32 %b) nounwind ssp { entry: -; CHECK: t: -; CHECK: xorb -; CHECK-NOT: andb -; CHECK-NOT: shrb -; CHECK: testb $64 +; X32: t: +; X32: xorb +; X32-NOT: andb +; X32-NOT: shrb +; X32: testb $64 +; X32: jne + +; X64: t: +; X64-NOT: setne +; X64: xorl +; X64: testb $64 +; X64: jne %0 = and i32 %a, 16384 %1 = icmp ne i32 %0, 0 %2 = and i32 %b, 16384 diff --git a/test/CodeGen/XCore/2009-03-27-v2f64-param.ll b/test/CodeGen/XCore/2009-03-27-v2f64-param.ll index a6b9699987eb..e35a36a8a159 100644 --- a/test/CodeGen/XCore/2009-03-27-v2f64-param.ll +++ b/test/CodeGen/XCore/2009-03-27-v2f64-param.ll @@ -2,5 +2,5 @@ ; PR3898 define i32 @vector_param(<2 x double> %x) nounwind { - ret i32 1; + ret i32 1 } diff --git a/test/CodeGen/XCore/private.ll b/test/CodeGen/XCore/private.ll index 9a2f5b32dc39..c595a6df4950 100644 --- a/test/CodeGen/XCore/private.ll +++ b/test/CodeGen/XCore/private.ll @@ -12,7 +12,7 @@ define private void @foo() { ret void } -@baz = private global i32 4; +@baz = private global i32 4 define i32 @bar() { call void @foo() diff --git a/test/DebugInfo/2009-10-16-Scope.ll b/test/DebugInfo/2009-10-16-Scope.ll index ea43249668a4..9f9fa65d5b79 100644 --- a/test/DebugInfo/2009-10-16-Scope.ll +++ b/test/DebugInfo/2009-10-16-Scope.ll @@ -9,8 +9,7 @@ entry: br label %do.body, !dbg !0 do.body: ; preds = %entry - %0 = bitcast i32* %count_ to { }* ; <{ }*> [#uses=1] - call void @llvm.dbg.declare({ }* %0, metadata !4) + call void @llvm.dbg.declare(metadata !{i32* %count_}, metadata !4) %conv = ptrtoint i32* %count_ to i32, !dbg !0 ; [#uses=1] %call = call i32 @foo(i32 %conv) ssp, !dbg !0 ; [#uses=0] br label %do.end, !dbg !0 @@ -19,7 +18,7 @@ do.end: ; preds = %do.body ret void, !dbg !7 } -declare void @llvm.dbg.declare({ }*, metadata) nounwind readnone +declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone declare i32 @foo(i32) ssp diff --git a/test/DebugInfo/2009-12-01-CurrentFn.ll b/test/DebugInfo/2009-12-01-CurrentFn.ll deleted file mode 100644 index 6fc538e43027..000000000000 --- a/test/DebugInfo/2009-12-01-CurrentFn.ll +++ /dev/null @@ -1,17 +0,0 @@ -; RUN: llc < %s | grep "func_end1:" | count 1 -; XFAIL: powerpc-apple-darwin -declare void @foo() - -define void @bar(i32 %i) nounwind ssp { -entry: - tail call void @foo() nounwind, !dbg !0 - ret void, !dbg !0 -} - -!0 = metadata !{i32 9, i32 0, metadata !1, null} -!1 = metadata !{i32 458798, i32 0, metadata !2, metadata !"baz", metadata !"baz", metadata !"baz", metadata !2, i32 8, metadata !3, i1 true, i1 true}; [DW_TAG_subprogram ] -!2 = metadata !{i32 458769, i32 0, i32 1, metadata !"2007-12-VarArrayDebug.c", metadata !"/Volumes/Data/ddunbar/llvm/test/FrontendC", metadata !"4.2.1 (Based on Apple Inc. build 5653) (LLVM build)", i1 true, i1 true, metadata !"", i32 0}; [DW_TAG_compile_unit ] -!3 = metadata !{i32 458773, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0}; [DW_TAG_subroutine_type ] -!4 = metadata !{null, metadata !5} -!5 = metadata !{i32 458788, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}; [DW_TAG_base_type ] - diff --git a/test/DebugInfo/2010-01-05-DbgScope.ll b/test/DebugInfo/2010-01-05-DbgScope.ll new file mode 100644 index 000000000000..8cf20e3146ac --- /dev/null +++ b/test/DebugInfo/2010-01-05-DbgScope.ll @@ -0,0 +1,18 @@ +; RUN: llc < %s -o /dev/null +; PR 5942 +define i8* @foo() nounwind { +entry: + %0 = load i32* undef, align 4, !dbg !0 ; [#uses=1] + %1 = inttoptr i32 %0 to i8*, !dbg !0 ; [#uses=1] + ret i8* %1, !dbg !10 + +} + +!0 = metadata !{i32 571, i32 3, metadata !1, null} +!1 = metadata !{i32 458763, metadata !2}; [DW_TAG_lexical_block ] +!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"foo", metadata !"foo", metadata !"foo", metadata !3, i32 561, metadata !4, i1 false, i1 true}; [DW_TAG_subprogram ] +!3 = metadata !{i32 458769, i32 0, i32 12, metadata !"hashtab.c", metadata !"/usr/src/gnu/usr.bin/cc/cc_tools/../../../../contrib/gcclibs/libiberty", metadata !"clang 1.1", i1 true, i1 false, metadata !"", i32 0}; [DW_TAG_compile_unit ] +!4 = metadata !{i32 458773, metadata !3, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0}; [DW_TAG_subroutine_type ] +!5 = metadata !{metadata !6} +!6 = metadata !{i32 458788, metadata !3, metadata !"char", metadata !3, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] +!10 = metadata !{i32 588, i32 1, metadata !2, null} diff --git a/test/DebugInfo/printdbginfo2.ll b/test/DebugInfo/printdbginfo2.ll index c5fe7ad6c803..e19395b0df1f 100644 --- a/test/DebugInfo/printdbginfo2.ll +++ b/test/DebugInfo/printdbginfo2.ll @@ -9,32 +9,26 @@ define i32 @main() nounwind { entry: -; CHECK:; (x.c:6:3) %retval = alloca i32 ; [#uses=3] %b = alloca %struct.foo, align 4 ; <%struct.foo*> [#uses=2] ; CHECK:; %b is variable b of type foo declared at x.c:7 %a = alloca [4 x i32], align 4 ; <[4 x i32]*> [#uses=1] ; CHECK:; %a is variable a of type declared at x.c:8 call void @llvm.dbg.func.start(metadata !3) -; CHECK:; fully qualified function name: main return type: int at line 5 store i32 0, i32* %retval call void @llvm.dbg.stoppoint(i32 6, i32 3, metadata !1) -; CHECK:; x.c:7:3 call void @llvm.dbg.stoppoint(i32 7, i32 3, metadata !1) %0 = bitcast %struct.foo* %b to { }* ; <{ }*> [#uses=1] - call void @llvm.dbg.declare({ }* %0, metadata !4) + call void @llvm.dbg.declare(metadata !{%struct.foo* %b}, metadata !4) ; CHECK:; %0 is variable b of type foo declared at x.c:7 call void @llvm.dbg.stoppoint(i32 8, i32 3, metadata !1) -; CHECK:; x.c:8:3 %1 = bitcast [4 x i32]* %a to { }* ; <{ }*> [#uses=1] - call void @llvm.dbg.declare({ }* %1, metadata !8) + call void @llvm.dbg.declare(metadata !{[4 x i32]* %a}, metadata !8) ; CHECK:; %1 is variable a of type declared at x.c:8 call void @llvm.dbg.stoppoint(i32 9, i32 3, metadata !1) -; CHECK:; x.c:9:3 %tmp = getelementptr inbounds %struct.foo* %b, i32 0, i32 0 ; [#uses=1] ; CHECK:; %tmp is variable b of type foo declared at x.c:7 store i32 5, i32* %tmp -; CHECK:; x.c:10:3 call void @llvm.dbg.stoppoint(i32 10, i32 3, metadata !1) %tmp1 = load i32* @main.c ; [#uses=1] ; CHECK:; @main.c is variable c of type int declared at x.c:6 @@ -43,7 +37,6 @@ entry: ;