2014-03-18 22:07:45 +00:00
|
|
|
Pull in r199975 from upstream llvm trunk (by Jakob Stoklund Olesen):
|
|
|
|
|
|
|
|
Implement atomicrmw operations in 32 and 64 bits for SPARCv9.
|
|
|
|
|
|
|
|
These all use the compare-and-swap CASA/CASXA instructions.
|
|
|
|
|
2014-05-24 22:27:31 +00:00
|
|
|
Introduced here: http://svnweb.freebsd.org/changeset/base/262261
|
2014-03-18 22:07:45 +00:00
|
|
|
|
|
|
|
Index: test/CodeGen/SPARC/atomics.ll
|
|
|
|
===================================================================
|
|
|
|
--- test/CodeGen/SPARC/atomics.ll
|
|
|
|
+++ test/CodeGen/SPARC/atomics.ll
|
|
|
|
@@ -1,4 +1,4 @@
|
|
|
|
-; RUN: llc < %s -march=sparcv9 | FileCheck %s
|
|
|
|
+; RUN: llc < %s -march=sparcv9 -verify-machineinstrs | FileCheck %s
|
|
|
|
|
|
|
|
; CHECK-LABEL: test_atomic_i32
|
|
|
|
; CHECK: ld [%o0]
|
|
|
|
@@ -61,3 +61,84 @@ entry:
|
|
|
|
%b = atomicrmw xchg i32* %ptr, i32 42 monotonic
|
|
|
|
ret i32 %b
|
|
|
|
}
|
|
|
|
+
|
|
|
|
+; CHECK-LABEL: test_load_add_32
|
|
|
|
+; CHECK: membar
|
|
|
|
+; CHECK: add
|
|
|
|
+; CHECK: cas [%o0]
|
|
|
|
+; CHECK: membar
|
|
|
|
+define zeroext i32 @test_load_add_32(i32* %p, i32 zeroext %v) {
|
|
|
|
+entry:
|
|
|
|
+ %0 = atomicrmw add i32* %p, i32 %v seq_cst
|
|
|
|
+ ret i32 %0
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+; CHECK-LABEL: test_load_sub_64
|
|
|
|
+; CHECK: membar
|
|
|
|
+; CHECK: sub
|
|
|
|
+; CHECK: casx [%o0]
|
|
|
|
+; CHECK: membar
|
|
|
|
+define zeroext i64 @test_load_sub_64(i64* %p, i64 zeroext %v) {
|
|
|
|
+entry:
|
|
|
|
+ %0 = atomicrmw sub i64* %p, i64 %v seq_cst
|
|
|
|
+ ret i64 %0
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+; CHECK-LABEL: test_load_xor_32
|
|
|
|
+; CHECK: membar
|
|
|
|
+; CHECK: xor
|
|
|
|
+; CHECK: cas [%o0]
|
|
|
|
+; CHECK: membar
|
|
|
|
+define zeroext i32 @test_load_xor_32(i32* %p, i32 zeroext %v) {
|
|
|
|
+entry:
|
|
|
|
+ %0 = atomicrmw xor i32* %p, i32 %v seq_cst
|
|
|
|
+ ret i32 %0
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+; CHECK-LABEL: test_load_and_32
|
|
|
|
+; CHECK: membar
|
|
|
|
+; CHECK: and
|
|
|
|
+; CHECK-NOT: xor
|
|
|
|
+; CHECK: cas [%o0]
|
|
|
|
+; CHECK: membar
|
|
|
|
+define zeroext i32 @test_load_and_32(i32* %p, i32 zeroext %v) {
|
|
|
|
+entry:
|
|
|
|
+ %0 = atomicrmw and i32* %p, i32 %v seq_cst
|
|
|
|
+ ret i32 %0
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+; CHECK-LABEL: test_load_nand_32
|
|
|
|
+; CHECK: membar
|
|
|
|
+; CHECK: and
|
|
|
|
+; CHECK: xor
|
|
|
|
+; CHECK: cas [%o0]
|
|
|
|
+; CHECK: membar
|
|
|
|
+define zeroext i32 @test_load_nand_32(i32* %p, i32 zeroext %v) {
|
|
|
|
+entry:
|
|
|
|
+ %0 = atomicrmw nand i32* %p, i32 %v seq_cst
|
|
|
|
+ ret i32 %0
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+; CHECK-LABEL: test_load_max_64
|
|
|
|
+; CHECK: membar
|
|
|
|
+; CHECK: cmp
|
|
|
|
+; CHECK: movg %xcc
|
|
|
|
+; CHECK: casx [%o0]
|
|
|
|
+; CHECK: membar
|
|
|
|
+define zeroext i64 @test_load_max_64(i64* %p, i64 zeroext %v) {
|
|
|
|
+entry:
|
|
|
|
+ %0 = atomicrmw max i64* %p, i64 %v seq_cst
|
|
|
|
+ ret i64 %0
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+; CHECK-LABEL: test_load_umin_32
|
|
|
|
+; CHECK: membar
|
|
|
|
+; CHECK: cmp
|
|
|
|
+; CHECK: movleu %icc
|
|
|
|
+; CHECK: cas [%o0]
|
|
|
|
+; CHECK: membar
|
|
|
|
+define zeroext i32 @test_load_umin_32(i32* %p, i32 zeroext %v) {
|
|
|
|
+entry:
|
|
|
|
+ %0 = atomicrmw umin i32* %p, i32 %v seq_cst
|
|
|
|
+ ret i32 %0
|
|
|
|
+}
|
|
|
|
Index: lib/Target/Sparc/SparcInstr64Bit.td
|
|
|
|
===================================================================
|
|
|
|
--- lib/Target/Sparc/SparcInstr64Bit.td
|
|
|
|
+++ lib/Target/Sparc/SparcInstr64Bit.td
|
|
|
|
@@ -438,6 +438,31 @@ def : Pat<(atomic_store ADDRri:$dst, i64:$val), (S
|
|
|
|
|
|
|
|
} // Predicates = [Is64Bit]
|
|
|
|
|
|
|
|
+let usesCustomInserter = 1, hasCtrlDep = 1, mayLoad = 1, mayStore = 1,
|
|
|
|
+ Defs = [ICC] in
|
|
|
|
+multiclass AtomicRMW<SDPatternOperator op32, SDPatternOperator op64> {
|
|
|
|
+
|
|
|
|
+ def _32 : Pseudo<(outs IntRegs:$rd),
|
|
|
|
+ (ins ptr_rc:$addr, IntRegs:$rs2), "",
|
|
|
|
+ [(set i32:$rd, (op32 iPTR:$addr, i32:$rs2))]>;
|
|
|
|
+
|
|
|
|
+ let Predicates = [Is64Bit] in
|
|
|
|
+ def _64 : Pseudo<(outs I64Regs:$rd),
|
|
|
|
+ (ins ptr_rc:$addr, I64Regs:$rs2), "",
|
|
|
|
+ [(set i64:$rd, (op64 iPTR:$addr, i64:$rs2))]>;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+defm ATOMIC_LOAD_ADD : AtomicRMW<atomic_load_add_32, atomic_load_add_64>;
|
|
|
|
+defm ATOMIC_LOAD_SUB : AtomicRMW<atomic_load_sub_32, atomic_load_sub_64>;
|
|
|
|
+defm ATOMIC_LOAD_AND : AtomicRMW<atomic_load_and_32, atomic_load_and_64>;
|
|
|
|
+defm ATOMIC_LOAD_OR : AtomicRMW<atomic_load_or_32, atomic_load_or_64>;
|
|
|
|
+defm ATOMIC_LOAD_XOR : AtomicRMW<atomic_load_xor_32, atomic_load_xor_64>;
|
|
|
|
+defm ATOMIC_LOAD_NAND : AtomicRMW<atomic_load_nand_32, atomic_load_nand_64>;
|
|
|
|
+defm ATOMIC_LOAD_MIN : AtomicRMW<atomic_load_min_32, atomic_load_min_64>;
|
|
|
|
+defm ATOMIC_LOAD_MAX : AtomicRMW<atomic_load_max_32, atomic_load_max_64>;
|
|
|
|
+defm ATOMIC_LOAD_UMIN : AtomicRMW<atomic_load_umin_32, atomic_load_umin_64>;
|
|
|
|
+defm ATOMIC_LOAD_UMAX : AtomicRMW<atomic_load_umax_32, atomic_load_umax_64>;
|
|
|
|
+
|
|
|
|
// Global addresses, constant pool entries
|
|
|
|
let Predicates = [Is64Bit] in {
|
|
|
|
|
|
|
|
Index: lib/Target/Sparc/SparcISelLowering.cpp
|
|
|
|
===================================================================
|
|
|
|
--- lib/Target/Sparc/SparcISelLowering.cpp
|
|
|
|
+++ lib/Target/Sparc/SparcISelLowering.cpp
|
|
|
|
@@ -2831,11 +2831,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) cons
|
|
|
|
MachineBasicBlock *
|
|
|
|
SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
|
|
|
|
MachineBasicBlock *BB) const {
|
|
|
|
- const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
|
|
|
|
- unsigned BROpcode;
|
|
|
|
- unsigned CC;
|
|
|
|
- DebugLoc dl = MI->getDebugLoc();
|
|
|
|
- // Figure out the conditional branch opcode to use for this select_cc.
|
|
|
|
switch (MI->getOpcode()) {
|
|
|
|
default: llvm_unreachable("Unknown SELECT_CC!");
|
|
|
|
case SP::SELECT_CC_Int_ICC:
|
|
|
|
@@ -2842,17 +2837,64 @@ SparcTargetLowering::EmitInstrWithCustomInserter(M
|
|
|
|
case SP::SELECT_CC_FP_ICC:
|
|
|
|
case SP::SELECT_CC_DFP_ICC:
|
|
|
|
case SP::SELECT_CC_QFP_ICC:
|
|
|
|
- BROpcode = SP::BCOND;
|
|
|
|
- break;
|
|
|
|
+ return expandSelectCC(MI, BB, SP::BCOND);
|
|
|
|
case SP::SELECT_CC_Int_FCC:
|
|
|
|
case SP::SELECT_CC_FP_FCC:
|
|
|
|
case SP::SELECT_CC_DFP_FCC:
|
|
|
|
case SP::SELECT_CC_QFP_FCC:
|
|
|
|
- BROpcode = SP::FBCOND;
|
|
|
|
- break;
|
|
|
|
+ return expandSelectCC(MI, BB, SP::FBCOND);
|
|
|
|
+
|
|
|
|
+ case SP::ATOMIC_LOAD_ADD_32:
|
|
|
|
+ return expandAtomicRMW(MI, BB, SP::ADDrr);
|
|
|
|
+ case SP::ATOMIC_LOAD_ADD_64:
|
|
|
|
+ return expandAtomicRMW(MI, BB, SP::ADDXrr);
|
|
|
|
+ case SP::ATOMIC_LOAD_SUB_32:
|
|
|
|
+ return expandAtomicRMW(MI, BB, SP::SUBrr);
|
|
|
|
+ case SP::ATOMIC_LOAD_SUB_64:
|
|
|
|
+ return expandAtomicRMW(MI, BB, SP::SUBXrr);
|
|
|
|
+ case SP::ATOMIC_LOAD_AND_32:
|
|
|
|
+ return expandAtomicRMW(MI, BB, SP::ANDrr);
|
|
|
|
+ case SP::ATOMIC_LOAD_AND_64:
|
|
|
|
+ return expandAtomicRMW(MI, BB, SP::ANDXrr);
|
|
|
|
+ case SP::ATOMIC_LOAD_OR_32:
|
|
|
|
+ return expandAtomicRMW(MI, BB, SP::ORrr);
|
|
|
|
+ case SP::ATOMIC_LOAD_OR_64:
|
|
|
|
+ return expandAtomicRMW(MI, BB, SP::ORXrr);
|
|
|
|
+ case SP::ATOMIC_LOAD_XOR_32:
|
|
|
|
+ return expandAtomicRMW(MI, BB, SP::XORrr);
|
|
|
|
+ case SP::ATOMIC_LOAD_XOR_64:
|
|
|
|
+ return expandAtomicRMW(MI, BB, SP::XORXrr);
|
|
|
|
+ case SP::ATOMIC_LOAD_NAND_32:
|
|
|
|
+ return expandAtomicRMW(MI, BB, SP::ANDrr);
|
|
|
|
+ case SP::ATOMIC_LOAD_NAND_64:
|
|
|
|
+ return expandAtomicRMW(MI, BB, SP::ANDXrr);
|
|
|
|
+
|
|
|
|
+ case SP::ATOMIC_LOAD_MAX_32:
|
|
|
|
+ return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_G);
|
|
|
|
+ case SP::ATOMIC_LOAD_MAX_64:
|
|
|
|
+ return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_G);
|
|
|
|
+ case SP::ATOMIC_LOAD_MIN_32:
|
|
|
|
+ return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_LE);
|
|
|
|
+ case SP::ATOMIC_LOAD_MIN_64:
|
|
|
|
+ return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_LE);
|
|
|
|
+ case SP::ATOMIC_LOAD_UMAX_32:
|
|
|
|
+ return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_GU);
|
|
|
|
+ case SP::ATOMIC_LOAD_UMAX_64:
|
|
|
|
+ return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_GU);
|
|
|
|
+ case SP::ATOMIC_LOAD_UMIN_32:
|
|
|
|
+ return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_LEU);
|
|
|
|
+ case SP::ATOMIC_LOAD_UMIN_64:
|
|
|
|
+ return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_LEU);
|
|
|
|
}
|
|
|
|
+}
|
|
|
|
|
|
|
|
- CC = (SPCC::CondCodes)MI->getOperand(3).getImm();
|
|
|
|
+MachineBasicBlock*
|
|
|
|
+SparcTargetLowering::expandSelectCC(MachineInstr *MI,
|
|
|
|
+ MachineBasicBlock *BB,
|
|
|
|
+ unsigned BROpcode) const {
|
|
|
|
+ const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
|
|
|
|
+ DebugLoc dl = MI->getDebugLoc();
|
|
|
|
+ unsigned CC = (SPCC::CondCodes)MI->getOperand(3).getImm();
|
|
|
|
|
|
|
|
// To "insert" a SELECT_CC instruction, we actually have to insert the diamond
|
|
|
|
// control-flow pattern. The incoming instruction knows the destination vreg
|
|
|
|
@@ -2906,6 +2948,100 @@ SparcTargetLowering::EmitInstrWithCustomInserter(M
|
|
|
|
return BB;
|
|
|
|
}
|
|
|
|
|
|
|
|
+MachineBasicBlock*
|
|
|
|
+SparcTargetLowering::expandAtomicRMW(MachineInstr *MI,
|
|
|
|
+ MachineBasicBlock *MBB,
|
|
|
|
+ unsigned Opcode,
|
|
|
|
+ unsigned CondCode) const {
|
|
|
|
+ const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
|
|
|
|
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
|
|
|
|
+ DebugLoc DL = MI->getDebugLoc();
|
|
|
|
+
|
|
|
|
+ // MI is an atomic read-modify-write instruction of the form:
|
|
|
|
+ //
|
|
|
|
+ // rd = atomicrmw<op> addr, rs2
|
|
|
|
+ //
|
|
|
|
+ // All three operands are registers.
|
|
|
|
+ unsigned DestReg = MI->getOperand(0).getReg();
|
|
|
|
+ unsigned AddrReg = MI->getOperand(1).getReg();
|
|
|
|
+ unsigned Rs2Reg = MI->getOperand(2).getReg();
|
|
|
|
+
|
|
|
|
+ // SelectionDAG has already inserted memory barriers before and after MI, so
|
|
|
|
+ // we simply have to implement the operatiuon in terms of compare-and-swap.
|
|
|
|
+ //
|
|
|
|
+ // %val0 = load %addr
|
|
|
|
+ // loop:
|
|
|
|
+ // %val = phi %val0, %dest
|
|
|
|
+ // %upd = op %val, %rs2
|
|
|
|
+ // %dest = cas %addr, %upd, %val
|
|
|
|
+ // cmp %val, %dest
|
|
|
|
+ // bne loop
|
|
|
|
+ // done:
|
|
|
|
+ //
|
|
|
|
+ bool is64Bit = SP::I64RegsRegClass.hasSubClassEq(MRI.getRegClass(DestReg));
|
|
|
|
+ const TargetRegisterClass *ValueRC =
|
|
|
|
+ is64Bit ? &SP::I64RegsRegClass : &SP::IntRegsRegClass;
|
|
|
|
+ unsigned Val0Reg = MRI.createVirtualRegister(ValueRC);
|
|
|
|
+
|
|
|
|
+ BuildMI(*MBB, MI, DL, TII.get(is64Bit ? SP::LDXri : SP::LDri), Val0Reg)
|
|
|
|
+ .addReg(AddrReg).addImm(0);
|
|
|
|
+
|
|
|
|
+ // Split the basic block MBB before MI and insert the loop block in the hole.
|
|
|
|
+ MachineFunction::iterator MFI = MBB;
|
|
|
|
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
|
|
|
|
+ MachineFunction *MF = MBB->getParent();
|
|
|
|
+ MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
|
|
|
|
+ MachineBasicBlock *DoneMBB = MF->CreateMachineBasicBlock(LLVM_BB);
|
|
|
|
+ ++MFI;
|
|
|
|
+ MF->insert(MFI, LoopMBB);
|
|
|
|
+ MF->insert(MFI, DoneMBB);
|
|
|
|
+
|
|
|
|
+ // Move MI and following instructions to DoneMBB.
|
|
|
|
+ DoneMBB->splice(DoneMBB->begin(), MBB, MI, MBB->end());
|
|
|
|
+ DoneMBB->transferSuccessorsAndUpdatePHIs(MBB);
|
|
|
|
+
|
|
|
|
+ // Connect the CFG again.
|
|
|
|
+ MBB->addSuccessor(LoopMBB);
|
|
|
|
+ LoopMBB->addSuccessor(LoopMBB);
|
|
|
|
+ LoopMBB->addSuccessor(DoneMBB);
|
|
|
|
+
|
|
|
|
+ // Build the loop block.
|
|
|
|
+ unsigned ValReg = MRI.createVirtualRegister(ValueRC);
|
|
|
|
+ unsigned UpdReg = MRI.createVirtualRegister(ValueRC);
|
|
|
|
+
|
|
|
|
+ BuildMI(LoopMBB, DL, TII.get(SP::PHI), ValReg)
|
|
|
|
+ .addReg(Val0Reg).addMBB(MBB)
|
|
|
|
+ .addReg(DestReg).addMBB(LoopMBB);
|
|
|
|
+
|
|
|
|
+ if (CondCode) {
|
|
|
|
+ // This is one of the min/max operations. We need a CMPrr followed by a
|
|
|
|
+ // MOVXCC/MOVICC.
|
|
|
|
+ BuildMI(LoopMBB, DL, TII.get(SP::CMPrr)).addReg(ValReg).addReg(Rs2Reg);
|
|
|
|
+ BuildMI(LoopMBB, DL, TII.get(Opcode), UpdReg)
|
|
|
|
+ .addReg(ValReg).addReg(Rs2Reg).addImm(CondCode);
|
|
|
|
+ } else {
|
|
|
|
+ BuildMI(LoopMBB, DL, TII.get(Opcode), UpdReg)
|
|
|
|
+ .addReg(ValReg).addReg(Rs2Reg);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (MI->getOpcode() == SP::ATOMIC_LOAD_NAND_32 ||
|
|
|
|
+ MI->getOpcode() == SP::ATOMIC_LOAD_NAND_64) {
|
|
|
|
+ unsigned TmpReg = UpdReg;
|
|
|
|
+ UpdReg = MRI.createVirtualRegister(ValueRC);
|
|
|
|
+ BuildMI(LoopMBB, DL, TII.get(SP::XORri), UpdReg).addReg(TmpReg).addImm(-1);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ BuildMI(LoopMBB, DL, TII.get(is64Bit ? SP::CASXrr : SP::CASrr), DestReg)
|
|
|
|
+ .addReg(AddrReg).addReg(UpdReg).addReg(ValReg)
|
|
|
|
+ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
|
|
|
|
+ BuildMI(LoopMBB, DL, TII.get(SP::CMPrr)).addReg(ValReg).addReg(DestReg);
|
|
|
|
+ BuildMI(LoopMBB, DL, TII.get(is64Bit ? SP::BPXCC : SP::BCOND))
|
|
|
|
+ .addMBB(LoopMBB).addImm(SPCC::ICC_NE);
|
|
|
|
+
|
|
|
|
+ MI->eraseFromParent();
|
|
|
|
+ return DoneMBB;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Sparc Inline Assembly Support
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
Index: lib/Target/Sparc/SparcISelLowering.h
|
|
|
|
===================================================================
|
|
|
|
--- lib/Target/Sparc/SparcISelLowering.h
|
|
|
|
+++ lib/Target/Sparc/SparcISelLowering.h
|
|
|
|
@@ -165,6 +165,13 @@ namespace llvm {
|
|
|
|
virtual void ReplaceNodeResults(SDNode *N,
|
|
|
|
SmallVectorImpl<SDValue>& Results,
|
|
|
|
SelectionDAG &DAG) const;
|
|
|
|
+
|
|
|
|
+ MachineBasicBlock *expandSelectCC(MachineInstr *MI, MachineBasicBlock *BB,
|
|
|
|
+ unsigned BROpcode) const;
|
|
|
|
+ MachineBasicBlock *expandAtomicRMW(MachineInstr *MI,
|
|
|
|
+ MachineBasicBlock *BB,
|
|
|
|
+ unsigned Opcode,
|
|
|
|
+ unsigned CondCode = 0) const;
|
|
|
|
};
|
|
|
|
} // end namespace llvm
|
|
|
|
|