freebsd-dev/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

//===-- AMDGPUTargetTransformInfo.cpp - AMDGPU specific TTI pass ---------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// \file
// This file implements a TargetTransformInfo analysis pass specific to the
// AMDGPU target machine. It uses the target's detailed information to provide
// more precise answers to certain TTI queries, while letting the target
// independent and default TTI implementations handle the rest.
//
//===----------------------------------------------------------------------===//

#include "AMDGPUTargetTransformInfo.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/CostTable.h"
#include "llvm/Target/TargetLowering.h"
using namespace llvm;

#define DEBUG_TYPE "AMDGPUtti"

void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L,
                                            TTI::UnrollingPreferences &UP) {
  UP.Threshold = 300; // Twice the default.
  UP.MaxCount = UINT_MAX;
  UP.Partial = true;

  // TODO: Do we want runtime unrolling?

  for (const BasicBlock *BB : L->getBlocks()) {
    const DataLayout &DL = BB->getModule()->getDataLayout();
    for (const Instruction &I : *BB) {
      const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I);
      if (!GEP || GEP->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
        continue;

      const Value *Ptr = GEP->getPointerOperand();
      const AllocaInst *Alloca =
          dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr, DL));
      if (Alloca) {
        // We want to do whatever we can to limit the number of alloca
        // instructions that make it through to the code generator.  allocas
        // require us to use indirect addressing, which is slow and prone to
        // compiler bugs.  If this loop does an address calculation on an
        // alloca ptr, then we want to use a higher than normal loop unroll
        // threshold. This will give SROA a better chance to eliminate these
        // allocas.
        //
        // Don't use the maximum allowed value here as it will make some
        // programs way too big.
        UP.Threshold = 800;
      }
    }
  }
}

unsigned AMDGPUTTIImpl::getNumberOfRegisters(bool Vec) {
  if (Vec)
    return 0;

  // Number of VGPRs on SI.
  if (ST->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
    return 256;

  return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
}

unsigned AMDGPUTTIImpl::getRegisterBitWidth(bool) { return 32; }

unsigned AMDGPUTTIImpl::getMaxInterleaveFactor(unsigned VF) {
  // Semi-arbitrary large amount.
  return 64;
}
Vendor import of llvm release_34 branch r197841 (effectively, 3.4 RC3): https://llvm.org/svn/llvm-project/llvm/branches/release_34@197841 2013-12-22 00:04:03 +00:00			`//===-- AMDGPUTargetTransformInfo.cpp - AMDGPU specific TTI pass ---------===//`
			`//`
			`// The LLVM Compiler Infrastructure`
			`//`
			`// This file is distributed under the University of Illinois Open Source`
			`// License. See LICENSE.TXT for details.`
			`//`
			`//===----------------------------------------------------------------------===//`
			`//`
			`// \file`
			`// This file implements a TargetTransformInfo analysis pass specific to the`
			`// AMDGPU target machine. It uses the target's detailed information to provide`
			`// more precise answers to certain TTI queries, while letting the target`
			`// independent and default TTI implementations handle the rest.`
			`//`
			`//===----------------------------------------------------------------------===//`

Vendor import of llvm trunk r238337: https://llvm.org/svn/llvm-project/llvm/trunk@238337 2015-05-27 18:44:32 +00:00			`#include "AMDGPUTargetTransformInfo.h"`
Vendor import of llvm RELEASE_350/final tag r216957 (effectively, 3.5.0 release): https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_350/final@216957 2014-11-24 09:08:18 +00:00			`#include "llvm/Analysis/LoopInfo.h"`
Vendor import of llvm release_34 branch r197841 (effectively, 3.4 RC3): https://llvm.org/svn/llvm-project/llvm/branches/release_34@197841 2013-12-22 00:04:03 +00:00			`#include "llvm/Analysis/TargetTransformInfo.h"`
Vendor import of llvm RELEASE_350/final tag r216957 (effectively, 3.5.0 release): https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_350/final@216957 2014-11-24 09:08:18 +00:00			`#include "llvm/Analysis/ValueTracking.h"`
Vendor import of llvm trunk r238337: https://llvm.org/svn/llvm-project/llvm/trunk@238337 2015-05-27 18:44:32 +00:00			`#include "llvm/CodeGen/BasicTTIImpl.h"`
			`#include "llvm/IR/Module.h"`
Vendor import of llvm release_34 branch r197841 (effectively, 3.4 RC3): https://llvm.org/svn/llvm-project/llvm/branches/release_34@197841 2013-12-22 00:04:03 +00:00			`#include "llvm/Support/Debug.h"`
			`#include "llvm/Target/CostTable.h"`
Vendor import of llvm RELEASE_350/final tag r216957 (effectively, 3.5.0 release): https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_350/final@216957 2014-11-24 09:08:18 +00:00			`#include "llvm/Target/TargetLowering.h"`
Vendor import of llvm release_34 branch r197841 (effectively, 3.4 RC3): https://llvm.org/svn/llvm-project/llvm/branches/release_34@197841 2013-12-22 00:04:03 +00:00			`using namespace llvm;`

Vendor import of llvm RELEASE_350/final tag r216957 (effectively, 3.5.0 release): https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_350/final@216957 2014-11-24 09:08:18 +00:00			`#define DEBUG_TYPE "AMDGPUtti"`

Vendor import of llvm trunk r238337: https://llvm.org/svn/llvm-project/llvm/trunk@238337 2015-05-27 18:44:32 +00:00			`void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L,`
			`TTI::UnrollingPreferences &UP) {`
Vendor import of llvm RELEASE_360/rc1 tag r226102 (effectively, 3.6.0 RC1): https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_360/rc1@226102 2015-01-18 16:17:27 +00:00			`UP.Threshold = 300; // Twice the default.`
Vendor import of llvm RELEASE_360/rc3 tag r229040 (effectively, 3.6.0 RC3): https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_360/rc3@229040 2015-02-14 12:17:42 +00:00			`UP.MaxCount = UINT_MAX;`
Vendor import of llvm RELEASE_360/rc1 tag r226102 (effectively, 3.6.0 RC1): https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_360/rc1@226102 2015-01-18 16:17:27 +00:00			`UP.Partial = true;`

			`// TODO: Do we want runtime unrolling?`

Vendor import of llvm RELEASE_350/final tag r216957 (effectively, 3.5.0 release): https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_350/final@216957 2014-11-24 09:08:18 +00:00			`for (const BasicBlock *BB : L->getBlocks()) {`
Vendor import of llvm trunk r238337: https://llvm.org/svn/llvm-project/llvm/trunk@238337 2015-05-27 18:44:32 +00:00			`const DataLayout &DL = BB->getModule()->getDataLayout();`
Vendor import of llvm RELEASE_350/final tag r216957 (effectively, 3.5.0 release): https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_350/final@216957 2014-11-24 09:08:18 +00:00			`for (const Instruction &I : *BB) {`
			`const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I);`
			`if (!GEP \|\| GEP->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)`
			`continue;`

			`const Value *Ptr = GEP->getPointerOperand();`
Vendor import of llvm trunk r238337: https://llvm.org/svn/llvm-project/llvm/trunk@238337 2015-05-27 18:44:32 +00:00			`const AllocaInst *Alloca =`
			`dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr, DL));`
Vendor import of llvm RELEASE_350/final tag r216957 (effectively, 3.5.0 release): https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_350/final@216957 2014-11-24 09:08:18 +00:00			`if (Alloca) {`
			`// We want to do whatever we can to limit the number of alloca`
			`// instructions that make it through to the code generator. allocas`
			`// require us to use indirect addressing, which is slow and prone to`
			`// compiler bugs. If this loop does an address calculation on an`
			`// alloca ptr, then we want to use a higher than normal loop unroll`
			`// threshold. This will give SROA a better chance to eliminate these`
			`// allocas.`
			`//`
			`// Don't use the maximum allowed value here as it will make some`
			`// programs way too big.`
Vendor import of llvm RELEASE_360/rc1 tag r226102 (effectively, 3.6.0 RC1): https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_360/rc1@226102 2015-01-18 16:17:27 +00:00			`UP.Threshold = 800;`
Vendor import of llvm RELEASE_350/final tag r216957 (effectively, 3.5.0 release): https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_350/final@216957 2014-11-24 09:08:18 +00:00			`}`
			`}`
			`}`
			`}`

Vendor import of llvm trunk r238337: https://llvm.org/svn/llvm-project/llvm/trunk@238337 2015-05-27 18:44:32 +00:00			`unsigned AMDGPUTTIImpl::getNumberOfRegisters(bool Vec) {`
Vendor import of llvm RELEASE_350/final tag r216957 (effectively, 3.5.0 release): https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_350/final@216957 2014-11-24 09:08:18 +00:00			`if (Vec)`
			`return 0;`

			`// Number of VGPRs on SI.`
			`if (ST->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)`
			`return 256;`

			`return 4 * 128; // XXX - 4 channels. Should these count as vector instead?`
			`}`

Vendor import of llvm trunk r238337: https://llvm.org/svn/llvm-project/llvm/trunk@238337 2015-05-27 18:44:32 +00:00			`unsigned AMDGPUTTIImpl::getRegisterBitWidth(bool) { return 32; }`
Vendor import of llvm RELEASE_350/final tag r216957 (effectively, 3.5.0 release): https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_350/final@216957 2014-11-24 09:08:18 +00:00
Vendor import of llvm trunk r238337: https://llvm.org/svn/llvm-project/llvm/trunk@238337 2015-05-27 18:44:32 +00:00			`unsigned AMDGPUTTIImpl::getMaxInterleaveFactor(unsigned VF) {`
Vendor import of llvm RELEASE_350/final tag r216957 (effectively, 3.5.0 release): https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_350/final@216957 2014-11-24 09:08:18 +00:00			`// Semi-arbitrary large amount.`
			`return 64;`
			`}`