Update LLVM to r86140.

2009-11-05 17:17:44 +00:00 · 2009-11-05 17:17:44 +00:00 · ded64d5d34
commit ded64d5d34
parent ee2025263d
63 changed files with 895 additions and 354 deletions
--- a/docs/CommandGuide/lit.pod
+++ b/docs/CommandGuide/lit.pod
@ -54,6 +54,12 @@ number of detected available CPUs.
 Search for I<NAME.cfg> and I<NAME.site.cfg> when searching for test suites,
 instead I<lit.cfg> and I<lit.site.cfg>.

+=item B<--param> I<NAME>, B<--param> I<NAME>=I<VALUE>
+
+Add a user defined parameter I<NAME> with the given I<VALUE> (or the empty
+string if not given). The meaning and use of these parameters is test suite
+dependent.
+
 =back 

 =head1 OUTPUT OPTIONS
--- a/examples/BrainF/BrainF.cpp
+++ b/examples/BrainF/BrainF.cpp
@ -81,8 +81,11 @@ void BrainF::header(LLVMContext& C) {
  ConstantInt *val_mem = ConstantInt::get(C, APInt(32, memtotal));
  BasicBlock* BB = builder->GetInsertBlock();
  const Type* IntPtrTy = IntegerType::getInt32Ty(C);
-  ptr_arr = CallInst::CreateMalloc(BB, IntPtrTy, IntegerType::getInt8Ty(C),
-                                   val_mem, NULL, "arr");
+  const Type* Int8Ty = IntegerType::getInt8Ty(C);
+  Constant* allocsize = ConstantExpr::getSizeOf(Int8Ty);
+  allocsize = ConstantExpr::getTruncOrBitCast(allocsize, IntPtrTy);
+  ptr_arr = CallInst::CreateMalloc(BB, IntPtrTy, Int8Ty, allocsize, val_mem, 
+                                   NULL, "arr");
  BB->getInstList().push_back(cast<Instruction>(ptr_arr));

  //call void @llvm.memset.i32(i8 *%arr, i8 0, i32 %d, i32 1)
--- a/include/llvm/Analysis/MemoryBuiltins.h
+++ b/include/llvm/Analysis/MemoryBuiltins.h
@ -50,13 +50,17 @@ const CallInst* isArrayMalloc(const Value* I, LLVMContext &Context,
                              const TargetData* TD);

 /// getMallocType - Returns the PointerType resulting from the malloc call.
-/// This PointerType is the result type of the call's only bitcast use.
-/// If there is no unique bitcast use, then return NULL.
+/// The PointerType depends on the number of bitcast uses of the malloc call:
+///   0: PointerType is the malloc calls' return type.
+///   1: PointerType is the bitcast's result type.
+///  >1: Unique PointerType cannot be determined, return NULL.
 const PointerType* getMallocType(const CallInst* CI);

-/// getMallocAllocatedType - Returns the Type allocated by malloc call. This
-/// Type is the result type of the call's only bitcast use. If there is no
-/// unique bitcast use, then return NULL.
+/// getMallocAllocatedType - Returns the Type allocated by malloc call.
+/// The Type depends on the number of bitcast uses of the malloc call:
+///   0: PointerType is the malloc calls' return type.
+///   1: PointerType is the bitcast's result type.
+///  >1: Unique PointerType cannot be determined, return NULL.
 const Type* getMallocAllocatedType(const CallInst* CI);

 /// getMallocArraySize - Returns the array size of a malloc call.  If the 
--- a/include/llvm/CodeGen/AsmPrinter.h
+++ b/include/llvm/CodeGen/AsmPrinter.h
@ -374,8 +374,10 @@ namespace llvm {
    /// printImplicitDef - This method prints the specified machine instruction
    /// that is an implicit def.
    virtual void printImplicitDef(const MachineInstr *MI) const;
-    
-    
+
+    /// printKill - This method prints the specified kill machine instruction.
+    virtual void printKill(const MachineInstr *MI) const;
+
    /// printPICJumpTableSetLabel - This method prints a set label for the
    /// specified MachineBasicBlock for a jumptable entry.
    virtual void printPICJumpTableSetLabel(unsigned uid,
--- a/include/llvm/CodeGen/SlotIndexes.h
+++ b/include/llvm/CodeGen/SlotIndexes.h
@ -28,6 +28,7 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/Support/Allocator.h"
+#include "llvm/Support/ErrorHandling.h"

 namespace llvm {

@ -38,14 +39,35 @@ namespace llvm {
  class IndexListEntry {
  private:

+    static std::auto_ptr<IndexListEntry> emptyKeyEntry,
+                                         tombstoneKeyEntry;
+    typedef enum { EMPTY_KEY, TOMBSTONE_KEY } ReservedEntryType;
+    static const unsigned EMPTY_KEY_INDEX = ~0U & ~3U,
+                          TOMBSTONE_KEY_INDEX = ~0U & ~7U;
+
    IndexListEntry *next, *prev;
    MachineInstr *mi;
    unsigned index;

+    // This constructor is only to be used by getEmptyKeyEntry
+    // & getTombstoneKeyEntry. It sets index to the given
+    // value and mi to zero.
+    IndexListEntry(ReservedEntryType r) : mi(0) {
+      switch(r) {
+        case EMPTY_KEY: index = EMPTY_KEY_INDEX; break;
+        case TOMBSTONE_KEY: index = TOMBSTONE_KEY_INDEX; break;
+        default: assert(false && "Invalid value for constructor."); 
+      }
+    }
+
  public:

-    IndexListEntry(MachineInstr *mi, unsigned index)
-      : mi(mi), index(index) {}
+    IndexListEntry(MachineInstr *mi, unsigned index) : mi(mi), index(index) {
+      if (index == EMPTY_KEY_INDEX || index == TOMBSTONE_KEY_INDEX) {
+        llvm_report_error("Attempt to create invalid index. "
+                          "Available indexes may have been exhausted?.");
+      }
+    }

    MachineInstr* getInstr() const { return mi; }
    void setInstr(MachineInstr *mi) { this->mi = mi; }
@ -60,6 +82,24 @@ namespace llvm {
    IndexListEntry* getPrev() { return prev; }
    const IndexListEntry* getPrev() const { return prev; }
    void setPrev(IndexListEntry *prev) { this->prev = prev; }
+
+    // This function returns the index list entry that is to be used for empty
+    // SlotIndex keys.
+    static IndexListEntry* getEmptyKeyEntry() {
+      if (emptyKeyEntry.get() == 0) {
+        emptyKeyEntry.reset(new IndexListEntry(EMPTY_KEY));
+      }
+      return emptyKeyEntry.get();
+    }
+
+    // This function returns the index list entry that is to be used for
+    // tombstone SlotIndex keys.
+    static IndexListEntry* getTombstoneKeyEntry() {
+      if (tombstoneKeyEntry.get() == 0) {
+        tombstoneKeyEntry.reset(new IndexListEntry(TOMBSTONE_KEY));
+      }
+      return tombstoneKeyEntry.get();
+    } 
  };

  // Specialize PointerLikeTypeTraits for IndexListEntry.
@ -81,10 +121,6 @@ namespace llvm {
    friend class DenseMapInfo<SlotIndex>;

  private:
-
-    // FIXME: Is there any way to statically allocate these things and have
-    // them 8-byte aligned?
-    static std::auto_ptr<IndexListEntry> emptyKeyPtr, tombstoneKeyPtr;
    static const unsigned PHI_BIT = 1 << 2;

    PointerIntPair<IndexListEntry*, 3, unsigned> lie;
@ -95,7 +131,6 @@ namespace llvm {
    }

    IndexListEntry& entry() const {
-      assert(lie.getPointer() != 0 && "Use of invalid index.");
      return *lie.getPointer();
    }

@ -116,25 +151,15 @@ namespace llvm {
    enum Slot { LOAD, USE, DEF, STORE, NUM };

    static inline SlotIndex getEmptyKey() {
-      // FIXME: How do we guarantee these numbers don't get allocated to
-      // legit indexes?
-      if (emptyKeyPtr.get() == 0)
-        emptyKeyPtr.reset(new IndexListEntry(0, ~0U & ~3U));
-
-      return SlotIndex(emptyKeyPtr.get(), 0);
+      return SlotIndex(IndexListEntry::getEmptyKeyEntry(), 0);
    }

    static inline SlotIndex getTombstoneKey() {
-      // FIXME: How do we guarantee these numbers don't get allocated to
-      // legit indexes?
-      if (tombstoneKeyPtr.get() == 0)
-        tombstoneKeyPtr.reset(new IndexListEntry(0, ~0U & ~7U));
-
-      return SlotIndex(tombstoneKeyPtr.get(), 0);
+      return SlotIndex(IndexListEntry::getTombstoneKeyEntry(), 0);
    }
    
    /// Construct an invalid index.
-    SlotIndex() : lie(&getEmptyKey().entry(), 0) {}
+    SlotIndex() : lie(IndexListEntry::getEmptyKeyEntry(), 0) {}

    // Construct a new slot index from the given one, set the phi flag on the
    // new index to the value of the phi parameter.
--- a/include/llvm/Instructions.h
+++ b/include/llvm/Instructions.h
@ -899,11 +899,12 @@ public:
  /// 3. Bitcast the result of the malloc call to the specified type.
  static Instruction *CreateMalloc(Instruction *InsertBefore,
                                   const Type *IntPtrTy, const Type *AllocTy,
-                                   Value *ArraySize = 0,
+                                   Value *AllocSize, Value *ArraySize = 0,
                                   const Twine &Name = "");
  static Instruction *CreateMalloc(BasicBlock *InsertAtEnd,
                                   const Type *IntPtrTy, const Type *AllocTy,
-                                   Value *ArraySize = 0, Function* MallocF = 0,
+                                   Value *AllocSize, Value *ArraySize = 0,
+                                   Function* MallocF = 0,
                                   const Twine &Name = "");
  /// CreateFree - Generate the IR for a call to the builtin free function.
  static void CreateFree(Value* Source, Instruction *InsertBefore);
--- a/include/llvm/Support/ConstantFolder.h
+++ b/include/llvm/Support/ConstantFolder.h
@ -18,6 +18,8 @@
 #define LLVM_SUPPORT_CONSTANTFOLDER_H

 #include "llvm/Constants.h"
+#include "llvm/Instruction.h"
+#include "llvm/InstrTypes.h"

 namespace llvm {

--- a/include/llvm/Support/Format.h
+++ b/include/llvm/Support/Format.h
@ -23,6 +23,7 @@
 #ifndef LLVM_SUPPORT_FORMAT_H
 #define LLVM_SUPPORT_FORMAT_H

+#include <cassert>
 #include <cstdio>
 #ifdef WIN32
 #define snprintf _snprintf
--- a/include/llvm/Support/LeakDetector.h
+++ b/include/llvm/Support/LeakDetector.h
@ -26,6 +26,7 @@

 namespace llvm {

+class LLVMContext;
 class Value;

 struct LeakDetector {
--- a/include/llvm/Support/OutputBuffer.h
+++ b/include/llvm/Support/OutputBuffer.h
@ -14,6 +14,7 @@
 #ifndef LLVM_SUPPORT_OUTPUTBUFFER_H
 #define LLVM_SUPPORT_OUTPUTBUFFER_H

+#include <cassert>
 #include <string>
 #include <vector>

--- a/include/llvm/Support/PassNameParser.h
+++ b/include/llvm/Support/PassNameParser.h
@ -25,6 +25,7 @@

 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Pass.h"
 #include <algorithm>
 #include <cstring>
--- a/include/llvm/Support/RecyclingAllocator.h
+++ b/include/llvm/Support/RecyclingAllocator.h
@ -41,7 +41,7 @@ public:
  /// SubClass. The storage may be either newly allocated or recycled.
  ///
  template<class SubClass>
-  SubClass *Allocate() { return Base.Allocate<SubClass>(Allocator); }
+  SubClass *Allocate() { return Base.template Allocate<SubClass>(Allocator); }

  T *Allocate() { return Base.Allocate(Allocator); }

--- a/include/llvm/Support/TargetFolder.h
+++ b/include/llvm/Support/TargetFolder.h
@ -20,6 +20,8 @@
 #define LLVM_SUPPORT_TARGETFOLDER_H

 #include "llvm/Constants.h"
+#include "llvm/Instruction.h"
+#include "llvm/InstrTypes.h"
 #include "llvm/Analysis/ConstantFolding.h"

 namespace llvm {
--- a/include/llvm/Target/TargetIntrinsicInfo.h
+++ b/include/llvm/Target/TargetIntrinsicInfo.h
@ -14,6 +14,8 @@
 #ifndef LLVM_TARGET_TARGETINTRINSICINFO_H
 #define LLVM_TARGET_TARGETINTRINSICINFO_H

+#include <string>
+
 namespace llvm {

 class Function;
@ -32,7 +34,13 @@ public:
  virtual ~TargetIntrinsicInfo();

  /// Return the name of a target intrinsic, e.g. "llvm.bfin.ssync".
-  virtual const char *getName(unsigned IntrID) const =0;
+  /// The Tys and numTys parameters are for intrinsics with overloaded types
+  /// (e.g., those using iAny or fAny). For a declaration for an overloaded
+  /// intrinsic, Tys should point to an array of numTys pointers to Type,
+  /// and must provide exactly one type for each overloaded type in the
+  /// intrinsic.
+  virtual std::string getName(unsigned IID, const Type **Tys = 0,
+                              unsigned numTys = 0) const = 0;

  /// Look up target intrinsic by name. Return intrinsic ID or 0 for unknown
  /// names.
@ -40,6 +48,15 @@ public:

  /// Return the target intrinsic ID of a function, or 0.
  virtual unsigned getIntrinsicID(Function *F) const;
+
+  /// Returns true if the intrinsic can be overloaded.
+  virtual bool isOverloaded(unsigned IID) const = 0;
+  
+  /// Create or insert an LLVM Function declaration for an intrinsic,
+  /// and return it. The Tys and numTys are for intrinsics with overloaded
+  /// types. See above for more information.
+  virtual Function *getDeclaration(Module *M, unsigned ID, const Type **Tys = 0,
+                                   unsigned numTys = 0) const = 0;
 };

 } // End llvm namespace
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@ -401,12 +401,18 @@ bool DIVariable::Verify() const {
 /// getOriginalTypeSize - If this type is derived from a base type then
 /// return base type size.
 uint64_t DIDerivedType::getOriginalTypeSize() const {
-  DIType BT = getTypeDerivedFrom();
-  if (!BT.isNull() && BT.isDerivedType())
-    return DIDerivedType(BT.getNode()).getOriginalTypeSize();
-  if (BT.isNull())
-    return getSizeInBits();
-  return BT.getSizeInBits();
+  unsigned Tag = getTag();
+  if (Tag == dwarf::DW_TAG_member || Tag == dwarf::DW_TAG_typedef ||
+      Tag == dwarf::DW_TAG_const_type || Tag == dwarf::DW_TAG_volatile_type ||
+      Tag == dwarf::DW_TAG_restrict_type) {
+    DIType BaseType = getTypeDerivedFrom();
+    if (BaseType.isDerivedType())
+      return DIDerivedType(BaseType.getNode()).getOriginalTypeSize();
+    else
+      return BaseType.getSizeInBits();
+  }
+    
+  return getSizeInBits();
 }

 /// describes - Return true if this subprogram provides debugging
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@ -17,6 +17,7 @@
 #include "llvm/Instructions.h"
 #include "llvm/Module.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Target/TargetData.h"
 using namespace llvm;

 //===----------------------------------------------------------------------===//
@ -96,45 +97,47 @@ static Value *isArrayMallocHelper(const CallInst *CI, LLVMContext &Context,
  if (!CI)
    return NULL;

-  // Type must be known to determine array size.
+  // The size of the malloc's result type must be known to determine array size.
  const Type *T = getMallocAllocatedType(CI);
-  if (!T)
+  if (!T || !T->isSized() || !TD)
    return NULL;

  Value *MallocArg = CI->getOperand(1);
+  const Type *ArgType = MallocArg->getType();
  ConstantExpr *CO = dyn_cast<ConstantExpr>(MallocArg);
  BinaryOperator *BO = dyn_cast<BinaryOperator>(MallocArg);

-  Constant *ElementSize = ConstantExpr::getSizeOf(T);
-  ElementSize = ConstantExpr::getTruncOrBitCast(ElementSize, 
-                                                MallocArg->getType());
-  Constant *FoldedElementSize =
-   ConstantFoldConstantExpression(cast<ConstantExpr>(ElementSize), Context, TD);
+  unsigned ElementSizeInt = TD->getTypeAllocSize(T);
+  if (const StructType *ST = dyn_cast<StructType>(T))
+    ElementSizeInt = TD->getStructLayout(ST)->getSizeInBytes();
+  Constant *ElementSize = ConstantInt::get(ArgType, ElementSizeInt);

  // First, check if CI is a non-array malloc.
-  if (CO && ((CO == ElementSize) ||
-             (FoldedElementSize && (CO == FoldedElementSize))))
+  if (CO && CO == ElementSize)
    // Match CreateMalloc's use of constant 1 array-size for non-array mallocs.
-    return ConstantInt::get(MallocArg->getType(), 1);
+    return ConstantInt::get(ArgType, 1);

  // Second, check if CI is an array malloc whose array size can be determined.
-  if (isConstantOne(ElementSize) || 
-      (FoldedElementSize && isConstantOne(FoldedElementSize)))
+  if (isConstantOne(ElementSize))
    return MallocArg;

+  if (ConstantInt *CInt = dyn_cast<ConstantInt>(MallocArg))
+    if (CInt->getZExtValue() % ElementSizeInt == 0)
+      return ConstantInt::get(ArgType, CInt->getZExtValue() / ElementSizeInt);
+
  if (!CO && !BO)
    return NULL;

  Value *Op0 = NULL;
  Value *Op1 = NULL;
  unsigned Opcode = 0;
-  if (CO && ((CO->getOpcode() == Instruction::Mul) || 
+  if (CO && ((CO->getOpcode() == Instruction::Mul) ||
             (CO->getOpcode() == Instruction::Shl))) {
    Op0 = CO->getOperand(0);
    Op1 = CO->getOperand(1);
    Opcode = CO->getOpcode();
  }
-  if (BO && ((BO->getOpcode() == Instruction::Mul) || 
+  if (BO && ((BO->getOpcode() == Instruction::Mul) ||
             (BO->getOpcode() == Instruction::Shl))) {
    Op0 = BO->getOperand(0);
    Op1 = BO->getOperand(1);
@ -144,12 +147,10 @@ static Value *isArrayMallocHelper(const CallInst *CI, LLVMContext &Context,
  // Determine array size if malloc's argument is the product of a mul or shl.
  if (Op0) {
    if (Opcode == Instruction::Mul) {
-      if ((Op1 == ElementSize) ||
-          (FoldedElementSize && (Op1 == FoldedElementSize)))
+      if (Op1 == ElementSize)
        // ArraySize * ElementSize
        return Op0;
-      if ((Op0 == ElementSize) ||
-          (FoldedElementSize && (Op0 == FoldedElementSize)))
+      if (Op0 == ElementSize)
        // ElementSize * ArraySize
        return Op1;
    }
@ -161,11 +162,10 @@ static Value *isArrayMallocHelper(const CallInst *CI, LLVMContext &Context,
      uint64_t BitToSet = Op1Int.getLimitedValue(Op1Int.getBitWidth() - 1);
      Value *Op1Pow = ConstantInt::get(Context, 
                                  APInt(Op1Int.getBitWidth(), 0).set(BitToSet));
-      if (Op0 == ElementSize || (FoldedElementSize && Op0 == FoldedElementSize))
+      if (Op0 == ElementSize)
        // ArraySize << log2(ElementSize)
        return Op1Pow;
-      if (Op1Pow == ElementSize ||
-          (FoldedElementSize && Op1Pow == FoldedElementSize))
+      if (Op1Pow == ElementSize)
        // ElementSize << log2(ArraySize)
        return Op0;
    }
@ -205,35 +205,41 @@ const CallInst *llvm::isArrayMalloc(const Value *I, LLVMContext &Context,
 }

 /// getMallocType - Returns the PointerType resulting from the malloc call.
-/// This PointerType is the result type of the call's only bitcast use.
-/// If there is no unique bitcast use, then return NULL.
+/// The PointerType depends on the number of bitcast uses of the malloc call:
+///   0: PointerType is the calls' return type.
+///   1: PointerType is the bitcast's result type.
+///  >1: Unique PointerType cannot be determined, return NULL.
 const PointerType *llvm::getMallocType(const CallInst *CI) {
  assert(isMalloc(CI) && "GetMallocType and not malloc call");
  
-  const BitCastInst *BCI = NULL;
-  
+  const PointerType *MallocType = NULL;
+  unsigned NumOfBitCastUses = 0;
+
  // Determine if CallInst has a bitcast use.
  for (Value::use_const_iterator UI = CI->use_begin(), E = CI->use_end();
       UI != E; )
-    if ((BCI = dyn_cast<BitCastInst>(cast<Instruction>(*UI++))))
-      break;
+    if (const BitCastInst *BCI = dyn_cast<BitCastInst>(*UI++)) {
+      MallocType = cast<PointerType>(BCI->getDestTy());
+      NumOfBitCastUses++;
+    }

-  // Malloc call has 1 bitcast use and no other uses, so type is the bitcast's
-  // destination type.
-  if (BCI && CI->hasOneUse())
-    return cast<PointerType>(BCI->getDestTy());
+  // Malloc call has 1 bitcast use, so type is the bitcast's destination type.
+  if (NumOfBitCastUses == 1)
+    return MallocType;

  // Malloc call was not bitcast, so type is the malloc function's return type.
-  if (!BCI)
+  if (NumOfBitCastUses == 0)
    return cast<PointerType>(CI->getType());

  // Type could not be determined.
  return NULL;
 }

-/// getMallocAllocatedType - Returns the Type allocated by malloc call. This
-/// Type is the result type of the call's only bitcast use. If there is no
-/// unique bitcast use, then return NULL.
+/// getMallocAllocatedType - Returns the Type allocated by malloc call.
+/// The Type depends on the number of bitcast uses of the malloc call:
+///   0: PointerType is the malloc calls' return type.
+///   1: PointerType is the bitcast's result type.
+///  >1: Unique PointerType cannot be determined, return NULL.
 const Type *llvm::getMallocAllocatedType(const CallInst *CI) {
  const PointerType *PT = getMallocType(CI);
  return PT ? PT->getElementType() : NULL;
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@ -480,17 +480,17 @@ bool LLParser::ParseMDNode(MetadataBase *&Node) {
  if (ParseUInt32(MID))  return true;

  // Check existing MDNode.
-  std::map<unsigned, MetadataBase *>::iterator I = MetadataCache.find(MID);
+  std::map<unsigned, WeakVH>::iterator I = MetadataCache.find(MID);
  if (I != MetadataCache.end()) {
-    Node = I->second;
+    Node = cast<MetadataBase>(I->second);
    return false;
  }

  // Check known forward references.
-  std::map<unsigned, std::pair<MetadataBase *, LocTy> >::iterator
+  std::map<unsigned, std::pair<WeakVH, LocTy> >::iterator
    FI = ForwardRefMDNodes.find(MID);
  if (FI != ForwardRefMDNodes.end()) {
-    Node = FI->second.first;
+    Node = cast<MetadataBase>(FI->second.first);
    return false;
  }

@ -570,7 +570,7 @@ bool LLParser::ParseStandaloneMetadata() {

  MDNode *Init = MDNode::get(Context, Elts.data(), Elts.size());
  MetadataCache[MetadataID] = Init;
-  std::map<unsigned, std::pair<MetadataBase *, LocTy> >::iterator
+  std::map<unsigned, std::pair<WeakVH, LocTy> >::iterator
    FI = ForwardRefMDNodes.find(MetadataID);
  if (FI != ForwardRefMDNodes.end()) {
    MDNode *FwdNode = cast<MDNode>(FI->second.first);
@ -3619,12 +3619,14 @@ bool LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS,
  // Autoupgrade old malloc instruction to malloc call.
  // FIXME: Remove in LLVM 3.0.
  const Type *IntPtrTy = Type::getInt32Ty(Context);
+  Constant *AllocSize = ConstantExpr::getSizeOf(Ty);
+  AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, IntPtrTy);
  if (!MallocF)
    // Prototype malloc as "void *(int32)".
    // This function is renamed as "malloc" in ValidateEndOfModule().
    MallocF = cast<Function>(
       M->getOrInsertFunction("", Type::getInt8PtrTy(Context), IntPtrTy, NULL));
-  Inst = CallInst::CreateMalloc(BB, IntPtrTy, Ty, Size, MallocF);
+  Inst = CallInst::CreateMalloc(BB, IntPtrTy, Ty, AllocSize, Size, MallocF);
  return false;
 }

--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@ -79,8 +79,8 @@ namespace llvm {
    std::map<unsigned, std::pair<PATypeHolder, LocTy> > ForwardRefTypeIDs;
    std::vector<PATypeHolder> NumberedTypes;
    /// MetadataCache - This map keeps track of parsed metadata constants.
-    std::map<unsigned, MetadataBase *> MetadataCache;
-    std::map<unsigned, std::pair<MetadataBase *, LocTy> > ForwardRefMDNodes;
+    std::map<unsigned, WeakVH> MetadataCache;
+    std::map<unsigned, std::pair<WeakVH, LocTy> > ForwardRefMDNodes;
    SmallVector<std::pair<unsigned, MDNode *>, 2> MDsOnInst;
    struct UpRefRecord {
      /// Loc - This is the location of the upref.
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@ -2101,8 +2101,10 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
      if (!Ty || !Size) return Error("Invalid MALLOC record");
      if (!CurBB) return Error("Invalid malloc instruction with no BB");
      const Type *Int32Ty = IntegerType::getInt32Ty(CurBB->getContext());
+      Constant *AllocSize = ConstantExpr::getSizeOf(Ty->getElementType());
+      AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, Int32Ty);
      I = CallInst::CreateMalloc(CurBB, Int32Ty, Ty->getElementType(),
-                                 Size, NULL);
+                                 AllocSize, Size, NULL);
      InstructionList.push_back(I);
      break;
    }
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@ -491,8 +491,9 @@ BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) {
 }  

 bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
-                          unsigned AntiDepGroupIndex,
-                          std::map<unsigned, unsigned> &RenameMap) {
+                                unsigned AntiDepGroupIndex,
+                                RenameOrderType& RenameOrder,
+                                std::map<unsigned, unsigned> &RenameMap) {
  unsigned *KillIndices = State->GetKillIndices();
  unsigned *DefIndices = State->GetDefIndices();
  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& 
@ -547,22 +548,41 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
  if (Regs.size() > 1)
    return false;

-  // Check each possible rename register for SuperReg. If that register
-  // is available, and the corresponding registers are available for
-  // the other group subregisters, then we can use those registers to
-  // rename.
-  DEBUG(errs() << "\tFind Register:");
+  // Check each possible rename register for SuperReg in round-robin
+  // order. If that register is available, and the corresponding
+  // registers are available for the other group subregisters, then we
+  // can use those registers to rename.
  BitVector SuperBV = RenameRegisterMap[SuperReg];
-  for (int r = SuperBV.find_first(); r != -1; r = SuperBV.find_next(r)) {
-    const unsigned Reg = (unsigned)r;
+  const TargetRegisterClass *SuperRC = 
+    TRI->getPhysicalRegisterRegClass(SuperReg, MVT::Other);
+  
+  const TargetRegisterClass::iterator RB = SuperRC->allocation_order_begin(MF);
+  const TargetRegisterClass::iterator RE = SuperRC->allocation_order_end(MF);
+  if (RB == RE) {
+    DEBUG(errs() << "\tEmpty Regclass!!\n");
+    return false;
+  }
+
+  if (RenameOrder.count(SuperRC) == 0)
+    RenameOrder.insert(RenameOrderType::value_type(SuperRC, RE));
+
+  DEBUG(errs() << "\tFind Register:");
+
+  const TargetRegisterClass::iterator OrigR = RenameOrder[SuperRC];
+  const TargetRegisterClass::iterator EndR = ((OrigR == RE) ? RB : OrigR);
+  TargetRegisterClass::iterator R = OrigR;
+  do {
+    if (R == RB) R = RE;
+    --R;
+    const unsigned Reg = *R;
    // Don't replace a register with itself.
    if (Reg == SuperReg) continue;
-
+    
    DEBUG(errs() << " " << TRI->getName(Reg));
-      
+    
    // If Reg is dead and Reg's most recent def is not before
-    // SuperRegs's kill, it's safe to replace SuperReg with
-    // Reg. We must also check all subregisters of Reg.
+    // SuperRegs's kill, it's safe to replace SuperReg with Reg. We
+    // must also check all subregisters of Reg.
    if (State->IsLive(Reg) || (KillIndices[SuperReg] > DefIndices[Reg])) {
      DEBUG(errs() << "(live)");
      continue;
@ -580,13 +600,15 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
      if (found)
        continue;
    }
-      
+    
    if (Reg != 0) { 
      DEBUG(errs() << '\n');
+      RenameOrder.erase(SuperRC);
+      RenameOrder.insert(RenameOrderType::value_type(SuperRC, R));
      RenameMap.insert(std::pair<unsigned, unsigned>(SuperReg, Reg));
      return true;
    }
-  }
+  } while (R != EndR);

  DEBUG(errs() << '\n');

@ -627,6 +649,9 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
      State = new AggressiveAntiDepState(*SavedState);
    }
  }
+  
+  // For each regclass the next register to use for renaming.
+  RenameOrderType RenameOrder;

  // ...need a map from MI to SUnit.
  std::map<MachineInstr *, SUnit *> MISUnitMap;
@ -738,7 +763,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
        
        // Look for a suitable register to use to break the anti-dependence.
        std::map<unsigned, unsigned> RenameMap;
-        if (FindSuitableFreeRegisters(GroupIndex, RenameMap)) {
+        if (FindSuitableFreeRegisters(GroupIndex, RenameOrder, RenameMap)) {
          DEBUG(errs() << "\tBreaking anti-dependence edge on "
                << TRI->getName(AntiDepReg) << ":");
          
--- a/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.h
@ -155,6 +155,9 @@ namespace llvm {
    void FinishBlock();

  private:
+    typedef std::map<const TargetRegisterClass *,
+                     TargetRegisterClass::const_iterator> RenameOrderType;
+
    /// IsImplicitDefUse - Return true if MO represents a register
    /// that is both implicitly used and defined in MI
    bool IsImplicitDefUse(MachineInstr *MI, MachineOperand& MO);
@ -169,6 +172,7 @@ namespace llvm {
    void ScanInstruction(MachineInstr *MI, unsigned Count);
    BitVector GetRenameRegisters(unsigned Reg);
    bool FindSuitableFreeRegisters(unsigned AntiDepGroupIndex,
+                                   RenameOrderType& RenameOrder,
                                   std::map<unsigned, unsigned> &RenameMap);
  };
 }
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@ -1590,6 +1590,17 @@ void AsmPrinter::printImplicitDef(const MachineInstr *MI) const {
    << TRI->getName(MI->getOperand(0).getReg());
 }

+void AsmPrinter::printKill(const MachineInstr *MI) const {
+  if (!VerboseAsm) return;
+  O.PadToColumn(MAI->getCommentColumn());
+  O << MAI->getCommentString() << " kill:";
+  for (unsigned n = 0, e = MI->getNumOperands(); n != e; ++n) {
+    const MachineOperand &op = MI->getOperand(n);
+    assert(op.isReg() && "KILL instruction must have only register operands");
+    O << ' ' << TRI->getName(op.getReg()) << (op.isDef() ? "<def>" : "<kill>");
+  }
+}
+
 /// printLabel - This method prints a local label used by debug and
 /// exception handling tables.
 void AsmPrinter::printLabel(const MachineInstr *MI) const {
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@ -1137,6 +1137,9 @@ DIE *DwarfDebug::CreateMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT){

  AddSourceLine(MemberDie, &DT);

+  DIEBlock *MemLocationDie = new DIEBlock();
+  AddUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+
  uint64_t Size = DT.getSizeInBits();
  uint64_t FieldSize = DT.getOriginalTypeSize();

@ -1155,12 +1158,16 @@ DIE *DwarfDebug::CreateMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT){
    // Maybe we need to work from the other end.
    if (TD->isLittleEndian()) Offset = FieldSize - (Offset + Size);
    AddUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset);
-  }

-  DIEBlock *Block = new DIEBlock();
-  AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
-  AddUInt(Block, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3);
-  AddBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, Block);
+    // Here WD_AT_data_member_location points to the anonymous
+    // field that includes this bit field.
+    AddUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, FieldOffset >> 3);
+
+  } else
+    // This is not a bitfield.
+    AddUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3);
+
+  AddBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie);

  if (DT.isProtected())
    AddUInt(MemberDie, dwarf::DW_AT_accessibility, 0,
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@ -31,6 +31,22 @@ namespace llvm {
  bool EnableFastISel;
 }

+static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden,
+    cl::desc("Disable Post Regalloc"));
+static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden,
+    cl::desc("Disable branch folding"));
+static cl::opt<bool> DisableCodePlace("disable-code-place", cl::Hidden,
+    cl::desc("Disable code placement"));
+static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden,
+    cl::desc("Disable Stack Slot Coloring"));
+static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden,
+    cl::desc("Disable Machine LICM"));
+static cl::opt<bool> DisableMachineSink("disable-machine-sink", cl::Hidden,
+    cl::desc("Disable Machine Sinking"));
+static cl::opt<bool> DisableLSR("disable-lsr", cl::Hidden,
+    cl::desc("Disable Loop Strength Reduction Pass"));
+static cl::opt<bool> DisableCGP("disable-cgp", cl::Hidden,
+    cl::desc("Disable Codegen Prepare"));
 static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden,
    cl::desc("Print LLVM IR produced by the loop-reduce pass"));
 static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden,
@ -208,7 +224,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
  // Standard LLVM-Level Passes.

  // Run loop strength reduction before anything else.
-  if (OptLevel != CodeGenOpt::None) {
+  if (OptLevel != CodeGenOpt::None && !DisableLSR) {
    PM.add(createLoopStrengthReducePass(getTargetLowering()));
    if (PrintLSR)
      PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &errs()));
@ -236,7 +252,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
  // Make sure that no unreachable blocks are instruction selected.
  PM.add(createUnreachableBlockEliminationPass());

-  if (OptLevel != CodeGenOpt::None)
+  if (OptLevel != CodeGenOpt::None && !DisableCGP)
    PM.add(createCodeGenPreparePass(getTargetLowering()));

  PM.add(createStackProtectorPass(getTargetLowering()));
@ -265,8 +281,10 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
                 /* allowDoubleDefs= */ true);

  if (OptLevel != CodeGenOpt::None) {
-    PM.add(createMachineLICMPass());
-    PM.add(createMachineSinkingPass());
+    if (!DisableMachineLICM)
+      PM.add(createMachineLICMPass());
+    if (!DisableMachineSink)
+      PM.add(createMachineSinkingPass());
    printAndVerify(PM, "After MachineLICM and MachineSinking",
                   /* allowDoubleDefs= */ true);
  }
@ -281,7 +299,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
  printAndVerify(PM, "After Register Allocation");

  // Perform stack slot coloring.
-  if (OptLevel != CodeGenOpt::None) {
+  if (OptLevel != CodeGenOpt::None && !DisableSSC) {
    // FIXME: Re-enable coloring with register when it's capable of adding
    // kill markers.
    PM.add(createStackSlotColoringPass(false));
@ -304,13 +322,13 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
    printAndVerify(PM, "After PreSched2 passes");

  // Second pass scheduler.
-  if (OptLevel != CodeGenOpt::None) {
+  if (OptLevel != CodeGenOpt::None && !DisablePostRA) {
    PM.add(createPostRAScheduler(OptLevel));
    printAndVerify(PM, "After PostRAScheduler");
  }

  // Branch folding must be run after regalloc and prolog/epilog insertion.
-  if (OptLevel != CodeGenOpt::None) {
+  if (OptLevel != CodeGenOpt::None && !DisableBranchFold) {
    PM.add(createBranchFoldingPass(getEnableTailMergeDefault()));
    printAndVerify(PM, "After BranchFolding");
  }
@ -324,13 +342,13 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
  PM.add(createDebugLabelFoldingPass());
  printAndVerify(PM, "After DebugLabelFolding");

-  if (addPreEmitPass(PM, OptLevel))
-    printAndVerify(PM, "After PreEmit passes");
-
-  if (OptLevel != CodeGenOpt::None) {
+  if (OptLevel != CodeGenOpt::None && !DisableCodePlace) {
    PM.add(createCodePlacementOptPass());
    printAndVerify(PM, "After CodePlacementOpt");
  }

+  if (addPreEmitPass(PM, OptLevel))
+    printAndVerify(PM, "After PreEmit passes");
+
  return false;
 }
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@ -111,6 +111,13 @@ namespace {
    /// be hoistable.
    MachineInstr *ExtractHoistableLoad(MachineInstr *MI);

+    /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on
+    /// the preheader that compute the same value. If it's found, do a RAU on
+    /// with the definition of the existing instruction rather than hoisting
+    /// the instruction to the preheader.
+    bool EliminateCSE(MachineInstr *MI,
+           DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI);
+
    /// Hoist - When an instruction is found to only use loop invariant operands
    /// that is safe to hoist, this instruction is called to do the dirty work.
    ///
@ -349,37 +356,6 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
  return true;
 }

-static const MachineInstr *LookForDuplicate(const MachineInstr *MI,
-                                      std::vector<const MachineInstr*> &PrevMIs,
-                                      MachineRegisterInfo *RegInfo) {
-  unsigned NumOps = MI->getNumOperands();
-  for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) {
-    const MachineInstr *PrevMI = PrevMIs[i];
-    unsigned NumOps2 = PrevMI->getNumOperands();
-    if (NumOps != NumOps2)
-      continue;
-    bool IsSame = true;
-    for (unsigned j = 0; j != NumOps; ++j) {
-      const MachineOperand &MO = MI->getOperand(j);
-      if (MO.isReg() && MO.isDef()) {
-        if (RegInfo->getRegClass(MO.getReg()) !=
-            RegInfo->getRegClass(PrevMI->getOperand(j).getReg())) {
-          IsSame = false;
-          break;
-        }
-        continue;
-      }
-      if (!MO.isIdenticalTo(PrevMI->getOperand(j))) {
-        IsSame = false;
-        break;
-      }
-    }
-    if (IsSame)
-      return PrevMI;
-  }
-  return 0;
-}
-
 MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
  // If not, we may be able to unfold a load and hoist that.
  // First test whether the instruction is loading from an amenable
@ -456,6 +432,55 @@ void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
  }
 }

+static const MachineInstr *LookForDuplicate(const MachineInstr *MI,
+                                      std::vector<const MachineInstr*> &PrevMIs,
+                                      MachineRegisterInfo *RegInfo) {
+  unsigned NumOps = MI->getNumOperands();
+  for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) {
+    const MachineInstr *PrevMI = PrevMIs[i];
+    unsigned NumOps2 = PrevMI->getNumOperands();
+    if (NumOps != NumOps2)
+      continue;
+    bool IsSame = true;
+    for (unsigned j = 0; j != NumOps; ++j) {
+      const MachineOperand &MO = MI->getOperand(j);
+      if (MO.isReg() && MO.isDef()) {
+        if (RegInfo->getRegClass(MO.getReg()) !=
+            RegInfo->getRegClass(PrevMI->getOperand(j).getReg())) {
+          IsSame = false;
+          break;
+        }
+        continue;
+      }
+      if (!MO.isIdenticalTo(PrevMI->getOperand(j))) {
+        IsSame = false;
+        break;
+      }
+    }
+    if (IsSame)
+      return PrevMI;
+  }
+  return 0;
+}
+
+bool MachineLICM::EliminateCSE(MachineInstr *MI,
+          DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) {
+  if (CI != CSEMap.end()) {
+    if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second, RegInfo)) {
+      DEBUG(errs() << "CSEing " << *MI << " with " << *Dup);
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        const MachineOperand &MO = MI->getOperand(i);
+        if (MO.isReg() && MO.isDef())
+          RegInfo->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg());
+      }
+      MI->eraseFromParent();
+      ++NumCSEed;
+      return true;
+    }
+  }
+  return false;
+}
+
 /// Hoist - When an instruction is found to use only loop invariant operands
 /// that are safe to hoist, this instruction is called to do the dirty work.
 ///
@ -488,24 +513,8 @@ void MachineLICM::Hoist(MachineInstr *MI) {
  unsigned Opcode = MI->getOpcode();
  DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
    CI = CSEMap.find(Opcode);
-  bool DoneCSE = false;
-  if (CI != CSEMap.end()) {
-    const MachineInstr *Dup = LookForDuplicate(MI, CI->second, RegInfo);
-    if (Dup) {
-      DEBUG(errs() << "CSEing " << *MI << " with " << *Dup);
-      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-        const MachineOperand &MO = MI->getOperand(i);
-        if (MO.isReg() && MO.isDef())
-          RegInfo->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg());
-      }
-      MI->eraseFromParent();
-      DoneCSE = true;
-      ++NumCSEed;
-    }
-  }
-
-  // Otherwise, splice the instruction to the preheader.
-  if (!DoneCSE) {
+  if (!EliminateCSE(MI, CI)) {
+    // Otherwise, splice the instruction to the preheader.
    CurPreheader->splice(CurPreheader->getFirstTerminator(),MI->getParent(),MI);

    // Add to the CSE map.
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@ -770,7 +770,8 @@ void SchedulePostRATDList::ListScheduleTopDown(
        // just advance the current cycle and try again.
        DEBUG(errs() << "*** Stall in cycle " << CurCycle << '\n');
        HazardRec->AdvanceCycle();
-        ++NumStalls;
+        if (!IgnoreAntiDep)
+          ++NumStalls;
      } else {
        // Otherwise, we have no instructions to issue and we have instructions
        // that will fault if we don't do this right.  This is the case for
@ -778,7 +779,8 @@ void SchedulePostRATDList::ListScheduleTopDown(
        DEBUG(errs() << "*** Emitting noop in cycle " << CurCycle << '\n');
        HazardRec->EmitNoop();
        Sequence.push_back(0);   // NULL here means noop
-        ++NumNoops;
+        if (!IgnoreAntiDep)
+          ++NumNoops;
      }

      ++CurCycle;
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@ -367,6 +367,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
        for (unsigned i = 0, e = I->second.size(); i != e; ++i)
          I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
        I->second.clear();
+        I->second.push_back(SU);
      }
      // See if it is known to just have a single memory reference.
      MachineInstr *ChainMI = Chain->getInstr();
@ -413,7 +414,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
          if (Chain)
            Chain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
        }
-      } else if (MayAlias) {
+      } else {
        // Treat all other stores conservatively.
        goto new_chain;
      }
@ -439,7 +440,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
        // Treat volatile loads conservatively. Note that this includes
        // cases where memoperand information is unavailable.
        goto new_chain;
-      } else if (MayAlias) {
+      } else {
        // A "MayAlias" load. Depend on the general chain, as well as on
        // all stores. In the absense of MachineMemOperand information,
        // we can't even assume that the load doesn't alias well-behaved
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@ -16,8 +16,8 @@

 using namespace llvm;

-std::auto_ptr<IndexListEntry> SlotIndex::emptyKeyPtr(0),
-                              SlotIndex::tombstoneKeyPtr(0);
+std::auto_ptr<IndexListEntry> IndexListEntry::emptyKeyEntry,
+                              IndexListEntry::tombstoneKeyEntry;

 char SlotIndexes::ID = 0;
 static RegisterPass<SlotIndexes> X("slotindexes", "Slot index numbering");
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@ -229,7 +229,7 @@ MemoryBuffer *MemoryBuffer::getFile(const char *Filename, std::string *ErrStr,
    if (NumRead > 0) {
      BytesLeft -= NumRead;
      BufPtr += NumRead;
-    } else if (errno == EINTR) {
+    } else if (NumRead == -1 && errno == EINTR) {
      // try again
    } else {
      // error reading.
--- a/lib/System/Win32/Path.inc
+++ b/lib/System/Win32/Path.inc
@ -608,7 +608,8 @@ Path::createDirectoryOnDisk(bool create_parents, std::string* ErrMsg) {
    while (*next) {
      next = strchr(next, '/');
      *next = 0;
-      if (!CreateDirectory(pathname, NULL))
+      if (!CreateDirectory(pathname, NULL) &&
+          GetLastError() != ERROR_ALREADY_EXISTS)
          return MakeErrMsg(ErrMsg, 
            std::string(pathname) + ": Can't create directory: ");
      *next++ = '/';
@ -616,7 +617,8 @@ Path::createDirectoryOnDisk(bool create_parents, std::string* ErrMsg) {
  } else {
    // Drop trailing slash.
    pathname[len-1] = 0;
-    if (!CreateDirectory(pathname, NULL)) {
+    if (!CreateDirectory(pathname, NULL) &&
+        GetLastError() != ERROR_ALREADY_EXISTS) {
      return MakeErrMsg(ErrMsg, std::string(pathname) + ": Can't create directory: ");
    }
  }
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@ -476,7 +476,11 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
 }

 static unsigned calculateMaxStackAlignment(const MachineFrameInfo *FFI) {
-  unsigned MaxAlign = 0;
+  // FIXME: For now, force at least 128-bit alignment. This will push the
+  // nightly tester harder for making sure things work correctly. When
+  // we're ready to enable this for real, this goes back to starting at zero.
+  unsigned MaxAlign = 16;
+//  unsigned MaxAlign = 0;

  for (int i = FFI->getObjectIndexBegin(),
         e = FFI->getObjectIndexEnd(); i != e; ++i) {
@ -509,12 +513,15 @@ needsStackRealignment(const MachineFunction &MF) const {
  if (!ARMDynamicStackAlign)
    return false;

+  // FIXME: To force more brutal testing, realign whether we need to or not.
+  // Change this to be more selective when we turn it on for real, of course.
  const MachineFrameInfo *MFI = MF.getFrameInfo();
  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
+//  unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
  return (RealignStack &&
          !AFI->isThumb1OnlyFunction() &&
-          (MFI->getMaxAlignment() > StackAlign) &&
+          AFI->hasStackFrame() &&
+//          (MFI->getMaxAlignment() > StackAlign) &&
          !MFI->hasVarSizedObjects());
 }

@ -1205,7 +1212,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
  return ScratchReg;
 }

-/// Move iterator pass the next bunch of callee save load / store ops for
+/// Move iterator past the next bunch of callee save load / store ops for
 /// the particular spill area (1: integer area 1, 2: integer area 2,
 /// 3: fp area, 0: don't care).
 static void movePastCSLoadStoreOps(MachineBasicBlock &MBB,
@ -1339,10 +1346,10 @@ emitPrologue(MachineFunction &MF) const {
  AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
  AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);

+  movePastCSLoadStoreOps(MBB, MBBI, ARM::FSTD, 0, 3, STI);
  NumBytes = DPRCSOffset;
  if (NumBytes) {
-    // Insert it after all the callee-save spills.
-    movePastCSLoadStoreOps(MBB, MBBI, ARM::FSTD, 0, 3, STI);
+    // Adjust SP after all the callee-save spills.
    emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes);
  }

--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@ -1346,6 +1346,7 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
    printLabel(MI);
    return;
  case TargetInstrInfo::KILL:
+    printKill(MI);
    return;
  case TargetInstrInfo::INLINEASM:
    O << '\t';
--- a/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
@ -12,7 +12,11 @@
 //===----------------------------------------------------------------------===//

 #include "BlackfinIntrinsicInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
 #include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cstring>

@ -30,18 +34,21 @@ namespace bfinIntrinsic {

 }

-const char *BlackfinIntrinsicInfo::getName(unsigned IntrID) const {
+std::string BlackfinIntrinsicInfo::getName(unsigned IntrID, const Type **Tys,
+                                           unsigned numTys) const {
  static const char *const names[] = {
 #define GET_INTRINSIC_NAME_TABLE
 #include "BlackfinGenIntrinsics.inc"
 #undef GET_INTRINSIC_NAME_TABLE
  };

+  assert(!isOverloaded(IntrID) && "Blackfin intrinsics are not overloaded");
  if (IntrID < Intrinsic::num_intrinsics)
    return 0;
  assert(IntrID < bfinIntrinsic::num_bfin_intrinsics && "Invalid intrinsic ID");

-  return names[IntrID - Intrinsic::num_intrinsics];
+  std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
+  return Result;
 }

 unsigned
@ -51,3 +58,44 @@ BlackfinIntrinsicInfo::lookupName(const char *Name, unsigned Len) const {
 #undef GET_FUNCTION_RECOGNIZER
  return 0;
 }
+
+bool BlackfinIntrinsicInfo::isOverloaded(unsigned IntrID) const {
+  // Overload Table
+  const bool OTable[] = {
+    false,  // illegal intrinsic
+#define GET_INTRINSIC_OVERLOAD_TABLE
+#include "BlackfinGenIntrinsics.inc"
+#undef GET_INTRINSIC_OVERLOAD_TABLE
+  };
+  if (IntrID == 0)
+    return false;
+  else
+    return OTable[IntrID - Intrinsic::num_intrinsics];
+}
+
+/// This defines the "getAttributes(ID id)" method.
+#define GET_INTRINSIC_ATTRIBUTES
+#include "BlackfinGenIntrinsics.inc"
+#undef GET_INTRINSIC_ATTRIBUTES
+
+static const FunctionType *getType(LLVMContext &Context, unsigned id) {
+  const Type *ResultTy = NULL;
+  std::vector<const Type*> ArgTys;
+  bool IsVarArg = false;
+  
+#define GET_INTRINSIC_GENERATOR
+#include "BlackfinGenIntrinsics.inc"
+#undef GET_INTRINSIC_GENERATOR
+
+  return FunctionType::get(ResultTy, ArgTys, IsVarArg); 
+}
+
+Function *BlackfinIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
+                                                const Type **Tys,
+                                                unsigned numTy) const {
+  assert(!isOverloaded(IntrID) && "Blackfin intrinsics are not overloaded");
+  AttrListPtr AList = getAttributes((bfinIntrinsic::ID) IntrID);
+  return cast<Function>(M->getOrInsertFunction(getName(IntrID),
+                                               getType(M->getContext(), IntrID),
+                                               AList));
+}
--- a/lib/Target/Blackfin/BlackfinIntrinsicInfo.h
+++ b/lib/Target/Blackfin/BlackfinIntrinsicInfo.h
@ -19,8 +19,12 @@ namespace llvm {

  class BlackfinIntrinsicInfo : public TargetIntrinsicInfo {
  public:
-    const char *getName(unsigned IntrID) const;
+    std::string getName(unsigned IntrID, const Type **Tys = 0,
+                        unsigned numTys = 0) const;
    unsigned lookupName(const char *Name, unsigned Len) const;
+    bool isOverloaded(unsigned IID) const;
+    Function *getDeclaration(Module *M, unsigned ID, const Type **Tys = 0,
+                             unsigned numTys = 0) const;
  };

 }
--- a/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp
+++ b/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp
@ -306,6 +306,7 @@ void MSP430AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI)
    printLabel(MI);
    return;
  case TargetInstrInfo::KILL:
+    printKill(MI);
    return;
  case TargetInstrInfo::INLINEASM:
    O << '\t';
--- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
@ -414,6 +414,9 @@ void PPCAsmPrinter::printOp(const MachineOperand &MO) {
    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
      << '_' << MO.getIndex();
    return;
+  case MachineOperand::MO_BlockAddress:
+    GetBlockAddressSymbol(MO.getBlockAddress())->print(O, MAI);
+    return;
  case MachineOperand::MO_ExternalSymbol: {
    // Computing the address of an external symbol, not calling it.
    std::string Name(MAI->getGlobalPrefix());
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@ -196,10 +196,12 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
  // appropriate instructions to materialize the address.
  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+  setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
  setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
  setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
  setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+  setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
  setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
  setOperationAction(ISD::JumpTable,     MVT::i64, Custom);

@ -1167,6 +1169,36 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
  return SDValue(); // Not reached
 }

+SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) {
+  EVT PtrVT = Op.getValueType();
+  DebugLoc DL = Op.getDebugLoc();
+
+  BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+  SDValue TgtBA = DAG.getBlockAddress(BA, DL, /*isTarget=*/true);
+  SDValue Zero = DAG.getConstant(0, PtrVT);
+  SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, TgtBA, Zero);
+  SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, TgtBA, Zero);
+
+  // If this is a non-darwin platform, we don't support non-static relo models
+  // yet.
+  const TargetMachine &TM = DAG.getTarget();
+  if (TM.getRelocationModel() == Reloc::Static ||
+      !TM.getSubtarget<PPCSubtarget>().isDarwin()) {
+    // Generate non-pic code that has direct accesses to globals.
+    // The address of the global is just (hi(&g)+lo(&g)).
+    return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
+  }
+
+  if (TM.getRelocationModel() == Reloc::PIC_) {
+    // With PIC, the first instruction is actually "GR+hi(&G)".
+    Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
+                     DAG.getNode(PPCISD::GlobalBaseReg,
+                                 DebugLoc::getUnknownLoc(), PtrVT), Hi);
+  }
+
+  return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
+}
+
 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
                                              SelectionDAG &DAG) {
  EVT PtrVT = Op.getValueType();
@ -4181,6 +4213,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
  switch (Op.getOpcode()) {
  default: llvm_unreachable("Wasn't expecting to be able to lower this!");
  case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
+  case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
  case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
  case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@ -361,6 +361,7 @@ namespace llvm {
    SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG);
    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG);
    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG);
    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG);
    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG);
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@ -731,9 +731,13 @@ def : Pat<(PPChi tconstpool:$in , 0), (LIS8 tconstpool:$in)>;
 def : Pat<(PPClo tconstpool:$in , 0), (LI8  tconstpool:$in)>;
 def : Pat<(PPChi tjumptable:$in , 0), (LIS8 tjumptable:$in)>;
 def : Pat<(PPClo tjumptable:$in , 0), (LI8  tjumptable:$in)>;
+def : Pat<(PPChi tblockaddress:$in, 0), (LIS8 tblockaddress:$in)>;
+def : Pat<(PPClo tblockaddress:$in, 0), (LI8  tblockaddress:$in)>;
 def : Pat<(add G8RC:$in, (PPChi tglobaladdr:$g, 0)),
          (ADDIS8 G8RC:$in, tglobaladdr:$g)>;
 def : Pat<(add G8RC:$in, (PPChi tconstpool:$g, 0)),
          (ADDIS8 G8RC:$in, tconstpool:$g)>;
 def : Pat<(add G8RC:$in, (PPChi tjumptable:$g, 0)),
          (ADDIS8 G8RC:$in, tjumptable:$g)>;
+def : Pat<(add G8RC:$in, (PPChi tblockaddress:$g, 0)),
+          (ADDIS8 G8RC:$in, tblockaddress:$g)>;
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@ -1436,12 +1436,16 @@ def : Pat<(PPChi tconstpool:$in, 0), (LIS tconstpool:$in)>;
 def : Pat<(PPClo tconstpool:$in, 0), (LI tconstpool:$in)>;
 def : Pat<(PPChi tjumptable:$in, 0), (LIS tjumptable:$in)>;
 def : Pat<(PPClo tjumptable:$in, 0), (LI tjumptable:$in)>;
+def : Pat<(PPChi tblockaddress:$in, 0), (LIS tblockaddress:$in)>;
+def : Pat<(PPClo tblockaddress:$in, 0), (LI tblockaddress:$in)>;
 def : Pat<(add GPRC:$in, (PPChi tglobaladdr:$g, 0)),
          (ADDIS GPRC:$in, tglobaladdr:$g)>;
 def : Pat<(add GPRC:$in, (PPChi tconstpool:$g, 0)),
          (ADDIS GPRC:$in, tconstpool:$g)>;
 def : Pat<(add GPRC:$in, (PPChi tjumptable:$g, 0)),
          (ADDIS GPRC:$in, tjumptable:$g)>;
+def : Pat<(add GPRC:$in, (PPChi tblockaddress:$g, 0)),
+          (ADDIS GPRC:$in, tblockaddress:$g)>;

 // Fused negative multiply subtract, alternate pattern
 def : Pat<(fsub F8RC:$B, (fmul F8RC:$A, F8RC:$C)),
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
@ -412,6 +412,7 @@ void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
    printImplicitDef(MI);
    return;
  case TargetInstrInfo::KILL:
+    printKill(MI);
    return;
  case X86::MOVPC32r: {
    MCInst TmpInst;
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@ -822,32 +822,42 @@ static void ConstantPropUsersOf(Value *V, LLVMContext &Context) {
 /// malloc into a global, and any loads of GV as uses of the new global.
 static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
                                                     CallInst *CI,
-                                                     BitCastInst *BCI,
+                                                     const Type *AllocTy,
                                                     Value* NElems,
                                                     LLVMContext &Context,
                                                     TargetData* TD) {
-  DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV
-               << "  CALL = " << *CI << "  BCI = " << *BCI << '\n');
+  DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << "  CALL = " << *CI << '\n');

  const Type *IntPtrTy = TD->getIntPtrType(Context);
  
+  // CI has either 0 or 1 bitcast uses (getMallocType() would otherwise have
+  // returned NULL and we would not be here).
+  BitCastInst *BCI = NULL;
+  for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); UI != E; )
+    if ((BCI = dyn_cast<BitCastInst>(cast<Instruction>(*UI++))))
+      break;
+
  ConstantInt *NElements = cast<ConstantInt>(NElems);
  if (NElements->getZExtValue() != 1) {
    // If we have an array allocation, transform it to a single element
    // allocation to make the code below simpler.
-    Type *NewTy = ArrayType::get(getMallocAllocatedType(CI),
-                                 NElements->getZExtValue());
-    Value* NewM = CallInst::CreateMalloc(CI, IntPtrTy, NewTy);
-    Instruction* NewMI = cast<Instruction>(NewM);
+    Type *NewTy = ArrayType::get(AllocTy, NElements->getZExtValue());
+    unsigned TypeSize = TD->getTypeAllocSize(NewTy);
+    if (const StructType *ST = dyn_cast<StructType>(NewTy))
+      TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
+    Instruction *NewCI = CallInst::CreateMalloc(CI, IntPtrTy, NewTy,
+                                         ConstantInt::get(IntPtrTy, TypeSize));
    Value* Indices[2];
    Indices[0] = Indices[1] = Constant::getNullValue(IntPtrTy);
-    Value *NewGEP = GetElementPtrInst::Create(NewMI, Indices, Indices + 2,
-                                              NewMI->getName()+".el0", CI);
-    BCI->replaceAllUsesWith(NewGEP);
-    BCI->eraseFromParent();
+    Value *NewGEP = GetElementPtrInst::Create(NewCI, Indices, Indices + 2,
+                                              NewCI->getName()+".el0", CI);
+    Value *Cast = new BitCastInst(NewGEP, CI->getType(), "el0", CI);
+    if (BCI) BCI->replaceAllUsesWith(NewGEP);
+    CI->replaceAllUsesWith(Cast);
+    if (BCI) BCI->eraseFromParent();
    CI->eraseFromParent();
-    BCI = cast<BitCastInst>(NewMI);
-    CI = extractMallocCallFromBitCast(NewMI);
+    BCI = dyn_cast<BitCastInst>(NewCI);
+    CI = BCI ? extractMallocCallFromBitCast(BCI) : cast<CallInst>(NewCI);
  }

  // Create the new global variable.  The contents of the malloc'd memory is
@ -861,8 +871,9 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
                                             GV,
                                             GV->isThreadLocal());
  
-  // Anything that used the malloc now uses the global directly.
-  BCI->replaceAllUsesWith(NewGV);
+  // Anything that used the malloc or its bitcast now uses the global directly.
+  if (BCI) BCI->replaceAllUsesWith(NewGV);
+  CI->replaceAllUsesWith(new BitCastInst(NewGV, CI->getType(), "newgv", CI));

  Constant *RepValue = NewGV;
  if (NewGV->getType() != GV->getType()->getElementType())
@ -930,9 +941,9 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
    GV->getParent()->getGlobalList().insert(GV, InitBool);


-  // Now the GV is dead, nuke it and the malloc.
+  // Now the GV is dead, nuke it and the malloc (both CI and BCI).
  GV->eraseFromParent();
-  BCI->eraseFromParent();
+  if (BCI) BCI->eraseFromParent();
  CI->eraseFromParent();

  // To further other optimizations, loop over all users of NewGV and try to
@ -1273,13 +1284,10 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,

 /// PerformHeapAllocSRoA - CI is an allocation of an array of structures.  Break
 /// it up into multiple allocations of arrays of the fields.
-static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,
-                                            CallInst *CI, BitCastInst* BCI,
-                                            Value* NElems,
-                                            LLVMContext &Context,
+static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
+                                            Value* NElems, LLVMContext &Context,
                                            TargetData *TD) {
-  DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << "  MALLOC CALL = " << *CI 
-               << " BITCAST = " << *BCI << '\n');
+  DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << "  MALLOC = " << *CI << '\n');
  const Type* MAT = getMallocAllocatedType(CI);
  const StructType *STy = cast<StructType>(MAT);

@ -1287,8 +1295,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,
  // it into GV).  If there are other uses, change them to be uses of
  // the global to simplify later code.  This also deletes the store
  // into GV.
-  ReplaceUsesOfMallocWithGlobal(BCI, GV);
-  
+  ReplaceUsesOfMallocWithGlobal(CI, GV);
+
  // Okay, at this point, there are no users of the malloc.  Insert N
  // new mallocs at the same place as CI, and N globals.
  std::vector<Value*> FieldGlobals;
@ -1306,11 +1314,16 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,
                         GV->isThreadLocal());
    FieldGlobals.push_back(NGV);
    
-    Value *NMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context),
-                                        FieldTy, NElems,
-                                        BCI->getName() + ".f" + Twine(FieldNo));
+    unsigned TypeSize = TD->getTypeAllocSize(FieldTy);
+    if (const StructType* ST = dyn_cast<StructType>(FieldTy))
+      TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
+    const Type* IntPtrTy = TD->getIntPtrType(Context);
+    Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
+                                        ConstantInt::get(IntPtrTy, TypeSize),
+                                        NElems,
+                                        CI->getName() + ".f" + Twine(FieldNo));
    FieldMallocs.push_back(NMI);
-    new StoreInst(NMI, NGV, BCI);
+    new StoreInst(NMI, NGV, CI);
  }
  
  // The tricky aspect of this transformation is handling the case when malloc
@ -1327,18 +1340,18 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,
  //    }
  Value *RunningOr = 0;
  for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
-    Value *Cond = new ICmpInst(BCI, ICmpInst::ICMP_EQ, FieldMallocs[i],
-                              Constant::getNullValue(FieldMallocs[i]->getType()),
-                                  "isnull");
+    Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i],
+                             Constant::getNullValue(FieldMallocs[i]->getType()),
+                               "isnull");
    if (!RunningOr)
      RunningOr = Cond;   // First seteq
    else
-      RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", BCI);
+      RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI);
  }

  // Split the basic block at the old malloc.
-  BasicBlock *OrigBB = BCI->getParent();
-  BasicBlock *ContBB = OrigBB->splitBasicBlock(BCI, "malloc_cont");
+  BasicBlock *OrigBB = CI->getParent();
+  BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont");
  
  // Create the block to check the first condition.  Put all these blocks at the
  // end of the function as they are unlikely to be executed.
@ -1374,9 +1387,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,
  }
  
  BranchInst::Create(ContBB, NullPtrBlock);
-  
-  // CI and BCI are no longer needed, remove them.
-  BCI->eraseFromParent();
+
+  // CI is no longer needed, remove it.
  CI->eraseFromParent();

  /// InsertedScalarizedLoads - As we process loads, if we can't immediately
@ -1463,14 +1475,10 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,
 /// cast of malloc.
 static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
                                               CallInst *CI,
-                                               BitCastInst *BCI,
+                                               const Type *AllocTy,
                                               Module::global_iterator &GVI,
                                               TargetData *TD,
                                               LLVMContext &Context) {
-  // If we can't figure out the type being malloced, then we can't optimize.
-  const Type *AllocTy = getMallocAllocatedType(CI);
-  assert(AllocTy);
-
  // If this is a malloc of an abstract type, don't touch it.
  if (!AllocTy->isSized())
    return false;
@ -1491,7 +1499,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
  // for.
  {
    SmallPtrSet<PHINode*, 8> PHIs;
-    if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs))
+    if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs))
      return false;
  }  

@ -1499,16 +1507,16 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
  // transform the program to use global memory instead of malloc'd memory.
  // This eliminates dynamic allocation, avoids an indirection accessing the
  // data, and exposes the resultant global to further GlobalOpt.
-  Value *NElems = getMallocArraySize(CI, Context, TD);
  // We cannot optimize the malloc if we cannot determine malloc array size.
-  if (NElems) {
+  if (Value *NElems = getMallocArraySize(CI, Context, TD)) {
    if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
      // Restrict this transformation to only working on small allocations
      // (2048 bytes currently), as we don't want to introduce a 16M global or
      // something.
      if (TD && 
          NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) {
-        GVI = OptimizeGlobalAddressOfMalloc(GV, CI, BCI, NElems, Context, TD);
+        GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElems,
+                                            Context, TD);
        return true;
      }
  
@ -1526,26 +1534,29 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
      // This the structure has an unreasonable number of fields, leave it
      // alone.
      if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
-          AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, BCI)) {
+          AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) {

        // If this is a fixed size array, transform the Malloc to be an alloc of
        // structs.  malloc [100 x struct],1 -> malloc struct, 100
        if (const ArrayType *AT =
                              dyn_cast<ArrayType>(getMallocAllocatedType(CI))) {
-          Value* NumElements = ConstantInt::get(Type::getInt32Ty(Context),
-                                                AT->getNumElements());
-          Value* NewMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context),
-                                                AllocSTy, NumElements,
-                                                BCI->getName());
-          Value *Cast = new BitCastInst(NewMI, getMallocType(CI), "tmp", CI);
-          BCI->replaceAllUsesWith(Cast);
-          BCI->eraseFromParent();
+          const Type *IntPtrTy = TD->getIntPtrType(Context);
+          unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes();
+          Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
+          Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
+          Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy,
+                                                       AllocSize, NumElements,
+                                                       CI->getName());
+          Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI);
+          CI->replaceAllUsesWith(Cast);
          CI->eraseFromParent();
-          BCI = cast<BitCastInst>(NewMI);
-          CI = extractMallocCallFromBitCast(NewMI);
+          CI = dyn_cast<BitCastInst>(Malloc) ?
+               extractMallocCallFromBitCast(Malloc):
+               cast<CallInst>(Malloc);
        }
      
-        GVI = PerformHeapAllocSRoA(GV, CI, BCI, NElems, Context, TD);
+        GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, Context, TD), 
+                                   Context, TD);
        return true;
      }
    }
@ -1577,15 +1588,10 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
      if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, Context))
        return true;
    } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) {
-      if (getMallocAllocatedType(CI)) {
-        BitCastInst* BCI = NULL;
-        for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
-             UI != E; )
-          BCI = dyn_cast<BitCastInst>(cast<Instruction>(*UI++));
-        if (BCI &&
-            TryToOptimizeStoreOfMallocToGlobal(GV, CI, BCI, GVI, TD, Context))
-          return true;
-      }
+      const Type* MallocType = getMallocAllocatedType(CI);
+      if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, 
+                                                           GVI, TD, Context))
+        return true;
    }
  }

--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@ -78,6 +78,21 @@ static RegisterPass<DSE> X("dse", "Dead Store Elimination");

 FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }

+/// isValueAtLeastAsBigAs - Return true if V1 is greater than or equal to the
+/// stored size of V2.  This returns false if we don't know.
+///
+static bool isValueAtLeastAsBigAs(Value *V1, Value *V2, const TargetData *TD) {
+  const Type *V1Ty = V1->getType(), *V2Ty = V2->getType();
+  
+  // Exactly the same type, must have exactly the same size.
+  if (V1Ty == V2Ty) return true;
+  
+  // If we don't have target data, we don't know.
+  if (TD == 0) return false;
+  
+  return TD->getTypeStoreSize(V1Ty) >= TD->getTypeStoreSize(V2Ty);
+}
+
 bool DSE::runOnBasicBlock(BasicBlock &BB) {
  MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
  TD = getAnalysisIfAvailable<TargetData>();
@ -118,9 +133,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
    // If this is a store-store dependence, then the previous store is dead so
    // long as this store is at least as big as it.
    if (StoreInst *DepStore = dyn_cast<StoreInst>(InstDep.getInst()))
-      if (TD &&
-          TD->getTypeStoreSize(DepStore->getOperand(0)->getType()) <=
-          TD->getTypeStoreSize(SI->getOperand(0)->getType())) {
+      if (isValueAtLeastAsBigAs(SI->getOperand(0), DepStore->getOperand(0),TD)){
        // Delete the store and now-dead instructions that feed it.
        DeleteDeadInstruction(DepStore);
        NumFastStores++;
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@ -68,9 +68,6 @@ namespace {
    static char ID; // Pass identification
    JumpThreading() : FunctionPass(&ID) {}

-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-    }
-
    bool runOnFunction(Function &F);
    void FindLoopHeaders(Function &F);
    
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@ -370,13 +370,13 @@ private:
  /// by properly seeding constants etc.
  LatticeVal &getValueState(Value *V) {
    assert(!isa<StructType>(V->getType()) && "Should use getStructValueState");
-    
-    // TODO: Change to do insert+find in one operation.
-    DenseMap<Value*, LatticeVal>::iterator I = ValueState.find(V);
-    if (I != ValueState.end())
-      return I->second;  // Common case, already in the map.

-    LatticeVal &LV = ValueState[V];
+    std::pair<DenseMap<Value*, LatticeVal>::iterator, bool> I =
+      ValueState.insert(std::make_pair(V, LatticeVal()));
+    LatticeVal &LV = I.first->second;
+
+    if (!I.second)
+      return LV;  // Common case, already in the map.

    if (Constant *C = dyn_cast<Constant>(V)) {
      // Undef values remain undefined.
@ -395,15 +395,15 @@ private:
    assert(isa<StructType>(V->getType()) && "Should use getValueState");
    assert(i < cast<StructType>(V->getType())->getNumElements() &&
           "Invalid element #");
-    
-    // TODO: Change to do insert+find in one operation.
-    DenseMap<std::pair<Value*, unsigned>, LatticeVal>::iterator
-      I = StructValueState.find(std::make_pair(V, i));
-    if (I != StructValueState.end())
-      return I->second;  // Common case, already in the map.
-    
-    LatticeVal &LV = StructValueState[std::make_pair(V, i)];
-    
+
+    std::pair<DenseMap<std::pair<Value*, unsigned>, LatticeVal>::iterator,
+              bool> I = StructValueState.insert(
+                        std::make_pair(std::make_pair(V, i), LatticeVal()));
+    LatticeVal &LV = I.first->second;
+
+    if (!I.second)
+      return LV;  // Common case, already in the map.
+
    if (Constant *C = dyn_cast<Constant>(V)) {
      if (isa<UndefValue>(C))
        ; // Undef values remain undefined.
@ -1280,9 +1280,10 @@ CallOverdefined:
      }
      
      if (const StructType *STy = dyn_cast<StructType>(AI->getType())) {
-        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
-          mergeInValue(getStructValueState(AI, i), AI,
-                       getStructValueState(*CAI, i));
+        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+          LatticeVal CallArg = getStructValueState(*CAI, i);
+          mergeInValue(getStructValueState(AI, i), AI, CallArg);
+        }
      } else {
        mergeInValue(AI, getValueState(*CAI));
      }
--- a/lib/VMCore/Core.cpp
+++ b/lib/VMCore/Core.cpp
@ -1699,18 +1699,24 @@ LLVMValueRef LLVMBuildNot(LLVMBuilderRef B, LLVMValueRef V, const char *Name) {

 LLVMValueRef LLVMBuildMalloc(LLVMBuilderRef B, LLVMTypeRef Ty,
                             const char *Name) {
-  const Type* IntPtrT = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext());
-  return wrap(unwrap(B)->Insert(CallInst::CreateMalloc(
-      unwrap(B)->GetInsertBlock(), IntPtrT, unwrap(Ty), 0, 0, ""),
-      Twine(Name)));
+  const Type* ITy = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext());
+  Constant* AllocSize = ConstantExpr::getSizeOf(unwrap(Ty));
+  AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, ITy);
+  Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(), 
+                                               ITy, unwrap(Ty), AllocSize, 
+                                               0, 0, "");
+  return wrap(unwrap(B)->Insert(Malloc, Twine(Name)));
 }

 LLVMValueRef LLVMBuildArrayMalloc(LLVMBuilderRef B, LLVMTypeRef Ty,
                                  LLVMValueRef Val, const char *Name) {
-  const Type* IntPtrT = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext());
-  return wrap(unwrap(B)->Insert(CallInst::CreateMalloc(
-      unwrap(B)->GetInsertBlock(), IntPtrT, unwrap(Ty), unwrap(Val), 0, ""),
-      Twine(Name)));
+  const Type* ITy = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext());
+  Constant* AllocSize = ConstantExpr::getSizeOf(unwrap(Ty));
+  AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, ITy);
+  Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(), 
+                                               ITy, unwrap(Ty), AllocSize, 
+                                               unwrap(Val), 0, "");
+  return wrap(unwrap(B)->Insert(Malloc, Twine(Name)));
 }

 LLVMValueRef LLVMBuildAlloca(LLVMBuilderRef B, LLVMTypeRef Ty,
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@ -24,6 +24,7 @@
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetData.h"

 using namespace llvm;

@ -448,22 +449,11 @@ static bool IsConstantOne(Value *val) {
  return isa<ConstantInt>(val) && cast<ConstantInt>(val)->isOne();
 }

-static Value *checkArraySize(Value *Amt, const Type *IntPtrTy) {
-  if (!Amt)
-    Amt = ConstantInt::get(IntPtrTy, 1);
-  else {
-    assert(!isa<BasicBlock>(Amt) &&
-           "Passed basic block into malloc size parameter! Use other ctor");
-    assert(Amt->getType() == IntPtrTy &&
-           "Malloc array size is not an intptr!");
-  }
-  return Amt;
-}
-
 static Instruction *createMalloc(Instruction *InsertBefore,
                                 BasicBlock *InsertAtEnd, const Type *IntPtrTy,
-                                 const Type *AllocTy, Value *ArraySize,
-                                 Function *MallocF, const Twine &NameStr) {
+                                 const Type *AllocTy, Value *AllocSize, 
+                                 Value *ArraySize, Function *MallocF,
+                                 const Twine &Name) {
  assert(((!InsertBefore && InsertAtEnd) || (InsertBefore && !InsertAtEnd)) &&
         "createMalloc needs either InsertBefore or InsertAtEnd");

@ -471,10 +461,14 @@ static Instruction *createMalloc(Instruction *InsertBefore,
  //       bitcast (i8* malloc(typeSize)) to type*
  // malloc(type, arraySize) becomes:
  //       bitcast (i8 *malloc(typeSize*arraySize)) to type*
-  Value *AllocSize = ConstantExpr::getSizeOf(AllocTy);
-  AllocSize = ConstantExpr::getTruncOrBitCast(cast<Constant>(AllocSize),
-                                              IntPtrTy);
-  ArraySize = checkArraySize(ArraySize, IntPtrTy);
+  if (!ArraySize)
+    ArraySize = ConstantInt::get(IntPtrTy, 1);
+  else if (ArraySize->getType() != IntPtrTy) {
+    if (InsertBefore)
+      ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, false, "", InsertBefore);
+    else
+      ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, false, "", InsertAtEnd);
+  }

  if (!IsConstantOne(ArraySize)) {
    if (IsConstantOne(AllocSize)) {
@ -513,14 +507,14 @@ static Instruction *createMalloc(Instruction *InsertBefore,
    Result = MCall;
    if (Result->getType() != AllocPtrType)
      // Create a cast instruction to convert to the right type...
-      Result = new BitCastInst(MCall, AllocPtrType, NameStr, InsertBefore);
+      Result = new BitCastInst(MCall, AllocPtrType, Name, InsertBefore);
  } else {
    MCall = CallInst::Create(MallocF, AllocSize, "malloccall");
    Result = MCall;
    if (Result->getType() != AllocPtrType) {
      InsertAtEnd->getInstList().push_back(MCall);
      // Create a cast instruction to convert to the right type...
-      Result = new BitCastInst(MCall, AllocPtrType, NameStr);
+      Result = new BitCastInst(MCall, AllocPtrType, Name);
    }
  }
  MCall->setTailCall();
@ -538,8 +532,9 @@ static Instruction *createMalloc(Instruction *InsertBefore,
 /// 3. Bitcast the result of the malloc call to the specified type.
 Instruction *CallInst::CreateMalloc(Instruction *InsertBefore,
                                    const Type *IntPtrTy, const Type *AllocTy,
-                                    Value *ArraySize, const Twine &Name) {
-  return createMalloc(InsertBefore, NULL, IntPtrTy, AllocTy, 
+                                    Value *AllocSize, Value *ArraySize,
+                                    const Twine &Name) {
+  return createMalloc(InsertBefore, NULL, IntPtrTy, AllocTy, AllocSize,
                      ArraySize, NULL, Name);
 }

@ -553,9 +548,9 @@ Instruction *CallInst::CreateMalloc(Instruction *InsertBefore,
 /// responsibility of the caller.
 Instruction *CallInst::CreateMalloc(BasicBlock *InsertAtEnd,
                                    const Type *IntPtrTy, const Type *AllocTy,
-                                    Value *ArraySize, Function* MallocF,
-                                    const Twine &Name) {
-  return createMalloc(NULL, InsertAtEnd, IntPtrTy, AllocTy,
+                                    Value *AllocSize, Value *ArraySize, 
+                                    Function *MallocF, const Twine &Name) {
+  return createMalloc(NULL, InsertAtEnd, IntPtrTy, AllocTy, AllocSize,
                      ArraySize, MallocF, Name);
 }

--- a/test/Analysis/PointerTracking/sizes.ll
+++ b/test/Analysis/PointerTracking/sizes.ll
@ -31,6 +31,7 @@ entry:
 }

 declare i32 @bar(i8*)
+declare i32 @bar2(i64*)

 define i32 @foo1(i32 %n) nounwind {
 entry:
@ -60,11 +61,16 @@ entry:
 	ret i32 %add16
 }

-define i32 @foo2(i32 %n) nounwind {
+define i32 @foo2(i64 %n) nounwind {
 entry:
-	%call = malloc i8, i32 %n		; <i8*> [#uses=1]
+	%call = tail call i8* @malloc(i64 %n)  ; <i8*> [#uses=1]
 ; CHECK: %call =
 ; CHECK: ==> %n elements, %n bytes allocated
+	%mallocsize = mul i64 %n, 8                     ; <i64> [#uses=1]
+	%malloccall = tail call i8* @malloc(i64 %mallocsize) ; <i8*> [#uses=1]
+	%call3 = bitcast i8* %malloccall to i64*        ; <i64*> [#uses=1]
+; CHECK: %malloccall =
+; CHECK: ==> (8 * %n) elements, (8 * %n) bytes allocated
 	%call2 = tail call i8* @calloc(i64 2, i64 4) nounwind		; <i8*> [#uses=1]
 ; CHECK: %call2 =
 ; CHECK: ==> 8 elements, 8 bytes allocated
@ -72,13 +78,17 @@ entry:
 ; CHECK: %call4 =
 ; CHECK: ==> 16 elements, 16 bytes allocated
 	%call6 = tail call i32 @bar(i8* %call) nounwind		; <i32> [#uses=1]
+	%call7 = tail call i32 @bar2(i64* %call3) nounwind ; <i32> [#uses=1]
 	%call8 = tail call i32 @bar(i8* %call2) nounwind		; <i32> [#uses=1]
 	%call10 = tail call i32 @bar(i8* %call4) nounwind		; <i32> [#uses=1]
-	%add = add i32 %call8, %call6		; <i32> [#uses=1]
-	%add11 = add i32 %add, %call10		; <i32> [#uses=1]
+	%add = add i32 %call8, %call6                   ; <i32> [#uses=1]
+	%add10 = add i32 %add, %call7                   ; <i32> [#uses=1]
+	%add11 = add i32 %add10, %call10                ; <i32> [#uses=1]
 	ret i32 %add11
 }

+declare noalias i8* @malloc(i64) nounwind
+
 declare noalias i8* @calloc(i64, i64) nounwind

 declare noalias i8* @realloc(i8* nocapture, i64) nounwind
--- a/test/CodeGen/ARM/indirectbr.ll
+++ b/test/CodeGen/ARM/indirectbr.ll
@ -0,0 +1,60 @@
+; RUN: llc < %s -relocation-model=pic -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -relocation-model=pic -mtriple=thumb-apple-darwin | FileCheck %s -check-prefix=THUMB
+; RUN: llc < %s -relocation-model=static -mtriple=thumbv7-apple-darwin | FileCheck %s -check-prefix=THUMB2
+
+@nextaddr = global i8* null                       ; <i8**> [#uses=2]
+@C.0.2070 = private constant [5 x i8*] [i8* blockaddress(@foo, %L1), i8* blockaddress(@foo, %L2), i8* blockaddress(@foo, %L3), i8* blockaddress(@foo, %L4), i8* blockaddress(@foo, %L5)] ; <[5 x i8*]*> [#uses=1]
+
+define internal arm_apcscc i32 @foo(i32 %i) nounwind {
+; ARM: foo:
+; THUMB: foo:
+; THUMB2: foo:
+entry:
+  %0 = load i8** @nextaddr, align 4               ; <i8*> [#uses=2]
+  %1 = icmp eq i8* %0, null                       ; <i1> [#uses=1]
+  br i1 %1, label %bb3, label %bb2
+
+bb2:                                              ; preds = %entry, %bb3
+  %gotovar.4.0 = phi i8* [ %gotovar.4.0.pre, %bb3 ], [ %0, %entry ] ; <i8*> [#uses=1]
+; ARM: bx
+; THUMB: mov pc, r1
+; THUMB2: mov pc, r1
+  indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
+
+bb3:                                              ; preds = %entry
+  %2 = getelementptr inbounds [5 x i8*]* @C.0.2070, i32 0, i32 %i ; <i8**> [#uses=1]
+  %gotovar.4.0.pre = load i8** %2, align 4        ; <i8*> [#uses=1]
+  br label %bb2
+
+L5:                                               ; preds = %bb2
+  br label %L4
+
+L4:                                               ; preds = %L5, %bb2
+  %res.0 = phi i32 [ 385, %L5 ], [ 35, %bb2 ]     ; <i32> [#uses=1]
+  br label %L3
+
+L3:                                               ; preds = %L4, %bb2
+  %res.1 = phi i32 [ %res.0, %L4 ], [ 5, %bb2 ]   ; <i32> [#uses=1]
+  br label %L2
+
+L2:                                               ; preds = %L3, %bb2
+  %res.2 = phi i32 [ %res.1, %L3 ], [ 1, %bb2 ]   ; <i32> [#uses=1]
+  %phitmp = mul i32 %res.2, 6                     ; <i32> [#uses=1]
+  br label %L1
+
+L1:                                               ; preds = %L2, %bb2
+  %res.3 = phi i32 [ %phitmp, %L2 ], [ 2, %bb2 ]  ; <i32> [#uses=1]
+; ARM: ldr r1, LCPI
+; ARM: add r1, pc, r1
+; ARM: str r1
+; THUMB: ldr.n r2, LCPI
+; THUMB: add r2, pc
+; THUMB: str r2
+; THUMB2: ldr.n r2, LCPI
+; THUMB2-NEXT: str r2
+  store i8* blockaddress(@foo, %L5), i8** @nextaddr, align 4
+  ret i32 %res.3
+}
+; ARM: .long L_foo_L5-(LPC{{.*}}+8)
+; THUMB: .long L_foo_L5-(LPC{{.*}}+4)
+; THUMB2: .long L_foo_L5
--- a/test/CodeGen/PowerPC/indirectbr.ll
+++ b/test/CodeGen/PowerPC/indirectbr.ll
@ -0,0 +1,55 @@
+; RUN: llc < %s -relocation-model=pic -march=ppc32 -mtriple=powerpc-apple-darwin | FileCheck %s -check-prefix=PIC
+; RUN: llc < %s -relocation-model=static -march=ppc32 -mtriple=powerpc-apple-darwin | FileCheck %s -check-prefix=STATIC
+
+@nextaddr = global i8* null                       ; <i8**> [#uses=2]
+@C.0.2070 = private constant [5 x i8*] [i8* blockaddress(@foo, %L1), i8* blockaddress(@foo, %L2), i8* blockaddress(@foo, %L3), i8* blockaddress(@foo, %L4), i8* blockaddress(@foo, %L5)] ; <[5 x i8*]*> [#uses=1]
+
+define internal i32 @foo(i32 %i) nounwind {
+; PIC: foo:
+; STATIC: foo:
+entry:
+  %0 = load i8** @nextaddr, align 4               ; <i8*> [#uses=2]
+  %1 = icmp eq i8* %0, null                       ; <i1> [#uses=1]
+  br i1 %1, label %bb3, label %bb2
+
+bb2:                                              ; preds = %entry, %bb3
+  %gotovar.4.0 = phi i8* [ %gotovar.4.0.pre, %bb3 ], [ %0, %entry ] ; <i8*> [#uses=1]
+; PIC: mtctr
+; PIC-NEXT: bctr
+; STATIC: mtctr
+; STATIC-NEXT: bctr
+  indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
+
+bb3:                                              ; preds = %entry
+  %2 = getelementptr inbounds [5 x i8*]* @C.0.2070, i32 0, i32 %i ; <i8**> [#uses=1]
+  %gotovar.4.0.pre = load i8** %2, align 4        ; <i8*> [#uses=1]
+  br label %bb2
+
+L5:                                               ; preds = %bb2
+  br label %L4
+
+L4:                                               ; preds = %L5, %bb2
+  %res.0 = phi i32 [ 385, %L5 ], [ 35, %bb2 ]     ; <i32> [#uses=1]
+  br label %L3
+
+L3:                                               ; preds = %L4, %bb2
+  %res.1 = phi i32 [ %res.0, %L4 ], [ 5, %bb2 ]   ; <i32> [#uses=1]
+  br label %L2
+
+L2:                                               ; preds = %L3, %bb2
+  %res.2 = phi i32 [ %res.1, %L3 ], [ 1, %bb2 ]   ; <i32> [#uses=1]
+  %phitmp = mul i32 %res.2, 6                     ; <i32> [#uses=1]
+  br label %L1
+
+L1:                                               ; preds = %L2, %bb2
+  %res.3 = phi i32 [ %phitmp, %L2 ], [ 2, %bb2 ]  ; <i32> [#uses=1]
+; PIC: addis r4, r2, ha16(L_foo_L5-"L1$pb")
+; PIC: li r5, lo16(L_foo_L5-"L1$pb")
+; PIC: add r4, r4, r5
+; PIC: stw r4
+; STATIC: li r2, lo16(L_foo_L5)
+; STATIC: addis r2, r2, ha16(L_foo_L5)
+; STATIC: stw r2
+  store i8* blockaddress(@foo, %L5), i8** @nextaddr, align 4
+  ret i32 %res.3
+}
--- a/test/Transforms/DeadStoreElimination/no-targetdata.ll
+++ b/test/Transforms/DeadStoreElimination/no-targetdata.ll
@ -0,0 +1,15 @@
+; RUN: opt %s -dse -S | FileCheck %s
+
+declare void @test1f()
+
+define void @test1(i32* noalias %p) {
+       store i32 1, i32* %p;
+       call void @test1f()
+       store i32 2, i32 *%p
+       ret void
+; CHECK: define void @test1
+; CHECK-NOT: store
+; CHECK-NEXT: call void
+; CHECK-NEXT: store i32 2
+; CHECK-NEXT: ret void
+}
--- a/test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll
+++ b/test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll
@ -1,4 +1,5 @@
 ; RUN: opt < %s -globalopt
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"

 	%struct.s_annealing_sched = type { i32, float, float, float, float }
 	%struct.s_bb = type { i32, i32, i32, i32 }
@ -96,7 +97,9 @@ bb.i34:		; preds = %bb
 	unreachable

 bb1.i38:		; preds = %bb
-	%0 = malloc %struct.s_net, i32 undef		; <%struct.s_net*> [#uses=1]
+	%mallocsize = mul i64 28, undef                  ; <i64> [#uses=1]
+	%malloccall = tail call i8* @malloc(i64 %mallocsize)      ; <i8*> [#uses=1]
+	%0 = bitcast i8* %malloccall to %struct.s_net*  ; <%struct.s_net*> [#uses=1]
 	br i1 undef, label %bb.i1.i39, label %my_malloc.exit2.i

 bb.i1.i39:		; preds = %bb1.i38
@ -115,3 +118,5 @@ my_malloc.exit8.i:		; preds = %my_malloc.exit2.i
 bb7:		; preds = %bb6.preheader
 	unreachable
 }
+
+declare noalias i8* @malloc(i64)
--- a/test/Transforms/GlobalOpt/heap-sra-1.ll
+++ b/test/Transforms/GlobalOpt/heap-sra-1.ll
@ -1,18 +1,22 @@
-; RUN: opt < %s -globalopt -S | grep {@X.f0}
-; RUN: opt < %s -globalopt -S | grep {@X.f1}
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i386-apple-darwin7"
+; RUN: opt < %s -globalopt -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"

 	%struct.foo = type { i32, i32 }
@X = internal global %struct.foo* null
+; CHECK: @X.f0
+; CHECK: @X.f1

-define void @bar(i32 %Size) nounwind noinline {
+define void @bar(i64 %Size) nounwind noinline {
 entry:
-	%.sub = malloc %struct.foo, i32 %Size	
+  %mallocsize = mul i64 %Size, 8                  ; <i64> [#uses=1]
+  %malloccall = tail call i8* @malloc(i64 %mallocsize) ; <i8*> [#uses=1]
+  %.sub = bitcast i8* %malloccall to %struct.foo* ; <%struct.foo*> [#uses=1]
 	store %struct.foo* %.sub, %struct.foo** @X, align 4
 	ret void
 }

+declare noalias i8* @malloc(i64)
+
 define i32 @baz() nounwind readonly noinline {
 bb1.thread:
 	%0 = load %struct.foo** @X, align 4		
--- a/test/Transforms/GlobalOpt/heap-sra-2.ll
+++ b/test/Transforms/GlobalOpt/heap-sra-2.ll
@ -1,20 +1,22 @@
-; RUN: opt < %s -globalopt -S | grep {@X.f0}
-; RUN: opt < %s -globalopt -S | grep {@X.f1}
+; RUN: opt < %s -globalopt -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"

-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i386-apple-darwin7"
 	%struct.foo = type { i32, i32 }
@X = internal global %struct.foo* null		; <%struct.foo**> [#uses=2]
+; CHECK: @X.f0
+; CHECK: @X.f1

 define void @bar(i32 %Size) nounwind noinline {
 entry:
-	%0 = malloc [1000000 x %struct.foo]
-        ;%.sub = bitcast [1000000 x %struct.foo]* %0 to %struct.foo*
+	%malloccall = tail call i8* @malloc(i64 8000000) ; <i8*> [#uses=1]
+	%0 = bitcast i8* %malloccall to [1000000 x %struct.foo]* ; <[1000000 x %struct.foo]*> [#uses=1]
 	%.sub = getelementptr [1000000 x %struct.foo]* %0, i32 0, i32 0		; <%struct.foo*> [#uses=1]
 	store %struct.foo* %.sub, %struct.foo** @X, align 4
 	ret void
 }

+declare noalias i8* @malloc(i64)
+
 define i32 @baz() nounwind readonly noinline {
 bb1.thread:
 	%0 = load %struct.foo** @X, align 4		; <%struct.foo*> [#uses=1]
--- a/test/Transforms/GlobalOpt/heap-sra-3.ll
+++ b/test/Transforms/GlobalOpt/heap-sra-3.ll
@ -1,24 +1,22 @@
 ; RUN: opt < %s -globalopt -S | FileCheck %s
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i386-apple-darwin10"
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"

 	%struct.foo = type { i32, i32 }
@X = internal global %struct.foo* null
 ; CHECK: @X.f0
 ; CHECK: @X.f1

-define void @bar(i32 %Size) nounwind noinline {
+define void @bar(i64 %Size) nounwind noinline {
 entry:
-  %mallocsize = mul i32 ptrtoint (%struct.foo* getelementptr (%struct.foo* null, i32 1) to i32), %Size, ; <i32> [#uses=1]
-; CHECK: mul i32 %Size
-  %malloccall = tail call i8* @malloc(i32 %mallocsize) ; <i8*> [#uses=1]
+  %mallocsize = mul i64 8, %Size, ; <i64> [#uses=1]
+; CHECK: mul i64 %Size, 4
+  %malloccall = tail call i8* @malloc(i64 %mallocsize) ; <i8*> [#uses=1]
  %.sub = bitcast i8* %malloccall to %struct.foo* ; <%struct.foo*> [#uses=1]
 	store %struct.foo* %.sub, %struct.foo** @X, align 4
 	ret void
 }

-declare noalias i8* @malloc(i32)
+declare noalias i8* @malloc(i64)

 define i32 @baz() nounwind readonly noinline {
 bb1.thread:
--- a/test/Transforms/GlobalOpt/heap-sra-4.ll
+++ b/test/Transforms/GlobalOpt/heap-sra-4.ll
@ -1,24 +1,22 @@
 ; RUN: opt < %s -globalopt -S | FileCheck %s
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i386-apple-darwin7"
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"

 	%struct.foo = type { i32, i32 }
@X = internal global %struct.foo* null
 ; CHECK: @X.f0
 ; CHECK: @X.f1

-define void @bar(i32 %Size) nounwind noinline {
+define void @bar(i64 %Size) nounwind noinline {
 entry:
-  %mallocsize = shl i32 ptrtoint (%struct.foo* getelementptr (%struct.foo* null, i32 1) to i32), 9, ; <i32> [#uses=1]
-  %malloccall = tail call i8* @malloc(i32 %mallocsize) ; <i8*> [#uses=1]
-; CHECK: @malloc(i32 mul (i32 512
+  %mallocsize = shl i64 %Size, 3                  ; <i64> [#uses=1]
+  %malloccall = tail call i8* @malloc(i64 %mallocsize) ; <i8*> [#uses=1]
+; CHECK: mul i64 %Size, 4
  %.sub = bitcast i8* %malloccall to %struct.foo* ; <%struct.foo*> [#uses=1]
 	store %struct.foo* %.sub, %struct.foo** @X, align 4
 	ret void
 }

-declare noalias i8* @malloc(i32)
+declare noalias i8* @malloc(i64)

 define i32 @baz() nounwind readonly noinline {
 bb1.thread:
--- a/test/Transforms/GlobalOpt/heap-sra-phi.ll
+++ b/test/Transforms/GlobalOpt/heap-sra-phi.ll
@ -1,19 +1,21 @@
 ; RUN: opt < %s -globalopt -S | grep {tmp.f1 = phi i32. }
 ; RUN: opt < %s -globalopt -S | grep {tmp.f0 = phi i32. }
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"

-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i386-apple-darwin7"
 	%struct.foo = type { i32, i32 }
@X = internal global %struct.foo* null		; <%struct.foo**> [#uses=2]

 define void @bar(i32 %Size) nounwind noinline {
 entry:
-	%tmp = malloc [1000000 x %struct.foo]		; <[1000000 x %struct.foo]*> [#uses=1]
+	%malloccall = tail call i8* @malloc(i64 8000000) ; <i8*> [#uses=1]
+	%tmp = bitcast i8* %malloccall to [1000000 x %struct.foo]* ; <[1000000 x %struct.foo]*> [#uses=1]
 	%.sub = getelementptr [1000000 x %struct.foo]* %tmp, i32 0, i32 0		; <%struct.foo*> [#uses=1]
 	store %struct.foo* %.sub, %struct.foo** @X, align 4
 	ret void
 }

+declare noalias i8* @malloc(i64)
+
 define i32 @baz() nounwind readonly noinline {
 bb1.thread:
 	%tmpLD1 = load %struct.foo** @X, align 4		; <%struct.foo*> [#uses=1]
--- a/test/Transforms/GlobalOpt/malloc-promote-1.ll
+++ b/test/Transforms/GlobalOpt/malloc-promote-1.ll
@ -1,19 +1,24 @@
-; RUN: opt < %s -globalopt -S | not grep global
+; RUN: opt < %s -globalopt -S | FileCheck %s
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"

@G = internal global i32* null          ; <i32**> [#uses=3]
+; CHECK-NOT: global

 define void @init() {
-        %P = malloc i32         ; <i32*> [#uses=1]
+        %malloccall = tail call i8* @malloc(i64 4)      ; <i8*> [#uses=1]
+        %P = bitcast i8* %malloccall to i32*            ; <i32*> [#uses=1]
        store i32* %P, i32** @G
        %GV = load i32** @G             ; <i32*> [#uses=1]
        store i32 0, i32* %GV
        ret void
 }

+declare noalias i8* @malloc(i64)
+
 define i32 @get() {
        %GV = load i32** @G             ; <i32*> [#uses=1]
        %V = load i32* %GV              ; <i32> [#uses=1]
        ret i32 %V
+; CHECK: ret i32 0
 }

--- a/test/Transforms/GlobalOpt/malloc-promote-2.ll
+++ b/test/Transforms/GlobalOpt/malloc-promote-2.ll
@ -1,11 +1,11 @@
 ; RUN: opt < %s -globalopt -globaldce -S | not grep malloc
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i686-apple-darwin8"
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"

@G = internal global i32* null          ; <i32**> [#uses=3]

 define void @init() {
-        %P = malloc i32, i32 100                ; <i32*> [#uses=1]
+        %malloccall = tail call i8* @malloc(i64 mul (i64 100, i64 4)) ; <i8*> [#uses=1]
+        %P = bitcast i8* %malloccall to i32*            ; <i32*> [#uses=1]
        store i32* %P, i32** @G
        %GV = load i32** @G             ; <i32*> [#uses=1]
        %GVe = getelementptr i32* %GV, i32 40           ; <i32*> [#uses=1]
@ -13,6 +13,8 @@ define void @init() {
        ret void
 }

+declare noalias i8* @malloc(i64)
+
 define i32 @get() {
        %GV = load i32** @G             ; <i32*> [#uses=1]
        %GVe = getelementptr i32* %GV, i32 40           ; <i32*> [#uses=1]
--- a/test/Transforms/GlobalOpt/malloc-promote-3.ll
+++ b/test/Transforms/GlobalOpt/malloc-promote-3.ll
@ -1,11 +1,11 @@
 ; RUN: opt < %s -globalopt -globaldce -S | not grep malloc
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i686-apple-darwin8"
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"

@G = internal global i32* null          ; <i32**> [#uses=4]

 define void @init() {
-        %P = malloc i32, i32 100                ; <i32*> [#uses=1]
+        %malloccall = tail call i8* @malloc(i64 mul (i64 100, i64 4)) ; <i8*> [#uses=1]
+        %P = bitcast i8* %malloccall to i32*            ; <i32*> [#uses=1]
        store i32* %P, i32** @G
        %GV = load i32** @G             ; <i32*> [#uses=1]
        %GVe = getelementptr i32* %GV, i32 40           ; <i32*> [#uses=1]
@ -13,6 +13,8 @@ define void @init() {
        ret void
 }

+declare noalias i8* @malloc(i64)
+
 define i32 @get() {
        %GV = load i32** @G             ; <i32*> [#uses=1]
        %GVe = getelementptr i32* %GV, i32 40           ; <i32*> [#uses=1]
--- a/test/Transforms/JumpThreading/crash.ll
+++ b/test/Transforms/JumpThreading/crash.ll
@ -88,4 +88,85 @@ D:
  ret i32 %c
 E:
  ret i32 412
-}
+}
+
+
+define i32 @test2() nounwind {
+entry:
+        br i1 true, label %decDivideOp.exit, label %bb7.i
+
+bb7.i:          ; preds = %bb7.i, %entry
+        br label %bb7.i
+
+decDivideOp.exit:               ; preds = %entry
+        ret i32 undef
+}
+
+
+; PR3298
+
+define i32 @test3(i32 %p_79, i32 %p_80) nounwind {
+entry:
+	br label %bb7
+
+bb1:		; preds = %bb2
+	br label %bb2
+
+bb2:		; preds = %bb7, %bb1
+	%l_82.0 = phi i8 [ 0, %bb1 ], [ %l_82.1, %bb7 ]		; <i8> [#uses=3]
+	br i1 true, label %bb3, label %bb1
+
+bb3:		; preds = %bb2
+	%0 = icmp eq i32 %p_80_addr.1, 0		; <i1> [#uses=1]
+	br i1 %0, label %bb7, label %bb6
+
+bb5:		; preds = %bb6
+	%1 = icmp eq i8 %l_82.0, 0		; <i1> [#uses=1]
+	br i1 %1, label %bb1.i, label %bb.i
+
+bb.i:		; preds = %bb5
+	br label %safe_div_func_char_s_s.exit
+
+bb1.i:		; preds = %bb5
+	br label %safe_div_func_char_s_s.exit
+
+safe_div_func_char_s_s.exit:		; preds = %bb1.i, %bb.i
+	br label %bb6
+
+bb6:		; preds = %safe_div_func_char_s_s.exit, %bb3
+	%p_80_addr.0 = phi i32 [ %p_80_addr.1, %bb3 ], [ 1, %safe_div_func_char_s_s.exit ]		; <i32> [#uses=2]
+	%2 = icmp eq i32 %p_80_addr.0, 0		; <i1> [#uses=1]
+	br i1 %2, label %bb7, label %bb5
+
+bb7:		; preds = %bb6, %bb3, %entry
+	%l_82.1 = phi i8 [ 1, %entry ], [ %l_82.0, %bb3 ], [ %l_82.0, %bb6 ]		; <i8> [#uses=2]
+	%p_80_addr.1 = phi i32 [ 0, %entry ], [ %p_80_addr.1, %bb3 ], [ %p_80_addr.0, %bb6 ]		; <i32> [#uses=4]
+	%3 = icmp eq i32 %p_80_addr.1, 0		; <i1> [#uses=1]
+	br i1 %3, label %bb8, label %bb2
+
+bb8:		; preds = %bb7
+	%4 = sext i8 %l_82.1 to i32		; <i32> [#uses=0]
+	ret i32 0
+}
+
+
+; PR3353
+
+define i32 @test4(i8 %X) {
+entry:
+        %Y = add i8 %X, 1
+        %Z = add i8 %Y, 1
+        br label %bb33.i
+
+bb33.i:         ; preds = %bb33.i, %bb32.i
+        switch i8 %Y, label %bb32.i [
+                i8 39, label %bb35.split.i
+                i8 13, label %bb33.i
+        ]
+
+bb35.split.i:
+        ret i32 5
+bb32.i:
+        ret i32 1
+}
+
--- a/utils/lit/LitConfig.py
+++ b/utils/lit/LitConfig.py
@ -17,7 +17,8 @@ class LitConfig:
    def __init__(self, progname, path, quiet,
                 useValgrind, valgrindArgs,
                 useTclAsSh,
-                 noExecute, debug, isWindows):
+                 noExecute, debug, isWindows,
+                 params):
        # The name of the test runner.
        self.progname = progname
        # The items to add to the PATH environment variable.
@ -29,6 +30,7 @@ class LitConfig:
        self.noExecute = noExecute
        self.debug = debug
        self.isWindows = bool(isWindows)
+        self.params = dict(params)
        self.bashPath = None

        self.numErrors = 0
--- a/utils/lit/lit.py
+++ b/utils/lit/lit.py
@ -321,6 +321,10 @@ def main():
    parser.add_option("", "--config-prefix", dest="configPrefix",
                      metavar="NAME", help="Prefix for 'lit' config files",
                      action="store", default=None)
+    parser.add_option("", "--param", dest="userParameters",
+                      metavar="NAME=VAL",
+                      help="Add 'NAME' = 'VAL' to the user defined parameters",
+                      type=str, action="append", default=[])

    group = OptionGroup(parser, "Output Format")
    # FIXME: I find these names very confusing, although I like the
@ -396,6 +400,15 @@ def main():

    inputs = args

+    # Create the user defined parameters.
+    userParams = {}
+    for entry in opts.userParameters:
+        if '=' not in entry:
+            name,val = entry,''
+        else:
+            name,val = entry.split('=', 1)
+        userParams[name] = val
+
    # Create the global config object.
    litConfig = LitConfig.LitConfig(progname = os.path.basename(sys.argv[0]),
                                    path = opts.path,
@ -405,7 +418,8 @@ def main():
                                    useTclAsSh = opts.useTclAsSh,
                                    noExecute = opts.noExecute,
                                    debug = opts.debug,
-                                    isWindows = (platform.system()=='Windows'))
+                                    isWindows = (platform.system()=='Windows'),
+                                    params = userParams)

    # Load the tests from the inputs.
    tests = []