Newer
Older
Chris Lattner
committed
//===- ScalarReplAggregates.cpp - Scalar Replacement of Aggregates --------===//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//===----------------------------------------------------------------------===//
Chris Lattner
committed
//
// This transformation implements the well known scalar replacement of
// aggregates transformation. This xform breaks up alloca instructions of
// aggregate type (structure or array) into individual alloca instructions for
// each member (if possible). Then, if possible, it transforms the individual
// alloca instructions into nice clean scalar SSA form.
//
// This combines a simple SRoA algorithm with the Mem2Reg algorithm because
// often interact, especially for C++ programs. As such, iterating between
// SRoA, then Mem2Reg until we run out of things to promote works well.
Chris Lattner
committed
//
//===----------------------------------------------------------------------===//
Chris Lattner
committed
#define DEBUG_TYPE "scalarrepl"
Chris Lattner
committed
#include "llvm/Transforms/Scalar.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
Chris Lattner
committed
#include "llvm/Function.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
Devang Patel
committed
#include "llvm/Analysis/DebugInfo.h"
Cameron Zwarich
committed
#include "llvm/Analysis/DIBuilder.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/SetVector.h"
Chris Lattner
committed
STATISTIC(NumReplaced, "Number of allocas broken up");
STATISTIC(NumPromoted, "Number of allocas promoted");
STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion");
Chris Lattner
committed
STATISTIC(NumConverted, "Number of aggregates converted to scalar");
STATISTIC(NumGlobals, "Number of allocas copied from constant global");
Chris Lattner
committed
Chris Lattner
committed
namespace {
struct SROA : public FunctionPass {
SROA(int T, bool hasDT, char &ID)
: FunctionPass(ID), HasDomTree(hasDT) {
if (T == -1)
else
SRThreshold = T;
}
Chris Lattner
committed
bool runOnFunction(Function &F);
bool performScalarRepl(Function &F);
bool performPromotion(Function &F);
Chris Lattner
committed
private:
TargetData *TD;
/// DeadInsts - Keep track of instructions we have made dead, so that
/// we can remove them after we are done working.
SmallVector<Value*, 32> DeadInsts;
/// AllocaInfo - When analyzing uses of an alloca instruction, this captures
/// information about the uses. All these fields are initialized to false
/// and set to true when something is learned.
struct AllocaInfo {
/// The alloca to promote.
AllocaInst *AI;
/// CheckedPHIs - This is a set of verified PHI nodes, to prevent infinite
/// looping and avoid redundant work.
SmallPtrSet<PHINode*, 8> CheckedPHIs;
/// isUnsafe - This is set to true if the alloca cannot be SROA'd.
bool isUnsafe : 1;
/// isMemCpySrc - This is true if this aggregate is memcpy'd from.
bool isMemCpySrc : 1;
/// isMemCpyDst - This is true if this aggregate is memcpy'd into.
bool isMemCpyDst : 1;
Chris Lattner
committed
/// hasSubelementAccess - This is true if a subelement of the alloca is
/// ever accessed, or false if the alloca is only accessed with mem
/// intrinsics or load/store that only access the entire alloca at once.
bool hasSubelementAccess : 1;
/// hasALoadOrStore - This is true if there are any loads or stores to it.
/// The alloca may just be accessed with memcpy, for example, which would
/// not set this.
bool hasALoadOrStore : 1;
explicit AllocaInfo(AllocaInst *ai)
: AI(ai), isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false),
Chris Lattner
committed
hasSubelementAccess(false), hasALoadOrStore(false) {}
};
unsigned SRThreshold;
void MarkUnsafe(AllocaInfo &I, Instruction *User) {
I.isUnsafe = true;
DEBUG(dbgs() << " Transformation preventing inst: " << *User << '\n');
}
Victor Hernandez
committed
bool isSafeAllocaToScalarRepl(AllocaInst *AI);
void isSafeForScalarRepl(Instruction *I, uint64_t Offset, AllocaInfo &Info);
void isSafePHISelectUseForScalarRepl(Instruction *User, uint64_t Offset,
AllocaInfo &Info);
void isSafeGEP(GetElementPtrInst *GEPI, uint64_t &Offset, AllocaInfo &Info);
void isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
Type *MemOpType, bool isStore, AllocaInfo &Info,
Instruction *TheAccess, bool AllowWholeAccess);
bool TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size);
uint64_t FindElementAndOffset(Type *&T, uint64_t &Offset,
Type *&IdxTy);
Victor Hernandez
committed
std::vector<AllocaInst*> &WorkList);
void DeleteDeadInstructions();
void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
SmallVector<AllocaInst*, 32> &NewElts);
void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
SmallVector<AllocaInst*, 32> &NewElts);
void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
SmallVector<AllocaInst*, 32> &NewElts);
void RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
uint64_t Offset,
SmallVector<AllocaInst*, 32> &NewElts);
void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
Victor Hernandez
committed
AllocaInst *AI,
SmallVector<AllocaInst*, 32> &NewElts);
Victor Hernandez
committed
void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
SmallVector<AllocaInst*, 32> &NewElts);
Victor Hernandez
committed
void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
SmallVector<AllocaInst*, 32> &NewElts);
Nick Lewycky
committed
static MemTransferInst *isOnlyCopiedFromConstantGlobal(
AllocaInst *AI, SmallVector<Instruction*, 4> &ToDelete);
Chris Lattner
committed
};
// SROA_DT - SROA that uses DominatorTree.
struct SROA_DT : public SROA {
static char ID;
public:
SROA_DT(int T = -1) : SROA(T, true, ID) {
initializeSROA_DTPass(*PassRegistry::getPassRegistry());
}
// getAnalysisUsage - This pass does not require any passes, but we know it
// will not alter the CFG, so say so.
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTree>();
AU.setPreservesCFG();
}
};
// SROA_SSAUp - SROA that uses SSAUpdater.
struct SROA_SSAUp : public SROA {
static char ID;
public:
SROA_SSAUp(int T = -1) : SROA(T, false, ID) {
initializeSROA_SSAUpPass(*PassRegistry::getPassRegistry());
}
// getAnalysisUsage - This pass does not require any passes, but we know it
// will not alter the CFG, so say so.
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
}
};
Chris Lattner
committed
}
char SROA_DT::ID = 0;
char SROA_SSAUp::ID = 0;
INITIALIZE_PASS_BEGIN(SROA_DT, "scalarrepl",
"Scalar Replacement of Aggregates (DT)", false, false)
Owen Anderson
committed
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_END(SROA_DT, "scalarrepl",
"Scalar Replacement of Aggregates (DT)", false, false)
INITIALIZE_PASS_BEGIN(SROA_SSAUp, "scalarrepl-ssa",
"Scalar Replacement of Aggregates (SSAUp)", false, false)
INITIALIZE_PASS_END(SROA_SSAUp, "scalarrepl-ssa",
"Scalar Replacement of Aggregates (SSAUp)", false, false)
// Public interface to the ScalarReplAggregates pass
FunctionPass *llvm::createScalarReplAggregatesPass(int Threshold,
bool UseDomTree) {
if (UseDomTree)
return new SROA_DT(Threshold);
return new SROA_SSAUp(Threshold);
Chris Lattner
committed
//===----------------------------------------------------------------------===//
// Convert To Scalar Optimization.
//===----------------------------------------------------------------------===//
/// ConvertToScalarInfo - This class implements the "Convert To Scalar"
/// optimization, which scans the uses of an alloca and determines if it can
/// rewrite it in terms of a single new alloca that can be mem2reg'd.
class ConvertToScalarInfo {
/// AllocaSize - The size of the alloca being considered in bytes.
unsigned AllocaSize;
const TargetData &TD;
/// IsNotTrivial - This is set to true if there is some access to the object
bool IsNotTrivial;
Cameron Zwarich
committed
/// ScalarKind - Tracks the kind of alloca being considered for promotion,
/// computed based on the uses of the alloca rather than the LLVM type system.
enum {
Unknown,
Cameron Zwarich
committed
// Accesses via GEPs that are consistent with element access of a vector
Cameron Zwarich
committed
// type. This will not be converted into a vector unless there is a later
// access using an actual vector type.
ImplicitVector,
// Accesses via vector operations and GEPs that are consistent with the
// layout of a vector type.
Cameron Zwarich
committed
Vector,
Cameron Zwarich
committed
// An integer bag-of-bits with bitwise operations for insertion and
// extraction. Any combination of types can be converted into this kind
// of scalar.
Cameron Zwarich
committed
Integer
} ScalarKind;
/// VectorTy - This tracks the type that we should promote the vector to if
/// it is possible to turn it into a vector. This starts out null, and if it
/// isn't possible to turn into a vector type, it gets set to VoidTy.
VectorType *VectorTy;
/// HadNonMemTransferAccess - True if there is at least one access to the
/// alloca that is not a MemTransferInst. We don't want to turn structs into
/// large integers unless there is some potential for optimization.
Cameron Zwarich
committed
bool HadNonMemTransferAccess;
explicit ConvertToScalarInfo(unsigned Size, const TargetData &td)
Cameron Zwarich
committed
: AllocaSize(Size), TD(td), IsNotTrivial(false), ScalarKind(Unknown),
Cameron Zwarich
committed
VectorTy(0), HadNonMemTransferAccess(false) { }
bool CanConvertToScalar(Value *V, uint64_t Offset);
void MergeInTypeForLoadOrStore(Type *In, uint64_t Offset);
bool MergeInVectorType(VectorType *VInTy, uint64_t Offset);
void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
Value *ConvertScalar_ExtractValue(Value *NV, Type *ToType,
uint64_t Offset, IRBuilder<> &Builder);
Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal,
uint64_t Offset, IRBuilder<> &Builder);
};
} // end anonymous namespace.
/// TryConvert - Analyze the specified alloca, and if it is safe to do so,
/// rewrite it to be a new alloca which is mem2reg'able. This returns the new
/// alloca if possible or null if not.
AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
// If we can't convert this scalar, or if mem2reg can trivially do it, bail
// out.
if (!CanConvertToScalar(AI, 0) || !IsNotTrivial)
return 0;
Cameron Zwarich
committed
// If an alloca has only memset / memcpy uses, it may still have an Unknown
// ScalarKind. Treat it as an Integer below.
if (ScalarKind == Unknown)
ScalarKind = Integer;
Cameron Zwarich
committed
// FIXME: It should be possible to promote the vector type up to the alloca's
// size.
if (ScalarKind == Vector && VectorTy->getBitWidth() != AllocaSize * 8)
ScalarKind = Integer;
// If we were able to find a vector type that can handle this with
// insert/extract elements, and if there was at least one use that had
// a vector type, promote this to a vector. We don't want to promote
// random stuff that doesn't use vectors (e.g. <9 x double>) because then
// we just get a lot of insert/extracts. If at least one vector is
// involved, then we probably really do have a union of vector/array.
Type *NewTy;
if (ScalarKind == Vector) {
assert(VectorTy && "Missing type for vector scalar.");
DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = "
<< *VectorTy << '\n');
NewTy = VectorTy; // Use the vector type.
} else {
Cameron Zwarich
committed
unsigned BitWidth = AllocaSize * 8;
Cameron Zwarich
committed
if ((ScalarKind == ImplicitVector || ScalarKind == Integer) &&
!HadNonMemTransferAccess && !TD.fitsInLegalInteger(BitWidth))
Cameron Zwarich
committed
return 0;
DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
// Create and insert the integer alloca.
Cameron Zwarich
committed
NewTy = IntegerType::get(AI->getContext(), BitWidth);
}
AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin());
ConvertUsesToScalar(AI, NewAI, 0);
return NewAI;
}
/// MergeInTypeForLoadOrStore - Add the 'In' type to the accumulated vector type
/// (VectorTy) so far at the offset specified by Offset (which is specified in
/// bytes).
Cameron Zwarich
committed
/// There are three cases we handle here:
/// 1) A union of vector types of the same size and potentially its elements.
/// Here we turn element accesses into insert/extract element operations.
/// This promotes a <4 x float> with a store of float to the third element
/// into a <4 x float> that uses insert element.
Cameron Zwarich
committed
/// 2) A union of vector types with power-of-2 size differences, e.g. a float,
/// <2 x float> and <4 x float>. Here we turn element accesses into insert
/// and extract element operations, and <2 x float> accesses into a cast to
/// <2 x double>, an extract, and a cast back to <2 x float>.
/// 3) A fully general blob of memory, which we turn into some (potentially
/// large) integer type with extract and insert operations where the loads
/// and stores would mutate the memory. We mark this by setting VectorTy
/// to VoidTy.
void ConvertToScalarInfo::MergeInTypeForLoadOrStore(Type *In,
// If we already decided to turn this into a blob of integer memory, there is
// nothing to be done.
Cameron Zwarich
committed
if (ScalarKind == Integer)
// If this could be contributing to a vector, analyze it.
// If the In type is a vector that is the same size as the alloca, see if it
// matches the existing VecTy.
if (VectorType *VInTy = dyn_cast<VectorType>(In)) {
Cameron Zwarich
committed
if (MergeInVectorType(VInTy, Offset))
return;
} else if (In->isFloatTy() || In->isDoubleTy() ||
(In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 &&
isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
Cameron Zwarich
committed
// Full width accesses can be ignored, because they can always be turned
// into bitcasts.
unsigned EltSize = In->getPrimitiveSizeInBits()/8;
Cameron Zwarich
committed
return;
Cameron Zwarich
committed
// If we're accessing something that could be an element of a vector, see
// if the implied vector agrees with what we already have and if Offset is
// compatible with it.
Cameron Zwarich
committed
if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 &&
(!VectorTy || Offset * 8 < VectorTy->getPrimitiveSizeInBits())) {
Cameron Zwarich
committed
if (!VectorTy) {
Cameron Zwarich
committed
ScalarKind = ImplicitVector;
VectorTy = VectorType::get(In, AllocaSize/EltSize);
Cameron Zwarich
committed
return;
}
Cameron Zwarich
committed
unsigned CurrentEltSize = VectorTy->getElementType()
Cameron Zwarich
committed
->getPrimitiveSizeInBits()/8;
if (EltSize == CurrentEltSize)
return;
if (In->isIntegerTy() && isPowerOf2_32(AllocaSize / EltSize))
return;
// Otherwise, we have a case that we can't handle with an optimized vector
// form. We can still turn this into a large integer.
Cameron Zwarich
committed
ScalarKind = Integer;
/// MergeInVectorType - Handles the vector case of MergeInTypeForLoadOrStore,
/// returning true if the type was successfully merged and false otherwise.
bool ConvertToScalarInfo::MergeInVectorType(VectorType *VInTy,
Cameron Zwarich
committed
uint64_t Offset) {
Cameron Zwarich
committed
// TODO: Support nonzero offsets?
if (Offset != 0)
return false;
// Only allow vectors that are a power-of-2 away from the size of the alloca.
if (!isPowerOf2_64(AllocaSize / (VInTy->getBitWidth() / 8)))
return false;
// If this the first vector we see, remember the type so that we know the
// element size.
if (!VectorTy) {
Cameron Zwarich
committed
ScalarKind = Vector;
Cameron Zwarich
committed
VectorTy = VInTy;
Cameron Zwarich
committed
return true;
}
Cameron Zwarich
committed
unsigned BitWidth = VectorTy->getBitWidth();
Cameron Zwarich
committed
unsigned InBitWidth = VInTy->getBitWidth();
// Vectors of the same size can be converted using a simple bitcast.
Cameron Zwarich
committed
if (InBitWidth == BitWidth && AllocaSize == (InBitWidth / 8)) {
ScalarKind = Vector;
Cameron Zwarich
committed
return true;
Cameron Zwarich
committed
}
Cameron Zwarich
committed
Type *ElementTy = VectorTy->getElementType();
Type *InElementTy = VInTy->getElementType();
Cameron Zwarich
committed
// If they're the same alloc size, we'll be attempting to convert between
// them with a vector shuffle, which requires the element types to match.
if (TD.getTypeAllocSize(VectorTy) == TD.getTypeAllocSize(VInTy) &&
ElementTy != InElementTy)
return false;
Cameron Zwarich
committed
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
// Do not allow mixed integer and floating-point accesses from vectors of
// different sizes.
if (ElementTy->isFloatingPointTy() != InElementTy->isFloatingPointTy())
return false;
if (ElementTy->isFloatingPointTy()) {
// Only allow floating-point vectors of different sizes if they have the
// same element type.
// TODO: This could be loosened a bit, but would anything benefit?
if (ElementTy != InElementTy)
return false;
// There are no arbitrary-precision floating-point types, which limits the
// number of legal vector types with larger element types that we can form
// to bitcast and extract a subvector.
// TODO: We could support some more cases with mixed fp128 and double here.
if (!(BitWidth == 64 || BitWidth == 128) ||
!(InBitWidth == 64 || InBitWidth == 128))
return false;
} else {
assert(ElementTy->isIntegerTy() && "Vector elements must be either integer "
"or floating-point.");
unsigned BitWidth = ElementTy->getPrimitiveSizeInBits();
unsigned InBitWidth = InElementTy->getPrimitiveSizeInBits();
// Do not allow integer types smaller than a byte or types whose widths are
// not a multiple of a byte.
if (BitWidth < 8 || InBitWidth < 8 ||
BitWidth % 8 != 0 || InBitWidth % 8 != 0)
return false;
}
// Pick the largest of the two vector types.
Cameron Zwarich
committed
ScalarKind = Vector;
Cameron Zwarich
committed
if (InBitWidth > BitWidth)
VectorTy = VInTy;
return true;
Cameron Zwarich
committed
}
/// CanConvertToScalar - V is a pointer. If we can convert the pointee and all
/// its accesses to a single vector type, return true and set VecTy to
/// the new type. If we could convert the alloca into a single promotable
/// integer, return true but set VecTy to VoidTy. Further, if the use is not a
/// completely trivial use that mem2reg could promote, set IsNotTrivial. Offset
/// is the current offset from the base of the alloca being analyzed.
///
/// If we see at least one access to the value that is as a vector type, set the
/// SawVec flag.
bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
Instruction *User = cast<Instruction>(*UI);
if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
// Don't break volatile loads.
if (LI->isVolatile())
return false;
// Don't touch MMX operations.
if (LI->getType()->isX86_MMXTy())
return false;
Cameron Zwarich
committed
HadNonMemTransferAccess = true;
MergeInTypeForLoadOrStore(LI->getType(), Offset);
if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
// Storing the pointer, not into the value?
if (SI->getOperand(0) == V || SI->isVolatile()) return false;
// Don't touch MMX operations.
if (SI->getOperand(0)->getType()->isX86_MMXTy())
return false;
Cameron Zwarich
committed
HadNonMemTransferAccess = true;
MergeInTypeForLoadOrStore(SI->getOperand(0)->getType(), Offset);
continue;
}
if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
if (!onlyUsedByLifetimeMarkers(BCI))
IsNotTrivial = true; // Can't be mem2reg'd.
if (!CanConvertToScalar(BCI, Offset))
return false;
continue;
}
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
// If this is a GEP with a variable indices, we can't handle it.
if (!GEP->hasAllConstantIndices())
return false;
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
// See if all uses can be converted.
if (!CanConvertToScalar(GEP, Offset+GEPOffset))
return false;
Cameron Zwarich
committed
HadNonMemTransferAccess = true;
Chris Lattner
committed
// If this is a constant sized memset of a constant value (e.g. 0) we can
// handle it.
if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
Cameron Zwarich
committed
// Store of constant value.
if (!isa<ConstantInt>(MSI->getValue()))
Cameron Zwarich
committed
// Store of constant size.
ConstantInt *Len = dyn_cast<ConstantInt>(MSI->getLength());
if (!Len)
return false;
// If the size differs from the alloca, we can only convert the alloca to
// an integer bag-of-bits.
// FIXME: This should handle all of the cases that are currently accepted
// as vector element insertions.
if (Len->getZExtValue() != AllocaSize || Offset != 0)
ScalarKind = Integer;
Cameron Zwarich
committed
HadNonMemTransferAccess = true;
// If this is a memcpy or memmove into or out of the whole allocation, we
// can handle it like a load or store of the scalar type.
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength());
if (Len == 0 || Len->getZExtValue() != AllocaSize || Offset != 0)
return false;
IsNotTrivial = true; // Can't be mem2reg'd.
continue;
Chris Lattner
committed
}
// If this is a lifetime intrinsic, we can handle it.
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
II->getIntrinsicID() == Intrinsic::lifetime_end) {
continue;
}
}
// Otherwise, we cannot handle this!
return false;
}
Chris Lattner
committed
}
/// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca
/// directly. This happens when we are converting an "integer union" to a
/// single integer scalar, or when we are converting a "vector union" to a
/// vector with insert/extractelement instructions.
///
/// Offset is an offset from the original alloca, in bits that need to be
/// shifted to the right. By the end of this, there should be no uses of Ptr.
void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
uint64_t Offset) {
while (!Ptr->use_empty()) {
Instruction *User = cast<Instruction>(Ptr->use_back());
if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) {
ConvertUsesToScalar(CI, NewAI, Offset);
CI->eraseFromParent();
continue;
}
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8);
GEP->eraseFromParent();
continue;
}
IRBuilder<> Builder(User);
if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
// The load is a bit extract from NewAI shifted right by Offset bits.
Value *LoadedVal = Builder.CreateLoad(NewAI, "tmp");
Value *NewLoadVal
= ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder);
LI->replaceAllUsesWith(NewLoadVal);
LI->eraseFromParent();
continue;
}
if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
assert(SI->getOperand(0) != Ptr && "Consistency error!");
Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset,
Builder);
Builder.CreateStore(New, NewAI);
SI->eraseFromParent();
// If the load we just inserted is now dead, then the inserted store
// overwrote the entire thing.
if (Old->use_empty())
Old->eraseFromParent();
continue;
}
// If this is a constant sized memset of a constant value (e.g. 0) we can
// transform it into a store of the expanded constant value.
if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
assert(MSI->getRawDest() == Ptr && "Consistency error!");
unsigned NumBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
if (NumBytes != 0) {
unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue();
// Compute the value replicated the right number of times.
APInt APVal(NumBytes*8, Val);
Daniel Dunbar
committed
// Splat the value if non-zero.
if (Val)
for (unsigned i = 1; i != NumBytes; ++i)
APVal |= APVal << 8;
Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
Value *New = ConvertScalar_InsertValue(
ConstantInt::get(User->getContext(), APVal),
Old, Offset, Builder);
Builder.CreateStore(New, NewAI);
// If the load we just inserted is now dead, then the memset overwrote
// the entire thing.
if (Old->use_empty())
}
MSI->eraseFromParent();
continue;
}
Daniel Dunbar
committed
// If this is a memcpy or memmove into or out of the whole allocation, we
// can handle it like a load or store of the scalar type.
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
assert(Offset == 0 && "must be store to start of alloca");
// If the source and destination are both to the same alloca, then this is
// a noop copy-to-self, just delete it. Otherwise, emit a load and store
// as appropriate.
AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, &TD, 0));
if (GetUnderlyingObject(MTI->getSource(), &TD, 0) != OrigAI) {
// Dest must be OrigAI, change this to be a load from the original
// pointer (bitcasted), then a store to our new alloca.
assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
Value *SrcPtr = MTI->getSource();
PointerType* SPTy = cast<PointerType>(SrcPtr->getType());
PointerType* AIPTy = cast<PointerType>(NewAI->getType());
Mon P Wang
committed
if (SPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
AIPTy = PointerType::get(AIPTy->getElementType(),
SPTy->getAddressSpace());
}
SrcPtr = Builder.CreateBitCast(SrcPtr, AIPTy);
LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
SrcVal->setAlignment(MTI->getAlignment());
Builder.CreateStore(SrcVal, NewAI);
} else if (GetUnderlyingObject(MTI->getDest(), &TD, 0) != OrigAI) {
// Src must be OrigAI, change this to be a load from NewAI then a store
// through the original dest pointer (bitcasted).
assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval");
PointerType* DPTy = cast<PointerType>(MTI->getDest()->getType());
PointerType* AIPTy = cast<PointerType>(NewAI->getType());
Mon P Wang
committed
if (DPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
AIPTy = PointerType::get(AIPTy->getElementType(),
DPTy->getAddressSpace());
}
Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), AIPTy);
StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr);
NewStore->setAlignment(MTI->getAlignment());
} else {
// Noop transfer. Src == Dst
}
MTI->eraseFromParent();
continue;
}
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
II->getIntrinsicID() == Intrinsic::lifetime_end) {
// There's no need to preserve these, as the resulting alloca will be
// converted to a register anyways.
II->eraseFromParent();
continue;
}
}
llvm_unreachable("Unsupported operation!");
Daniel Dunbar
committed
}
Cameron Zwarich
committed
/// getScaledElementType - Gets a scaled element type for a partial vector
/// access of an alloca. The input types must be integer or floating-point
/// scalar or vector types, and the resulting type is an integer, float or
/// double.
static Type *getScaledElementType(Type *Ty1, Type *Ty2,
Cameron Zwarich
committed
unsigned NewBitWidth) {
bool IsFP1 = Ty1->isFloatingPointTy() ||
(Ty1->isVectorTy() &&
cast<VectorType>(Ty1)->getElementType()->isFloatingPointTy());
bool IsFP2 = Ty2->isFloatingPointTy() ||
(Ty2->isVectorTy() &&
cast<VectorType>(Ty2)->getElementType()->isFloatingPointTy());
LLVMContext &Context = Ty1->getContext();
// Prefer floating-point types over integer types, as integer types may have
// been created by earlier scalar replacement.
if (IsFP1 || IsFP2) {
if (NewBitWidth == 32)
return Type::getFloatTy(Context);
if (NewBitWidth == 64)
return Type::getDoubleTy(Context);
}
Cameron Zwarich
committed
return Type::getIntNTy(Context, NewBitWidth);
Cameron Zwarich
committed
}
/// CreateShuffleVectorCast - Creates a shuffle vector to convert one vector
/// to another vector of the same element type which has the same allocation
/// size but different primitive sizes (e.g. <3 x i32> and <4 x i32>).
static Value *CreateShuffleVectorCast(Value *FromVal, Type *ToType,
IRBuilder<> &Builder) {
Type *FromType = FromVal->getType();
VectorType *FromVTy = cast<VectorType>(FromType);
VectorType *ToVTy = cast<VectorType>(ToType);
assert((ToVTy->getElementType() == FromVTy->getElementType()) &&
"Vectors must have the same element type");
Value *UnV = UndefValue::get(FromType);
unsigned numEltsFrom = FromVTy->getNumElements();
unsigned numEltsTo = ToVTy->getNumElements();
SmallVector<Constant*, 3> Args;
Type* Int32Ty = Builder.getInt32Ty();
unsigned minNumElts = std::min(numEltsFrom, numEltsTo);
unsigned i;
for (i=0; i != minNumElts; ++i)
Args.push_back(ConstantInt::get(Int32Ty, i));
if (i < numEltsTo) {
Constant* UnC = UndefValue::get(Int32Ty);
for (; i != numEltsTo; ++i)
Args.push_back(UnC);
}
Constant *Mask = ConstantVector::get(Args);
return Builder.CreateShuffleVector(FromVal, UnV, Mask, "tmpV");
}
/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer
/// or vector value FromVal, extracting the bits from the offset specified by
/// Offset. This returns the value, which is of type ToType.
///
/// This happens when we are converting an "integer union" to a single
/// integer scalar, or when we are converting a "vector union" to a vector with
/// insert/extractelement instructions.
///
/// Offset is an offset from the original alloca, in bits that need to be
/// shifted to the right.
Value *ConvertToScalarInfo::
ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
uint64_t Offset, IRBuilder<> &Builder) {
// If the load is of the whole new alloca, no conversion is needed.
Type *FromType = FromVal->getType();
if (FromType == ToType && Offset == 0)
return FromVal;
// If the result alloca is a vector type, this is either an element
// access or a bitcast to another vector type of the same size.
if (VectorType *VTy = dyn_cast<VectorType>(FromType)) {
Cameron Zwarich
committed
unsigned FromTypeSize = TD.getTypeAllocSize(FromType);
Cameron Zwarich
committed
unsigned ToTypeSize = TD.getTypeAllocSize(ToType);
Cameron Zwarich
committed
if (FromTypeSize == ToTypeSize) {
// If the two types have the same primitive size, use a bit cast.
// Otherwise, it is two vectors with the same element type that has
// the same allocation size but different number of elements so use
// a shuffle vector.
if (FromType->getPrimitiveSizeInBits() ==
ToType->getPrimitiveSizeInBits())
return Builder.CreateBitCast(FromVal, ToType, "tmp");
else
return CreateShuffleVectorCast(FromVal, ToType, Builder);
}
Cameron Zwarich
committed
Cameron Zwarich
committed
if (isPowerOf2_64(FromTypeSize / ToTypeSize)) {
assert(!(ToType->isVectorTy() && Offset != 0) && "Can't extract a value "
"of a smaller vector type at a nonzero offset.");
Cameron Zwarich
committed
Type *CastElementTy = getScaledElementType(FromType, ToType,
Cameron Zwarich
committed
ToTypeSize * 8);
Cameron Zwarich
committed
unsigned NumCastVectorElements = FromTypeSize / ToTypeSize;
Cameron Zwarich
committed
LLVMContext &Context = FromVal->getContext();
Type *CastTy = VectorType::get(CastElementTy,
Cameron Zwarich
committed
NumCastVectorElements);
Value *Cast = Builder.CreateBitCast(FromVal, CastTy, "tmp");
unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy);
unsigned Elt = Offset/EltSize;
assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
Cameron Zwarich
committed
Value *Extract = Builder.CreateExtractElement(Cast, ConstantInt::get(
Type::getInt32Ty(Context), Elt), "tmp");
Cameron Zwarich
committed
return Builder.CreateBitCast(Extract, ToType, "tmp");
Cameron Zwarich
committed
}
// Otherwise it must be an element access.
unsigned Elt = 0;
if (Offset) {
unsigned EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
Elt = Offset/EltSize;
assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
}
// Return the element extracted out of it.
Value *V = Builder.CreateExtractElement(FromVal, ConstantInt::get(
Type::getInt32Ty(FromVal->getContext()), Elt), "tmp");
if (V->getType() != ToType)
V = Builder.CreateBitCast(V, ToType, "tmp");
return V;
}
// If ToType is a first class aggregate, extract out each of the pieces and
// use insertvalue's to form the FCA.
if (StructType *ST = dyn_cast<StructType>(ToType)) {
const StructLayout &Layout = *TD.getStructLayout(ST);
Value *Res = UndefValue::get(ST);
for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i),
Offset+Layout.getElementOffsetInBits(i),
Builder);
Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
}
return Res;
}
if (ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
Value *Res = UndefValue::get(AT);
for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
Offset+i*EltSize, Builder);
Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
}
return Res;
}
Daniel Dunbar
committed
// Otherwise, this must be a union that was converted to an integer value.
IntegerType *NTy = cast<IntegerType>(FromVal->getType());
// If this is a big-endian system and the load is narrower than the
// full alloca type, we need to do a shift to get the right bits.
int ShAmt = 0;
if (TD.isBigEndian()) {
// On big-endian machines, the lowest bit is stored at the bit offset
// from the pointer given by getTypeStoreSizeInBits. This matters for
// integers with a bitwidth that is not a multiple of 8.
ShAmt = TD.getTypeStoreSizeInBits(NTy) -
TD.getTypeStoreSizeInBits(ToType) - Offset;
} else {
}
// Note: we support negative bitwidths (with shl) which are not defined.
// We do this to support (f.e.) loads off the end of a structure where
// only some bits are used.
if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth())
FromVal = Builder.CreateLShr(FromVal,
ConstantInt::get(FromVal->getType(),
ShAmt), "tmp");
else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())
ConstantInt::get(FromVal->getType(),
-ShAmt), "tmp");
// Finally, unconditionally truncate the integer to the right width.
unsigned LIBitWidth = TD.getTypeSizeInBits(ToType);
if (LIBitWidth < NTy->getBitWidth())
FromVal =
Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(),
LIBitWidth), "tmp");
else if (LIBitWidth > NTy->getBitWidth())
FromVal =
Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(),
LIBitWidth), "tmp");
// If the result is an integer, this is a trunc or bitcast.
if (ToType->isIntegerTy()) {
// Should be done.
} else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) {
// Just do a bitcast, we know the sizes match up.
FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp");
} else {
// Otherwise must be a pointer.
FromVal = Builder.CreateIntToPtr(FromVal, ToType, "tmp");
}
assert(FromVal->getType() == ToType && "Didn't convert right?");
return FromVal;
}
/// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer
/// or vector value "Old" at the offset specified by Offset.
///
/// This happens when we are converting an "integer union" to a
/// single integer scalar, or when we are converting a "vector union" to a
/// vector with insert/extractelement instructions.
///
/// Offset is an offset from the original alloca, in bits that need to be
/// shifted to the right.
Value *ConvertToScalarInfo::
ConvertScalar_InsertValue(Value *SV, Value *Old,
uint64_t Offset, IRBuilder<> &Builder) {
// Convert the stored type to the actual type, shift it left to insert
// then 'or' into place.
Type *AllocaType = Old->getType();
LLVMContext &Context = Old->getContext();
Daniel Dunbar
committed
if (VectorType *VTy = dyn_cast<VectorType>(AllocaType)) {
uint64_t VecSize = TD.getTypeAllocSizeInBits(VTy);
uint64_t ValSize = TD.getTypeAllocSizeInBits(SV->getType());
// Changing the whole vector with memset or with an access of a different
// vector type?
if (ValSize == VecSize) {
// If the two types have the same primitive size, use a bit cast.
// Otherwise, it is two vectors with the same element type that has
// the same allocation size but different number of elements so use
// a shuffle vector.
if (VTy->getPrimitiveSizeInBits() ==
SV->getType()->getPrimitiveSizeInBits())
return Builder.CreateBitCast(SV, AllocaType, "tmp");
else
return CreateShuffleVectorCast(SV, VTy, Builder);
}
Daniel Dunbar
committed
if (isPowerOf2_64(VecSize / ValSize)) {
assert(!(SV->getType()->isVectorTy() && Offset != 0) && "Can't insert a "
"value of a smaller vector type at a nonzero offset.");
Cameron Zwarich
committed
Type *CastElementTy = getScaledElementType(VTy, SV->getType(),
ValSize);
Cameron Zwarich
committed
unsigned NumCastVectorElements = VecSize / ValSize;
Cameron Zwarich
committed
LLVMContext &Context = SV->getContext();
Type *OldCastTy = VectorType::get(CastElementTy,
Cameron Zwarich
committed
NumCastVectorElements);
Value *OldCast = Builder.CreateBitCast(Old, OldCastTy, "tmp");
Value *SVCast = Builder.CreateBitCast(SV, CastElementTy, "tmp");
unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy);
unsigned Elt = Offset/EltSize;
assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
Cameron Zwarich
committed
Value *Insert =
Builder.CreateInsertElement(OldCast, SVCast, ConstantInt::get(
Type::getInt32Ty(Context), Elt), "tmp");
Cameron Zwarich
committed
return Builder.CreateBitCast(Insert, AllocaType, "tmp");