Newer
Older
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
}
// If this is a big-endian system, we need to shift the value down to the low
// bits so that a truncate will work.
if (TD.isBigEndian()) {
Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize);
StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt);
}
// Truncate the integer to the right size now.
const Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize);
StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt);
if (LoadedTy == NewIntTy)
return StoredVal;
// If the result is a pointer, inttoptr.
if (isa<PointerType>(LoadedTy))
return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt);
// Otherwise, bitcast.
return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt);
}
static void
GetAvailableBlockValues(DenseMap<BasicBlock*, Value*> &BlockReplValues,
SmallVector<std::pair<BasicBlock*,
Value*>, 16> &ValuesPerBlock,
const Type *LoadTy,
const TargetData *TD) {
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
BasicBlock *BB = ValuesPerBlock[i].first;
Value *AvailableVal = ValuesPerBlock[i].second;
Value *&BlockEntry = BlockReplValues[BB];
if (BlockEntry) continue;
if (AvailableVal->getType() != LoadTy) {
assert(TD && "Need target data to handle type mismatch case");
AvailableVal = CoerceAvailableValueToLoadType(AvailableVal, LoadTy,
BB->getTerminator(), *TD);
DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n"
<< *ValuesPerBlock[i].second << '\n'
<< *AvailableVal << '\n' << "\n\n\n");
}
BlockEntry = AvailableVal;
}
}
/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
/// non-local by performing PHI construction.
bool GVN::processNonLocalLoad(LoadInst *LI,
Chris Lattner
committed
SmallVectorImpl<Instruction*> &toErase) {
// Find the non-local dependencies of the load.
SmallVector<MemoryDependenceAnalysis::NonLocalDepEntry, 64> Deps;
MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(),
Deps);
//DEBUG(errs() << "INVESTIGATING NONLOCAL LOAD: "
// << Deps.size() << *LI << '\n');
// If we had to process more than one hundred blocks to find the
// dependencies, this load isn't worth worrying about. Optimizing
// it will be too expensive.
if (Deps.size() > 100)
return false;
// If we had a phi translation failure, we'll have a single entry which is a
// clobber in the current block. Reject this early.
if (Deps.size() == 1 && Deps[0].second.isClobber()) {
DEBUG(
errs() << "GVN: non-local load ";
WriteAsOperand(errs(), LI);
errs() << " is clobbered by " << *Deps[0].second.getInst() << '\n';
return false;
// Filter out useless results (non-locals, etc). Keep track of the blocks
// where we have a value available in repl, also keep track of whether we see
// dependencies that produce an unknown value for the load (such as a call
// that could potentially clobber the load).
SmallVector<std::pair<BasicBlock*, Value*>, 16> ValuesPerBlock;
SmallVector<BasicBlock*, 16> UnavailableBlocks;
const TargetData *TD = 0;
for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
BasicBlock *DepBB = Deps[i].first;
MemDepResult DepInfo = Deps[i].second;
if (DepInfo.isClobber()) {
UnavailableBlocks.push_back(DepBB);
continue;
}
Instruction *DepInst = DepInfo.getInst();
// Loading the allocation -> undef.
Victor Hernandez
committed
if (isa<AllocationInst>(DepInst) || isMalloc(DepInst)) {
ValuesPerBlock.push_back(std::make_pair(DepBB,
continue;
}
if (StoreInst* S = dyn_cast<StoreInst>(DepInst)) {
// Reject loads and stores that are to the same address but are of
// different types if we have to.
if (S->getOperand(0)->getType() != LI->getType()) {
if (TD == 0)
TD = getAnalysisIfAvailable<TargetData>();
// If the stored value is larger or equal to the loaded value, we can
// reuse it.
if (TD == 0 ||
TD->getTypeSizeInBits(S->getOperand(0)->getType()) <
TD->getTypeSizeInBits(LI->getType())) {
UnavailableBlocks.push_back(DepBB);
continue;
}
ValuesPerBlock.push_back(std::make_pair(DepBB, S->getOperand(0)));
} else if (LoadInst* LD = dyn_cast<LoadInst>(DepInst)) {
// If the types mismatch and we can't handle it, reject reuse of the load.
if (LD->getType() != LI->getType()) {
if (TD == 0)
TD = getAnalysisIfAvailable<TargetData>();
// If the stored value is larger or equal to the loaded value, we can
// reuse it.
if (TD == 0 ||
TD->getTypeSizeInBits(LD->getType()) <
TD->getTypeSizeInBits(LI->getType())) {
UnavailableBlocks.push_back(DepBB);
continue;
}
}
ValuesPerBlock.push_back(std::make_pair(DepBB, LD));
// FIXME: Handle memset/memcpy.
UnavailableBlocks.push_back(DepBB);
continue;
// If we have no predecessors that produce a known value for this load, exit
// early.
if (ValuesPerBlock.empty()) return false;
// If all of the instructions we depend on produce a known value for this
// load, then it is fully redundant and we can use PHI insertion to compute
// its value. Insert PHIs and remove the fully redundant value now.
if (UnavailableBlocks.empty()) {
// Use cached PHI construction information from previous runs
SmallPtrSet<Instruction*, 4> &p = phiMap[LI->getPointerOperand()];
// FIXME: What does phiMap do? Are we positive it isn't getting invalidated?
for (SmallPtrSet<Instruction*, 4>::iterator I = p.begin(), E = p.end();
I != E; ++I) {
if ((*I)->getParent() == LI->getParent()) {
DEBUG(errs() << "GVN REMOVING NONLOCAL LOAD #1: " << *LI << '\n');
LI->replaceAllUsesWith(*I);
if (isa<PointerType>((*I)->getType()))
MD->invalidateCachedPointerInfo(*I);
toErase.push_back(LI);
NumGVNLoad++;
return true;
}
ValuesPerBlock.push_back(std::make_pair((*I)->getParent(), *I));
DEBUG(errs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
// Convert the block information to a map, and insert coersions as needed.
DenseMap<BasicBlock*, Value*> BlockReplValues;
GetAvailableBlockValues(BlockReplValues, ValuesPerBlock, LI->getType(), TD);
// Perform PHI construction.
Value *V = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true);
LI->replaceAllUsesWith(V);
if (isa<PHINode>(V))
V->takeName(LI);
if (isa<PointerType>(V->getType()))
MD->invalidateCachedPointerInfo(V);
toErase.push_back(LI);
NumGVNLoad++;
return true;
if (!EnablePRE || !EnableLoadPRE)
return false;
// Okay, we have *some* definitions of the value. This means that the value
// is available in some of our (transitive) predecessors. Lets think about
// doing PRE of this load. This will involve inserting a new load into the
// predecessor when it's not available. We could do this in general, but
// prefer to not increase code size. As such, we only do this when we know
// that we only have to insert *one* load (which means we're basically moving
// the load, not inserting a new one).
Owen Anderson
committed
SmallPtrSet<BasicBlock *, 4> Blockers;
for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
Blockers.insert(UnavailableBlocks[i]);
// Lets find first basic block with more than one predecessor. Walk backwards
// through predecessors if needed.
BasicBlock *LoadBB = LI->getParent();
Owen Anderson
committed
BasicBlock *TmpBB = LoadBB;
bool isSinglePred = false;
bool allSingleSucc = true;
Owen Anderson
committed
while (TmpBB->getSinglePredecessor()) {
isSinglePred = true;
TmpBB = TmpBB->getSinglePredecessor();
if (!TmpBB) // If haven't found any, bail now.
return false;
if (TmpBB == LoadBB) // Infinite (unreachable) loop.
return false;
if (Blockers.count(TmpBB))
return false;
if (TmpBB->getTerminator()->getNumSuccessors() != 1)
allSingleSucc = false;
Owen Anderson
committed
}
Owen Anderson
committed
assert(TmpBB);
LoadBB = TmpBB;
// If we have a repl set with LI itself in it, this means we have a loop where
// at least one of the values is LI. Since this means that we won't be able
// to eliminate LI even if we insert uses in the other predecessors, we will
// end up increasing code size. Reject this by scanning for LI.
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
if (ValuesPerBlock[i].second == LI)
return false;
Owen Anderson
committed
if (isSinglePred) {
bool isHot = false;
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
if (Instruction *I = dyn_cast<Instruction>(ValuesPerBlock[i].second))
// "Hot" Instruction is in some loop (because it dominates its dep.
// instruction).
if (DT->dominates(LI, I)) {
isHot = true;
break;
}
Owen Anderson
committed
// We are interested only in "hot" instructions. We don't want to do any
// mis-optimizations here.
if (!isHot)
return false;
}
// Okay, we have some hope :). Check to see if the loaded value is fully
// available in all but one predecessor.
// FIXME: If we could restructure the CFG, we could make a common pred with
// all the preds that don't have an available LI and insert a new load into
// that one block.
BasicBlock *UnavailablePred = 0;
DenseMap<BasicBlock*, char> FullyAvailableBlocks;
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
FullyAvailableBlocks[ValuesPerBlock[i].first] = true;
for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
FullyAvailableBlocks[UnavailableBlocks[i]] = false;
for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB);
PI != E; ++PI) {
if (IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks))
continue;
// If this load is not available in multiple predecessors, reject it.
if (UnavailablePred && UnavailablePred != *PI)
return false;
UnavailablePred = *PI;
}
assert(UnavailablePred != 0 &&
"Fully available value should be eliminated above!");
// If the loaded pointer is PHI node defined in this block, do PHI translation
// to get its value in the predecessor.
Value *LoadPtr = LI->getOperand(0)->DoPHITranslation(LoadBB, UnavailablePred);
// Make sure the value is live in the predecessor. If it was defined by a
// non-PHI instruction in this block, we don't know how to recompute it above.
if (Instruction *LPInst = dyn_cast<Instruction>(LoadPtr))
if (!DT->dominates(LPInst->getParent(), UnavailablePred)) {
Daniel Dunbar
committed
DEBUG(errs() << "COULDN'T PRE LOAD BECAUSE PTR IS UNAVAILABLE IN PRED: "
<< *LPInst << '\n' << *LI << "\n");
return false;
}
// We don't currently handle critical edges :(
if (UnavailablePred->getTerminator()->getNumSuccessors() != 1) {
Daniel Dunbar
committed
DEBUG(errs() << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '"
<< UnavailablePred->getName() << "': " << *LI << '\n');
return false;
}
// Make sure it is valid to move this load here. We have to watch out for:
// @1 = getelementptr (i8* p, ...
// test p and branch if == 0
// load @1
// It is valid to have the getelementptr before the test, even if p can be 0,
// as getelementptr only does address arithmetic.
// If we are not pushing the value through any multiple-successor blocks
// we do not have this case. Otherwise, check that the load is safe to
// put anywhere; this can be improved, but should be conservatively safe.
if (!allSingleSucc &&
!isSafeToLoadUnconditionally(LoadPtr, UnavailablePred->getTerminator()))
return false;
// Okay, we can eliminate this load by inserting a reload in the predecessor
// and using PHI construction to get the value in the other predecessors, do
// it.
DEBUG(errs() << "GVN REMOVING PRE LOAD: " << *LI << '\n');
Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
LI->getAlignment(),
UnavailablePred->getTerminator());
Owen Anderson
committed
SmallPtrSet<Instruction*, 4> &p = phiMap[LI->getPointerOperand()];
for (SmallPtrSet<Instruction*, 4>::iterator I = p.begin(), E = p.end();
I != E; ++I)
ValuesPerBlock.push_back(std::make_pair((*I)->getParent(), *I));
DenseMap<BasicBlock*, Value*> BlockReplValues;
GetAvailableBlockValues(BlockReplValues, ValuesPerBlock, LI->getType(), TD);
BlockReplValues[UnavailablePred] = NewLoad;
// Perform PHI construction.
Value *V = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true);
LI->replaceAllUsesWith(V);
if (isa<PHINode>(V))
V->takeName(LI);
if (isa<PointerType>(V->getType()))
MD->invalidateCachedPointerInfo(V);
toErase.push_back(LI);
NumPRELoad++;
return true;
}
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
/// GetBaseWithConstantOffset - Analyze the specified pointer to see if it can
/// be expressed as a base pointer plus a constant offset. Return the base and
/// offset to the caller.
static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
const TargetData *TD) {
Operator *PtrOp = dyn_cast<Operator>(Ptr);
if (PtrOp == 0) return Ptr;
// Just look through bitcasts.
if (PtrOp->getOpcode() == Instruction::BitCast)
return GetBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD);
// If this is a GEP with constant indices, we can look through it.
GEPOperator *GEP = dyn_cast<GEPOperator>(PtrOp);
if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr;
gep_type_iterator GTI = gep_type_begin(GEP);
for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E;
++I, ++GTI) {
ConstantInt *OpC = cast<ConstantInt>(*I);
if (OpC->isZero()) continue;
// Handle a struct and array indices which add their offset to the pointer.
if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
Offset += TD->getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
} else {
uint64_t Size = TD->getTypeAllocSize(GTI.getIndexedType());
Offset += OpC->getSExtValue()*Size;
}
}
// Re-sign extend from the pointer size if needed to get overflow edge cases
// right.
unsigned PtrSize = TD->getPointerSizeInBits();
if (PtrSize < 64)
Offset = (Offset << (64-PtrSize)) >> (64-PtrSize);
return GetBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD);
}
/// AnalyzeLoadFromClobberingStore - This function is called when we have a
/// memdep query of a load that ends up being a clobbering store. This means
/// that the store *may* provide bits used by the load but we can't be sure
/// because the pointers don't mustalias. Check this case to see if there is
/// anything more we can do before we give up. This returns -1 if we have to
/// give up, or a byte number in the stored value of the piece that feeds the
/// load.
static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI,
const TargetData *TD) {
int64_t StoreOffset = 0, LoadOffset = 0;
Value *StoreBase =
GetBaseWithConstantOffset(DepSI->getPointerOperand(), StoreOffset, TD);
Value *LoadBase =
GetBaseWithConstantOffset(L->getPointerOperand(), LoadOffset, TD);
if (StoreBase != LoadBase)
return -1;
// If the load and store are to the exact same address, they should have been
// a must alias. AA must have gotten confused.
// FIXME: Study to see if/when this happens.
if (LoadOffset == StoreOffset) {
#if 0
errs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n"
<< "Base = " << *StoreBase << "\n"
<< "Store Ptr = " << *DepSI->getPointerOperand() << "\n"
<< "Store Offs = " << StoreOffset << " - " << *DepSI << "\n"
<< "Load Ptr = " << *L->getPointerOperand() << "\n"
<< "Load Offs = " << LoadOffset << " - " << *L << "\n\n";
errs() << "'" << L->getParent()->getParent()->getName() << "'"
<< *L->getParent();
#endif
return -1;
}
// If the load and store don't overlap at all, the store doesn't provide
// anything to the load. In this case, they really don't alias at all, AA
// must have gotten confused.
// FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then
// remove this check, as it is duplicated with what we have below.
uint64_t StoreSize = TD->getTypeSizeInBits(DepSI->getOperand(0)->getType());
uint64_t LoadSize = TD->getTypeSizeInBits(L->getType());
if ((StoreSize & 7) | (LoadSize & 7))
return -1;
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
StoreSize >>= 3; // Convert to bytes.
LoadSize >>= 3;
bool isAAFailure = false;
if (StoreOffset < LoadOffset) {
isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset;
} else {
isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset;
}
if (isAAFailure) {
#if 0
errs() << "STORE LOAD DEP WITH COMMON BASE:\n"
<< "Base = " << *StoreBase << "\n"
<< "Store Ptr = " << *DepSI->getPointerOperand() << "\n"
<< "Store Offs = " << StoreOffset << " - " << *DepSI << "\n"
<< "Load Ptr = " << *L->getPointerOperand() << "\n"
<< "Load Offs = " << LoadOffset << " - " << *L << "\n\n";
errs() << "'" << L->getParent()->getParent()->getName() << "'"
<< *L->getParent();
#endif
return -1;
}
// If the Load isn't completely contained within the stored bits, we don't
// have all the bits to feed it. We could do something crazy in the future
// (issue a smaller load then merge the bits in) but this seems unlikely to be
// valuable.
if (StoreOffset > LoadOffset ||
StoreOffset+StoreSize < LoadOffset+LoadSize)
return -1;
// Okay, we can do this transformation. Return the number of bytes into the
// store that the load is.
return LoadOffset-StoreOffset;
}
/// GetStoreValueForLoad - This function is called when we have a
/// memdep query of a load that ends up being a clobbering store. This means
/// that the store *may* provide bits used by the load but we can't be sure
/// because the pointers don't mustalias. Check this case to see if there is
/// anything more we can do before we give up.
static Value *GetStoreValueForLoad(Value *SrcVal, int Offset,const Type *LoadTy,
Instruction *InsertPt, const TargetData *TD){
LLVMContext &Ctx = SrcVal->getType()->getContext();
uint64_t StoreSize = TD->getTypeSizeInBits(SrcVal->getType())/8;
uint64_t LoadSize = TD->getTypeSizeInBits(LoadTy)/8;
// Compute which bits of the stored value are being used by the load. Convert
// to an integer type to start with.
if (isa<PointerType>(SrcVal->getType()))
SrcVal = new PtrToIntInst(SrcVal, TD->getIntPtrType(Ctx), "tmp", InsertPt);
if (!isa<IntegerType>(SrcVal->getType()))
SrcVal = new BitCastInst(SrcVal, IntegerType::get(Ctx, StoreSize*8),
"tmp", InsertPt);
// Shift the bits to the least significant depending on endianness.
unsigned ShiftAmt;
if (TD->isLittleEndian()) {
ShiftAmt = Offset*8;
ShiftAmt = StoreSize-LoadSize-Offset;
}
SrcVal = BinaryOperator::CreateLShr(SrcVal,
ConstantInt::get(SrcVal->getType(), ShiftAmt), "tmp", InsertPt);
SrcVal = new TruncInst(SrcVal, IntegerType::get(Ctx, LoadSize*8),
"tmp", InsertPt);
return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, *TD);
}
/// processLoad - Attempt to eliminate a load, first by eliminating it
/// locally, and then attempting non-local elimination if that fails.
bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
if (L->isVolatile())
Owen Anderson
committed
return false;
Owen Anderson
committed
// ... to a pointer that has been loaded from before...
MemDepResult Dep = MD->getDependency(L);
// If the value isn't available, don't do anything!
// FIXME: We should handle memset/memcpy/memmove as dependent instructions
// to forward the value if available.
//if (isa<MemIntrinsic>(Dep.getInst()))
//errs() << "LOAD DEPENDS ON MEM: " << *L << "\n" << *Dep.getInst()<<"\n\n";
// Check to see if we have something like this:
// store i32 123, i32* %P
// %A = bitcast i32* %P to i8*
// %B = gep i8* %A, i32 1
// %C = load i8* %B
//
// We could do that by recognizing if the clobber instructions are obviously
// a common base + constant offset, and if the previous store (or memset)
// completely covers this load. This sort of thing can happen in bitfield
// access code.
if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst()))
if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) {
int Offset = AnalyzeLoadFromClobberingStore(L, DepSI, TD);
if (Offset != -1) {
Value *AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset,
L->getType(), L, TD);
DEBUG(errs() << "GVN COERCED STORE BITS:\n" << *DepSI << '\n'
<< *AvailVal << '\n' << *L << "\n\n\n");
// Replace the load!
L->replaceAllUsesWith(AvailVal);
if (isa<PointerType>(AvailVal->getType()))
MD->invalidateCachedPointerInfo(AvailVal);
toErase.push_back(L);
NumGVNLoad++;
return true;
}
DEBUG(
// fast print dep, using operator<< on instruction would be too slow
errs() << "GVN: load ";
WriteAsOperand(errs(), L);
Instruction *I = Dep.getInst();
errs() << " is clobbered by " << *I << '\n';
);
}
// If it is defined in another block, try harder.
return processNonLocalLoad(L, toErase);
Instruction *DepInst = Dep.getInst();
if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
Value *StoredVal = DepSI->getOperand(0);
// The store and load are to a must-aliased pointer, but they may not
// actually have the same type. See if we know how to reuse the stored
// value (depending on its type).
const TargetData *TD = 0;
if (StoredVal->getType() != L->getType() &&
(TD = getAnalysisIfAvailable<TargetData>())) {
StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(), L, *TD);
if (StoredVal == 0)
return false;
DEBUG(errs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
<< '\n' << *L << "\n\n\n");
}
L->replaceAllUsesWith(StoredVal);
if (isa<PointerType>(StoredVal->getType()))
MD->invalidateCachedPointerInfo(StoredVal);
toErase.push_back(L);
NumGVNLoad++;
return true;
}
if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) {
Value *AvailableVal = DepLI;
// The loads are of a must-aliased pointer, but they may not actually have
// the same type. See if we know how to reuse the previously loaded value
// (depending on its type).
const TargetData *TD = 0;
if (DepLI->getType() != L->getType() &&
(TD = getAnalysisIfAvailable<TargetData>())) {
AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L, *TD);
if (AvailableVal == 0)
return false;
DEBUG(errs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
<< "\n" << *L << "\n\n\n");
}
L->replaceAllUsesWith(AvailableVal);
if (isa<PointerType>(DepLI->getType()))
MD->invalidateCachedPointerInfo(DepLI);
toErase.push_back(L);
NumGVNLoad++;
return true;
Owen Anderson
committed
}
// If this load really doesn't depend on anything, then we must be loading an
// undef value. This can happen when loading for a fresh allocation with no
// intervening stores, for example.
Victor Hernandez
committed
if (isa<AllocationInst>(DepInst) || isMalloc(DepInst)) {
L->replaceAllUsesWith(UndefValue::get(L->getType()));
toErase.push_back(L);
NumGVNLoad++;
}
Owen Anderson
committed
}
Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) {
Owen Anderson
committed
DenseMap<BasicBlock*, ValueNumberScope*>::iterator I = localAvail.find(BB);
if (I == localAvail.end())
return 0;
ValueNumberScope *Locals = I->second;
while (Locals) {
DenseMap<uint32_t, Value*>::iterator I = Locals->table.find(num);
if (I != Locals->table.end())
Owen Anderson
committed
return I->second;
Owen Anderson
committed
}
Owen Anderson
committed
return 0;
}
Owen Anderson
committed
/// AttemptRedundancyElimination - If the "fast path" of redundancy elimination
/// by inheritance from the dominator fails, see if we can perform phi
Owen Anderson
committed
/// construction to eliminate the redundancy.
Value *GVN::AttemptRedundancyElimination(Instruction *orig, unsigned valno) {
BasicBlock *BaseBlock = orig->getParent();
Owen Anderson
committed
SmallPtrSet<BasicBlock*, 4> Visited;
SmallVector<BasicBlock*, 8> Stack;
Stack.push_back(BaseBlock);
Owen Anderson
committed
DenseMap<BasicBlock*, Value*> Results;
Owen Anderson
committed
// Walk backwards through our predecessors, looking for instances of the
// value number we're looking for. Instances are recorded in the Results
// map, which is then used to perform phi construction.
while (!Stack.empty()) {
BasicBlock *Current = Stack.back();
Owen Anderson
committed
Stack.pop_back();
Owen Anderson
committed
// If we've walked all the way to a proper dominator, then give up. Cases
// where the instance is in the dominator will have been caught by the fast
// path, and any cases that require phi construction further than this are
// probably not worth it anyways. Note that this is a SIGNIFICANT compile
// time improvement.
if (DT->properlyDominates(Current, orig->getParent())) return 0;
Owen Anderson
committed
DenseMap<BasicBlock*, ValueNumberScope*>::iterator LA =
localAvail.find(Current);
if (LA == localAvail.end()) return 0;
DenseMap<uint32_t, Value*>::iterator V = LA->second->table.find(valno);
Owen Anderson
committed
if (V != LA->second->table.end()) {
// Found an instance, record it.
Results.insert(std::make_pair(Current, V->second));
continue;
}
Owen Anderson
committed
// If we reach the beginning of the function, then give up.
if (pred_begin(Current) == pred_end(Current))
return 0;
Owen Anderson
committed
for (pred_iterator PI = pred_begin(Current), PE = pred_end(Current);
PI != PE; ++PI)
if (Visited.insert(*PI))
Stack.push_back(*PI);
}
Owen Anderson
committed
// If we didn't find instances, give up. Otherwise, perform phi construction.
if (Results.size() == 0)
return 0;
else
return GetValueForBlock(BaseBlock, orig, Results, true);
}
Owen Anderson
committed
/// processInstruction - When calculating availability, handle an instruction
Owen Anderson
committed
/// by inserting it into the appropriate sets
bool GVN::processInstruction(Instruction *I,
Chris Lattner
committed
SmallVectorImpl<Instruction*> &toErase) {
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
bool Changed = processLoad(LI, toErase);
if (!Changed) {
unsigned Num = VN.lookup_or_add(LI);
localAvail[I->getParent()]->table.insert(std::make_pair(Num, LI));
Owen Anderson
committed
}
Owen Anderson
committed
}
uint32_t NextNum = VN.getNextUnusedValueNumber();
unsigned Num = VN.lookup_or_add(I);
if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
return false;
Value *BranchCond = BI->getCondition();
uint32_t CondVN = VN.lookup_or_add(BranchCond);
BasicBlock *TrueSucc = BI->getSuccessor(0);
BasicBlock *FalseSucc = BI->getSuccessor(1);
if (TrueSucc->getSinglePredecessor())
localAvail[TrueSucc]->table[CondVN] =
ConstantInt::getTrue(TrueSucc->getContext());
if (FalseSucc->getSinglePredecessor())
localAvail[FalseSucc]->table[CondVN] =
ConstantInt::getFalse(TrueSucc->getContext());
return false;
Owen Anderson
committed
// Allocations are always uniquely numbered, so we can save time and memory
// by fast failing them.
Victor Hernandez
committed
} else if (isa<AllocationInst>(I) || isMalloc(I) || isa<TerminatorInst>(I)) {
localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
Owen Anderson
committed
return false;
Owen Anderson
committed
}
Owen Anderson
committed
if (PHINode* p = dyn_cast<PHINode>(I)) {
Value *constVal = CollapsePhi(p);
Owen Anderson
committed
if (constVal) {
for (PhiMapType::iterator PI = phiMap.begin(), PE = phiMap.end();
PI != PE; ++PI)
p->replaceAllUsesWith(constVal);
if (isa<PointerType>(constVal->getType()))
MD->invalidateCachedPointerInfo(constVal);
Owen Anderson
committed
VN.erase(p);
Owen Anderson
committed
} else {
localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
Owen Anderson
committed
}
Owen Anderson
committed
// If the number we were assigned was a brand new VN, then we don't
// need to do a lookup to see if the number already exists
// somewhere in the domtree: it can't!
} else if (Num == NextNum) {
localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
Owen Anderson
committed
// Perform fast-path value-number based elimination of values inherited from
// dominators.
} else if (Value *repl = lookupNumber(I->getParent(), Num)) {
Owen Anderson
committed
// Remove it!
Owen Anderson
committed
I->replaceAllUsesWith(repl);
if (isa<PointerType>(repl->getType()))
MD->invalidateCachedPointerInfo(repl);
Owen Anderson
committed
toErase.push_back(I);
return true;
Owen Anderson
committed
#if 0
// Perform slow-pathvalue-number based elimination with phi construction.
} else if (Value *repl = AttemptRedundancyElimination(I, Num)) {
Owen Anderson
committed
// Remove it!
VN.erase(I);
I->replaceAllUsesWith(repl);
if (isa<PointerType>(repl->getType()))
MD->invalidateCachedPointerInfo(repl);
toErase.push_back(I);
return true;
#endif
Owen Anderson
committed
} else {
localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
Owen Anderson
committed
}
Owen Anderson
committed
return false;
}
/// runOnFunction - This is the main transformation entry point for a function.
MD = &getAnalysis<MemoryDependenceAnalysis>();
DT = &getAnalysis<DominatorTree>();
VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
VN.setMemDep(MD);
VN.setDomTree(DT);
bool Changed = false;
bool ShouldContinue = true;
Owen Anderson
committed
// Merge unconditional branches, allowing PRE to catch more
// optimization opportunities.
for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
Owen Anderson
committed
++FI;
bool removedBlock = MergeBlockIntoPredecessor(BB, this);
if (removedBlock) NumGVNBlocks++;
Owen Anderson
committed
}
DEBUG(errs() << "GVN iteration: " << Iteration << "\n");
ShouldContinue = iterateOnFunction(F);
Changed |= ShouldContinue;
Owen Anderson
committed
if (EnablePRE) {
bool PREChanged = true;
while (PREChanged) {
PREChanged = performPRE(F);
Owen Anderson
committed
}
// FIXME: Should perform GVN again after PRE does something. PRE can move
// computations into blocks where they become fully redundant. Note that
// we can't do this until PRE's critical edge splitting updates memdep.
// Actually, when this happens, we should just fully integrate PRE into GVN.
cleanupGlobalSets();
bool GVN::processBlock(BasicBlock *BB) {
// FIXME: Kill off toErase by doing erasing eagerly in a helper function (and
// incrementing BI before processing an instruction).
SmallVector<Instruction*, 8> toErase;
bool ChangedFunction = false;
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
BI != BE;) {
ChangedFunction |= processInstruction(BI, toErase);
if (toErase.empty()) {
++BI;
continue;
}
// If we need some instructions deleted, do it now.
NumGVNInstr += toErase.size();
// Avoid iterator invalidation.
bool AtStart = BI == BB->begin();
if (!AtStart)
--BI;
for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
E = toErase.end(); I != E; ++I) {
DEBUG(errs() << "GVN removed: " << **I << '\n');
MD->removeInstruction(*I);
if (AtStart)
BI = BB->begin();
else
++BI;
}
Owen Anderson
committed
/// performPRE - Perform a purely local form of PRE that looks for diamond
/// control flow patterns and attempts to perform simple PRE at the join point.
bool GVN::performPRE(Function& F) {
Chris Lattner
committed
bool Changed = false;
SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit;
DenseMap<BasicBlock*, Value*> predMap;
Owen Anderson
committed
for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
BasicBlock *CurrentBlock = *DI;
Owen Anderson
committed
// Nothing to PRE in the entry block.
if (CurrentBlock == &F.getEntryBlock()) continue;
Owen Anderson
committed
for (BasicBlock::iterator BI = CurrentBlock->begin(),
BE = CurrentBlock->end(); BI != BE; ) {
Chris Lattner
committed
Instruction *CurInst = BI++;
Victor Hernandez
committed
if (isa<AllocationInst>(CurInst) || isMalloc(CurInst) ||
isa<TerminatorInst>(CurInst) || isa<PHINode>(CurInst) ||
(CurInst->getType() == Type::getVoidTy(F.getContext())) ||
CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
isa<DbgInfoIntrinsic>(CurInst))
Owen Anderson
committed
continue;
uint32_t ValNo = VN.lookup(CurInst);
Owen Anderson
committed
// Look for the predecessors for PRE opportunities. We're
// only trying to solve the basic diamond case, where
// a value is computed in the successor and one predecessor,
// but not the other. We also explicitly disallow cases
// where the successor is its own predecessor, because they're
// more complicated to get right.
unsigned NumWith = 0;
unsigned NumWithout = 0;
BasicBlock *PREPred = 0;
predMap.clear();
Owen Anderson
committed
for (pred_iterator PI = pred_begin(CurrentBlock),
PE = pred_end(CurrentBlock); PI != PE; ++PI) {
// We're not interested in PRE where the block is its
Owen Anderson
committed
// own predecessor, on in blocks with predecessors
// that are not reachable.
if (*PI == CurrentBlock) {
Owen Anderson
committed
break;
} else if (!localAvail.count(*PI)) {
Owen Anderson
committed
break;
}
DenseMap<uint32_t, Value*>::iterator predV =
localAvail[*PI]->table.find(ValNo);
Owen Anderson
committed
if (predV == localAvail[*PI]->table.end()) {
Owen Anderson
committed
PREPred = *PI;
Chris Lattner
committed
} else if (predV->second == CurInst) {
Owen Anderson
committed
} else {
Owen Anderson
committed
predMap[*PI] = predV->second;
Owen Anderson
committed
}
}
Owen Anderson
committed
// Don't do PRE when it might increase code size, i.e. when
// we would need to insert instructions in more than one pred.
if (NumWithout != 1 || NumWith == 0)
Owen Anderson
committed
continue;
// We can't do PRE safely on a critical edge, so instead we schedule
// the edge to be split and perform the PRE the next time we iterate
// on the function.
for (unsigned i = 0, e = PREPred->getTerminator()->getNumSuccessors();
i != e; ++i)
if (PREPred->getTerminator()->getSuccessor(i) == CurrentBlock) {
if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
Owen Anderson
committed
// Instantiate the expression the in predecessor that lacked it.
// Because we are going top-down through the block, all value numbers
// will be available in the predecessor by the time we need them. Any
// that weren't original present will have been instantiated earlier
// in this loop.
Instruction *PREInstr = CurInst->clone(CurInst->getContext());
Owen Anderson
committed
bool success = true;
Chris Lattner
committed
for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) {
Value *Op = PREInstr->getOperand(i);
if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
continue;
Chris Lattner
committed
if (Value *V = lookupNumber(PREPred, VN.lookup(Op))) {
PREInstr->setOperand(i, V);
} else {
success = false;
break;
Owen Anderson
committed
}
Owen Anderson
committed
// Fail out if we encounter an operand that is not available in
// the PRE predecessor. This is typically because of loads which
Owen Anderson
committed
// are not value numbered precisely.
if (!success) {