Newer
Older
// FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then
// remove this check, as it is duplicated with what we have below.
uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy);
if ((WriteSizeInBits & 7) | (LoadSize & 7))
uint64_t StoreSize = WriteSizeInBits >> 3; // Convert to bytes.
LoadSize >>= 3;
bool isAAFailure = false;
if (StoreOffset < LoadOffset)
isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset;
else
isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset;
<< "Store Ptr = " << *WritePtr << "\n"
<< "Store Offs = " << StoreOffset << "\n"
<< "Load Ptr = " << *LoadPtr << "\n";
#endif
return -1;
}
// If the Load isn't completely contained within the stored bits, we don't
// have all the bits to feed it. We could do something crazy in the future
// (issue a smaller load then merge the bits in) but this seems unlikely to be
// valuable.
if (StoreOffset > LoadOffset ||
StoreOffset+StoreSize < LoadOffset+LoadSize)
return -1;
// Okay, we can do this transformation. Return the number of bytes into the
// store that the load is.
return LoadOffset-StoreOffset;
}
/// AnalyzeLoadFromClobberingStore - This function is called when we have a
/// memdep query of a load that ends up being a clobbering store.
static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr,
StoreInst *DepSI,
const TargetData &TD) {
// Cannot handle reading from store of first-class aggregate yet.
if (DepSI->getValueOperand()->getType()->isStructTy() ||
DepSI->getValueOperand()->getType()->isArrayTy())
return -1;
Value *StorePtr = DepSI->getPointerOperand();
uint64_t StoreSize =TD.getTypeSizeInBits(DepSI->getValueOperand()->getType());
return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
StorePtr, StoreSize, TD);
}
static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr,
MemIntrinsic *MI,
const TargetData &TD) {
// If the mem operation is a non-constant size, we can't handle it.
ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
if (SizeCst == 0) return -1;
uint64_t MemSizeInBits = SizeCst->getZExtValue()*8;
// If this is memset, we just need to see if the offset is valid in the size
// of the memset..
if (MI->getIntrinsicID() == Intrinsic::memset)
return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
MemSizeInBits, TD);
// If we have a memcpy/memmove, the only case we can handle is if this is a
// copy from constant memory. In that case, we can read directly from the
// constant memory.
MemTransferInst *MTI = cast<MemTransferInst>(MI);
Constant *Src = dyn_cast<Constant>(MTI->getSource());
if (Src == 0) return -1;
GlobalVariable *GV = dyn_cast<GlobalVariable>(Src->getUnderlyingObject());
if (GV == 0 || !GV->isConstant()) return -1;
// See if the access is within the bounds of the transfer.
int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
MI->getDest(), MemSizeInBits, TD);
if (Offset == -1)
return Offset;
// Otherwise, see if we can constant fold a load from the constant with the
// offset applied as appropriate.
Src = ConstantExpr::getBitCast(Src,
llvm::Type::getInt8PtrTy(Src->getContext()));
Constant *OffsetCst =
ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1);
Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
if (ConstantFoldLoadFromConstPtr(Src, &TD))
return Offset;
return -1;
}
/// GetStoreValueForLoad - This function is called when we have a
/// memdep query of a load that ends up being a clobbering store. This means
/// that the store *may* provide bits used by the load but we can't be sure
/// because the pointers don't mustalias. Check this case to see if there is
/// anything more we can do before we give up.
static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
const Type *LoadTy,
Instruction *InsertPt, const TargetData &TD){
LLVMContext &Ctx = SrcVal->getType()->getContext();
uint64_t StoreSize = (TD.getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
uint64_t LoadSize = (TD.getTypeSizeInBits(LoadTy) + 7) / 8;
IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
// Compute which bits of the stored value are being used by the load. Convert
// to an integer type to start with.
Duncan Sands
committed
if (SrcVal->getType()->isPointerTy())
SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx), "tmp");
Duncan Sands
committed
if (!SrcVal->getType()->isIntegerTy())
SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8),
"tmp");
// Shift the bits to the least significant depending on endianness.
unsigned ShiftAmt;
if (TD.isLittleEndian())
else
ShiftAmt = (StoreSize-LoadSize-Offset)*8;
if (ShiftAmt)
SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt, "tmp");
if (LoadSize != StoreSize)
SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8),
"tmp");
return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
/// GetMemInstValueForLoad - This function is called when we have a
/// memdep query of a load that ends up being a clobbering mem intrinsic.
static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
const Type *LoadTy, Instruction *InsertPt,
const TargetData &TD){
LLVMContext &Ctx = LoadTy->getContext();
uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
// We know that this method is only called when the mem transfer fully
// provides the bits for the load.
if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
// memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and
// independently of what the offset is.
Value *Val = MSI->getValue();
if (LoadSize != 1)
Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8));
Value *OneElt = Val;
// Splat the value out to the right number of bits.
for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) {
// If we can double the number of bytes set, do it.
if (NumBytesSet*2 <= LoadSize) {
Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8);
Val = Builder.CreateOr(Val, ShVal);
NumBytesSet <<= 1;
continue;
}
// Otherwise insert one byte at a time.
Value *ShVal = Builder.CreateShl(Val, 1*8);
Val = Builder.CreateOr(OneElt, ShVal);
++NumBytesSet;
}
return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD);
}
// Otherwise, this is a memcpy/memmove from a constant global.
MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
Constant *Src = cast<Constant>(MTI->getSource());
// Otherwise, see if we can constant fold a load from the constant with the
// offset applied as appropriate.
Src = ConstantExpr::getBitCast(Src,
llvm::Type::getInt8PtrTy(Src->getContext()));
Constant *OffsetCst =
ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1);
Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
return ConstantFoldLoadFromConstPtr(Src, &TD);
}
struct AvailableValueInBlock {
/// BB - The basic block in question.
BasicBlock *BB;
enum ValType {
SimpleVal, // A simple offsetted value that is accessed.
MemIntrin // A memory intrinsic which is loaded from.
};
/// V - The value that is live out of the block.
PointerIntPair<Value *, 1, ValType> Val;
/// Offset - The byte offset in Val that is interesting for the load query.
unsigned Offset;
static AvailableValueInBlock get(BasicBlock *BB, Value *V,
unsigned Offset = 0) {
AvailableValueInBlock Res;
Res.BB = BB;
Res.Val.setPointer(V);
Res.Val.setInt(SimpleVal);
Res.Offset = Offset;
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
unsigned Offset = 0) {
AvailableValueInBlock Res;
Res.BB = BB;
Res.Val.setPointer(MI);
Res.Val.setInt(MemIntrin);
Res.Offset = Offset;
return Res;
}
bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
Value *getSimpleValue() const {
assert(isSimpleValue() && "Wrong accessor");
return Val.getPointer();
}
MemIntrinsic *getMemIntrinValue() const {
assert(!isSimpleValue() && "Wrong accessor");
return cast<MemIntrinsic>(Val.getPointer());
}
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
/// MaterializeAdjustedValue - Emit code into this block to adjust the value
/// defined here to the specified type. This handles various coercion cases.
Value *MaterializeAdjustedValue(const Type *LoadTy,
const TargetData *TD) const {
Value *Res;
if (isSimpleValue()) {
Res = getSimpleValue();
if (Res->getType() != LoadTy) {
assert(TD && "Need target data to handle type mismatch case");
Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
*TD);
DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
<< *getSimpleValue() << '\n'
<< *Res << '\n' << "\n\n\n");
}
} else {
Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
LoadTy, BB->getTerminator(), *TD);
DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
<< " " << *getMemIntrinValue() << '\n'
<< *Res << '\n' << "\n\n\n");
}
return Res;
}
/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
/// construct SSA form, allowing us to eliminate LI. This returns the value
/// that should be used at LI's definition site.
static Value *ConstructSSAForLoadSet(LoadInst *LI,
SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
const TargetData *TD,
Chris Lattner
committed
const DominatorTree &DT,
AliasAnalysis *AA) {
Chris Lattner
committed
// Check for the fully redundant, dominating load case. In this case, we can
// just use the dominating value directly.
if (ValuesPerBlock.size() == 1 &&
DT.properlyDominates(ValuesPerBlock[0].BB, LI->getParent()))
return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), TD);
// Otherwise, we have to construct SSA form.
SmallVector<PHINode*, 8> NewPHIs;
SSAUpdater SSAUpdate(&NewPHIs);
SSAUpdate.Initialize(LI->getType(), LI->getName());
const Type *LoadTy = LI->getType();
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
const AvailableValueInBlock &AV = ValuesPerBlock[i];
BasicBlock *BB = AV.BB;
if (SSAUpdate.HasValueForBlock(BB))
continue;
SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, TD));
// Perform PHI construction.
Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
// If new PHI nodes were created, notify alias analysis.
Duncan Sands
committed
if (V->getType()->isPointerTy())
for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
AA->copyValue(LI, NewPHIs[i]);
return V;
}
static bool isLifetimeStart(const Instruction *Inst) {
if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
Owen Anderson
committed
return II->getIntrinsicID() == Intrinsic::lifetime_start;
/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
/// non-local by performing PHI construction.
bool GVN::processNonLocalLoad(LoadInst *LI,
Chris Lattner
committed
SmallVectorImpl<Instruction*> &toErase) {
// Find the non-local dependencies of the load.
Chris Lattner
committed
SmallVector<NonLocalDepResult, 64> Deps;
AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI);
MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps);
//DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: "
// << Deps.size() << *LI << '\n');
// If we had to process more than one hundred blocks to find the
// dependencies, this load isn't worth worrying about. Optimizing
// it will be too expensive.
if (Deps.size() > 100)
return false;
// If we had a phi translation failure, we'll have a single entry which is a
// clobber in the current block. Reject this early.
if (Deps.size() == 1 && Deps[0].getResult().isClobber()) {
dbgs() << "GVN: non-local load ";
WriteAsOperand(dbgs(), LI);
dbgs() << " is clobbered by " << *Deps[0].getResult().getInst() << '\n';
return false;
// Filter out useless results (non-locals, etc). Keep track of the blocks
// where we have a value available in repl, also keep track of whether we see
// dependencies that produce an unknown value for the load (such as a call
// that could potentially clobber the load).
SmallVector<AvailableValueInBlock, 16> ValuesPerBlock;
SmallVector<BasicBlock*, 16> UnavailableBlocks;
for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
BasicBlock *DepBB = Deps[i].getBB();
MemDepResult DepInfo = Deps[i].getResult();
if (DepInfo.isClobber()) {
// The address being loaded in this non-local block may not be the same as
// the pointer operand of the load if PHI translation occurs. Make sure
// to consider the right address.
Value *Address = Deps[i].getAddress();
// If the dependence is to a store that writes to a superset of the bits
// read by the load, we can extract the bits we need for the load from the
// stored value.
if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
if (TD && Address) {
int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address,
DepSI, *TD);
if (Offset != -1) {
ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
DepSI->getValueOperand(),
Offset));
continue;
}
}
}
// If the clobbering value is a memset/memcpy/memmove, see if we can
// forward a value on from it.
if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
if (TD && Address) {
int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address,
DepMI, *TD);
if (Offset != -1) {
ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI,
Offset));
continue;
}
}
}
UnavailableBlocks.push_back(DepBB);
continue;
}
Instruction *DepInst = DepInfo.getInst();
// Loading the allocation -> undef.
if (isa<AllocaInst>(DepInst) || isMalloc(DepInst) ||
Owen Anderson
committed
// Loading immediately after lifetime begin -> undef.
isLifetimeStart(DepInst)) {
ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
UndefValue::get(LI->getType())));
continue;
}
if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
// Reject loads and stores that are to the same address but are of
// different types if we have to.
if (S->getValueOperand()->getType() != LI->getType()) {
// If the stored value is larger or equal to the loaded value, we can
// reuse it.
if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(),
LI->getType(), *TD)) {
UnavailableBlocks.push_back(DepBB);
continue;
}
ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
S->getValueOperand()));
continue;
}
if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) {
// If the types mismatch and we can't handle it, reject reuse of the load.
if (LD->getType() != LI->getType()) {
// If the stored value is larger or equal to the loaded value, we can
// reuse it.
if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){
UnavailableBlocks.push_back(DepBB);
continue;
}
ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD));
continue;
UnavailableBlocks.push_back(DepBB);
continue;
// If we have no predecessors that produce a known value for this load, exit
// early.
if (ValuesPerBlock.empty()) return false;
// If all of the instructions we depend on produce a known value for this
// load, then it is fully redundant and we can use PHI insertion to compute
// its value. Insert PHIs and remove the fully redundant value now.
if (UnavailableBlocks.empty()) {
DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
// Perform PHI construction.
Chris Lattner
committed
Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT,
VN.getAliasAnalysis());
LI->replaceAllUsesWith(V);
if (isa<PHINode>(V))
V->takeName(LI);
Duncan Sands
committed
if (V->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(V);
VN.erase(LI);
toErase.push_back(LI);
++NumGVNLoad;
return true;
if (!EnablePRE || !EnableLoadPRE)
return false;
// Okay, we have *some* definitions of the value. This means that the value
// is available in some of our (transitive) predecessors. Lets think about
// doing PRE of this load. This will involve inserting a new load into the
// predecessor when it's not available. We could do this in general, but
// prefer to not increase code size. As such, we only do this when we know
// that we only have to insert *one* load (which means we're basically moving
// the load, not inserting a new one).
Owen Anderson
committed
SmallPtrSet<BasicBlock *, 4> Blockers;
for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
Blockers.insert(UnavailableBlocks[i]);
// Lets find first basic block with more than one predecessor. Walk backwards
// through predecessors if needed.
BasicBlock *LoadBB = LI->getParent();
Owen Anderson
committed
BasicBlock *TmpBB = LoadBB;
bool isSinglePred = false;
bool allSingleSucc = true;
Owen Anderson
committed
while (TmpBB->getSinglePredecessor()) {
isSinglePred = true;
TmpBB = TmpBB->getSinglePredecessor();
if (TmpBB == LoadBB) // Infinite (unreachable) loop.
return false;
if (Blockers.count(TmpBB))
return false;
Owen Anderson
committed
// If any of these blocks has more than one successor (i.e. if the edge we
// just traversed was critical), then there are other paths through this
// block along which the load may not be anticipated. Hoisting the load
// above this block would be adding the load to execution paths along
// which it was not previously executed.
if (TmpBB->getTerminator()->getNumSuccessors() != 1)
Owen Anderson
committed
return false;
Owen Anderson
committed
}
Owen Anderson
committed
assert(TmpBB);
LoadBB = TmpBB;
// FIXME: It is extremely unclear what this loop is doing, other than
// artificially restricting loadpre.
Owen Anderson
committed
if (isSinglePred) {
bool isHot = false;
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
const AvailableValueInBlock &AV = ValuesPerBlock[i];
if (AV.isSimpleValue())
// "Hot" Instruction is in some loop (because it dominates its dep.
// instruction).
if (Instruction *I = dyn_cast<Instruction>(AV.getSimpleValue()))
if (DT->dominates(LI, I)) {
isHot = true;
break;
}
}
Owen Anderson
committed
// We are interested only in "hot" instructions. We don't want to do any
// mis-optimizations here.
if (!isHot)
return false;
}
Bob Wilson
committed
// Check to see how many predecessors have the loaded value fully
// available.
DenseMap<BasicBlock*, Value*> PredLoads;
DenseMap<BasicBlock*, char> FullyAvailableBlocks;
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
FullyAvailableBlocks[ValuesPerBlock[i].BB] = true;
for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
FullyAvailableBlocks[UnavailableBlocks[i]] = false;
SmallVector<std::pair<TerminatorInst*, unsigned>, 4> NeedToSplit;
for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB);
PI != E; ++PI) {
Bob Wilson
committed
BasicBlock *Pred = *PI;
if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks)) {
continue;
Bob Wilson
committed
}
PredLoads[Pred] = 0;
Bob Wilson
committed
if (Pred->getTerminator()->getNumSuccessors() != 1) {
if (isa<IndirectBrInst>(Pred->getTerminator())) {
DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '"
<< Pred->getName() << "': " << *LI << '\n');
return false;
}
unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB);
NeedToSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum));
Bob Wilson
committed
}
if (!NeedToSplit.empty()) {
toSplit.append(NeedToSplit.begin(), NeedToSplit.end());
return false;
}
Bob Wilson
committed
// Decide whether PRE is profitable for this load.
unsigned NumUnavailablePreds = PredLoads.size();
assert(NumUnavailablePreds != 0 &&
"Fully available value should be eliminated above!");
// If this load is unavailable in multiple predecessors, reject it.
// FIXME: If we could restructure the CFG, we could make a common pred with
// all the preds that don't have an available LI and insert a new load into
// that one block.
if (NumUnavailablePreds != 1)
Bob Wilson
committed
return false;
// Check if the load can safely be moved to all the unavailable predecessors.
bool CanDoPRE = true;
SmallVector<Instruction*, 8> NewInsts;
Bob Wilson
committed
for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(),
E = PredLoads.end(); I != E; ++I) {
BasicBlock *UnavailablePred = I->first;
// Do PHI translation to get its value in the predecessor if necessary. The
// returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
// If all preds have a single successor, then we know it is safe to insert
// the load on the pred (?!?), so we can insert code to materialize the
// pointer if it is not available.
PHITransAddr Address(LI->getPointerOperand(), TD);
Bob Wilson
committed
Value *LoadPtr = 0;
if (allSingleSucc) {
LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
*DT, NewInsts);
} else {
Address.PHITranslateValue(LoadBB, UnavailablePred, DT);
Bob Wilson
committed
LoadPtr = Address.getAddr();
}
Bob Wilson
committed
// If we couldn't find or insert a computation of this phi translated value,
// we fail PRE.
if (LoadPtr == 0) {
DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
<< *LI->getPointerOperand() << "\n");
Bob Wilson
committed
CanDoPRE = false;
break;
}
Bob Wilson
committed
// Make sure it is valid to move this load here. We have to watch out for:
// @1 = getelementptr (i8* p, ...
// test p and branch if == 0
// load @1
// It is valid to have the getelementptr before the test, even if p can be 0,
// as getelementptr only does address arithmetic.
// If we are not pushing the value through any multiple-successor blocks
// we do not have this case. Otherwise, check that the load is safe to
// put anywhere; this can be improved, but should be conservatively safe.
if (!allSingleSucc &&
// FIXME: REEVALUTE THIS.
!isSafeToLoadUnconditionally(LoadPtr,
UnavailablePred->getTerminator(),
LI->getAlignment(), TD)) {
CanDoPRE = false;
break;
}
I->second = LoadPtr;
Bob Wilson
committed
if (!CanDoPRE) {
while (!NewInsts.empty())
NewInsts.pop_back_val()->eraseFromParent();
// Okay, we can eliminate this load by inserting a reload in the predecessor
// and using PHI construction to get the value in the other predecessors, do
// it.
DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *LI << '\n');
DEBUG(if (!NewInsts.empty())
dbgs() << "INSERTED " << NewInsts.size() << " INSTS: "
<< *NewInsts.back() << '\n');
Bob Wilson
committed
// Assign value numbers to the new instructions.
for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) {
// FIXME: We really _ought_ to insert these value numbers into their
// parent's availability map. However, in doing so, we risk getting into
// ordering issues. If a block hasn't been processed yet, we would be
// marking a value as AVAIL-IN, which isn't what we intend.
VN.lookup_or_add(NewInsts[i]);
}
for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(),
E = PredLoads.end(); I != E; ++I) {
BasicBlock *UnavailablePred = I->first;
Value *LoadPtr = I->second;
Bob Wilson
committed
Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
LI->getAlignment(),
UnavailablePred->getTerminator());
// Add the newly created load.
ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,
NewLoad));
MD->invalidateCachedPointerInfo(LoadPtr);
DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n');
Bob Wilson
committed
}
// Perform PHI construction.
Chris Lattner
committed
Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT,
VN.getAliasAnalysis());
LI->replaceAllUsesWith(V);
if (isa<PHINode>(V))
V->takeName(LI);
Duncan Sands
committed
if (V->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(V);
VN.erase(LI);
toErase.push_back(LI);
++NumPRELoad;
return true;
}
/// processLoad - Attempt to eliminate a load, first by eliminating it
/// locally, and then attempting non-local elimination if that fails.
bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
if (!MD)
return false;
if (L->isVolatile())
Owen Anderson
committed
return false;
Owen Anderson
committed
// ... to a pointer that has been loaded from before...
MemDepResult Dep = MD->getDependency(L);
// If the value isn't available, don't do anything!
// Check to see if we have something like this:
// store i32 123, i32* %P
// %A = bitcast i32* %P to i8*
// %B = gep i8* %A, i32 1
// %C = load i8* %B
//
// We could do that by recognizing if the clobber instructions are obviously
// a common base + constant offset, and if the previous store (or memset)
// completely covers this load. This sort of thing can happen in bitfield
// access code.
Value *AvailVal = 0;
if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst()))
if (TD) {
int Offset = AnalyzeLoadFromClobberingStore(L->getType(),
L->getPointerOperand(),
DepSI, *TD);
if (Offset != -1)
AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset,
L->getType(), L, *TD);
// If the clobbering value is a memset/memcpy/memmove, see if we can forward
// a value on from it.
if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
if (TD) {
int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(),
L->getPointerOperand(),
DepMI, *TD);
if (Offset != -1)
AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD);
}
}
if (AvailVal) {
DEBUG(dbgs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n'
<< *AvailVal << '\n' << *L << "\n\n\n");
// Replace the load!
L->replaceAllUsesWith(AvailVal);
Duncan Sands
committed
if (AvailVal->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(AvailVal);
VN.erase(L);
toErase.push_back(L);
++NumGVNLoad;
return true;
}
DEBUG(
// fast print dep, using operator<< on instruction would be too slow
dbgs() << "GVN: load ";
WriteAsOperand(dbgs(), L);
Instruction *I = Dep.getInst();
);
}
// If it is defined in another block, try harder.
return processNonLocalLoad(L, toErase);
Instruction *DepInst = Dep.getInst();
if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
Value *StoredVal = DepSI->getValueOperand();
// The store and load are to a must-aliased pointer, but they may not
// actually have the same type. See if we know how to reuse the stored
// value (depending on its type).
if (StoredVal->getType() != L->getType()) {
if (TD) {
StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(),
L, *TD);
if (StoredVal == 0)
return false;
DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
<< '\n' << *L << "\n\n\n");
}
else
return false;
}
L->replaceAllUsesWith(StoredVal);
Duncan Sands
committed
if (StoredVal->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(StoredVal);
VN.erase(L);
toErase.push_back(L);
++NumGVNLoad;
return true;
}
if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) {
Value *AvailableVal = DepLI;
// The loads are of a must-aliased pointer, but they may not actually have
// the same type. See if we know how to reuse the previously loaded value
// (depending on its type).
if (DepLI->getType() != L->getType()) {
if (TD) {
AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD);
if (AvailableVal == 0)
return false;
DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
<< "\n" << *L << "\n\n\n");
}
else
return false;
L->replaceAllUsesWith(AvailableVal);
Duncan Sands
committed
if (DepLI->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(DepLI);
VN.erase(L);
toErase.push_back(L);
++NumGVNLoad;
Owen Anderson
committed
}
// If this load really doesn't depend on anything, then we must be loading an
// undef value. This can happen when loading for a fresh allocation with no
// intervening stores, for example.
Victor Hernandez
committed
if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) {
L->replaceAllUsesWith(UndefValue::get(L->getType()));
VN.erase(L);
toErase.push_back(L);
++NumGVNLoad;
}
Owen Anderson
committed
// If this load occurs either right after a lifetime begin,
// then the loaded value is undefined.
if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) {
Owen Anderson
committed
if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
L->replaceAllUsesWith(UndefValue::get(L->getType()));
VN.erase(L);
toErase.push_back(L);
++NumGVNLoad;
return true;
}
}
Owen Anderson
committed
}
Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) {
Owen Anderson
committed
DenseMap<BasicBlock*, ValueNumberScope*>::iterator I = localAvail.find(BB);
if (I == localAvail.end())
return 0;
ValueNumberScope *Locals = I->second;
while (Locals) {
DenseMap<uint32_t, Value*>::iterator I = Locals->table.find(num);
if (I != Locals->table.end())
Owen Anderson
committed
return I->second;
Owen Anderson
committed
}
Owen Anderson
committed
return 0;
}
Owen Anderson
committed
Owen Anderson
committed
/// processInstruction - When calculating availability, handle an instruction
Owen Anderson
committed
/// by inserting it into the appropriate sets
bool GVN::processInstruction(Instruction *I,
Chris Lattner
committed
SmallVectorImpl<Instruction*> &toErase) {
// Ignore dbg info intrinsics.
if (isa<DbgInfoIntrinsic>(I))
return false;
// If the instruction can be easily simplified then do so now in preference
// to value numbering it. Value numbering often exposes redundancies, for
// example if it determines that %y is equal to %x then the instruction
// "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
if (Value *V = SimplifyInstruction(I, TD, DT)) {
I->replaceAllUsesWith(V);
if (MD && V->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(V);
VN.erase(I);
toErase.push_back(I);
return true;
}
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
bool Changed = processLoad(LI, toErase);
if (!Changed) {
unsigned Num = VN.lookup_or_add(LI);
localAvail[I->getParent()]->table.insert(std::make_pair(Num, LI));
Owen Anderson
committed
}
Owen Anderson
committed
}
uint32_t NextNum = VN.getNextUnusedValueNumber();
unsigned Num = VN.lookup_or_add(I);
if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
return false;
Value *BranchCond = BI->getCondition();
uint32_t CondVN = VN.lookup_or_add(BranchCond);
BasicBlock *TrueSucc = BI->getSuccessor(0);
BasicBlock *FalseSucc = BI->getSuccessor(1);
if (TrueSucc->getSinglePredecessor())
localAvail[TrueSucc]->table[CondVN] =
ConstantInt::getTrue(TrueSucc->getContext());
if (FalseSucc->getSinglePredecessor())
localAvail[FalseSucc]->table[CondVN] =
ConstantInt::getFalse(TrueSucc->getContext());
return false;
Owen Anderson
committed
// Allocations are always uniquely numbered, so we can save time and memory
// by fast failing them.
Victor Hernandez
committed
} else if (isa<AllocaInst>(I) || isa<TerminatorInst>(I)) {
localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
Owen Anderson
committed
return false;
Owen Anderson
committed
}
if (isa<PHINode>(I)) {
localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
Owen Anderson
committed
// If the number we were assigned was a brand new VN, then we don't
// need to do a lookup to see if the number already exists
// somewhere in the domtree: it can't!
} else if (Num == NextNum) {
localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
Owen Anderson
committed
// Perform fast-path value-number based elimination of values inherited from
// dominators.
} else if (Value *repl = lookupNumber(I->getParent(), Num)) {
Owen Anderson
committed
// Remove it!
Owen Anderson
committed
I->replaceAllUsesWith(repl);
Duncan Sands
committed
if (MD && repl->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(repl);
Owen Anderson
committed
toErase.push_back(I);
return true;
Owen Anderson
committed
Owen Anderson
committed
} else {
localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
Owen Anderson
committed
}
Owen Anderson
committed
return false;
}
/// runOnFunction - This is the main transformation entry point for a function.
if (!NoLoads)
MD = &getAnalysis<MemoryDependenceAnalysis>();
DT = &getAnalysis<DominatorTree>();
TD = getAnalysisIfAvailable<TargetData>();
VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
VN.setMemDep(MD);
VN.setDomTree(DT);
bool Changed = false;
bool ShouldContinue = true;
Owen Anderson
committed
// Merge unconditional branches, allowing PRE to catch more
// optimization opportunities.
for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
Owen Anderson
committed
++FI;
bool removedBlock = MergeBlockIntoPredecessor(BB, this);
if (removedBlock) ++NumGVNBlocks;
Owen Anderson
committed
}
DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
ShouldContinue = iterateOnFunction(F);
if (splitCriticalEdges())
ShouldContinue = true;
Changed |= ShouldContinue;
Owen Anderson
committed
if (EnablePRE) {
bool PREChanged = true;
while (PREChanged) {
PREChanged = performPRE(F);
Owen Anderson
committed
}
// FIXME: Should perform GVN again after PRE does something. PRE can move
// computations into blocks where they become fully redundant. Note that
// we can't do this until PRE's critical edge splitting updates memdep.
// Actually, when this happens, we should just fully integrate PRE into GVN.