Newer
Older
Value *StoreBase = GetBaseWithConstantOffset(WritePtr, StoreOffset, TD);
GetBaseWithConstantOffset(LoadPtr, LoadOffset, TD);
if (StoreBase != LoadBase)
return -1;
// If the load and store are to the exact same address, they should have been
// a must alias. AA must have gotten confused.
// FIXME: Study to see if/when this happens. One case is forwarding a memset
// to a load from the base of the memset.
if (LoadOffset == StoreOffset) {
dbgs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n"
<< "Store Ptr = " << *WritePtr << "\n"
<< "Store Offs = " << StoreOffset << "\n"
<< "Load Ptr = " << *LoadPtr << "\n";
#endif
// If the load and store don't overlap at all, the store doesn't provide
// anything to the load. In this case, they really don't alias at all, AA
// must have gotten confused.
// FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then
// remove this check, as it is duplicated with what we have below.
uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy);
if ((WriteSizeInBits & 7) | (LoadSize & 7))
uint64_t StoreSize = WriteSizeInBits >> 3; // Convert to bytes.
LoadSize >>= 3;
bool isAAFailure = false;
if (StoreOffset < LoadOffset)
isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset;
else
isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset;
<< "Store Ptr = " << *WritePtr << "\n"
<< "Store Offs = " << StoreOffset << "\n"
<< "Load Ptr = " << *LoadPtr << "\n";
#endif
return -1;
}
// If the Load isn't completely contained within the stored bits, we don't
// have all the bits to feed it. We could do something crazy in the future
// (issue a smaller load then merge the bits in) but this seems unlikely to be
// valuable.
if (StoreOffset > LoadOffset ||
StoreOffset+StoreSize < LoadOffset+LoadSize)
return -1;
// Okay, we can do this transformation. Return the number of bytes into the
// store that the load is.
return LoadOffset-StoreOffset;
}
/// AnalyzeLoadFromClobberingStore - This function is called when we have a
/// memdep query of a load that ends up being a clobbering store.
static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr,
StoreInst *DepSI,
const TargetData &TD) {
// Cannot handle reading from store of first-class aggregate yet.
Duncan Sands
committed
if (DepSI->getOperand(0)->getType()->isStructTy() ||
DepSI->getOperand(0)->getType()->isArrayTy())
return -1;
Value *StorePtr = DepSI->getPointerOperand();
uint64_t StoreSize = TD.getTypeSizeInBits(DepSI->getOperand(0)->getType());
return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
StorePtr, StoreSize, TD);
}
static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr,
MemIntrinsic *MI,
const TargetData &TD) {
// If the mem operation is a non-constant size, we can't handle it.
ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
if (SizeCst == 0) return -1;
uint64_t MemSizeInBits = SizeCst->getZExtValue()*8;
// If this is memset, we just need to see if the offset is valid in the size
// of the memset..
if (MI->getIntrinsicID() == Intrinsic::memset)
return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
MemSizeInBits, TD);
// If we have a memcpy/memmove, the only case we can handle is if this is a
// copy from constant memory. In that case, we can read directly from the
// constant memory.
MemTransferInst *MTI = cast<MemTransferInst>(MI);
Constant *Src = dyn_cast<Constant>(MTI->getSource());
if (Src == 0) return -1;
GlobalVariable *GV = dyn_cast<GlobalVariable>(Src->getUnderlyingObject());
if (GV == 0 || !GV->isConstant()) return -1;
// See if the access is within the bounds of the transfer.
int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
MI->getDest(), MemSizeInBits, TD);
if (Offset == -1)
return Offset;
// Otherwise, see if we can constant fold a load from the constant with the
// offset applied as appropriate.
Src = ConstantExpr::getBitCast(Src,
llvm::Type::getInt8PtrTy(Src->getContext()));
Constant *OffsetCst =
ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1);
Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
if (ConstantFoldLoadFromConstPtr(Src, &TD))
return Offset;
return -1;
}
/// GetStoreValueForLoad - This function is called when we have a
/// memdep query of a load that ends up being a clobbering store. This means
/// that the store *may* provide bits used by the load but we can't be sure
/// because the pointers don't mustalias. Check this case to see if there is
/// anything more we can do before we give up.
static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
const Type *LoadTy,
Instruction *InsertPt, const TargetData &TD){
LLVMContext &Ctx = SrcVal->getType()->getContext();
uint64_t StoreSize = (TD.getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
uint64_t LoadSize = (TD.getTypeSizeInBits(LoadTy) + 7) / 8;
IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
// Compute which bits of the stored value are being used by the load. Convert
// to an integer type to start with.
Duncan Sands
committed
if (SrcVal->getType()->isPointerTy())
SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx), "tmp");
Duncan Sands
committed
if (!SrcVal->getType()->isIntegerTy())
SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8),
"tmp");
// Shift the bits to the least significant depending on endianness.
unsigned ShiftAmt;
if (TD.isLittleEndian())
else
ShiftAmt = (StoreSize-LoadSize-Offset)*8;
if (ShiftAmt)
SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt, "tmp");
if (LoadSize != StoreSize)
SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8),
"tmp");
return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
/// GetMemInstValueForLoad - This function is called when we have a
/// memdep query of a load that ends up being a clobbering mem intrinsic.
static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
const Type *LoadTy, Instruction *InsertPt,
const TargetData &TD){
LLVMContext &Ctx = LoadTy->getContext();
uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
// We know that this method is only called when the mem transfer fully
// provides the bits for the load.
if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
// memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and
// independently of what the offset is.
Value *Val = MSI->getValue();
if (LoadSize != 1)
Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8));
Value *OneElt = Val;
// Splat the value out to the right number of bits.
for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) {
// If we can double the number of bytes set, do it.
if (NumBytesSet*2 <= LoadSize) {
Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8);
Val = Builder.CreateOr(Val, ShVal);
NumBytesSet <<= 1;
continue;
}
// Otherwise insert one byte at a time.
Value *ShVal = Builder.CreateShl(Val, 1*8);
Val = Builder.CreateOr(OneElt, ShVal);
++NumBytesSet;
}
return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD);
}
// Otherwise, this is a memcpy/memmove from a constant global.
MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
Constant *Src = cast<Constant>(MTI->getSource());
// Otherwise, see if we can constant fold a load from the constant with the
// offset applied as appropriate.
Src = ConstantExpr::getBitCast(Src,
llvm::Type::getInt8PtrTy(Src->getContext()));
Constant *OffsetCst =
ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1);
Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
return ConstantFoldLoadFromConstPtr(Src, &TD);
}
struct AvailableValueInBlock {
/// BB - The basic block in question.
BasicBlock *BB;
enum ValType {
SimpleVal, // A simple offsetted value that is accessed.
MemIntrin // A memory intrinsic which is loaded from.
};
/// V - The value that is live out of the block.
PointerIntPair<Value *, 1, ValType> Val;
/// Offset - The byte offset in Val that is interesting for the load query.
unsigned Offset;
static AvailableValueInBlock get(BasicBlock *BB, Value *V,
unsigned Offset = 0) {
AvailableValueInBlock Res;
Res.BB = BB;
Res.Val.setPointer(V);
Res.Val.setInt(SimpleVal);
Res.Offset = Offset;
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
unsigned Offset = 0) {
AvailableValueInBlock Res;
Res.BB = BB;
Res.Val.setPointer(MI);
Res.Val.setInt(MemIntrin);
Res.Offset = Offset;
return Res;
}
bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
Value *getSimpleValue() const {
assert(isSimpleValue() && "Wrong accessor");
return Val.getPointer();
}
MemIntrinsic *getMemIntrinValue() const {
assert(!isSimpleValue() && "Wrong accessor");
return cast<MemIntrinsic>(Val.getPointer());
}
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
/// MaterializeAdjustedValue - Emit code into this block to adjust the value
/// defined here to the specified type. This handles various coercion cases.
Value *MaterializeAdjustedValue(const Type *LoadTy,
const TargetData *TD) const {
Value *Res;
if (isSimpleValue()) {
Res = getSimpleValue();
if (Res->getType() != LoadTy) {
assert(TD && "Need target data to handle type mismatch case");
Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
*TD);
DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
<< *getSimpleValue() << '\n'
<< *Res << '\n' << "\n\n\n");
}
} else {
Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
LoadTy, BB->getTerminator(), *TD);
DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
<< " " << *getMemIntrinValue() << '\n'
<< *Res << '\n' << "\n\n\n");
}
return Res;
}
/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
/// construct SSA form, allowing us to eliminate LI. This returns the value
/// that should be used at LI's definition site.
static Value *ConstructSSAForLoadSet(LoadInst *LI,
SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
const TargetData *TD,
Chris Lattner
committed
const DominatorTree &DT,
AliasAnalysis *AA) {
Chris Lattner
committed
// Check for the fully redundant, dominating load case. In this case, we can
// just use the dominating value directly.
if (ValuesPerBlock.size() == 1 &&
DT.properlyDominates(ValuesPerBlock[0].BB, LI->getParent()))
return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), TD);
// Otherwise, we have to construct SSA form.
SmallVector<PHINode*, 8> NewPHIs;
SSAUpdater SSAUpdate(&NewPHIs);
SSAUpdate.Initialize(LI);
const Type *LoadTy = LI->getType();
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
const AvailableValueInBlock &AV = ValuesPerBlock[i];
BasicBlock *BB = AV.BB;
if (SSAUpdate.HasValueForBlock(BB))
continue;
SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, TD));
// Perform PHI construction.
Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
// If new PHI nodes were created, notify alias analysis.
Duncan Sands
committed
if (V->getType()->isPointerTy())
for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
AA->copyValue(LI, NewPHIs[i]);
return V;
}
static bool isLifetimeStart(const Instruction *Inst) {
if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
Owen Anderson
committed
return II->getIntrinsicID() == Intrinsic::lifetime_start;
/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
/// non-local by performing PHI construction.
bool GVN::processNonLocalLoad(LoadInst *LI,
Chris Lattner
committed
SmallVectorImpl<Instruction*> &toErase) {
// Find the non-local dependencies of the load.
Chris Lattner
committed
SmallVector<NonLocalDepResult, 64> Deps;
MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(),
Deps);
//DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: "
// << Deps.size() << *LI << '\n');
// If we had to process more than one hundred blocks to find the
// dependencies, this load isn't worth worrying about. Optimizing
// it will be too expensive.
if (Deps.size() > 100)
return false;
// If we had a phi translation failure, we'll have a single entry which is a
// clobber in the current block. Reject this early.
if (Deps.size() == 1 && Deps[0].getResult().isClobber()) {
dbgs() << "GVN: non-local load ";
WriteAsOperand(dbgs(), LI);
dbgs() << " is clobbered by " << *Deps[0].getResult().getInst() << '\n';
return false;
// Filter out useless results (non-locals, etc). Keep track of the blocks
// where we have a value available in repl, also keep track of whether we see
// dependencies that produce an unknown value for the load (such as a call
// that could potentially clobber the load).
SmallVector<AvailableValueInBlock, 16> ValuesPerBlock;
SmallVector<BasicBlock*, 16> UnavailableBlocks;
const TargetData *TD = 0;
for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
BasicBlock *DepBB = Deps[i].getBB();
MemDepResult DepInfo = Deps[i].getResult();
if (DepInfo.isClobber()) {
// The address being loaded in this non-local block may not be the same as
// the pointer operand of the load if PHI translation occurs. Make sure
// to consider the right address.
Value *Address = Deps[i].getAddress();
// If the dependence is to a store that writes to a superset of the bits
// read by the load, we can extract the bits we need for the load from the
// stored value.
if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
if (TD == 0)
TD = getAnalysisIfAvailable<TargetData>();
if (TD && Address) {
int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address,
DepSI, *TD);
if (Offset != -1) {
ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
DepSI->getOperand(0),
Offset));
continue;
}
}
}
// If the clobbering value is a memset/memcpy/memmove, see if we can
// forward a value on from it.
if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
if (TD == 0)
TD = getAnalysisIfAvailable<TargetData>();
if (TD && Address) {
int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address,
DepMI, *TD);
if (Offset != -1) {
ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI,
Offset));
continue;
}
}
}
UnavailableBlocks.push_back(DepBB);
continue;
}
Instruction *DepInst = DepInfo.getInst();
// Loading the allocation -> undef.
if (isa<AllocaInst>(DepInst) || isMalloc(DepInst) ||
Owen Anderson
committed
// Loading immediately after lifetime begin -> undef.
isLifetimeStart(DepInst)) {
ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
UndefValue::get(LI->getType())));
continue;
}
if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
// Reject loads and stores that are to the same address but are of
// different types if we have to.
if (S->getOperand(0)->getType() != LI->getType()) {
if (TD == 0)
TD = getAnalysisIfAvailable<TargetData>();
// If the stored value is larger or equal to the loaded value, we can
// reuse it.
if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getOperand(0),
LI->getType(), *TD)) {
UnavailableBlocks.push_back(DepBB);
continue;
}
ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
S->getOperand(0)));
continue;
}
if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) {
// If the types mismatch and we can't handle it, reject reuse of the load.
if (LD->getType() != LI->getType()) {
if (TD == 0)
TD = getAnalysisIfAvailable<TargetData>();
// If the stored value is larger or equal to the loaded value, we can
// reuse it.
if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){
UnavailableBlocks.push_back(DepBB);
continue;
}
ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD));
continue;
UnavailableBlocks.push_back(DepBB);
continue;
// If we have no predecessors that produce a known value for this load, exit
// early.
if (ValuesPerBlock.empty()) return false;
// If all of the instructions we depend on produce a known value for this
// load, then it is fully redundant and we can use PHI insertion to compute
// its value. Insert PHIs and remove the fully redundant value now.
if (UnavailableBlocks.empty()) {
DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
// Perform PHI construction.
Chris Lattner
committed
Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT,
VN.getAliasAnalysis());
LI->replaceAllUsesWith(V);
if (isa<PHINode>(V))
V->takeName(LI);
Duncan Sands
committed
if (V->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(V);
VN.erase(LI);
toErase.push_back(LI);
++NumGVNLoad;
return true;
if (!EnablePRE || !EnableLoadPRE)
return false;
// Okay, we have *some* definitions of the value. This means that the value
// is available in some of our (transitive) predecessors. Lets think about
// doing PRE of this load. This will involve inserting a new load into the
// predecessor when it's not available. We could do this in general, but
// prefer to not increase code size. As such, we only do this when we know
// that we only have to insert *one* load (which means we're basically moving
// the load, not inserting a new one).
Owen Anderson
committed
SmallPtrSet<BasicBlock *, 4> Blockers;
for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
Blockers.insert(UnavailableBlocks[i]);
// Lets find first basic block with more than one predecessor. Walk backwards
// through predecessors if needed.
BasicBlock *LoadBB = LI->getParent();
Owen Anderson
committed
BasicBlock *TmpBB = LoadBB;
bool isSinglePred = false;
bool allSingleSucc = true;
Owen Anderson
committed
while (TmpBB->getSinglePredecessor()) {
isSinglePred = true;
TmpBB = TmpBB->getSinglePredecessor();
if (TmpBB == LoadBB) // Infinite (unreachable) loop.
return false;
if (Blockers.count(TmpBB))
return false;
if (TmpBB->getTerminator()->getNumSuccessors() != 1)
allSingleSucc = false;
Owen Anderson
committed
}
Owen Anderson
committed
assert(TmpBB);
LoadBB = TmpBB;
// If we have a repl set with LI itself in it, this means we have a loop where
// at least one of the values is LI. Since this means that we won't be able
// to eliminate LI even if we insert uses in the other predecessors, we will
// end up increasing code size. Reject this by scanning for LI.
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
if (ValuesPerBlock[i].isSimpleValue() &&
ValuesPerBlock[i].getSimpleValue() == LI) {
// Skip cases where LI is the only definition, even for EnableFullLoadPRE.
if (!EnableFullLoadPRE || e == 1)
Bob Wilson
committed
return false;
}
Bob Wilson
committed
}
// FIXME: It is extremely unclear what this loop is doing, other than
// artificially restricting loadpre.
Owen Anderson
committed
if (isSinglePred) {
bool isHot = false;
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
const AvailableValueInBlock &AV = ValuesPerBlock[i];
if (AV.isSimpleValue())
// "Hot" Instruction is in some loop (because it dominates its dep.
// instruction).
if (Instruction *I = dyn_cast<Instruction>(AV.getSimpleValue()))
if (DT->dominates(LI, I)) {
isHot = true;
break;
}
}
Owen Anderson
committed
// We are interested only in "hot" instructions. We don't want to do any
// mis-optimizations here.
if (!isHot)
return false;
}
Bob Wilson
committed
// Check to see how many predecessors have the loaded value fully
// available.
DenseMap<BasicBlock*, Value*> PredLoads;
DenseMap<BasicBlock*, char> FullyAvailableBlocks;
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
FullyAvailableBlocks[ValuesPerBlock[i].BB] = true;
for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
FullyAvailableBlocks[UnavailableBlocks[i]] = false;
SmallVector<std::pair<TerminatorInst*, unsigned>, 4> NeedToSplit;
for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB);
PI != E; ++PI) {
Bob Wilson
committed
BasicBlock *Pred = *PI;
if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks)) {
continue;
Bob Wilson
committed
}
PredLoads[Pred] = 0;
Bob Wilson
committed
if (Pred->getTerminator()->getNumSuccessors() != 1) {
if (isa<IndirectBrInst>(Pred->getTerminator())) {
DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '"
<< Pred->getName() << "': " << *LI << '\n');
return false;
}
unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB);
NeedToSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum));
Bob Wilson
committed
}
if (!NeedToSplit.empty()) {
toSplit.append(NeedToSplit.begin(), NeedToSplit.end());
return false;
}
Bob Wilson
committed
// Decide whether PRE is profitable for this load.
unsigned NumUnavailablePreds = PredLoads.size();
assert(NumUnavailablePreds != 0 &&
"Fully available value should be eliminated above!");
Bob Wilson
committed
if (!EnableFullLoadPRE) {
// If this load is unavailable in multiple predecessors, reject it.
// FIXME: If we could restructure the CFG, we could make a common pred with
// all the preds that don't have an available LI and insert a new load into
// that one block.
if (NumUnavailablePreds != 1)
return false;
}
Bob Wilson
committed
// Check if the load can safely be moved to all the unavailable predecessors.
bool CanDoPRE = true;
SmallVector<Instruction*, 8> NewInsts;
Bob Wilson
committed
for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(),
E = PredLoads.end(); I != E; ++I) {
BasicBlock *UnavailablePred = I->first;
// Do PHI translation to get its value in the predecessor if necessary. The
// returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
// If all preds have a single successor, then we know it is safe to insert
// the load on the pred (?!?), so we can insert code to materialize the
// pointer if it is not available.
PHITransAddr Address(LI->getOperand(0), TD);
Value *LoadPtr = 0;
if (allSingleSucc) {
LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
*DT, NewInsts);
} else {
Address.PHITranslateValue(LoadBB, UnavailablePred, DT);
Bob Wilson
committed
LoadPtr = Address.getAddr();
}
Bob Wilson
committed
// If we couldn't find or insert a computation of this phi translated value,
// we fail PRE.
if (LoadPtr == 0) {
DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
<< *LI->getOperand(0) << "\n");
CanDoPRE = false;
break;
}
Bob Wilson
committed
// Make sure it is valid to move this load here. We have to watch out for:
// @1 = getelementptr (i8* p, ...
// test p and branch if == 0
// load @1
// It is valid to have the getelementptr before the test, even if p can be 0,
// as getelementptr only does address arithmetic.
// If we are not pushing the value through any multiple-successor blocks
// we do not have this case. Otherwise, check that the load is safe to
// put anywhere; this can be improved, but should be conservatively safe.
if (!allSingleSucc &&
// FIXME: REEVALUTE THIS.
!isSafeToLoadUnconditionally(LoadPtr,
UnavailablePred->getTerminator(),
LI->getAlignment(), TD)) {
CanDoPRE = false;
break;
}
I->second = LoadPtr;
Bob Wilson
committed
if (!CanDoPRE) {
while (!NewInsts.empty())
NewInsts.pop_back_val()->eraseFromParent();
// Okay, we can eliminate this load by inserting a reload in the predecessor
// and using PHI construction to get the value in the other predecessors, do
// it.
DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *LI << '\n');
DEBUG(if (!NewInsts.empty())
dbgs() << "INSERTED " << NewInsts.size() << " INSTS: "
<< *NewInsts.back() << '\n');
Bob Wilson
committed
// Assign value numbers to the new instructions.
for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) {
// FIXME: We really _ought_ to insert these value numbers into their
// parent's availability map. However, in doing so, we risk getting into
// ordering issues. If a block hasn't been processed yet, we would be
// marking a value as AVAIL-IN, which isn't what we intend.
VN.lookup_or_add(NewInsts[i]);
}
for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(),
E = PredLoads.end(); I != E; ++I) {
BasicBlock *UnavailablePred = I->first;
Value *LoadPtr = I->second;
Bob Wilson
committed
Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
LI->getAlignment(),
UnavailablePred->getTerminator());
// Add the newly created load.
ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,
NewLoad));
MD->invalidateCachedPointerInfo(LoadPtr);
DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n');
Bob Wilson
committed
}
// Perform PHI construction.
Chris Lattner
committed
Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT,
VN.getAliasAnalysis());
LI->replaceAllUsesWith(V);
if (isa<PHINode>(V))
V->takeName(LI);
Duncan Sands
committed
if (V->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(V);
VN.erase(LI);
toErase.push_back(LI);
++NumPRELoad;
return true;
}
/// processLoad - Attempt to eliminate a load, first by eliminating it
/// locally, and then attempting non-local elimination if that fails.
bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
if (!MD)
return false;
if (L->isVolatile())
Owen Anderson
committed
return false;
Owen Anderson
committed
// ... to a pointer that has been loaded from before...
MemDepResult Dep = MD->getDependency(L);
// If the value isn't available, don't do anything!
// Check to see if we have something like this:
// store i32 123, i32* %P
// %A = bitcast i32* %P to i8*
// %B = gep i8* %A, i32 1
// %C = load i8* %B
//
// We could do that by recognizing if the clobber instructions are obviously
// a common base + constant offset, and if the previous store (or memset)
// completely covers this load. This sort of thing can happen in bitfield
// access code.
Value *AvailVal = 0;
if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst()))
if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) {
int Offset = AnalyzeLoadFromClobberingStore(L->getType(),
L->getPointerOperand(),
DepSI, *TD);
if (Offset != -1)
AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset,
L->getType(), L, *TD);
// If the clobbering value is a memset/memcpy/memmove, see if we can forward
// a value on from it.
if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) {
int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(),
L->getPointerOperand(),
DepMI, *TD);
if (Offset != -1)
AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD);
}
}
if (AvailVal) {
DEBUG(dbgs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n'
<< *AvailVal << '\n' << *L << "\n\n\n");
// Replace the load!
L->replaceAllUsesWith(AvailVal);
Duncan Sands
committed
if (AvailVal->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(AvailVal);
VN.erase(L);
toErase.push_back(L);
++NumGVNLoad;
return true;
}
DEBUG(
// fast print dep, using operator<< on instruction would be too slow
dbgs() << "GVN: load ";
WriteAsOperand(dbgs(), L);
Instruction *I = Dep.getInst();
);
}
// If it is defined in another block, try harder.
return processNonLocalLoad(L, toErase);
Instruction *DepInst = Dep.getInst();
if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
Value *StoredVal = DepSI->getOperand(0);
// The store and load are to a must-aliased pointer, but they may not
// actually have the same type. See if we know how to reuse the stored
// value (depending on its type).
const TargetData *TD = 0;
if (StoredVal->getType() != L->getType()) {
if ((TD = getAnalysisIfAvailable<TargetData>())) {
StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(),
L, *TD);
if (StoredVal == 0)
return false;
DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
<< '\n' << *L << "\n\n\n");
}
else
return false;
}
L->replaceAllUsesWith(StoredVal);
Duncan Sands
committed
if (StoredVal->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(StoredVal);
VN.erase(L);
toErase.push_back(L);
++NumGVNLoad;
return true;
}
if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) {
Value *AvailableVal = DepLI;
// The loads are of a must-aliased pointer, but they may not actually have
// the same type. See if we know how to reuse the previously loaded value
// (depending on its type).
const TargetData *TD = 0;
if (DepLI->getType() != L->getType()) {
if ((TD = getAnalysisIfAvailable<TargetData>())) {
AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD);
if (AvailableVal == 0)
return false;
DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
<< "\n" << *L << "\n\n\n");
}
else
return false;
L->replaceAllUsesWith(AvailableVal);
Duncan Sands
committed
if (DepLI->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(DepLI);
VN.erase(L);
toErase.push_back(L);
++NumGVNLoad;
Owen Anderson
committed
}
// If this load really doesn't depend on anything, then we must be loading an
// undef value. This can happen when loading for a fresh allocation with no
// intervening stores, for example.
Victor Hernandez
committed
if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) {
L->replaceAllUsesWith(UndefValue::get(L->getType()));
VN.erase(L);
toErase.push_back(L);
++NumGVNLoad;
}
Owen Anderson
committed
// If this load occurs either right after a lifetime begin,
// then the loaded value is undefined.
if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) {
Owen Anderson
committed
if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
L->replaceAllUsesWith(UndefValue::get(L->getType()));
VN.erase(L);
toErase.push_back(L);
++NumGVNLoad;
return true;
}
}
Owen Anderson
committed
}
Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) {
Owen Anderson
committed
DenseMap<BasicBlock*, ValueNumberScope*>::iterator I = localAvail.find(BB);
if (I == localAvail.end())
return 0;
ValueNumberScope *Locals = I->second;
while (Locals) {
DenseMap<uint32_t, Value*>::iterator I = Locals->table.find(num);
if (I != Locals->table.end())
Owen Anderson
committed
return I->second;
Owen Anderson
committed
}
Owen Anderson
committed
return 0;
}
Owen Anderson
committed
Owen Anderson
committed
/// processInstruction - When calculating availability, handle an instruction
Owen Anderson
committed
/// by inserting it into the appropriate sets
bool GVN::processInstruction(Instruction *I,
Chris Lattner
committed
SmallVectorImpl<Instruction*> &toErase) {
// Ignore dbg info intrinsics.
if (isa<DbgInfoIntrinsic>(I))
return false;
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
bool Changed = processLoad(LI, toErase);
if (!Changed) {
unsigned Num = VN.lookup_or_add(LI);
localAvail[I->getParent()]->table.insert(std::make_pair(Num, LI));
Owen Anderson
committed
}
Owen Anderson
committed
}
uint32_t NextNum = VN.getNextUnusedValueNumber();
unsigned Num = VN.lookup_or_add(I);
if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
return false;
Value *BranchCond = BI->getCondition();
uint32_t CondVN = VN.lookup_or_add(BranchCond);
BasicBlock *TrueSucc = BI->getSuccessor(0);
BasicBlock *FalseSucc = BI->getSuccessor(1);
if (TrueSucc->getSinglePredecessor())
localAvail[TrueSucc]->table[CondVN] =
ConstantInt::getTrue(TrueSucc->getContext());
if (FalseSucc->getSinglePredecessor())
localAvail[FalseSucc]->table[CondVN] =
ConstantInt::getFalse(TrueSucc->getContext());
return false;
Owen Anderson
committed
// Allocations are always uniquely numbered, so we can save time and memory
// by fast failing them.
Victor Hernandez
committed
} else if (isa<AllocaInst>(I) || isa<TerminatorInst>(I)) {
localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
Owen Anderson
committed
return false;
Owen Anderson
committed
}
Owen Anderson
committed
if (PHINode* p = dyn_cast<PHINode>(I)) {
Value *constVal = CollapsePhi(p);
Owen Anderson
committed
if (constVal) {
p->replaceAllUsesWith(constVal);
Duncan Sands
committed
if (MD && constVal->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(constVal);
Owen Anderson
committed
VN.erase(p);
Owen Anderson
committed
} else {
localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
Owen Anderson
committed
}
Owen Anderson
committed
// If the number we were assigned was a brand new VN, then we don't
// need to do a lookup to see if the number already exists
// somewhere in the domtree: it can't!
} else if (Num == NextNum) {
localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
Owen Anderson
committed
// Perform fast-path value-number based elimination of values inherited from
// dominators.
} else if (Value *repl = lookupNumber(I->getParent(), Num)) {
Owen Anderson
committed
// Remove it!
Owen Anderson
committed
I->replaceAllUsesWith(repl);
Duncan Sands
committed
if (MD && repl->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(repl);
Owen Anderson
committed
toErase.push_back(I);
return true;
Owen Anderson
committed
Owen Anderson
committed
} else {
localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
Owen Anderson
committed
}
Owen Anderson
committed
return false;
}