Newer
Older
//===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
Owen Anderson
committed
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
Owen Anderson
committed
//
//===----------------------------------------------------------------------===//
//
// This pass performs global value numbering to eliminate fully redundant
// instructions. It also performs simple dead load elimination.
//
// Note that this pass does the value numbering itself; it does not use the
Matthijs Kooijman
committed
// ValueNumbering analysis passes.
//
Owen Anderson
committed
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "gvn"
#include "llvm/Transforms/Scalar.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryBuiltins.h"
Owen Anderson
committed
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/PHITransAddr.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
Owen Anderson
committed
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/PatternMatch.h"
Owen Anderson
committed
using namespace llvm;
using namespace PatternMatch;
Owen Anderson
committed
STATISTIC(NumGVNInstr, "Number of instructions deleted");
STATISTIC(NumGVNLoad, "Number of loads deleted");
STATISTIC(NumGVNPRE, "Number of instructions PRE'd");
Owen Anderson
committed
STATISTIC(NumGVNBlocks, "Number of blocks merged");
STATISTIC(NumGVNSimpl, "Number of instructions simplified");
STATISTIC(NumGVNEqProp, "Number of equalities propagated");
STATISTIC(NumPRELoad, "Number of loads PRE'd");
static cl::opt<bool> EnablePRE("enable-pre",
cl::init(true), cl::Hidden);
static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
Owen Anderson
committed
//===----------------------------------------------------------------------===//
// ValueTable Class
//===----------------------------------------------------------------------===//
/// This class holds the mapping between values and value numbers. It is used
/// as an efficient mechanism to determine the expression-wise equivalence of
/// two values.
namespace {
struct Expression {
Owen Anderson
committed
uint32_t opcode;
Type *type;
Owen Anderson
committed
SmallVector<uint32_t, 4> varargs;
Owen Anderson
committed
bool operator==(const Expression &other) const {
if (opcode != other.opcode)
return false;
Owen Anderson
committed
return true;
Owen Anderson
committed
return false;
Benjamin Kramer
committed
return false;
return true;
Owen Anderson
committed
}
};
class ValueTable {
DenseMap<Value*, uint32_t> valueNumbering;
DenseMap<Expression, uint32_t> expressionNumbering;
AliasAnalysis *AA;
MemoryDependenceAnalysis *MD;
DominatorTree *DT;
uint32_t nextValueNumber;
Expression create_expression(Instruction* I);
Expression create_extractvalue_expression(ExtractValueInst* EI);
uint32_t lookup_or_add_call(CallInst* C);
public:
ValueTable() : nextValueNumber(1) { }
uint32_t lookup_or_add(Value *V);
uint32_t lookup(Value *V) const;
void add(Value *V, uint32_t num);
void clear();
void erase(Value *v);
void setAliasAnalysis(AliasAnalysis* A) { AA = A; }
AliasAnalysis *getAliasAnalysis() const { return AA; }
void setMemDep(MemoryDependenceAnalysis* M) { MD = M; }
void setDomTree(DominatorTree* D) { DT = D; }
uint32_t getNextUnusedValueNumber() { return nextValueNumber; }
void verifyRemoved(const Value *) const;
Owen Anderson
committed
};
}
namespace llvm {
template <> struct DenseMapInfo<Expression> {
Owen Anderson
committed
return ~0U;
Owen Anderson
committed
return ~1U;
Owen Anderson
committed
static unsigned getHashValue(const Expression e) {
unsigned hash = e.opcode;
hash = ((unsigned)((uintptr_t)e.type >> 4) ^
Owen Anderson
committed
(unsigned)((uintptr_t)e.type >> 9));
for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(),
E = e.varargs.end(); I != E; ++I)
Owen Anderson
committed
hash = *I + hash * 37;
Owen Anderson
committed
Owen Anderson
committed
return hash;
}
static bool isEqual(const Expression &LHS, const Expression &RHS) {
return LHS == RHS;
}
Owen Anderson
committed
};
Owen Anderson
committed
}
//===----------------------------------------------------------------------===//
// ValueTable Internal Functions
//===----------------------------------------------------------------------===//
Owen Anderson
committed
Expression ValueTable::create_expression(Instruction *I) {
Owen Anderson
committed
Expression e;
e.type = I->getType();
Owen Anderson
committed
e.opcode = I->getOpcode();
for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
OI != OE; ++OI)
e.varargs.push_back(lookup_or_add(*OI));
if (CmpInst *C = dyn_cast<CmpInst>(I)) {
Owen Anderson
committed
e.opcode = (C->getOpcode() << 8) | C->getPredicate();
} else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) {
for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
II != IE; ++II)
e.varargs.push_back(*II);
}
Owen Anderson
committed
return e;
}
Expression ValueTable::create_extractvalue_expression(ExtractValueInst *EI) {
assert(EI != 0 && "Not an ExtractValueInst?");
Expression e;
e.type = EI->getType();
e.opcode = 0;
IntrinsicInst *I = dyn_cast<IntrinsicInst>(EI->getAggregateOperand());
if (I != 0 && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) {
// EI might be an extract from one of our recognised intrinsics. If it
// is we'll synthesize a semantically equivalent expression instead on
// an extract value expression.
switch (I->getIntrinsicID()) {
case Intrinsic::sadd_with_overflow:
case Intrinsic::uadd_with_overflow:
e.opcode = Instruction::Add;
break;
case Intrinsic::ssub_with_overflow:
case Intrinsic::usub_with_overflow:
e.opcode = Instruction::Sub;
break;
case Intrinsic::smul_with_overflow:
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
case Intrinsic::umul_with_overflow:
e.opcode = Instruction::Mul;
break;
default:
break;
}
if (e.opcode != 0) {
// Intrinsic recognized. Grab its args to finish building the expression.
assert(I->getNumArgOperands() == 2 &&
"Expect two args for recognised intrinsics.");
e.varargs.push_back(lookup_or_add(I->getArgOperand(0)));
e.varargs.push_back(lookup_or_add(I->getArgOperand(1)));
return e;
}
}
// Not a recognised intrinsic. Fall back to producing an extract value
// expression.
e.opcode = EI->getOpcode();
for (Instruction::op_iterator OI = EI->op_begin(), OE = EI->op_end();
OI != OE; ++OI)
e.varargs.push_back(lookup_or_add(*OI));
for (ExtractValueInst::idx_iterator II = EI->idx_begin(), IE = EI->idx_end();
II != IE; ++II)
e.varargs.push_back(*II);
return e;
}
Owen Anderson
committed
//===----------------------------------------------------------------------===//
// ValueTable External Functions
//===----------------------------------------------------------------------===//
Owen Anderson
committed
/// add - Insert a value into the table with a specified value number.
void ValueTable::add(Value *V, uint32_t num) {
Owen Anderson
committed
valueNumbering.insert(std::make_pair(V, num));
}
Owen Anderson
committed
uint32_t ValueTable::lookup_or_add_call(CallInst* C) {
if (AA->doesNotAccessMemory(C)) {
Expression exp = create_expression(C);
uint32_t& e = expressionNumbering[exp];
if (!e) e = nextValueNumber++;
valueNumbering[C] = e;
return e;
} else if (AA->onlyReadsMemory(C)) {
Expression exp = create_expression(C);
uint32_t& e = expressionNumbering[exp];
if (!e) {
e = nextValueNumber++;
valueNumbering[C] = e;
Owen Anderson
committed
}
if (!MD) {
e = nextValueNumber++;
valueNumbering[C] = e;
return e;
}
Owen Anderson
committed
MemDepResult local_dep = MD->getDependency(C);
Owen Anderson
committed
if (!local_dep.isDef() && !local_dep.isNonLocal()) {
valueNumbering[C] = nextValueNumber;
return nextValueNumber++;
}
Owen Anderson
committed
if (local_dep.isDef()) {
CallInst* local_cdep = cast<CallInst>(local_dep.getInst());
if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) {
Owen Anderson
committed
valueNumbering[C] = nextValueNumber;
return nextValueNumber++;
for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
uint32_t c_vn = lookup_or_add(C->getArgOperand(i));
uint32_t cd_vn = lookup_or_add(local_cdep->getArgOperand(i));
Owen Anderson
committed
if (c_vn != cd_vn) {
valueNumbering[C] = nextValueNumber;
return nextValueNumber++;
}
Owen Anderson
committed
uint32_t v = lookup_or_add(local_cdep);
valueNumbering[C] = v;
return v;
}
Owen Anderson
committed
// Non-local case.
const MemoryDependenceAnalysis::NonLocalDepInfo &deps =
MD->getNonLocalCallDependency(CallSite(C));
// FIXME: Move the checking logic to MemDep!
Owen Anderson
committed
CallInst* cdep = 0;
// Check to see if we have a single dominating call instruction that is
// identical to C.
for (unsigned i = 0, e = deps.size(); i != e; ++i) {
const NonLocalDepEntry *I = &deps[i];
if (I->getResult().isNonLocal())
Owen Anderson
committed
continue;
// We don't handle non-definitions. If we already have a call, reject
Owen Anderson
committed
// instruction dependencies.
if (!I->getResult().isDef() || cdep != 0) {
cdep = 0;
break;
CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->getResult().getInst());
Owen Anderson
committed
// FIXME: All duplicated with non-local case.
if (NonLocalDepCall && DT->properlyDominates(I->getBB(), C->getParent())){
Owen Anderson
committed
cdep = NonLocalDepCall;
continue;
}
Owen Anderson
committed
cdep = 0;
break;
}
Owen Anderson
committed
if (!cdep) {
valueNumbering[C] = nextValueNumber;
return nextValueNumber++;
}
if (cdep->getNumArgOperands() != C->getNumArgOperands()) {
Owen Anderson
committed
valueNumbering[C] = nextValueNumber;
return nextValueNumber++;
}
for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
uint32_t c_vn = lookup_or_add(C->getArgOperand(i));
uint32_t cd_vn = lookup_or_add(cdep->getArgOperand(i));
Owen Anderson
committed
if (c_vn != cd_vn) {
valueNumbering[C] = nextValueNumber;
return nextValueNumber++;
}
}
uint32_t v = lookup_or_add(cdep);
valueNumbering[C] = v;
return v;
Owen Anderson
committed
} else {
Owen Anderson
committed
valueNumbering[C] = nextValueNumber;
return nextValueNumber++;
}
}
/// lookup_or_add - Returns the value number for the specified value, assigning
/// it a new number if it did not have one before.
uint32_t ValueTable::lookup_or_add(Value *V) {
DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
if (VI != valueNumbering.end())
return VI->second;
if (!isa<Instruction>(V)) {
Owen Anderson
committed
return nextValueNumber++;
}
Owen Anderson
committed
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
Instruction* I = cast<Instruction>(V);
Expression exp;
switch (I->getOpcode()) {
case Instruction::Call:
return lookup_or_add_call(cast<CallInst>(I));
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
case Instruction::FSub:
case Instruction::Mul:
case Instruction::FMul:
case Instruction::UDiv:
case Instruction::SDiv:
case Instruction::FDiv:
case Instruction::URem:
case Instruction::SRem:
case Instruction::FRem:
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
case Instruction::And:
case Instruction::Or :
case Instruction::Xor:
case Instruction::ICmp:
case Instruction::FCmp:
case Instruction::Trunc:
case Instruction::ZExt:
case Instruction::SExt:
case Instruction::FPToUI:
case Instruction::FPToSI:
case Instruction::UIToFP:
case Instruction::SIToFP:
case Instruction::FPTrunc:
case Instruction::FPExt:
case Instruction::PtrToInt:
case Instruction::IntToPtr:
case Instruction::BitCast:
case Instruction::Select:
case Instruction::ExtractElement:
case Instruction::InsertElement:
case Instruction::ShuffleVector:
case Instruction::InsertValue:
case Instruction::GetElementPtr:
Owen Anderson
committed
exp = create_expression(I);
Owen Anderson
committed
break;
case Instruction::ExtractValue:
exp = create_extractvalue_expression(cast<ExtractValueInst>(I));
break;
Owen Anderson
committed
default:
valueNumbering[V] = nextValueNumber;
return nextValueNumber++;
}
uint32_t& e = expressionNumbering[exp];
if (!e) e = nextValueNumber++;
valueNumbering[V] = e;
return e;
Owen Anderson
committed
}
/// lookup - Returns the value number of the specified value. Fails if
/// the value has not yet been numbered.
uint32_t ValueTable::lookup(Value *V) const {
DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
assert(VI != valueNumbering.end() && "Value not numbered?");
return VI->second;
Owen Anderson
committed
}
Owen Anderson
committed
void ValueTable::clear() {
valueNumbering.clear();
expressionNumbering.clear();
nextValueNumber = 1;
}
/// erase - Remove a value from the value numbering.
void ValueTable::erase(Value *V) {
valueNumbering.erase(V);
}
Bill Wendling
committed
/// verifyRemoved - Verify that the value is removed from all internal data
/// structures.
void ValueTable::verifyRemoved(const Value *V) const {
for (DenseMap<Value*, uint32_t>::const_iterator
Bill Wendling
committed
I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) {
assert(I->first != V && "Inst still occurs in value numbering map!");
}
}
Owen Anderson
committed
//===----------------------------------------------------------------------===//
Owen Anderson
committed
//===----------------------------------------------------------------------===//
Owen Anderson
committed
namespace {
class GVN : public FunctionPass {
bool NoLoads;
MemoryDependenceAnalysis *MD;
DominatorTree *DT;
Chris Lattner
committed
const TargetData *TD;
const TargetLibraryInfo *TLI;
Owen Anderson
committed
ValueTable VN;
Owen Anderson
committed
/// LeaderTable - A mapping from value numbers to lists of Value*'s that
Owen Anderson
committed
/// have that value number. Use findLeader to query it.
struct LeaderTableEntry {
Owen Anderson
committed
Value *Val;
BasicBlock *BB;
Owen Anderson
committed
LeaderTableEntry *Next;
Owen Anderson
committed
};
DenseMap<uint32_t, LeaderTableEntry> LeaderTable;
Owen Anderson
committed
BumpPtrAllocator TableAllocator;
SmallVector<Instruction*, 8> InstrsToErase;
Chris Lattner
committed
public:
static char ID; // Pass identification, replacement for typeid
explicit GVN(bool noloads = false)
: FunctionPass(ID), NoLoads(noloads), MD(0) {
initializeGVNPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F);
/// markInstructionForDeletion - This removes the specified instruction from
/// our various maps and marks it for deletion.
void markInstructionForDeletion(Instruction *I) {
VN.erase(I);
InstrsToErase.push_back(I);
}
Chris Lattner
committed
const TargetData *getTargetData() const { return TD; }
DominatorTree &getDominatorTree() const { return *DT; }
AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); }
MemoryDependenceAnalysis &getMemDep() const { return *MD; }
Chris Lattner
committed
private:
/// addToLeaderTable - Push a new Value to the LeaderTable onto the list for
Owen Anderson
committed
void addToLeaderTable(uint32_t N, Value *V, BasicBlock *BB) {
Chris Lattner
committed
LeaderTableEntry &Curr = LeaderTable[N];
Owen Anderson
committed
if (!Curr.Val) {
Curr.Val = V;
Curr.BB = BB;
Owen Anderson
committed
return;
}
Chris Lattner
committed
LeaderTableEntry *Node = TableAllocator.Allocate<LeaderTableEntry>();
Owen Anderson
committed
Node->Val = V;
Node->BB = BB;
Node->Next = Curr.Next;
Curr.Next = Node;
Owen Anderson
committed
}
/// removeFromLeaderTable - Scan the list of values corresponding to a given
/// value number, and remove the given value if encountered.
Owen Anderson
committed
void removeFromLeaderTable(uint32_t N, Value *V, BasicBlock *BB) {
LeaderTableEntry* Prev = 0;
LeaderTableEntry* Curr = &LeaderTable[N];
Owen Anderson
committed
Owen Anderson
committed
while (Curr->Val != V || Curr->BB != BB) {
Owen Anderson
committed
Prev = Curr;
Owen Anderson
committed
Curr = Curr->Next;
Owen Anderson
committed
}
if (Prev) {
Owen Anderson
committed
Prev->Next = Curr->Next;
Owen Anderson
committed
} else {
Owen Anderson
committed
if (!Curr->Next) {
Curr->Val = 0;
Curr->BB = 0;
Owen Anderson
committed
} else {
Owen Anderson
committed
LeaderTableEntry* Next = Curr->Next;
Owen Anderson
committed
Curr->Val = Next->Val;
Curr->BB = Next->BB;
Owen Anderson
committed
Curr->Next = Next->Next;
Owen Anderson
committed
}
}
}
// List of critical edges to be split between iterations.
SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit;
Owen Anderson
committed
// This transformation requires dominator postdominator info
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTree>();
AU.addRequired<TargetLibraryInfo>();
if (!NoLoads)
AU.addRequired<MemoryDependenceAnalysis>();
AU.addRequired<AliasAnalysis>();
Owen Anderson
committed
AU.addPreserved<DominatorTree>();
AU.addPreserved<AliasAnalysis>();
Owen Anderson
committed
}
Chris Lattner
committed
Owen Anderson
committed
// Helper fuctions
// FIXME: eliminate or document these better
bool processLoad(LoadInst *L);
bool processInstruction(Instruction *I);
bool processNonLocalLoad(LoadInst *L);
bool processBlock(BasicBlock *BB);
void dump(DenseMap<uint32_t, Value*> &d);
bool performPRE(Function &F);
Owen Anderson
committed
Value *findLeader(BasicBlock *BB, uint32_t num);
void cleanupGlobalSets();
Bill Wendling
committed
void verifyRemoved(const Instruction *I) const;
unsigned replaceAllDominatedUsesWith(Value *From, Value *To,
BasicBlock *Root);
bool propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root);
Owen Anderson
committed
};
Owen Anderson
committed
char GVN::ID = 0;
}
// createGVNPass - The public interface to this file...
FunctionPass *llvm::createGVNPass(bool NoLoads) {
return new GVN(NoLoads);
Owen Anderson
committed
Owen Anderson
committed
INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
Owen Anderson
committed
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false)
Owen Anderson
committed
Owen Anderson
committed
void GVN::dump(DenseMap<uint32_t, Value*>& d) {
Owen Anderson
committed
for (DenseMap<uint32_t, Value*>::iterator I = d.begin(),
E = d.end(); I != E; ++I) {
I->second->dump();
}
/// IsValueFullyAvailableInBlock - Return true if we can prove that the value
/// we're analyzing is fully available in the specified block. As we go, keep
/// track of which blocks we know are fully alive in FullyAvailableBlocks. This
/// map is actually a tri-state map with the following values:
/// 0) we know the block *is not* fully available.
/// 1) we know the block *is* fully available.
/// 2) we do not know whether the block is fully available or not, but we are
/// currently speculating that it will be.
/// 3) we are speculating for this block and have used that to speculate for
/// other blocks.
static bool IsValueFullyAvailableInBlock(BasicBlock *BB,
DenseMap<BasicBlock*, char> &FullyAvailableBlocks) {
// Optimistically assume that the block is fully available and check to see
// if we already know about this block in one lookup.
std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV =
FullyAvailableBlocks.insert(std::make_pair(BB, 2));
// If the entry already existed for this block, return the precomputed value.
if (!IV.second) {
// If this is a speculative "available" value, mark it as being used for
// speculation of other blocks.
if (IV.first->second == 2)
IV.first->second = 3;
return IV.first->second != 0;
}
// Otherwise, see if it is fully available in all predecessors.
pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
// If this block has no predecessors, it isn't live-in here.
if (PI == PE)
for (; PI != PE; ++PI)
// If the value isn't fully available in one of our predecessors, then it
// isn't fully available in this block either. Undo our previous
// optimistic assumption and bail out.
if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks))
return true;
// SpeculationFailure - If we get here, we found out that this is not, after
// all, a fully-available block. We have a problem if we speculated on this and
// used the speculation to mark other blocks as available.
SpeculationFailure:
char &BBVal = FullyAvailableBlocks[BB];
// If we didn't speculate on this, just return with it set to false.
if (BBVal == 2) {
BBVal = 0;
return false;
}
// If we did speculate on this value, we could have blocks set to 1 that are
// incorrect. Walk the (transitive) successors of this block and mark them as
// 0 if set to one.
SmallVector<BasicBlock*, 32> BBWorklist;
BBWorklist.push_back(BB);
BasicBlock *Entry = BBWorklist.pop_back_val();
// Note that this sets blocks to 0 (unavailable) if they happen to not
// already be in FullyAvailableBlocks. This is safe.
char &EntryVal = FullyAvailableBlocks[Entry];
if (EntryVal == 0) continue; // Already unavailable.
// Mark as unavailable.
EntryVal = 0;
for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I)
BBWorklist.push_back(*I);
} while (!BBWorklist.empty());
/// CanCoerceMustAliasedValueToLoad - Return true if
/// CoerceAvailableValueToLoadType will succeed.
static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
Type *LoadTy,
const TargetData &TD) {
// If the loaded or stored value is an first class array or struct, don't try
// to transform them. We need to be able to bitcast to integer.
Duncan Sands
committed
if (LoadTy->isStructTy() || LoadTy->isArrayTy() ||
StoredVal->getType()->isStructTy() ||
StoredVal->getType()->isArrayTy())
return false;
// The store has to be at least as big as the load.
if (TD.getTypeSizeInBits(StoredVal->getType()) <
TD.getTypeSizeInBits(LoadTy))
return false;
return true;
}
/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and
/// then a load from a must-aliased pointer of a different type, try to coerce
/// the stored value. LoadedTy is the type of the load we want to replace and
/// InsertPt is the place to insert new instructions.
///
/// If we can't do it, return null.
static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
Type *LoadedTy,
Instruction *InsertPt,
const TargetData &TD) {
if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD))
return 0;
// If this is already the right type, just return it.
Type *StoredValTy = StoredVal->getType();
Jakub Staszak
committed
uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy);
uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy);
// If the store and reload are the same size, we can always reuse it.
if (StoreSize == LoadSize) {
// Pointer to Pointer -> use bitcast.
if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy())
return new BitCastInst(StoredVal, LoadedTy, "", InsertPt);
// Convert source pointers to integers, which can be bitcast.
Duncan Sands
committed
if (StoredValTy->isPointerTy()) {
StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
}
Type *TypeToCastTo = LoadedTy;
Duncan Sands
committed
if (TypeToCastTo->isPointerTy())
TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext());
if (StoredValTy != TypeToCastTo)
StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt);
// Cast to pointer if the load needs a pointer type.
Duncan Sands
committed
if (LoadedTy->isPointerTy())
StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt);
return StoredVal;
}
// If the loaded value is smaller than the available value, then we can
// extract out a piece from it. If the available value is too small, then we
// can't do anything.
assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail");
// Convert source pointers to integers, which can be manipulated.
Duncan Sands
committed
if (StoredValTy->isPointerTy()) {
StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
}
// Convert vectors and fp to integer, which can be manipulated.
Duncan Sands
committed
if (!StoredValTy->isIntegerTy()) {
StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize);
StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt);
}
// If this is a big-endian system, we need to shift the value down to the low
// bits so that a truncate will work.
if (TD.isBigEndian()) {
Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize);
StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt);
}
// Truncate the integer to the right size now.
Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize);
StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt);
if (LoadedTy == NewIntTy)
return StoredVal;
// If the result is a pointer, inttoptr.
Duncan Sands
committed
if (LoadedTy->isPointerTy())
return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt);
// Otherwise, bitcast.
return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt);
}
/// AnalyzeLoadFromClobberingWrite - This function is called when we have a
/// memdep query of a load that ends up being a clobbering memory write (store,
/// memset, memcpy, memmove). This means that the write *may* provide bits used
/// by the load but we can't be sure because the pointers don't mustalias.
///
/// Check this case to see if there is anything more we can do before we give
/// up. This returns -1 if we have to give up, or a byte number in the stored
/// value of the piece that feeds the load.
static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
Value *WritePtr,
uint64_t WriteSizeInBits,
const TargetData &TD) {
// If the loaded or stored value is a first class array or struct, don't try
// to transform them. We need to be able to bitcast to integer.
Duncan Sands
committed
if (LoadTy->isStructTy() || LoadTy->isArrayTy())
return -1;
int64_t StoreOffset = 0, LoadOffset = 0;
Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr, StoreOffset,TD);
Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, TD);
if (StoreBase != LoadBase)
return -1;
// If the load and store are to the exact same address, they should have been
// a must alias. AA must have gotten confused.
// FIXME: Study to see if/when this happens. One case is forwarding a memset
// to a load from the base of the memset.
if (LoadOffset == StoreOffset) {
dbgs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n"
<< "Store Ptr = " << *WritePtr << "\n"
<< "Store Offs = " << StoreOffset << "\n"
<< "Load Ptr = " << *LoadPtr << "\n";
#endif
// If the load and store don't overlap at all, the store doesn't provide
// anything to the load. In this case, they really don't alias at all, AA
// must have gotten confused.
uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy);
if ((WriteSizeInBits & 7) | (LoadSize & 7))
uint64_t StoreSize = WriteSizeInBits >> 3; // Convert to bytes.
LoadSize >>= 3;
bool isAAFailure = false;
if (StoreOffset < LoadOffset)
isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset;
else
isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset;
<< "Store Ptr = " << *WritePtr << "\n"
<< "Store Offs = " << StoreOffset << "\n"
<< "Load Ptr = " << *LoadPtr << "\n";
#endif
return -1;
}
// If the Load isn't completely contained within the stored bits, we don't
// have all the bits to feed it. We could do something crazy in the future
// (issue a smaller load then merge the bits in) but this seems unlikely to be
// valuable.
if (StoreOffset > LoadOffset ||
StoreOffset+StoreSize < LoadOffset+LoadSize)
return -1;
// Okay, we can do this transformation. Return the number of bytes into the
// store that the load is.
return LoadOffset-StoreOffset;
}
/// AnalyzeLoadFromClobberingStore - This function is called when we have a
/// memdep query of a load that ends up being a clobbering store.
static int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
StoreInst *DepSI,
const TargetData &TD) {
// Cannot handle reading from store of first-class aggregate yet.
if (DepSI->getValueOperand()->getType()->isStructTy() ||
DepSI->getValueOperand()->getType()->isArrayTy())
return -1;
Value *StorePtr = DepSI->getPointerOperand();
uint64_t StoreSize =TD.getTypeSizeInBits(DepSI->getValueOperand()->getType());
return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
StorePtr, StoreSize, TD);
}
/// AnalyzeLoadFromClobberingLoad - This function is called when we have a
/// memdep query of a load that ends up being clobbered by another load. See if
/// the other load can feed into the second load.
static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr,
LoadInst *DepLI, const TargetData &TD){
// Cannot handle reading from store of first-class aggregate yet.
if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
return -1;
Value *DepPtr = DepLI->getPointerOperand();
uint64_t DepSize = TD.getTypeSizeInBits(DepLI->getType());
int R = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, TD);
if (R != -1) return R;
// If we have a load/load clobber an DepLI can be widened to cover this load,
// then we should widen it!
int64_t LoadOffs = 0;
const Value *LoadBase =
GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, TD);
unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
unsigned Size = MemoryDependenceAnalysis::
getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI, TD);
if (Size == 0) return -1;
return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, TD);
}
static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
MemIntrinsic *MI,
const TargetData &TD) {
// If the mem operation is a non-constant size, we can't handle it.
ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
if (SizeCst == 0) return -1;
uint64_t MemSizeInBits = SizeCst->getZExtValue()*8;
// If this is memset, we just need to see if the offset is valid in the size
// of the memset..
if (MI->getIntrinsicID() == Intrinsic::memset)
return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
MemSizeInBits, TD);
// If we have a memcpy/memmove, the only case we can handle is if this is a
// copy from constant memory. In that case, we can read directly from the
// constant memory.
MemTransferInst *MTI = cast<MemTransferInst>(MI);
Constant *Src = dyn_cast<Constant>(MTI->getSource());
if (Src == 0) return -1;
GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, &TD));
if (GV == 0 || !GV->isConstant()) return -1;
// See if the access is within the bounds of the transfer.
int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
MI->getDest(), MemSizeInBits, TD);
if (Offset == -1)
return Offset;
// Otherwise, see if we can constant fold a load from the constant with the
// offset applied as appropriate.
Src = ConstantExpr::getBitCast(Src,
llvm::Type::getInt8PtrTy(Src->getContext()));
Constant *OffsetCst =
ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
if (ConstantFoldLoadFromConstPtr(Src, &TD))
return Offset;
return -1;
}
/// GetStoreValueForLoad - This function is called when we have a
/// memdep query of a load that ends up being a clobbering store. This means
/// that the store provides bits used by the load but we the pointers don't
/// mustalias. Check this case to see if there is anything more we can do
/// before we give up.
static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
Type *LoadTy,
Instruction *InsertPt, const TargetData &TD){
LLVMContext &Ctx = SrcVal->getType()->getContext();
uint64_t StoreSize = (TD.getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
uint64_t LoadSize = (TD.getTypeSizeInBits(LoadTy) + 7) / 8;
IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
// Compute which bits of the stored value are being used by the load. Convert
// to an integer type to start with.
Duncan Sands
committed
if (SrcVal->getType()->isPointerTy())
Benjamin Kramer
committed
SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx));
Duncan Sands
committed
if (!SrcVal->getType()->isIntegerTy())
Benjamin Kramer
committed
SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8));
// Shift the bits to the least significant depending on endianness.
unsigned ShiftAmt;
if (TD.isLittleEndian())
else
ShiftAmt = (StoreSize-LoadSize-Offset)*8;
if (ShiftAmt)
Benjamin Kramer
committed
SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt);
if (LoadSize != StoreSize)
Benjamin Kramer
committed
SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8));
return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
/// memdep query of a load that ends up being a clobbering load. This means
/// that the load *may* provide bits used by the load but we can't be sure
/// because the pointers don't mustalias. Check this case to see if there is
/// anything more we can do before we give up.
static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
Type *LoadTy, Instruction *InsertPt,
Chris Lattner
committed
GVN &gvn) {
const TargetData &TD = *gvn.getTargetData();
// If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to
// widen SrcVal out to a larger load.
unsigned SrcValSize = TD.getTypeStoreSize(SrcVal->getType());
unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
if (Offset+LoadSize > SrcValSize) {
Eli Friedman
committed
assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!");
assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load");
// If we have a load/load clobber an DepLI can be widened to cover this
// load, then we should widen it to the next power of 2 size big enough!
unsigned NewLoadSize = Offset+LoadSize;
if (!isPowerOf2_32(NewLoadSize))
NewLoadSize = NextPowerOf2(NewLoadSize);