"clang/git@repo.hca.bsc.es:rferrer/llvm-epi-0.8.git" did not exist on "f8984013059449ae534340da09881762942b01d4"
Newer
Older
!OpInfo.isIndirect))
return false;
}
return true;
}
/// FindAllMemoryUses - Recursively walk all the uses of I until we find a
/// memory use. If we find an obviously non-foldable instruction, return true.
/// Add the ultimately found memory instructions to MemoryUses.
static bool FindAllMemoryUses(Instruction *I,
SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses,
SmallPtrSet<Instruction*, 16> &ConsideredInsts,
const TargetLowering &TLI) {
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
// If we already considered this instruction, we're done.
if (!ConsideredInsts.insert(I))
return false;
// If this is an obviously unfoldable instruction, bail out.
if (!MightBeFoldableInst(I))
return true;
// Loop over all the uses, recursively processing them.
for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
UI != E; ++UI) {
if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
MemoryUses.push_back(std::make_pair(LI, UI.getOperandNo()));
continue;
}
if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) {
if (UI.getOperandNo() == 0) return true; // Storing addr, not into addr.
MemoryUses.push_back(std::make_pair(SI, UI.getOperandNo()));
continue;
}
if (CallInst *CI = dyn_cast<CallInst>(*UI)) {
InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
if (IA == 0) return true;
// If this is a memory operand, we're cool, otherwise bail out.
if (!IsOperandAMemoryOperand(CI, IA, I, TLI))
return true;
continue;
if (FindAllMemoryUses(cast<Instruction>(*UI), MemoryUses, ConsideredInsts,
TLI))
return true;
}
return false;
}
/// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at
/// the use site that we're folding it into. If so, there is no cost to
/// include it in the addressing mode. KnownLive1 and KnownLive2 are two values
/// that we know are live at the instruction already.
bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
Value *KnownLive2) {
// If Val is either of the known-live values, we know it is live!
if (Val == 0 || Val == KnownLive1 || Val == KnownLive2)
return true;
// All values other than instructions and arguments (e.g. constants) are live.
if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
// If Val is a constant sized alloca in the entry block, it is live, this is
// true because it is just a reference to the stack/frame pointer, which is
// live for the whole function.
if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
if (AI->isStaticAlloca())
return true;
// Check to see if this value is already used in the memory instruction's
// block. If so, it's already live into the block at the very least, so we
// can reasonably fold it.
BasicBlock *MemBB = MemoryInst->getParent();
for (Value::use_iterator UI = Val->use_begin(), E = Val->use_end();
UI != E; ++UI)
// We know that uses of arguments and instructions have to be instructions.
if (cast<Instruction>(*UI)->getParent() == MemBB)
return true;
return false;
}
/// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing
/// mode of the machine to fold the specified instruction into a load or store
/// that ultimately uses it. However, the specified instruction has multiple
/// uses. Given this, it may actually increase register pressure to fold it
/// into the load. For example, consider this code:
///
/// X = ...
/// Y = X+1
/// use(Y) -> nonload/store
/// Z = Y+1
/// load Z
///
/// In this case, Y has multiple uses, and can be folded into the load of Z
/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
/// be live at the use(Y) line. If we don't fold Y into load Z, we use one
/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
/// number of computations either.
///
/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
/// X was live across 'load Z' for other reasons, we actually *would* want to
/// fold the addressing mode in the Z case. This would make Y die earlier.
bool AddressingModeMatcher::
IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
ExtAddrMode &AMAfter) {
if (IgnoreProfitability) return true;
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
// AMBefore is the addressing mode before this instruction was folded into it,
// and AMAfter is the addressing mode after the instruction was folded. Get
// the set of registers referenced by AMAfter and subtract out those
// referenced by AMBefore: this is the set of values which folding in this
// address extends the lifetime of.
//
// Note that there are only two potential values being referenced here,
// BaseReg and ScaleReg (global addresses are always available, as are any
// folded immediates).
Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
// If the BaseReg or ScaledReg was referenced by the previous addrmode, their
// lifetime wasn't extended by adding this instruction.
if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
BaseReg = 0;
if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
ScaledReg = 0;
// If folding this instruction (and it's subexprs) didn't extend any live
// ranges, we're ok with it.
if (BaseReg == 0 && ScaledReg == 0)
return true;
// If all uses of this instruction are ultimately load/store/inlineasm's,
// check to see if their addressing modes will include this instruction. If
// so, we can fold it into all uses, so it doesn't matter if it has multiple
// uses.
SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
SmallPtrSet<Instruction*, 16> ConsideredInsts;
if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI))
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
return false; // Has a non-memory, non-foldable use!
// Now that we know that all uses of this instruction are part of a chain of
// computation involving only operations that could theoretically be folded
// into a memory use, loop over each of these uses and see if they could
// *actually* fold the instruction.
SmallVector<Instruction*, 32> MatchedAddrModeInsts;
for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
Instruction *User = MemoryUses[i].first;
unsigned OpNo = MemoryUses[i].second;
// Get the access type of this use. If the use isn't a pointer, we don't
// know what it accesses.
Value *Address = User->getOperand(OpNo);
if (!isa<PointerType>(Address->getType()))
return false;
const Type *AddressAccessTy =
cast<PointerType>(Address->getType())->getElementType();
// Do a match against the root of this address, ignoring profitability. This
// will tell us if the addressing mode for the memory operation will
// *actually* cover the shared instruction.
ExtAddrMode Result;
AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy,
MemoryInst, Result);
Matcher.IgnoreProfitability = true;
bool Success = Matcher.MatchAddr(Address, 0);
Success = Success; assert(Success && "Couldn't select *anything*?");
// If the match didn't cover I, then it won't be shared by it.
if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
I) == MatchedAddrModeInsts.end())
return false;
MatchedAddrModeInsts.clear();
}
return true;
}
//===----------------------------------------------------------------------===//
// Memory Optimization
//===----------------------------------------------------------------------===//
/// IsNonLocalValue - Return true if the specified values are defined in a
/// different basic block than BB.
static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
if (Instruction *I = dyn_cast<Instruction>(V))
return I->getParent() != BB;
return false;
}
/// OptimizeMemoryInst - Load and Store Instructions have often have
/// addressing modes that can do significant amounts of computation. As such,
/// instruction selection will try to get the load or store to do as much
/// computation as possible for the program. The problem is that isel can only
/// see within a single block. As such, we sink as much legal addressing mode
/// stuff into the block as possible.
///
/// This method is used to optimize both load/store and inline asms with memory
/// operands.
bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
const Type *AccessTy,
DenseMap<Value*,Value*> &SunkAddrs) {
// Figure out what addressing mode will be built up for this operation.
SmallVector<Instruction*, 16> AddrModeInsts;
ExtAddrMode AddrMode = AddressingModeMatcher::Match(Addr, AccessTy,MemoryInst,
AddrModeInsts, *TLI);
// Check to see if any of the instructions supersumed by this addr mode are
// non-local to I's BB.
bool AnyNonLocal = false;
for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) {
if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) {
AnyNonLocal = true;
break;
}
}
// If all the instructions matched are already in this BB, don't do anything.
if (!AnyNonLocal) {
DEBUG(cerr << "CGP: Found local addrmode: " << AddrMode << "\n");
return false;
}
// Insert this computation right after this user. Since our caller is
// scanning from the top of the BB to the bottom, reuse of the expr are
// guaranteed to happen later.
BasicBlock::iterator InsertPt = MemoryInst;
// Now that we determined the addressing expression we want to use and know
// that we have to sink it into this block. Check to see if we have already
// done this for some other load/store instr in this block. If so, reuse the
// computation.
Value *&SunkAddr = SunkAddrs[Addr];
if (SunkAddr) {
DEBUG(cerr << "CGP: Reusing nonlocal addrmode: " << AddrMode << "\n");
if (SunkAddr->getType() != Addr->getType())
SunkAddr = new BitCastInst(SunkAddr, Addr->getType(), "tmp", InsertPt);
} else {
DEBUG(cerr << "CGP: SINKING nonlocal addrmode: " << AddrMode << "\n");
const Type *IntPtrTy = TLI->getTargetData()->getIntPtrType();
Value *Result = 0;
// Start with the scale value.
if (AddrMode.Scale) {
Value *V = AddrMode.ScaledReg;
if (V->getType() == IntPtrTy) {
// done.
} else if (isa<PointerType>(V->getType())) {
V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt);
} else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
cast<IntegerType>(V->getType())->getBitWidth()) {
V = new TruncInst(V, IntPtrTy, "sunkaddr", InsertPt);
} else {
V = new SExtInst(V, IntPtrTy, "sunkaddr", InsertPt);
}
if (AddrMode.Scale != 1)
Gabor Greif
committed
V = BinaryOperator::CreateMul(V, ConstantInt::get(IntPtrTy,
AddrMode.Scale),
"sunkaddr", InsertPt);
Result = V;
}
// Add in the base register.
if (AddrMode.BaseReg) {
Value *V = AddrMode.BaseReg;
if (V->getType() != IntPtrTy)
V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt);
if (Result)
Gabor Greif
committed
Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
else
Result = V;
}
// Add in the BaseGV if present.
if (AddrMode.BaseGV) {
Value *V = new PtrToIntInst(AddrMode.BaseGV, IntPtrTy, "sunkaddr",
InsertPt);
if (Result)
Gabor Greif
committed
Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
else
Result = V;
}
// Add in the Base Offset if present.
if (AddrMode.BaseOffs) {
Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
if (Result)
Gabor Greif
committed
Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
else
Result = V;
}
Chris Lattner
committed
if (Result == 0)
SunkAddr = Constant::getNullValue(Addr->getType());
else
SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt);
}
MemoryInst->replaceUsesOfWith(Addr, SunkAddr);
if (Addr->use_empty())
RecursivelyDeleteTriviallyDeadInstructions(Addr);
return true;
}
Chris Lattner
committed
/// OptimizeInlineAsmInst - If there are any memory operands, use
/// OptimizeMemoryInst to sink their address computing into the block when
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
/// possible / profitable.
bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS,
DenseMap<Value*,Value*> &SunkAddrs) {
bool MadeChange = false;
InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
// Do a prepass over the constraints, canonicalizing them, and building up the
// ConstraintOperands list.
std::vector<InlineAsm::ConstraintInfo>
ConstraintInfos = IA->ParseConstraints();
/// ConstraintOperands - Information about all of the constraints.
std::vector<TargetLowering::AsmOperandInfo> ConstraintOperands;
unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
ConstraintOperands.
push_back(TargetLowering::AsmOperandInfo(ConstraintInfos[i]));
TargetLowering::AsmOperandInfo &OpInfo = ConstraintOperands.back();
// Compute the value type for each operand.
switch (OpInfo.Type) {
case InlineAsm::isOutput:
if (OpInfo.isIndirect)
OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
break;
case InlineAsm::isInput:
OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
break;
case InlineAsm::isClobber:
// Nothing to do.
break;
}
// Compute the constraint code and ConstraintType to use.
TLI->ComputeConstraintToUse(OpInfo, SDValue(),
OpInfo.ConstraintType == TargetLowering::C_Memory);
if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
OpInfo.isIndirect) {
Value *OpVal = OpInfo.CallOperandVal;
MadeChange |= OptimizeMemoryInst(I, OpVal, OpVal->getType(), SunkAddrs);
}
}
return MadeChange;
}
Evan Cheng
committed
bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
BasicBlock *DefBB = I->getParent();
// If both result of the {s|z}xt and its source are live out, rewrite all
// other uses of the source with result of extension.
Value *Src = I->getOperand(0);
if (Src->hasOneUse())
return false;
if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
return false;
// Only safe to perform the optimization if the source is also defined in
// this block.
if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
return false;
Evan Cheng
committed
bool DefIsLiveOut = false;
for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
Evan Cheng
committed
UI != E; ++UI) {
Instruction *User = cast<Instruction>(*UI);
// Figure out which BB this ext is used in.
BasicBlock *UserBB = User->getParent();
if (UserBB == DefBB) continue;
DefIsLiveOut = true;
break;
}
if (!DefIsLiveOut)
return false;
// Make sure non of the uses are PHI nodes.
for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
UI != E; ++UI) {
Instruction *User = cast<Instruction>(*UI);
BasicBlock *UserBB = User->getParent();
if (UserBB == DefBB) continue;
// Be conservative. We don't want this xform to end up introducing
// reloads just before load / store instructions.
if (isa<PHINode>(User) || isa<LoadInst>(User) || isa<StoreInst>(User))
Evan Cheng
committed
// InsertedTruncs - Only insert one trunc in each block once.
DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
bool MadeChange = false;
for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
Evan Cheng
committed
UI != E; ++UI) {
Use &TheUse = UI.getUse();
Instruction *User = cast<Instruction>(*UI);
// Figure out which BB this ext is used in.
BasicBlock *UserBB = User->getParent();
if (UserBB == DefBB) continue;
// Both src and def are live in this block. Rewrite the use.
Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
if (!InsertedTrunc) {
BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
Evan Cheng
committed
InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt);
}
// Replace a use of the {s|z}ext source with a use of the result.
TheUse = InsertedTrunc;
MadeChange = true;
}
return MadeChange;
}
Chris Lattner
committed
// In this pass we look for GEP and cast instructions that are used
// across basic blocks and rewrite them to improve basic-block-at-a-time
// selection.
bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
bool MadeChange = false;
Evan Cheng
committed
// Split all critical edges where the dest block has a PHI.
Chris Lattner
committed
TerminatorInst *BBTI = BB.getTerminator();
if (BBTI->getNumSuccessors() > 1) {
Evan Cheng
committed
for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) {
BasicBlock *SuccBB = BBTI->getSuccessor(i);
if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true))
SplitEdgeNicely(BBTI, i, BackEdges, this);
}
Chris Lattner
committed
}
// Keep track of non-local addresses that have been sunk into this block.
// This allows us to avoid inserting duplicate code for blocks with multiple
// load/stores of the same address.
DenseMap<Value*, Value*> SunkAddrs;
Chris Lattner
committed
for (BasicBlock::iterator BBI = BB.begin(), E = BB.end(); BBI != E; ) {
Instruction *I = BBI++;
if (CastInst *CI = dyn_cast<CastInst>(I)) {
Chris Lattner
committed
// If the source of the cast is a constant, then this should have
// already been constant folded. The only reason NOT to constant fold
// it is if something (e.g. LSR) was careful to place the constant
// evaluation in a block other than then one that uses it (e.g. to hoist
// the address of globals out of a loop). If this is the case, we don't
// want to forward-subst the cast.
if (isa<Constant>(CI->getOperand(0)))
continue;
Evan Cheng
committed
bool Change = false;
if (TLI) {
Change = OptimizeNoopCopyExpression(CI, *TLI);
MadeChange |= Change;
}
if (!Change && (isa<ZExtInst>(I) || isa<SExtInst>(I)))
Evan Cheng
committed
MadeChange |= OptimizeExtUses(I);
} else if (CmpInst *CI = dyn_cast<CmpInst>(I)) {
MadeChange |= OptimizeCmpExpression(CI);
} else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
if (TLI)
MadeChange |= OptimizeMemoryInst(I, I->getOperand(0), LI->getType(),
SunkAddrs);
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
if (TLI)
MadeChange |= OptimizeMemoryInst(I, SI->getOperand(1),
SI->getOperand(0)->getType(),
SunkAddrs);
} else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
/// The GEP operand must be a pointer, so must its result -> BitCast
Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
GEPI->getName(), GEPI);
GEPI->replaceAllUsesWith(NC);
GEPI->eraseFromParent();
MadeChange = true;
BBI = NC;
}
} else if (CallInst *CI = dyn_cast<CallInst>(I)) {
// If we found an inline asm expession, and if the target knows how to
// lower it to normal LLVM code, do so now.
if (TLI && isa<InlineAsm>(CI->getCalledValue()))
if (const TargetAsmInfo *TAI =
TLI->getTargetMachine().getTargetAsmInfo()) {
if (TAI->ExpandInlineAsm(CI))
BBI = BB.begin();
else
// Sink address computing for memory operands into the block.
MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs);
}
Chris Lattner
committed
}
}
Chris Lattner
committed
return MadeChange;
}