Newer
Older
/// that ultimately uses it. However, the specified instruction has multiple
/// uses. Given this, it may actually increase register pressure to fold it
/// into the load. For example, consider this code:
///
/// X = ...
/// Y = X+1
/// use(Y) -> nonload/store
/// Z = Y+1
/// load Z
///
/// In this case, Y has multiple uses, and can be folded into the load of Z
/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
/// be live at the use(Y) line. If we don't fold Y into load Z, we use one
/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
/// number of computations either.
///
/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
/// X was live across 'load Z' for other reasons, we actually *would* want to
/// fold the addressing mode in the Z case. This would make Y die earlier.
bool AddressingModeMatcher::
IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
ExtAddrMode &AMAfter) {
if (IgnoreProfitability) return true;
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
// AMBefore is the addressing mode before this instruction was folded into it,
// and AMAfter is the addressing mode after the instruction was folded. Get
// the set of registers referenced by AMAfter and subtract out those
// referenced by AMBefore: this is the set of values which folding in this
// address extends the lifetime of.
//
// Note that there are only two potential values being referenced here,
// BaseReg and ScaleReg (global addresses are always available, as are any
// folded immediates).
Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
// If the BaseReg or ScaledReg was referenced by the previous addrmode, their
// lifetime wasn't extended by adding this instruction.
if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
BaseReg = 0;
if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
ScaledReg = 0;
// If folding this instruction (and it's subexprs) didn't extend any live
// ranges, we're ok with it.
if (BaseReg == 0 && ScaledReg == 0)
return true;
// If all uses of this instruction are ultimately load/store/inlineasm's,
// check to see if their addressing modes will include this instruction. If
// so, we can fold it into all uses, so it doesn't matter if it has multiple
// uses.
SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
SmallPtrSet<Instruction*, 16> ConsideredInsts;
if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI))
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
return false; // Has a non-memory, non-foldable use!
// Now that we know that all uses of this instruction are part of a chain of
// computation involving only operations that could theoretically be folded
// into a memory use, loop over each of these uses and see if they could
// *actually* fold the instruction.
SmallVector<Instruction*, 32> MatchedAddrModeInsts;
for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
Instruction *User = MemoryUses[i].first;
unsigned OpNo = MemoryUses[i].second;
// Get the access type of this use. If the use isn't a pointer, we don't
// know what it accesses.
Value *Address = User->getOperand(OpNo);
if (!isa<PointerType>(Address->getType()))
return false;
const Type *AddressAccessTy =
cast<PointerType>(Address->getType())->getElementType();
// Do a match against the root of this address, ignoring profitability. This
// will tell us if the addressing mode for the memory operation will
// *actually* cover the shared instruction.
ExtAddrMode Result;
AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy,
MemoryInst, Result);
Matcher.IgnoreProfitability = true;
bool Success = Matcher.MatchAddr(Address, 0);
Success = Success; assert(Success && "Couldn't select *anything*?");
// If the match didn't cover I, then it won't be shared by it.
if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
I) == MatchedAddrModeInsts.end())
return false;
MatchedAddrModeInsts.clear();
}
return true;
}
//===----------------------------------------------------------------------===//
// Memory Optimization
//===----------------------------------------------------------------------===//
/// IsNonLocalValue - Return true if the specified values are defined in a
/// different basic block than BB.
static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
if (Instruction *I = dyn_cast<Instruction>(V))
return I->getParent() != BB;
return false;
}
/// OptimizeMemoryInst - Load and Store Instructions have often have
/// addressing modes that can do significant amounts of computation. As such,
/// instruction selection will try to get the load or store to do as much
/// computation as possible for the program. The problem is that isel can only
/// see within a single block. As such, we sink as much legal addressing mode
/// stuff into the block as possible.
///
/// This method is used to optimize both load/store and inline asms with memory
/// operands.
bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
const Type *AccessTy,
DenseMap<Value*,Value*> &SunkAddrs) {
// Figure out what addressing mode will be built up for this operation.
SmallVector<Instruction*, 16> AddrModeInsts;
ExtAddrMode AddrMode = AddressingModeMatcher::Match(Addr, AccessTy,MemoryInst,
AddrModeInsts, *TLI);
// Check to see if any of the instructions supersumed by this addr mode are
// non-local to I's BB.
bool AnyNonLocal = false;
for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) {
if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) {
AnyNonLocal = true;
break;
}
}
// If all the instructions matched are already in this BB, don't do anything.
if (!AnyNonLocal) {
DEBUG(cerr << "CGP: Found local addrmode: " << AddrMode << "\n");
return false;
}
// Insert this computation right after this user. Since our caller is
// scanning from the top of the BB to the bottom, reuse of the expr are
// guaranteed to happen later.
BasicBlock::iterator InsertPt = MemoryInst;
// Now that we determined the addressing expression we want to use and know
// that we have to sink it into this block. Check to see if we have already
// done this for some other load/store instr in this block. If so, reuse the
// computation.
Value *&SunkAddr = SunkAddrs[Addr];
if (SunkAddr) {
DEBUG(cerr << "CGP: Reusing nonlocal addrmode: " << AddrMode << "\n");
if (SunkAddr->getType() != Addr->getType())
SunkAddr = new BitCastInst(SunkAddr, Addr->getType(), "tmp", InsertPt);
} else {
DEBUG(cerr << "CGP: SINKING nonlocal addrmode: " << AddrMode << "\n");
const Type *IntPtrTy = TLI->getTargetData()->getIntPtrType();
Value *Result = 0;
// Start with the scale value.
if (AddrMode.Scale) {
Value *V = AddrMode.ScaledReg;
if (V->getType() == IntPtrTy) {
// done.
} else if (isa<PointerType>(V->getType())) {
V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt);
} else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
cast<IntegerType>(V->getType())->getBitWidth()) {
V = new TruncInst(V, IntPtrTy, "sunkaddr", InsertPt);
} else {
V = new SExtInst(V, IntPtrTy, "sunkaddr", InsertPt);
}
if (AddrMode.Scale != 1)
Gabor Greif
committed
V = BinaryOperator::CreateMul(V, ConstantInt::get(IntPtrTy,
AddrMode.Scale),
"sunkaddr", InsertPt);
Result = V;
}
// Add in the base register.
if (AddrMode.BaseReg) {
Value *V = AddrMode.BaseReg;
if (V->getType() != IntPtrTy)
V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt);
if (Result)
Gabor Greif
committed
Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
else
Result = V;
}
// Add in the BaseGV if present.
if (AddrMode.BaseGV) {
Value *V = new PtrToIntInst(AddrMode.BaseGV, IntPtrTy, "sunkaddr",
InsertPt);
if (Result)
Gabor Greif
committed
Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
else
Result = V;
}
// Add in the Base Offset if present.
if (AddrMode.BaseOffs) {
Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
if (Result)
Gabor Greif
committed
Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
else
Result = V;
}
Chris Lattner
committed
if (Result == 0)
SunkAddr = Constant::getNullValue(Addr->getType());
else
SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt);
}
MemoryInst->replaceUsesOfWith(Addr, SunkAddr);
if (Addr->use_empty())
RecursivelyDeleteTriviallyDeadInstructions(Addr);
return true;
}
Chris Lattner
committed
/// OptimizeInlineAsmInst - If there are any memory operands, use
/// OptimizeMemoryInst to sink their address computing into the block when
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
/// possible / profitable.
bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS,
DenseMap<Value*,Value*> &SunkAddrs) {
bool MadeChange = false;
InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
// Do a prepass over the constraints, canonicalizing them, and building up the
// ConstraintOperands list.
std::vector<InlineAsm::ConstraintInfo>
ConstraintInfos = IA->ParseConstraints();
/// ConstraintOperands - Information about all of the constraints.
std::vector<TargetLowering::AsmOperandInfo> ConstraintOperands;
unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
ConstraintOperands.
push_back(TargetLowering::AsmOperandInfo(ConstraintInfos[i]));
TargetLowering::AsmOperandInfo &OpInfo = ConstraintOperands.back();
// Compute the value type for each operand.
switch (OpInfo.Type) {
case InlineAsm::isOutput:
if (OpInfo.isIndirect)
OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
break;
case InlineAsm::isInput:
OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
break;
case InlineAsm::isClobber:
// Nothing to do.
break;
}
// Compute the constraint code and ConstraintType to use.
TLI->ComputeConstraintToUse(OpInfo, SDValue(),
OpInfo.ConstraintType == TargetLowering::C_Memory);
if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
OpInfo.isIndirect) {
Value *OpVal = OpInfo.CallOperandVal;
MadeChange |= OptimizeMemoryInst(I, OpVal, OpVal->getType(), SunkAddrs);
}
}
return MadeChange;
}
Evan Cheng
committed
bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
BasicBlock *DefBB = I->getParent();
// If both result of the {s|z}xt and its source are live out, rewrite all
// other uses of the source with result of extension.
Value *Src = I->getOperand(0);
if (Src->hasOneUse())
return false;
if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
return false;
// Only safe to perform the optimization if the source is also defined in
// this block.
if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
return false;
Evan Cheng
committed
bool DefIsLiveOut = false;
for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
Evan Cheng
committed
UI != E; ++UI) {
Instruction *User = cast<Instruction>(*UI);
// Figure out which BB this ext is used in.
BasicBlock *UserBB = User->getParent();
if (UserBB == DefBB) continue;
DefIsLiveOut = true;
break;
}
if (!DefIsLiveOut)
return false;
// Make sure non of the uses are PHI nodes.
for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
UI != E; ++UI) {
Instruction *User = cast<Instruction>(*UI);
BasicBlock *UserBB = User->getParent();
if (UserBB == DefBB) continue;
// Be conservative. We don't want this xform to end up introducing
// reloads just before load / store instructions.
if (isa<PHINode>(User) || isa<LoadInst>(User) || isa<StoreInst>(User))
Evan Cheng
committed
// InsertedTruncs - Only insert one trunc in each block once.
DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
bool MadeChange = false;
for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
Evan Cheng
committed
UI != E; ++UI) {
Use &TheUse = UI.getUse();
Instruction *User = cast<Instruction>(*UI);
// Figure out which BB this ext is used in.
BasicBlock *UserBB = User->getParent();
if (UserBB == DefBB) continue;
// Both src and def are live in this block. Rewrite the use.
Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
if (!InsertedTrunc) {
BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
Evan Cheng
committed
InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt);
}
// Replace a use of the {s|z}ext source with a use of the result.
TheUse = InsertedTrunc;
MadeChange = true;
}
return MadeChange;
}
Chris Lattner
committed
// In this pass we look for GEP and cast instructions that are used
// across basic blocks and rewrite them to improve basic-block-at-a-time
// selection.
bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
bool MadeChange = false;
Chris Lattner
committed
// Split all critical edges where the dest block has a PHI and where the phi
// has shared immediate operands.
TerminatorInst *BBTI = BB.getTerminator();
if (BBTI->getNumSuccessors() > 1) {
for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i)
if (isa<PHINode>(BBTI->getSuccessor(i)->begin()) &&
isCriticalEdge(BBTI, i, true))
SplitEdgeNicely(BBTI, i, this);
}
// Keep track of non-local addresses that have been sunk into this block.
// This allows us to avoid inserting duplicate code for blocks with multiple
// load/stores of the same address.
DenseMap<Value*, Value*> SunkAddrs;
Chris Lattner
committed
for (BasicBlock::iterator BBI = BB.begin(), E = BB.end(); BBI != E; ) {
Instruction *I = BBI++;
if (CastInst *CI = dyn_cast<CastInst>(I)) {
Chris Lattner
committed
// If the source of the cast is a constant, then this should have
// already been constant folded. The only reason NOT to constant fold
// it is if something (e.g. LSR) was careful to place the constant
// evaluation in a block other than then one that uses it (e.g. to hoist
// the address of globals out of a loop). If this is the case, we don't
// want to forward-subst the cast.
if (isa<Constant>(CI->getOperand(0)))
continue;
Evan Cheng
committed
bool Change = false;
if (TLI) {
Change = OptimizeNoopCopyExpression(CI, *TLI);
MadeChange |= Change;
}
if (!Change && (isa<ZExtInst>(I) || isa<SExtInst>(I)))
Evan Cheng
committed
MadeChange |= OptimizeExtUses(I);
} else if (CmpInst *CI = dyn_cast<CmpInst>(I)) {
MadeChange |= OptimizeCmpExpression(CI);
} else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
if (TLI)
MadeChange |= OptimizeMemoryInst(I, I->getOperand(0), LI->getType(),
SunkAddrs);
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
if (TLI)
MadeChange |= OptimizeMemoryInst(I, SI->getOperand(1),
SI->getOperand(0)->getType(),
SunkAddrs);
} else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
/// The GEP operand must be a pointer, so must its result -> BitCast
Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
GEPI->getName(), GEPI);
GEPI->replaceAllUsesWith(NC);
GEPI->eraseFromParent();
MadeChange = true;
BBI = NC;
}
} else if (CallInst *CI = dyn_cast<CallInst>(I)) {
// If we found an inline asm expession, and if the target knows how to
// lower it to normal LLVM code, do so now.
if (TLI && isa<InlineAsm>(CI->getCalledValue()))
if (const TargetAsmInfo *TAI =
TLI->getTargetMachine().getTargetAsmInfo()) {
if (TAI->ExpandInlineAsm(CI))
BBI = BB.begin();
else
// Sink address computing for memory operands into the block.
MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs);
}
Chris Lattner
committed
}
}
Chris Lattner
committed
return MadeChange;
}