Newer
Older
//===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//===----------------------------------------------------------------------===//
//
// This pass performs a strength reduction on array references inside loops that
// have as one or more of their components the loop induction variable.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar.h"
#include "llvm/Constants.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Type.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Transforms/Utils/AddrModeMatcher.h"
Chris Lattner
committed
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetLowering.h"
using namespace llvm;
STATISTIC(NumReduced , "Number of IV uses strength reduced");
STATISTIC(NumInserted, "Number of PHIs inserted");
STATISTIC(NumVariable, "Number of PHIs with variable strides");
STATISTIC(NumEliminated, "Number of strides eliminated");
STATISTIC(NumShadow, "Number of Shadow IVs optimized");
STATISTIC(NumImmSunk, "Number of common expr immediates sunk into uses");
static cl::opt<bool> EnableFullLSRMode("enable-full-lsr",
cl::init(false),
cl::Hidden);
Chris Lattner
committed
namespace {
/// IVStrideUse - Keep track of one use of a strided induction variable, where
/// the stride is stored externally. The Offset member keeps track of the
/// offset from the IV, User is the actual user of the operand, and
/// 'OperandValToReplace' is the operand of the User that is the use.
struct VISIBILITY_HIDDEN IVStrideUse {
SCEVHandle Offset;
Instruction *User;
Value *OperandValToReplace;
// isUseOfPostIncrementedValue - True if this should use the
// post-incremented version of this IV, not the preincremented version.
// This can only be set in special cases, such as the terminating setcc
bool isUseOfPostIncrementedValue;
IVStrideUse(const SCEVHandle &Offs, Instruction *U, Value *O)
: Offset(Offs), User(U), OperandValToReplace(O),
isUseOfPostIncrementedValue(false) {}
};
/// IVUsersOfOneStride - This structure keeps track of all instructions that
/// have an operand that is based on the trip count multiplied by some stride.
/// The stride for all of these users is common and kept external to this
/// structure.
struct VISIBILITY_HIDDEN IVUsersOfOneStride {
/// Users - Keep track of all of the users of this stride as well as the
/// initial value and the operand that uses the IV.
std::vector<IVStrideUse> Users;
void addUser(const SCEVHandle &Offset,Instruction *User, Value *Operand) {
Users.push_back(IVStrideUse(Offset, User, Operand));
}
};
/// IVInfo - This structure keeps track of one IV expression inserted during
/// StrengthReduceStridedIVUsers. It contains the stride, the common base, as
/// well as the PHI node and increment value created for rewrite.
struct VISIBILITY_HIDDEN IVExpr {
SCEVHandle Base;
PHINode *PHI;
IVExpr(const SCEVHandle &stride, const SCEVHandle &base, PHINode *phi)
: Stride(stride), Base(base), PHI(phi) {}
};
/// IVsOfOneStride - This structure keeps track of all IV expression inserted
/// during StrengthReduceStridedIVUsers for a particular stride of the IV.
struct VISIBILITY_HIDDEN IVsOfOneStride {
std::vector<IVExpr> IVs;
void addIV(const SCEVHandle &Stride, const SCEVHandle &Base, PHINode *PHI) {
IVs.push_back(IVExpr(Stride, Base, PHI));
}
};
class VISIBILITY_HIDDEN LoopStrengthReduce : public LoopPass {
LoopInfo *LI;
ScalarEvolution *SE;
bool Changed;
/// IVUsesByStride - Keep track of all uses of induction variables that we
/// are interested in. The key of the map is the stride of the access.
Chris Lattner
committed
std::map<SCEVHandle, IVUsersOfOneStride> IVUsesByStride;
/// IVsByStride - Keep track of all IVs that have been inserted for a
/// particular stride.
std::map<SCEVHandle, IVsOfOneStride> IVsByStride;
/// StrideOrder - An ordering of the keys in IVUsesByStride that is stable:
/// We use this to iterate over the IVUsesByStride collection without being
/// dependent on random ordering of pointers in the process.
SmallVector<SCEVHandle, 16> StrideOrder;
/// DeadInsts - Keep track of instructions we may have made dead, so that
/// we can remove them after we are done working.
SmallVector<Instruction*, 16> DeadInsts;
/// TLI - Keep a pointer of a TargetLowering to consult for determining
/// transformation profitability.
const TargetLowering *TLI;
public:
explicit LoopStrengthReduce(const TargetLowering *tli = NULL) :
LoopPass(&ID), TLI(tli) {
}
bool runOnLoop(Loop *L, LPPassManager &LPM);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
// We split critical edges, so we change the CFG. However, we do update
// many analyses if they are around.
AU.addPreservedID(LoopSimplifyID);
AU.addPreserved<LoopInfo>();
AU.addPreserved<DominanceFrontier>();
AU.addPreserved<DominatorTree>();
Jeff Cohen
committed
AU.addRequiredID(LoopSimplifyID);
AU.addRequired<LoopInfo>();
AU.addRequired<ScalarEvolution>();
Devang Patel
committed
AU.addPreserved<ScalarEvolution>();
}
bool AddUsersIfInteresting(Instruction *I, Loop *L,
ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond,
IVStrideUse* &CondUse,
const SCEVHandle* &CondStride);
void OptimizeIndvars(Loop *L);
Devang Patel
committed
/// OptimizeShadowIV - If IV is used in a int-to-float cast
/// inside the loop then try to eliminate the cast opeation.
void OptimizeShadowIV(Loop *L);
/// OptimizeSMax - Rewrite the loop's terminating condition
/// if it uses an smax computation.
ICmpInst *OptimizeSMax(Loop *L, ICmpInst *Cond,
IVStrideUse* &CondUse);
bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
Devang Patel
committed
const SCEVHandle *&CondStride);
bool RequiresTypeConversion(const Type *Ty, const Type *NewTy);
SCEVHandle CheckForIVReuse(bool, bool, bool, const SCEVHandle&,
const std::vector<BasedUser>& UsersToProcess);
bool ValidStride(bool, int64_t,
const std::vector<BasedUser>& UsersToProcess);
SCEVHandle CollectIVUsers(const SCEVHandle &Stride,
IVUsersOfOneStride &Uses,
Loop *L,
bool &AllUsesAreAddresses,
bool &AllUsesAreOutsideLoop,
std::vector<BasedUser> &UsersToProcess);
bool ShouldUseFullStrengthReductionMode(
const std::vector<BasedUser> &UsersToProcess,
const Loop *L,
bool AllUsesAreAddresses,
SCEVHandle Stride);
void PrepareToStrengthReduceFully(
std::vector<BasedUser> &UsersToProcess,
SCEVHandle Stride,
SCEVHandle CommonExprs,
const Loop *L,
SCEVExpander &PreheaderRewriter);
void PrepareToStrengthReduceFromSmallerStride(
std::vector<BasedUser> &UsersToProcess,
Value *CommonBaseV,
const IVExpr &ReuseIV,
Instruction *PreInsertPt);
void PrepareToStrengthReduceWithNewPhi(
std::vector<BasedUser> &UsersToProcess,
SCEVHandle Stride,
SCEVHandle CommonExprs,
Value *CommonBaseV,
const Loop *L,
SCEVExpander &PreheaderRewriter);
Chris Lattner
committed
void StrengthReduceStridedIVUsers(const SCEVHandle &Stride,
IVUsersOfOneStride &Uses,
void DeleteTriviallyDeadInstructions();
};
}
char LoopStrengthReduce::ID = 0;
static RegisterPass<LoopStrengthReduce>
X("loop-reduce", "Loop Strength Reduction");
Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
return new LoopStrengthReduce(TLI);
}
/// DeleteTriviallyDeadInstructions - If any of the instructions is the
/// specified set are trivially dead, delete them and see if this makes any of
/// their operands subsequently dead.
void LoopStrengthReduce::DeleteTriviallyDeadInstructions() {
if (DeadInsts.empty()) return;
// Sort the deadinsts list so that we can trivially eliminate duplicates as we
// go. The code below never adds a non-dead instruction to the worklist, but
// callers may not be so careful.
array_pod_sort(DeadInsts.begin(), DeadInsts.end());
// Drop duplicate instructions and those with uses.
for (unsigned i = 0, e = DeadInsts.size()-1; i < e; ++i) {
Instruction *I = DeadInsts[i];
if (!I->use_empty()) DeadInsts[i] = 0;
while (i != e && DeadInsts[i+1] == I)
DeadInsts[++i] = 0;
}
while (!DeadInsts.empty()) {
Instruction *I = DeadInsts.back();
DeadInsts.pop_back();
if (I == 0 || !isInstructionTriviallyDead(I))
continue;
SE->deleteValueFromRecords(I);
Bill Wendling
committed
for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) {
if (Instruction *U = dyn_cast<Instruction>(*OI)) {
*OI = 0;
if (U->use_empty())
DeadInsts.push_back(U);
}
I->eraseFromParent();
Changed = true;
}
}
/// containsAddRecFromDifferentLoop - Determine whether expression S involves a
/// subexpression that is an AddRec from a loop other than L. An outer loop
/// of L is OK, but not an inner loop nor a disjoint loop.
static bool containsAddRecFromDifferentLoop(SCEVHandle S, Loop *L) {
// This is very common, put it first.
if (isa<SCEVConstant>(S))
return false;
if (const SCEVCommutativeExpr *AE = dyn_cast<SCEVCommutativeExpr>(S)) {
for (unsigned int i=0; i< AE->getNumOperands(); i++)
if (containsAddRecFromDifferentLoop(AE->getOperand(i), L))
return true;
return false;
}
if (const SCEVAddRecExpr *AE = dyn_cast<SCEVAddRecExpr>(S)) {
if (const Loop *newLoop = AE->getLoop()) {
if (newLoop == L)
return false;
// if newLoop is an outer loop of L, this is OK.
if (!LoopInfoBase<BasicBlock>::isNotAlreadyContainedIn(L, newLoop))
return false;
}
return true;
}
if (const SCEVUDivExpr *DE = dyn_cast<SCEVUDivExpr>(S))
return containsAddRecFromDifferentLoop(DE->getLHS(), L) ||
containsAddRecFromDifferentLoop(DE->getRHS(), L);
#if 0
// SCEVSDivExpr has been backed out temporarily, but will be back; we'll
// need this when it is.
if (const SCEVSDivExpr *DE = dyn_cast<SCEVSDivExpr>(S))
return containsAddRecFromDifferentLoop(DE->getLHS(), L) ||
containsAddRecFromDifferentLoop(DE->getRHS(), L);
#endif
if (const SCEVCastExpr *CE = dyn_cast<SCEVCastExpr>(S))
return containsAddRecFromDifferentLoop(CE->getOperand(), L);
return false;
}
/// getSCEVStartAndStride - Compute the start and stride of this expression,
/// returning false if the expression is not a start/stride pair, or true if it
/// is. The stride must be a loop invariant expression, but the start may be
/// a mix of loop invariant and loop variant expressions. The start cannot,
/// however, contain an AddRec from a different loop, unless that loop is an
/// outer loop of the current loop.
static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L,
SCEVHandle &Start, SCEVHandle &Stride,
ScalarEvolution *SE, DominatorTree *DT) {
SCEVHandle TheAddRec = Start; // Initialize to zero.
// If the outer level is an AddExpr, the operands are all start values except
// for a nested AddRecExpr.
if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(SH)) {
for (unsigned i = 0, e = AE->getNumOperands(); i != e; ++i)
if (SCEVAddRecExpr *AddRec =
dyn_cast<SCEVAddRecExpr>(AE->getOperand(i))) {
if (AddRec->getLoop() == L)
TheAddRec = SE->getAddExpr(AddRec, TheAddRec);
else
return false; // Nested IV of some sort?
} else {
Start = SE->getAddExpr(Start, AE->getOperand(i));
}
TheAddRec = SH;
} else {
return false; // not analyzable.
}
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(TheAddRec);
if (!AddRec || AddRec->getLoop() != L) return false;
// FIXME: Generalize to non-affine IV's.
if (!AddRec->isAffine()) return false;
// If Start contains an SCEVAddRecExpr from a different loop, other than an
// outer loop of the current loop, reject it. SCEV has no concept of
// operating on more than one loop at a time so don't confuse it with such
// expressions.
if (containsAddRecFromDifferentLoop(AddRec->getOperand(0), L))
return false;
Start = SE->getAddExpr(Start, AddRec->getOperand(0));
if (!isa<SCEVConstant>(AddRec->getOperand(1))) {
// If stride is an instruction, make sure it dominates the loop preheader.
// Otherwise we could end up with a use before def situation.
BasicBlock *Preheader = L->getLoopPreheader();
if (!AddRec->getOperand(1)->dominates(Preheader, DT))
return false;
DOUT << "[" << L->getHeader()->getName()
<< "] Variable stride: " << *AddRec << "\n";
}
Chris Lattner
committed
Stride = AddRec->getOperand(1);
return true;
}
/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression
/// and now we need to decide whether the user should use the preinc or post-inc
/// value. If this user should use the post-inc version of the IV, return true.
///
/// Choosing wrong here can break dominance properties (if we choose to use the
/// post-inc value when we cannot) or it can end up adding extra live-ranges to
/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we
/// should use the post-inc value).
static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV,
Evan Cheng
committed
Loop *L, DominatorTree *DT, Pass *P,
SmallVectorImpl<Instruction*> &DeadInsts){
// If the user is in the loop, use the preinc value.
if (L->contains(User->getParent())) return false;
BasicBlock *LatchBlock = L->getLoopLatch();
// Ok, the user is outside of the loop. If it is dominated by the latch
// block, use the post-inc value.
if (DT->dominates(LatchBlock, User->getParent()))
return true;
// There is one case we have to be careful of: PHI nodes. These little guys
// can live in blocks that do not dominate the latch block, but (since their
// uses occur in the predecessor block, not the block the PHI lives in) should
// still use the post-inc value. Check for this case now.
PHINode *PN = dyn_cast<PHINode>(User);
if (!PN) return false; // not a phi, not dominated by latch block.
// Look at all of the uses of IV by the PHI node. If any use corresponds to
// a block that is not dominated by the latch block, give up and use the
// preincremented value.
unsigned NumUses = 0;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) == IV) {
++NumUses;
if (!DT->dominates(LatchBlock, PN->getIncomingBlock(i)))
return false;
}
// Okay, all uses of IV by PN are in predecessor blocks that really are
// dominated by the latch block. Use the post-incremented value.
return true;
/// isAddressUse - Returns true if the specified instruction is using the
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
/// specified value as an address.
static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
bool isAddress = isa<LoadInst>(Inst);
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
if (SI->getOperand(1) == OperandVal)
isAddress = true;
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
// Addressing modes can also be folded into prefetches and a variety
// of intrinsics.
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::prefetch:
case Intrinsic::x86_sse2_loadu_dq:
case Intrinsic::x86_sse2_loadu_pd:
case Intrinsic::x86_sse_loadu_ps:
case Intrinsic::x86_sse_storeu_ps:
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
case Intrinsic::x86_sse2_storel_dq:
if (II->getOperand(1) == OperandVal)
isAddress = true;
break;
}
}
return isAddress;
}
/// getAccessType - Return the type of the memory being accessed.
static const Type *getAccessType(const Instruction *Inst) {
const Type *UseTy = Inst->getType();
if (const StoreInst *SI = dyn_cast<StoreInst>(Inst))
UseTy = SI->getOperand(0)->getType();
else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
// Addressing modes can also be folded into prefetches and a variety
// of intrinsics.
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::x86_sse_storeu_ps:
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
case Intrinsic::x86_sse2_storel_dq:
UseTy = II->getOperand(1)->getType();
break;
}
}
return UseTy;
}
/// AddUsersIfInteresting - Inspect the specified instruction. If it is a
/// reducible SCEV, recursively add its users to the IVUsesByStride set and
/// return true. Otherwise, return false.
bool LoopStrengthReduce::AddUsersIfInteresting(Instruction *I, Loop *L,
if (!SE->isSCEVable(I->getType()))
return false; // Void and FP expressions cannot be reduced.
Chris Lattner
committed
// LSR is not APInt clean, do not touch integers bigger than 64-bits.
if (SE->getTypeSizeInBits(I->getType()) > 64)
Chris Lattner
committed
return false;
return true; // Instruction already handled.
// Get the symbolic expression for this instruction.
SCEVHandle ISE = SE->getSCEV(I);
if (isa<SCEVCouldNotCompute>(ISE)) return false;
// Get the start and stride for this expression.
SCEVHandle Start = SE->getIntegerSCEV(0, ISE->getType());
Chris Lattner
committed
SCEVHandle Stride = Start;
if (!getSCEVStartAndStride(ISE, L, Start, Stride, SE, DT))
return false; // Non-reducible symbolic expression, bail out.
std::vector<Instruction *> IUsers;
// Collect all I uses now because IVUseShouldUsePostIncValue may
// invalidate use_iterator.
for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI)
IUsers.push_back(cast<Instruction>(*UI));
for (unsigned iused_index = 0, iused_size = IUsers.size();
iused_index != iused_size; ++iused_index) {
Instruction *User = IUsers[iused_index];
// Do not infinitely recurse on PHI nodes.
Chris Lattner
committed
if (isa<PHINode>(User) && Processed.count(User))
continue;
// Descend recursively, but not into PHI nodes outside the current loop.
// It's important to see the entire expression outside the loop to get
// choices that depend on addressing mode use right, although we won't
// consider references ouside the loop in all cases.
// If User is already in Processed, we don't want to recurse into it again,
// but do want to record a second reference in the same instruction.
bool AddUserToIVUsers = false;
if (LI->getLoopFor(User->getParent()) != L) {
if (isa<PHINode>(User) || Processed.count(User) ||
!AddUsersIfInteresting(User, L, Processed)) {
DOUT << "FOUND USER in other loop: " << *User
<< " OF SCEV: " << *ISE << "\n";
AddUserToIVUsers = true;
}
} else if (Processed.count(User) ||
!AddUsersIfInteresting(User, L, Processed)) {
DOUT << "FOUND USER: " << *User
<< " OF SCEV: " << *ISE << "\n";
AddUserToIVUsers = true;
}
if (AddUserToIVUsers) {
IVUsersOfOneStride &StrideUses = IVUsesByStride[Stride];
if (StrideUses.Users.empty()) // First occurrence of this stride?
StrideOrder.push_back(Stride);
// Okay, we found a user that we cannot reduce. Analyze the instruction
// and decide what to do with it. If we are a use inside of the loop, use
// the value before incrementation, otherwise use it after incrementation.
Evan Cheng
committed
if (IVUseShouldUsePostIncValue(User, I, L, DT, this, DeadInsts)) {
// The value used will be incremented by the stride more than we are
// expecting, so subtract this off.
SCEVHandle NewStart = SE->getMinusSCEV(Start, Stride);
StrideUses.addUser(NewStart, User, I);
StrideUses.Users.back().isUseOfPostIncrementedValue = true;
DOUT << " USING POSTINC SCEV, START=" << *NewStart<< "\n";
StrideUses.addUser(Start, User, I);
}
}
return true;
}
namespace {
/// BasedUser - For a particular base value, keep information about how we've
/// partitioned the expression so far.
struct BasedUser {
/// SE - The current ScalarEvolution object.
ScalarEvolution *SE;
/// Base - The Base value for the PHI node that needs to be inserted for
/// this use. As the use is processed, information gets moved from this
/// field to the Imm field (below). BasedUser values are sorted by this
/// field.
SCEVHandle Base;
/// Inst - The instruction using the induction variable.
Instruction *Inst;
/// OperandValToReplace - The operand value of Inst to replace with the
/// EmittedBase.
Value *OperandValToReplace;
/// Imm - The immediate value that should be added to the base immediately
/// before Inst, because it will be folded into the imm field of the
/// instruction. This is also sometimes used for loop-variant values that
/// must be added inside the loop.
SCEVHandle Imm;
/// Phi - The induction variable that performs the striding that
/// should be used for this user.
PHINode *Phi;
// isUseOfPostIncrementedValue - True if this should use the
// post-incremented version of this IV, not the preincremented version.
// This can only be set in special cases, such as the terminating setcc
// instruction for a loop and uses outside the loop that are dominated by
// the loop.
bool isUseOfPostIncrementedValue;
BasedUser(IVStrideUse &IVSU, ScalarEvolution *se)
: SE(se), Base(IVSU.Offset), Inst(IVSU.User),
OperandValToReplace(IVSU.OperandValToReplace),
isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue) {}
// Once we rewrite the code to insert the new IVs we want, update the
// operands of Inst to use the new expression 'NewBase', with 'Imm' added
// to it.
void RewriteInstructionToUseNewBase(const SCEVHandle &NewBase,
Instruction *InsertPt,
Evan Cheng
committed
SCEVExpander &Rewriter, Loop *L, Pass *P,
SmallVectorImpl<Instruction*> &DeadInsts);
Value *InsertCodeForBaseAtPosition(const SCEVHandle &NewBase,
const Type *Ty,
SCEVExpander &Rewriter,
Instruction *IP, Loop *L);
void dump() const;
};
}
void BasedUser::dump() const {
Bill Wendling
committed
cerr << " Base=" << *Base;
cerr << " Imm=" << *Imm;
cerr << " Inst: " << *Inst;
}
Value *BasedUser::InsertCodeForBaseAtPosition(const SCEVHandle &NewBase,
const Type *Ty,
SCEVExpander &Rewriter,
Instruction *IP, Loop *L) {
// Figure out where we *really* want to insert this code. In particular, if
// the user is inside of a loop that is nested inside of L, we really don't
// want to insert this expression before the user, we'd rather pull it out as
// many loops as possible.
LoopInfo &LI = Rewriter.getLoopInfo();
Instruction *BaseInsertPt = IP;
// Figure out the most-nested loop that IP is in.
Loop *InsertLoop = LI.getLoopFor(IP->getParent());
// If InsertLoop is not L, and InsertLoop is nested inside of L, figure out
// the preheader of the outer-most loop where NewBase is not loop invariant.
if (L->contains(IP->getParent()))
while (InsertLoop && NewBase->isLoopInvariant(InsertLoop)) {
BaseInsertPt = InsertLoop->getLoopPreheader()->getTerminator();
InsertLoop = InsertLoop->getParentLoop();
}
Value *Base = Rewriter.expandCodeFor(NewBase, Ty, BaseInsertPt);
// If there is no immediate value, skip the next part.
if (Imm->isZero())
return Base;
// If we are inserting the base and imm values in the same block, make sure to
// adjust the IP position if insertion reused a result.
if (IP == BaseInsertPt)
IP = Rewriter.getInsertionPoint();
// Always emit the immediate (if non-zero) into the same block as the user.
SCEVHandle NewValSCEV = SE->getAddExpr(SE->getUnknown(Base), Imm);
return Rewriter.expandCodeFor(NewValSCEV, Ty, IP);
// Once we rewrite the code to insert the new IVs we want, update the
// operands of Inst to use the new expression 'NewBase', with 'Imm' added
// to it. NewBasePt is the last instruction which contributes to the
// value of NewBase in the case that it's a diffferent instruction from
// the PHI that NewBase is computed from, or null otherwise.
//
void BasedUser::RewriteInstructionToUseNewBase(const SCEVHandle &NewBase,
Instruction *NewBasePt,
Evan Cheng
committed
SCEVExpander &Rewriter, Loop *L, Pass *P,
SmallVectorImpl<Instruction*> &DeadInsts){
if (!isa<PHINode>(Inst)) {
// By default, insert code at the user instruction.
BasicBlock::iterator InsertPt = Inst;
// However, if the Operand is itself an instruction, the (potentially
// complex) inserted code may be shared by many users. Because of this, we
// want to emit code for the computation of the operand right before its old
// computation. This is usually safe, because we obviously used to use the
// computation when it was computed in its current block. However, in some
// cases (e.g. use of a post-incremented induction variable) the NewBase
// value will be pinned to live somewhere after the original computation.
// In this case, we have to back off.
//
// If this is a use outside the loop (which means after, since it is based
// on a loop indvar) we use the post-incremented value, so that we don't
// artificially make the preinc value live out the bottom of the loop.
if (!isUseOfPostIncrementedValue && L->contains(Inst->getParent())) {
if (NewBasePt && isa<PHINode>(OperandValToReplace)) {
InsertPt = NewBasePt;
++InsertPt;
} else if (Instruction *OpInst
= dyn_cast<Instruction>(OperandValToReplace)) {
InsertPt = OpInst;
while (isa<PHINode>(InsertPt)) ++InsertPt;
}
}
Value *NewVal = InsertCodeForBaseAtPosition(NewBase,
OperandValToReplace->getType(),
Rewriter, InsertPt, L);
// Replace the use of the operand Value with the new Phi we just created.
Inst->replaceUsesOfWith(OperandValToReplace, NewVal);
DOUT << " Replacing with ";
DEBUG(WriteAsOperand(*DOUT, NewVal, /*PrintType=*/false));
DOUT << ", which has value " << *NewBase << " plus IMM " << *Imm << "\n";
return;
}
// PHI nodes are more complex. We have to insert one copy of the NewBase+Imm
Chris Lattner
committed
// expression into each operand block that uses it. Note that PHI nodes can
// have multiple entries for the same predecessor. We use a map to make sure
// that a PHI node only has a single Value* for each predecessor (which also
// prevents us from inserting duplicate code in some blocks).
DenseMap<BasicBlock*, Value*> InsertedCode;
PHINode *PN = cast<PHINode>(Inst);
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
if (PN->getIncomingValue(i) == OperandValToReplace) {
// If the original expression is outside the loop, put the replacement
// code in the same place as the original expression,
// which need not be an immediate predecessor of this PHI. This way we
// need only one copy of it even if it is referenced multiple times in
// the PHI. We don't do this when the original expression is inside the
// loop because multiple copies sometimes do useful sinking of code in
// that case(?).
Instruction *OldLoc = dyn_cast<Instruction>(OperandValToReplace);
if (L->contains(OldLoc->getParent())) {
// If this is a critical edge, split the edge so that we do not insert
// the code on all predecessor/successor paths. We do this unless this
// is the canonical backedge for this loop, as this can make some
// inserted code be in an illegal position.
BasicBlock *PHIPred = PN->getIncomingBlock(i);
if (e != 1 && PHIPred->getTerminator()->getNumSuccessors() > 1 &&
(PN->getParent() != L->getHeader() || !L->contains(PHIPred))) {
// First step, split the critical edge.
SplitCriticalEdge(PHIPred, PN->getParent(), P, false);
// Next step: move the basic block. In particular, if the PHI node
// is outside of the loop, and PredTI is in the loop, we want to
// move the block to be immediately before the PHI block, not
// immediately after PredTI.
if (L->contains(PHIPred) && !L->contains(PN->getParent())) {
BasicBlock *NewBB = PN->getIncomingBlock(i);
NewBB->moveBefore(PN->getParent());
}
// Splitting the edge can reduce the number of PHI entries we have.
e = PN->getNumIncomingValues();
Chris Lattner
committed
}
}
Chris Lattner
committed
Value *&Code = InsertedCode[PN->getIncomingBlock(i)];
if (!Code) {
// Insert the code into the end of the predecessor block.
Instruction *InsertPt = (L->contains(OldLoc->getParent())) ?
PN->getIncomingBlock(i)->getTerminator() :
OldLoc->getParent()->getTerminator();
Code = InsertCodeForBaseAtPosition(NewBase, PN->getType(),
Rewriter, InsertPt, L);
DOUT << " Changing PHI use to ";
DEBUG(WriteAsOperand(*DOUT, Code, /*PrintType=*/false));
DOUT << ", which has value " << *NewBase << " plus IMM " << *Imm << "\n";
Chris Lattner
committed
}
// Replace the use of the operand Value with the new Phi we just created.
Chris Lattner
committed
PN->setIncomingValue(i, Code);
Rewriter.clear();
}
}
Evan Cheng
committed
// PHI node might have become a constant value after SplitCriticalEdge.
DeadInsts.push_back(Inst);
}
/// fitsInAddressMode - Return true if V can be subsumed within an addressing
/// mode, and does not need to be put in a register first.
static bool fitsInAddressMode(const SCEVHandle &V, const Type *UseTy,
const TargetLowering *TLI, bool HasBaseReg) {
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(V)) {
int64_t VC = SC->getValue()->getSExtValue();
if (TLI) {
TargetLowering::AddrMode AM;
AM.BaseOffs = VC;
AM.HasBaseReg = HasBaseReg;
return TLI->isLegalAddressingMode(AM, UseTy);
} else {
// Defaults to PPC. PPC allows a sign-extended 16-bit immediate field.
return (VC > -(1 << 16) && VC < (1 << 16)-1);
}
if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V))
if (GlobalValue *GV = dyn_cast<GlobalValue>(SU->getValue())) {
if (TLI) {
TargetLowering::AddrMode AM;
AM.BaseGV = GV;
AM.HasBaseReg = HasBaseReg;
return TLI->isLegalAddressingMode(AM, UseTy);
} else {
// Default: assume global addresses are not legal.
}
return false;
}
/// MoveLoopVariantsToImmediateField - Move any subexpressions from Val that are
/// loop varying to the Imm operand.
static void MoveLoopVariantsToImmediateField(SCEVHandle &Val, SCEVHandle &Imm,
Loop *L, ScalarEvolution *SE) {
if (Val->isLoopInvariant(L)) return; // Nothing to do.
if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
std::vector<SCEVHandle> NewOps;
NewOps.reserve(SAE->getNumOperands());
for (unsigned i = 0; i != SAE->getNumOperands(); ++i)
if (!SAE->getOperand(i)->isLoopInvariant(L)) {
// If this is a loop-variant expression, it must stay in the immediate
// field of the expression.
Imm = SE->getAddExpr(Imm, SAE->getOperand(i));
} else {
NewOps.push_back(SAE->getOperand(i));
}
if (NewOps.empty())
Val = SE->getIntegerSCEV(0, Val->getType());
else
Val = SE->getAddExpr(NewOps);
} else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) {
// Try to pull immediates out of the start value of nested addrec's.
SCEVHandle Start = SARE->getStart();
MoveLoopVariantsToImmediateField(Start, Imm, L, SE);
std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end());
Ops[0] = Start;
Val = SE->getAddRecExpr(Ops, SARE->getLoop());
} else {
// Otherwise, all of Val is variant, move the whole thing over.
Imm = SE->getAddExpr(Imm, Val);
Val = SE->getIntegerSCEV(0, Val->getType());
}
}
Chris Lattner
committed
/// MoveImmediateValues - Look at Val, and pull out any additions of constants
/// that can fit into the immediate field of instructions in the target.
Chris Lattner
committed
/// Accumulate these immediate values into the Imm value.
static void MoveImmediateValues(const TargetLowering *TLI,
const Type *UseTy,
SCEVHandle &Val, SCEVHandle &Imm,
bool isAddress, Loop *L,
ScalarEvolution *SE) {
if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
Chris Lattner
committed
std::vector<SCEVHandle> NewOps;
NewOps.reserve(SAE->getNumOperands());
for (unsigned i = 0; i != SAE->getNumOperands(); ++i) {
SCEVHandle NewOp = SAE->getOperand(i);
MoveImmediateValues(TLI, UseTy, NewOp, Imm, isAddress, L, SE);
// If this is a loop-variant expression, it must stay in the immediate
// field of the expression.
Imm = SE->getAddExpr(Imm, NewOp);
Chris Lattner
committed
} else {
}
Chris Lattner
committed
if (NewOps.empty())
Val = SE->getIntegerSCEV(0, Val->getType());
Chris Lattner
committed
else
Val = SE->getAddExpr(NewOps);
Chris Lattner
committed
return;
} else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) {
// Try to pull immediates out of the start value of nested addrec's.
Chris Lattner
committed
SCEVHandle Start = SARE->getStart();
MoveImmediateValues(TLI, UseTy, Start, Imm, isAddress, L, SE);
Chris Lattner
committed
if (Start != SARE->getStart()) {
std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end());
Ops[0] = Start;
Val = SE->getAddRecExpr(Ops, SARE->getLoop());
Chris Lattner
committed
}
return;
} else if (const SCEVMulExpr *SME = dyn_cast<SCEVMulExpr>(Val)) {
// Transform "8 * (4 + v)" -> "32 + 8*V" if "32" fits in the immed field.
if (isAddress && fitsInAddressMode(SME->getOperand(0), UseTy, TLI, false) &&
SME->getNumOperands() == 2 && SME->isLoopInvariant(L)) {
SCEVHandle SubImm = SE->getIntegerSCEV(0, Val->getType());
MoveImmediateValues(TLI, UseTy, NewOp, SubImm, isAddress, L, SE);
// If we extracted something out of the subexpressions, see if we can
// simplify this!
if (NewOp != SME->getOperand(1)) {
// Scale SubImm up by "8". If the result is a target constant, we are
// good.
SubImm = SE->getMulExpr(SubImm, SME->getOperand(0));
if (fitsInAddressMode(SubImm, UseTy, TLI, false)) {
Imm = SE->getAddExpr(Imm, SubImm);
Val = SE->getMulExpr(SME->getOperand(0), NewOp);
}
Chris Lattner
committed
// Loop-variant expressions must stay in the immediate field of the
// expression.
if ((isAddress && fitsInAddressMode(Val, UseTy, TLI, false)) ||
Chris Lattner
committed
!Val->isLoopInvariant(L)) {
Imm = SE->getAddExpr(Imm, Val);
Val = SE->getIntegerSCEV(0, Val->getType());
Chris Lattner
committed
return;
Chris Lattner
committed
// Otherwise, no immediates to move.
}
static void MoveImmediateValues(const TargetLowering *TLI,
Instruction *User,
SCEVHandle &Val, SCEVHandle &Imm,
bool isAddress, Loop *L,
ScalarEvolution *SE) {
const Type *UseTy = getAccessType(User);
MoveImmediateValues(TLI, UseTy, Val, Imm, isAddress, L, SE);
}
/// SeparateSubExprs - Decompose Expr into all of the subexpressions that are
/// added together. This is used to reassociate common addition subexprs
/// together for maximal sharing when rewriting bases.
static void SeparateSubExprs(std::vector<SCEVHandle> &SubExprs,
SCEVHandle Expr,
ScalarEvolution *SE) {
if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(Expr)) {
for (unsigned j = 0, e = AE->getNumOperands(); j != e; ++j)
SeparateSubExprs(SubExprs, AE->getOperand(j), SE);
} else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Expr)) {
SCEVHandle Zero = SE->getIntegerSCEV(0, Expr->getType());
if (SARE->getOperand(0) == Zero) {
SubExprs.push_back(Expr);
} else {
// Compute the addrec with zero as its base.
std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end());
Ops[0] = Zero; // Start with zero base.
SubExprs.push_back(SE->getAddRecExpr(Ops, SARE->getLoop()));
SeparateSubExprs(SubExprs, SARE->getOperand(0), SE);
}
} else if (!Expr->isZero()) {
// Do not add zero.
SubExprs.push_back(Expr);
}
}
// This is logically local to the following function, but C++ says we have
// to make it file scope.
struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; };
/// RemoveCommonExpressionsFromUseBases - Look through all of the Bases of all
/// the Uses, removing any common subexpressions, except that if all such
/// subexpressions can be folded into an addressing mode for all uses inside
/// the loop (this case is referred to as "free" in comments herein) we do
/// not remove anything. This looks for things like (a+b+c) and
/// (a+c+d) and computes the common (a+c) subexpression. The common expression
/// is *removed* from the Bases and returned.
RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
ScalarEvolution *SE, Loop *L,
const TargetLowering *TLI) {
unsigned NumUses = Uses.size();
// Only one use? This is a very common case, so we handle it specially and
// cheaply.
SCEVHandle Zero = SE->getIntegerSCEV(0, Uses[0].Base->getType());
SCEVHandle FreeResult = Zero;
// If the use is inside the loop, use its base, regardless of what it is:
// it is clearly shared across all the IV's. If the use is outside the loop
// (which means after it) we don't want to factor anything *into* the loop,
// so just use 0 as the base.
if (L->contains(Uses[0].Inst->getParent()))
std::swap(Result, Uses[0].Base);
return Result;
}
// To find common subexpressions, count how many of Uses use each expression.
// If any subexpressions are used Uses.size() times, they are common.
// Also track whether all uses of each expression can be moved into an
// an addressing mode "for free"; such expressions are left within the loop.
// struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; };
std::map<SCEVHandle, SubExprUseData> SubExpressionUseData;
// UniqueSubExprs - Keep track of all of the subexpressions we see in the
// order we see them.
std::vector<SCEVHandle> UniqueSubExprs;
std::vector<SCEVHandle> SubExprs;
for (unsigned i = 0; i != NumUses; ++i) {
// If the user is outside the loop, just ignore it for base computation.
// Since the user is outside the loop, it must be *after* the loop (if it