Newer
Older
//===- LoopStrengthReduce.cpp - Strength Reduce GEPs in Loops -------------===//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//===----------------------------------------------------------------------===//
//
// This pass performs a strength reduction on array references inside loops that
// have as one or more of their components the loop induction variable. This is
// accomplished by creating a new Value to hold the initial value of the array
// access for the first iteration, and then creating a new GEP instruction in
// the loop to increment the value by the appropriate amount.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar.h"
#include "llvm/Constants.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Type.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Transforms/Utils/AddrModeMatcher.h"
Chris Lattner
committed
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Target/TargetData.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Target/TargetLowering.h"
using namespace llvm;
STATISTIC(NumReduced , "Number of GEPs strength reduced");
STATISTIC(NumInserted, "Number of PHIs inserted");
STATISTIC(NumVariable, "Number of PHIs with variable strides");
STATISTIC(NumEliminated, "Number of strides eliminated");
STATISTIC(NumShadow, "Number of Shadow IVs optimized");
STATISTIC(NumImmSunk, "Number of common expr immediates sunk into uses");
static cl::opt<bool> EnableFullLSRMode("enable-full-lsr",
cl::init(false),
cl::Hidden);
Chris Lattner
committed
namespace {
/// IVStrideUse - Keep track of one use of a strided induction variable, where
/// the stride is stored externally. The Offset member keeps track of the
/// offset from the IV, User is the actual user of the operand, and
/// 'OperandValToReplace' is the operand of the User that is the use.
struct VISIBILITY_HIDDEN IVStrideUse {
SCEVHandle Offset;
Instruction *User;
Value *OperandValToReplace;
// isUseOfPostIncrementedValue - True if this should use the
// post-incremented version of this IV, not the preincremented version.
// This can only be set in special cases, such as the terminating setcc
bool isUseOfPostIncrementedValue;
IVStrideUse(const SCEVHandle &Offs, Instruction *U, Value *O)
: Offset(Offs), User(U), OperandValToReplace(O),
isUseOfPostIncrementedValue(false) {}
};
/// IVUsersOfOneStride - This structure keeps track of all instructions that
/// have an operand that is based on the trip count multiplied by some stride.
/// The stride for all of these users is common and kept external to this
/// structure.
struct VISIBILITY_HIDDEN IVUsersOfOneStride {
/// Users - Keep track of all of the users of this stride as well as the
/// initial value and the operand that uses the IV.
std::vector<IVStrideUse> Users;
void addUser(const SCEVHandle &Offset,Instruction *User, Value *Operand) {
Users.push_back(IVStrideUse(Offset, User, Operand));
}
};
/// IVInfo - This structure keeps track of one IV expression inserted during
/// StrengthReduceStridedIVUsers. It contains the stride, the common base, as
/// well as the PHI node and increment value created for rewrite.
struct VISIBILITY_HIDDEN IVExpr {
SCEVHandle Base;
PHINode *PHI;
Value *IncV;
IVExpr(const SCEVHandle &stride, const SCEVHandle &base, PHINode *phi,
Value *incv)
: Stride(stride), Base(base), PHI(phi), IncV(incv) {}
};
/// IVsOfOneStride - This structure keeps track of all IV expression inserted
/// during StrengthReduceStridedIVUsers for a particular stride of the IV.
struct VISIBILITY_HIDDEN IVsOfOneStride {
std::vector<IVExpr> IVs;
void addIV(const SCEVHandle &Stride, const SCEVHandle &Base, PHINode *PHI,
Value *IncV) {
IVs.push_back(IVExpr(Stride, Base, PHI, IncV));
}
};
class VISIBILITY_HIDDEN LoopStrengthReduce : public LoopPass {
LoopInfo *LI;
ScalarEvolution *SE;
const TargetData *TD;
const Type *UIntPtrTy;
bool Changed;
/// IVUsesByStride - Keep track of all uses of induction variables that we
/// are interested in. The key of the map is the stride of the access.
Chris Lattner
committed
std::map<SCEVHandle, IVUsersOfOneStride> IVUsesByStride;
/// IVsByStride - Keep track of all IVs that have been inserted for a
/// particular stride.
std::map<SCEVHandle, IVsOfOneStride> IVsByStride;
/// StrideOrder - An ordering of the keys in IVUsesByStride that is stable:
/// We use this to iterate over the IVUsesByStride collection without being
/// dependent on random ordering of pointers in the process.
SmallVector<SCEVHandle, 16> StrideOrder;
/// GEPlist - A list of the GEP's that have been remembered in the SCEV
/// data structures. SCEV does not know to update these when the operands
/// of the GEP are changed, which means we cannot leave them live across
/// loops.
SmallVector<GetElementPtrInst *, 16> GEPlist;
/// CastedValues - As we need to cast values to uintptr_t, this keeps track
/// of the casted version of each value. This is accessed by
/// getCastedVersionOf.
DenseMap<Value*, Value*> CastedPointers;
/// DeadInsts - Keep track of instructions we may have made dead, so that
/// we can remove them after we are done working.
SmallVector<Instruction*, 16> DeadInsts;
/// TLI - Keep a pointer of a TargetLowering to consult for determining
/// transformation profitability.
const TargetLowering *TLI;
public:
explicit LoopStrengthReduce(const TargetLowering *tli = NULL) :
LoopPass(&ID), TLI(tli) {
}
bool runOnLoop(Loop *L, LPPassManager &LPM);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
// We split critical edges, so we change the CFG. However, we do update
// many analyses if they are around.
AU.addPreservedID(LoopSimplifyID);
AU.addPreserved<LoopInfo>();
AU.addPreserved<DominanceFrontier>();
AU.addPreserved<DominatorTree>();
Jeff Cohen
committed
AU.addRequiredID(LoopSimplifyID);
AU.addRequired<LoopInfo>();
AU.addRequired<TargetData>();
AU.addRequired<ScalarEvolution>();
Devang Patel
committed
AU.addPreserved<ScalarEvolution>();
}
/// getCastedVersionOf - Return the specified value casted to uintptr_t.
///
Value *getCastedVersionOf(Instruction::CastOps opcode, Value *V);
bool AddUsersIfInteresting(Instruction *I, Loop *L,
SCEVHandle GetExpressionSCEV(Instruction *E);
ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond,
IVStrideUse* &CondUse,
const SCEVHandle* &CondStride);
void OptimizeIndvars(Loop *L);
Devang Patel
committed
/// OptimizeShadowIV - If IV is used in a int-to-float cast
/// inside the loop then try to eliminate the cast opeation.
void OptimizeShadowIV(Loop *L);
/// OptimizeSMax - Rewrite the loop's terminating condition
/// if it uses an smax computation.
ICmpInst *OptimizeSMax(Loop *L, ICmpInst *Cond,
IVStrideUse* &CondUse);
bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
Devang Patel
committed
const SCEVHandle *&CondStride);
bool RequiresTypeConversion(const Type *Ty, const Type *NewTy);
SCEVHandle CheckForIVReuse(bool, bool, bool, const SCEVHandle&,
const std::vector<BasedUser>& UsersToProcess);
bool ValidStride(bool, int64_t,
const std::vector<BasedUser>& UsersToProcess);
SCEVHandle CollectIVUsers(const SCEVHandle &Stride,
IVUsersOfOneStride &Uses,
Loop *L,
bool &AllUsesAreAddresses,
bool &AllUsesAreOutsideLoop,
std::vector<BasedUser> &UsersToProcess);
bool ShouldUseFullStrengthReductionMode(
const std::vector<BasedUser> &UsersToProcess,
const Loop *L,
bool AllUsesAreAddresses,
SCEVHandle Stride);
void PrepareToStrengthReduceFully(
std::vector<BasedUser> &UsersToProcess,
SCEVHandle Stride,
SCEVHandle CommonExprs,
const Loop *L,
SCEVExpander &PreheaderRewriter);
void PrepareToStrengthReduceFromSmallerStride(
std::vector<BasedUser> &UsersToProcess,
Value *CommonBaseV,
const IVExpr &ReuseIV,
Instruction *PreInsertPt);
void PrepareToStrengthReduceWithNewPhi(
std::vector<BasedUser> &UsersToProcess,
SCEVHandle Stride,
SCEVHandle CommonExprs,
Value *CommonBaseV,
const Loop *L,
SCEVExpander &PreheaderRewriter);
Chris Lattner
committed
void StrengthReduceStridedIVUsers(const SCEVHandle &Stride,
IVUsersOfOneStride &Uses,
void DeleteTriviallyDeadInstructions();
};
}
char LoopStrengthReduce::ID = 0;
static RegisterPass<LoopStrengthReduce>
X("loop-reduce", "Loop Strength Reduction");
Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
return new LoopStrengthReduce(TLI);
}
/// getCastedVersionOf - Return the specified value casted to uintptr_t. This
/// assumes that the Value* V is of integer or pointer type only.
Value *LoopStrengthReduce::getCastedVersionOf(Instruction::CastOps opcode,
Value *V) {
if (V->getType() == UIntPtrTy) return V;
if (Constant *CB = dyn_cast<Constant>(V))
return ConstantExpr::getCast(opcode, CB, UIntPtrTy);
Value *&New = CastedPointers[V];
if (New) return New;
New = SCEVExpander::InsertCastOfTo(opcode, V, UIntPtrTy);
DeadInsts.push_back(cast<Instruction>(New));
return New;
/// DeleteTriviallyDeadInstructions - If any of the instructions is the
/// specified set are trivially dead, delete them and see if this makes any of
/// their operands subsequently dead.
void LoopStrengthReduce::DeleteTriviallyDeadInstructions() {
if (DeadInsts.empty()) return;
// Sort the deadinsts list so that we can trivially eliminate duplicates as we
// go. The code below never adds a non-dead instruction to the worklist, but
// callers may not be so careful.
array_pod_sort(DeadInsts.begin(), DeadInsts.end());
// Drop duplicate instructions and those with uses.
for (unsigned i = 0, e = DeadInsts.size()-1; i < e; ++i) {
Instruction *I = DeadInsts[i];
if (!I->use_empty()) DeadInsts[i] = 0;
while (i != e && DeadInsts[i+1] == I)
DeadInsts[++i] = 0;
}
while (!DeadInsts.empty()) {
Instruction *I = DeadInsts.back();
DeadInsts.pop_back();
if (I == 0 || !isInstructionTriviallyDead(I))
continue;
SE->deleteValueFromRecords(I);
Bill Wendling
committed
for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) {
if (Instruction *U = dyn_cast<Instruction>(*OI)) {
*OI = 0;
if (U->use_empty())
DeadInsts.push_back(U);
}
I->eraseFromParent();
Changed = true;
}
}
/// GetExpressionSCEV - Compute and return the SCEV for the specified
/// instruction.
SCEVHandle LoopStrengthReduce::GetExpressionSCEV(Instruction *Exp) {
// Pointer to pointer bitcast instructions return the same value as their
// operand.
if (BitCastInst *BCI = dyn_cast<BitCastInst>(Exp)) {
if (SE->hasSCEV(BCI) || !isa<Instruction>(BCI->getOperand(0)))
return SE->getSCEV(BCI);
SCEVHandle R = GetExpressionSCEV(cast<Instruction>(BCI->getOperand(0)));
SE->setSCEV(BCI, R);
return R;
}
// Scalar Evolutions doesn't know how to compute SCEV's for GEP instructions.
// If this is a GEP that SE doesn't know about, compute it now and insert it.
// If this is not a GEP, or if we have already done this computation, just let
// SE figure it out.
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Exp);
return SE->getSCEV(Exp);
// Analyze all of the subscripts of this getelementptr instruction, looking
// for uses that are determined by the trip count of the loop. First, skip
// all operands the are not dependent on the IV.
// Build up the base expression. Insert an LLVM cast of the pointer to
// uintptr_t first.
SCEVHandle GEPVal = SE->getUnknown(
getCastedVersionOf(Instruction::PtrToInt, GEP->getOperand(0)));
gep_type_iterator GTI = gep_type_begin(GEP);
for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end();
i != e; ++i, ++GTI) {
// If this is a use of a recurrence that we can analyze, and it comes before
// Op does in the GEP operand list, we will handle this when we process this
// operand.
if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
const StructLayout *SL = TD->getStructLayout(STy);
unsigned Idx = cast<ConstantInt>(*i)->getZExtValue();
uint64_t Offset = SL->getElementOffset(Idx);
GEPVal = SE->getAddExpr(GEPVal,
SE->getIntegerSCEV(Offset, UIntPtrTy));
unsigned GEPOpiBits =
(*i)->getType()->getPrimitiveSizeInBits();
unsigned IntPtrBits = UIntPtrTy->getPrimitiveSizeInBits();
Instruction::CastOps opcode = (GEPOpiBits < IntPtrBits ?
Instruction::SExt : (GEPOpiBits > IntPtrBits ? Instruction::Trunc :
Instruction::BitCast));
Value *OpVal = getCastedVersionOf(opcode, *i);
SCEVHandle Idx = SE->getSCEV(OpVal);
uint64_t TypeSize = TD->getTypePaddedSize(GTI.getIndexedType());
if (TypeSize != 1)
Idx = SE->getMulExpr(Idx,
SE->getConstant(ConstantInt::get(UIntPtrTy,
TypeSize)));
GEPVal = SE->getAddExpr(GEPVal, Idx);
}
GEPlist.push_back(GEP);
return GEPVal;
}
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
/// containsAddRecFromDifferentLoop - Determine whether expression S involves a
/// subexpression that is an AddRec from a loop other than L. An outer loop
/// of L is OK, but not an inner loop nor a disjoint loop.
static bool containsAddRecFromDifferentLoop(SCEVHandle S, Loop *L) {
// This is very common, put it first.
if (isa<SCEVConstant>(S))
return false;
if (SCEVCommutativeExpr *AE = dyn_cast<SCEVCommutativeExpr>(S)) {
for (unsigned int i=0; i< AE->getNumOperands(); i++)
if (containsAddRecFromDifferentLoop(AE->getOperand(i), L))
return true;
return false;
}
if (SCEVAddRecExpr *AE = dyn_cast<SCEVAddRecExpr>(S)) {
if (const Loop *newLoop = AE->getLoop()) {
if (newLoop == L)
return false;
// if newLoop is an outer loop of L, this is OK.
if (!LoopInfoBase<BasicBlock>::isNotAlreadyContainedIn(L, newLoop))
return false;
}
return true;
}
if (SCEVUDivExpr *DE = dyn_cast<SCEVUDivExpr>(S))
return containsAddRecFromDifferentLoop(DE->getLHS(), L) ||
containsAddRecFromDifferentLoop(DE->getRHS(), L);
#if 0
// SCEVSDivExpr has been backed out temporarily, but will be back; we'll
// need this when it is.
if (SCEVSDivExpr *DE = dyn_cast<SCEVSDivExpr>(S))
return containsAddRecFromDifferentLoop(DE->getLHS(), L) ||
containsAddRecFromDifferentLoop(DE->getRHS(), L);
#endif
if (SCEVTruncateExpr *TE = dyn_cast<SCEVTruncateExpr>(S))
return containsAddRecFromDifferentLoop(TE->getOperand(), L);
if (SCEVZeroExtendExpr *ZE = dyn_cast<SCEVZeroExtendExpr>(S))
return containsAddRecFromDifferentLoop(ZE->getOperand(), L);
if (SCEVSignExtendExpr *SE = dyn_cast<SCEVSignExtendExpr>(S))
return containsAddRecFromDifferentLoop(SE->getOperand(), L);
return false;
}
/// getSCEVStartAndStride - Compute the start and stride of this expression,
/// returning false if the expression is not a start/stride pair, or true if it
/// is. The stride must be a loop invariant expression, but the start may be
/// a mix of loop invariant and loop variant expressions. The start cannot,
/// however, contain an AddRec from a different loop, unless that loop is an
/// outer loop of the current loop.
static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L,
SCEVHandle &Start, SCEVHandle &Stride,
ScalarEvolution *SE, DominatorTree *DT) {
SCEVHandle TheAddRec = Start; // Initialize to zero.
// If the outer level is an AddExpr, the operands are all start values except
// for a nested AddRecExpr.
if (SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(SH)) {
for (unsigned i = 0, e = AE->getNumOperands(); i != e; ++i)
if (SCEVAddRecExpr *AddRec =
dyn_cast<SCEVAddRecExpr>(AE->getOperand(i))) {
if (AddRec->getLoop() == L)
TheAddRec = SE->getAddExpr(AddRec, TheAddRec);
else
return false; // Nested IV of some sort?
} else {
Start = SE->getAddExpr(Start, AE->getOperand(i));
}
TheAddRec = SH;
} else {
return false; // not analyzable.
}
SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(TheAddRec);
if (!AddRec || AddRec->getLoop() != L) return false;
// FIXME: Generalize to non-affine IV's.
if (!AddRec->isAffine()) return false;
// If Start contains an SCEVAddRecExpr from a different loop, other than an
// outer loop of the current loop, reject it. SCEV has no concept of
// operating on more than one loop at a time so don't confuse it with such
// expressions.
if (containsAddRecFromDifferentLoop(AddRec->getOperand(0), L))
return false;
Start = SE->getAddExpr(Start, AddRec->getOperand(0));
if (!isa<SCEVConstant>(AddRec->getOperand(1))) {
// If stride is an instruction, make sure it dominates the loop preheader.
// Otherwise we could end up with a use before def situation.
BasicBlock *Preheader = L->getLoopPreheader();
if (!AddRec->getOperand(1)->dominates(Preheader, DT))
return false;
DOUT << "[" << L->getHeader()->getName()
<< "] Variable stride: " << *AddRec << "\n";
}
Chris Lattner
committed
Stride = AddRec->getOperand(1);
return true;
}
/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression
/// and now we need to decide whether the user should use the preinc or post-inc
/// value. If this user should use the post-inc version of the IV, return true.
///
/// Choosing wrong here can break dominance properties (if we choose to use the
/// post-inc value when we cannot) or it can end up adding extra live-ranges to
/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we
/// should use the post-inc value).
static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV,
Evan Cheng
committed
Loop *L, DominatorTree *DT, Pass *P,
SmallVectorImpl<Instruction*> &DeadInsts){
// If the user is in the loop, use the preinc value.
if (L->contains(User->getParent())) return false;
BasicBlock *LatchBlock = L->getLoopLatch();
// Ok, the user is outside of the loop. If it is dominated by the latch
// block, use the post-inc value.
if (DT->dominates(LatchBlock, User->getParent()))
return true;
// There is one case we have to be careful of: PHI nodes. These little guys
// can live in blocks that do not dominate the latch block, but (since their
// uses occur in the predecessor block, not the block the PHI lives in) should
// still use the post-inc value. Check for this case now.
PHINode *PN = dyn_cast<PHINode>(User);
if (!PN) return false; // not a phi, not dominated by latch block.
// Look at all of the uses of IV by the PHI node. If any use corresponds to
// a block that is not dominated by the latch block, give up and use the
// preincremented value.
unsigned NumUses = 0;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) == IV) {
++NumUses;
if (!DT->dominates(LatchBlock, PN->getIncomingBlock(i)))
return false;
}
// Okay, all uses of IV by PN are in predecessor blocks that really are
// dominated by the latch block. Split the critical edges and use the
// post-incremented value.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) == IV) {
SplitCriticalEdge(PN->getIncomingBlock(i), PN->getParent(), P, false);
// Splitting the critical edge can reduce the number of entries in this
// PHI.
e = PN->getNumIncomingValues();
if (--NumUses == 0) break;
}
Evan Cheng
committed
// PHI node might have become a constant value after SplitCriticalEdge.
DeadInsts.push_back(User);
return true;
/// isAddressUse - Returns true if the specified instruction is using the
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
/// specified value as an address.
static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
bool isAddress = isa<LoadInst>(Inst);
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
if (SI->getOperand(1) == OperandVal)
isAddress = true;
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
// Addressing modes can also be folded into prefetches and a variety
// of intrinsics.
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::prefetch:
case Intrinsic::x86_sse2_loadu_dq:
case Intrinsic::x86_sse2_loadu_pd:
case Intrinsic::x86_sse_loadu_ps:
case Intrinsic::x86_sse_storeu_ps:
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
case Intrinsic::x86_sse2_storel_dq:
if (II->getOperand(1) == OperandVal)
isAddress = true;
break;
}
}
return isAddress;
}
/// AddUsersIfInteresting - Inspect the specified instruction. If it is a
/// reducible SCEV, recursively add its users to the IVUsesByStride set and
/// return true. Otherwise, return false.
bool LoopStrengthReduce::AddUsersIfInteresting(Instruction *I, Loop *L,
Chris Lattner
committed
if (!I->getType()->isInteger() && !isa<PointerType>(I->getType()))
return false; // Void and FP expressions cannot be reduced.
return true; // Instruction already handled.
// Get the symbolic expression for this instruction.
if (isa<SCEVCouldNotCompute>(ISE)) return false;
// Get the start and stride for this expression.
SCEVHandle Start = SE->getIntegerSCEV(0, ISE->getType());
Chris Lattner
committed
SCEVHandle Stride = Start;
if (!getSCEVStartAndStride(ISE, L, Start, Stride, SE, DT))
return false; // Non-reducible symbolic expression, bail out.
std::vector<Instruction *> IUsers;
// Collect all I uses now because IVUseShouldUsePostIncValue may
// invalidate use_iterator.
for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI)
IUsers.push_back(cast<Instruction>(*UI));
for (unsigned iused_index = 0, iused_size = IUsers.size();
iused_index != iused_size; ++iused_index) {
Instruction *User = IUsers[iused_index];
// Do not infinitely recurse on PHI nodes.
Chris Lattner
committed
if (isa<PHINode>(User) && Processed.count(User))
continue;
// Descend recursively, but not into PHI nodes outside the current loop.
// It's important to see the entire expression outside the loop to get
// choices that depend on addressing mode use right, although we won't
// consider references ouside the loop in all cases.
// If User is already in Processed, we don't want to recurse into it again,
// but do want to record a second reference in the same instruction.
bool AddUserToIVUsers = false;
if (LI->getLoopFor(User->getParent()) != L) {
if (isa<PHINode>(User) || Processed.count(User) ||
!AddUsersIfInteresting(User, L, Processed)) {
DOUT << "FOUND USER in other loop: " << *User
<< " OF SCEV: " << *ISE << "\n";
AddUserToIVUsers = true;
}
} else if (Processed.count(User) ||
!AddUsersIfInteresting(User, L, Processed)) {
DOUT << "FOUND USER: " << *User
<< " OF SCEV: " << *ISE << "\n";
AddUserToIVUsers = true;
}
if (AddUserToIVUsers) {
IVUsersOfOneStride &StrideUses = IVUsesByStride[Stride];
if (StrideUses.Users.empty()) // First occurrence of this stride?
StrideOrder.push_back(Stride);
// Okay, we found a user that we cannot reduce. Analyze the instruction
// and decide what to do with it. If we are a use inside of the loop, use
// the value before incrementation, otherwise use it after incrementation.
Evan Cheng
committed
if (IVUseShouldUsePostIncValue(User, I, L, DT, this, DeadInsts)) {
// The value used will be incremented by the stride more than we are
// expecting, so subtract this off.
SCEVHandle NewStart = SE->getMinusSCEV(Start, Stride);
StrideUses.addUser(NewStart, User, I);
StrideUses.Users.back().isUseOfPostIncrementedValue = true;
DOUT << " USING POSTINC SCEV, START=" << *NewStart<< "\n";
StrideUses.addUser(Start, User, I);
}
}
return true;
}
namespace {
/// BasedUser - For a particular base value, keep information about how we've
/// partitioned the expression so far.
struct BasedUser {
/// SE - The current ScalarEvolution object.
ScalarEvolution *SE;
/// Base - The Base value for the PHI node that needs to be inserted for
/// this use. As the use is processed, information gets moved from this
/// field to the Imm field (below). BasedUser values are sorted by this
/// field.
SCEVHandle Base;
/// Inst - The instruction using the induction variable.
Instruction *Inst;
/// OperandValToReplace - The operand value of Inst to replace with the
/// EmittedBase.
Value *OperandValToReplace;
/// Imm - The immediate value that should be added to the base immediately
/// before Inst, because it will be folded into the imm field of the
/// instruction. This is also sometimes used for loop-variant values that
/// must be added inside the loop.
SCEVHandle Imm;
/// Phi - The induction variable that performs the striding that
/// should be used for this user.
Value *Phi;
/// IncV - The post-incremented value of Phi.
Value *IncV;
// isUseOfPostIncrementedValue - True if this should use the
// post-incremented version of this IV, not the preincremented version.
// This can only be set in special cases, such as the terminating setcc
// instruction for a loop and uses outside the loop that are dominated by
// the loop.
bool isUseOfPostIncrementedValue;
BasedUser(IVStrideUse &IVSU, ScalarEvolution *se)
: SE(se), Base(IVSU.Offset), Inst(IVSU.User),
OperandValToReplace(IVSU.OperandValToReplace),
isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue) {}
// Once we rewrite the code to insert the new IVs we want, update the
// operands of Inst to use the new expression 'NewBase', with 'Imm' added
// to it.
void RewriteInstructionToUseNewBase(const SCEVHandle &NewBase,
Instruction *InsertPt,
Evan Cheng
committed
SCEVExpander &Rewriter, Loop *L, Pass *P,
SmallVectorImpl<Instruction*> &DeadInsts);
Value *InsertCodeForBaseAtPosition(const SCEVHandle &NewBase,
SCEVExpander &Rewriter,
Instruction *IP, Loop *L);
void dump() const;
};
}
void BasedUser::dump() const {
Bill Wendling
committed
cerr << " Base=" << *Base;
cerr << " Imm=" << *Imm;
cerr << " Inst: " << *Inst;
}
Value *BasedUser::InsertCodeForBaseAtPosition(const SCEVHandle &NewBase,
SCEVExpander &Rewriter,
Instruction *IP, Loop *L) {
// Figure out where we *really* want to insert this code. In particular, if
// the user is inside of a loop that is nested inside of L, we really don't
// want to insert this expression before the user, we'd rather pull it out as
// many loops as possible.
LoopInfo &LI = Rewriter.getLoopInfo();
Instruction *BaseInsertPt = IP;
// Figure out the most-nested loop that IP is in.
Loop *InsertLoop = LI.getLoopFor(IP->getParent());
// If InsertLoop is not L, and InsertLoop is nested inside of L, figure out
// the preheader of the outer-most loop where NewBase is not loop invariant.
if (L->contains(IP->getParent()))
while (InsertLoop && NewBase->isLoopInvariant(InsertLoop)) {
BaseInsertPt = InsertLoop->getLoopPreheader()->getTerminator();
InsertLoop = InsertLoop->getParentLoop();
}
Value *Base = Rewriter.expandCodeFor(NewBase, BaseInsertPt);
// If there is no immediate value, skip the next part.
if (Imm->isZero())
return Base;
// If we are inserting the base and imm values in the same block, make sure to
// adjust the IP position if insertion reused a result.
if (IP == BaseInsertPt)
IP = Rewriter.getInsertionPoint();
// Always emit the immediate (if non-zero) into the same block as the user.
SCEVHandle NewValSCEV = SE->getAddExpr(SE->getUnknown(Base), Imm);
return Rewriter.expandCodeFor(NewValSCEV, IP);
// Once we rewrite the code to insert the new IVs we want, update the
// operands of Inst to use the new expression 'NewBase', with 'Imm' added
// to it. NewBasePt is the last instruction which contributes to the
// value of NewBase in the case that it's a diffferent instruction from
// the PHI that NewBase is computed from, or null otherwise.
//
void BasedUser::RewriteInstructionToUseNewBase(const SCEVHandle &NewBase,
Instruction *NewBasePt,
Evan Cheng
committed
SCEVExpander &Rewriter, Loop *L, Pass *P,
SmallVectorImpl<Instruction*> &DeadInsts){
if (!isa<PHINode>(Inst)) {
// By default, insert code at the user instruction.
BasicBlock::iterator InsertPt = Inst;
// However, if the Operand is itself an instruction, the (potentially
// complex) inserted code may be shared by many users. Because of this, we
// want to emit code for the computation of the operand right before its old
// computation. This is usually safe, because we obviously used to use the
// computation when it was computed in its current block. However, in some
// cases (e.g. use of a post-incremented induction variable) the NewBase
// value will be pinned to live somewhere after the original computation.
// In this case, we have to back off.
//
// If this is a use outside the loop (which means after, since it is based
// on a loop indvar) we use the post-incremented value, so that we don't
// artificially make the preinc value live out the bottom of the loop.
if (!isUseOfPostIncrementedValue && L->contains(Inst->getParent())) {
if (NewBasePt && isa<PHINode>(OperandValToReplace)) {
InsertPt = NewBasePt;
++InsertPt;
} else if (Instruction *OpInst
= dyn_cast<Instruction>(OperandValToReplace)) {
InsertPt = OpInst;
while (isa<PHINode>(InsertPt)) ++InsertPt;
}
}
Value *NewVal = InsertCodeForBaseAtPosition(NewBase, Rewriter, InsertPt, L);
// Adjust the type back to match the Inst. Note that we can't use InsertPt
// here because the SCEVExpander may have inserted the instructions after
// that point, in its efforts to avoid inserting redundant expressions.
if (isa<PointerType>(OperandValToReplace->getType())) {
NewVal = SCEVExpander::InsertCastOfTo(Instruction::IntToPtr,
NewVal,
OperandValToReplace->getType());
// Replace the use of the operand Value with the new Phi we just created.
Inst->replaceUsesOfWith(OperandValToReplace, NewVal);
DOUT << " Replacing with ";
DEBUG(WriteAsOperand(*DOUT, NewVal, /*PrintType=*/false));
DOUT << ", which has value " << *NewBase << " plus IMM " << *Imm << "\n";
return;
}
// PHI nodes are more complex. We have to insert one copy of the NewBase+Imm
Chris Lattner
committed
// expression into each operand block that uses it. Note that PHI nodes can
// have multiple entries for the same predecessor. We use a map to make sure
// that a PHI node only has a single Value* for each predecessor (which also
// prevents us from inserting duplicate code in some blocks).
DenseMap<BasicBlock*, Value*> InsertedCode;
PHINode *PN = cast<PHINode>(Inst);
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
if (PN->getIncomingValue(i) == OperandValToReplace) {
// If the original expression is outside the loop, put the replacement
// code in the same place as the original expression,
// which need not be an immediate predecessor of this PHI. This way we
// need only one copy of it even if it is referenced multiple times in
// the PHI. We don't do this when the original expression is inside the
// loop because multiple copies sometimes do useful sinking of code in
// that case(?).
Instruction *OldLoc = dyn_cast<Instruction>(OperandValToReplace);
if (L->contains(OldLoc->getParent())) {
// If this is a critical edge, split the edge so that we do not insert
// the code on all predecessor/successor paths. We do this unless this
// is the canonical backedge for this loop, as this can make some
// inserted code be in an illegal position.
BasicBlock *PHIPred = PN->getIncomingBlock(i);
if (e != 1 && PHIPred->getTerminator()->getNumSuccessors() > 1 &&
(PN->getParent() != L->getHeader() || !L->contains(PHIPred))) {
// First step, split the critical edge.
SplitCriticalEdge(PHIPred, PN->getParent(), P, false);
// Next step: move the basic block. In particular, if the PHI node
// is outside of the loop, and PredTI is in the loop, we want to
// move the block to be immediately before the PHI block, not
// immediately after PredTI.
if (L->contains(PHIPred) && !L->contains(PN->getParent())) {
BasicBlock *NewBB = PN->getIncomingBlock(i);
NewBB->moveBefore(PN->getParent());
}
// Splitting the edge can reduce the number of PHI entries we have.
e = PN->getNumIncomingValues();
Chris Lattner
committed
}
}
Chris Lattner
committed
Value *&Code = InsertedCode[PN->getIncomingBlock(i)];
if (!Code) {
// Insert the code into the end of the predecessor block.
Instruction *InsertPt = (L->contains(OldLoc->getParent())) ?
PN->getIncomingBlock(i)->getTerminator() :
OldLoc->getParent()->getTerminator();
Code = InsertCodeForBaseAtPosition(NewBase, Rewriter, InsertPt, L);
// Adjust the type back to match the PHI. Note that we can't use
// InsertPt here because the SCEVExpander may have inserted its
// instructions after that point, in its efforts to avoid inserting
// redundant expressions.
if (isa<PointerType>(PN->getType())) {
Code = SCEVExpander::InsertCastOfTo(Instruction::IntToPtr,
Code,
PN->getType());
DOUT << " Changing PHI use to ";
DEBUG(WriteAsOperand(*DOUT, Code, /*PrintType=*/false));
DOUT << ", which has value " << *NewBase << " plus IMM " << *Imm << "\n";
Chris Lattner
committed
}
// Replace the use of the operand Value with the new Phi we just created.
Chris Lattner
committed
PN->setIncomingValue(i, Code);
Rewriter.clear();
}
}
Evan Cheng
committed
// PHI node might have become a constant value after SplitCriticalEdge.
DeadInsts.push_back(Inst);
}
/// fitsInAddressMode - Return true if V can be subsumed within an addressing
/// mode, and does not need to be put in a register first.
static bool fitsInAddressMode(const SCEVHandle &V, const Type *UseTy,
const TargetLowering *TLI, bool HasBaseReg) {
if (SCEVConstant *SC = dyn_cast<SCEVConstant>(V)) {
int64_t VC = SC->getValue()->getSExtValue();
if (TLI) {
TargetLowering::AddrMode AM;
AM.BaseOffs = VC;
AM.HasBaseReg = HasBaseReg;
return TLI->isLegalAddressingMode(AM, UseTy);
} else {
// Defaults to PPC. PPC allows a sign-extended 16-bit immediate field.
return (VC > -(1 << 16) && VC < (1 << 16)-1);
}
if (SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V))
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(SU->getValue()))
if (TLI && CE->getOpcode() == Instruction::PtrToInt) {
Constant *Op0 = CE->getOperand(0);
if (GlobalValue *GV = dyn_cast<GlobalValue>(Op0)) {
TargetLowering::AddrMode AM;
AM.BaseGV = GV;
AM.HasBaseReg = HasBaseReg;
return TLI->isLegalAddressingMode(AM, UseTy);
}
return false;
}
/// MoveLoopVariantsToImmediateField - Move any subexpressions from Val that are
/// loop varying to the Imm operand.
static void MoveLoopVariantsToImmediateField(SCEVHandle &Val, SCEVHandle &Imm,
Loop *L, ScalarEvolution *SE) {
if (Val->isLoopInvariant(L)) return; // Nothing to do.
if (SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
std::vector<SCEVHandle> NewOps;
NewOps.reserve(SAE->getNumOperands());
for (unsigned i = 0; i != SAE->getNumOperands(); ++i)
if (!SAE->getOperand(i)->isLoopInvariant(L)) {
// If this is a loop-variant expression, it must stay in the immediate
// field of the expression.
Imm = SE->getAddExpr(Imm, SAE->getOperand(i));
} else {
NewOps.push_back(SAE->getOperand(i));
}
if (NewOps.empty())
Val = SE->getIntegerSCEV(0, Val->getType());
else
Val = SE->getAddExpr(NewOps);
} else if (SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) {
// Try to pull immediates out of the start value of nested addrec's.
SCEVHandle Start = SARE->getStart();
MoveLoopVariantsToImmediateField(Start, Imm, L, SE);
std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end());
Ops[0] = Start;
Val = SE->getAddRecExpr(Ops, SARE->getLoop());
} else {
// Otherwise, all of Val is variant, move the whole thing over.
Imm = SE->getAddExpr(Imm, Val);
Val = SE->getIntegerSCEV(0, Val->getType());
}
}
Chris Lattner
committed
/// MoveImmediateValues - Look at Val, and pull out any additions of constants
/// that can fit into the immediate field of instructions in the target.
Chris Lattner
committed
/// Accumulate these immediate values into the Imm value.
static void MoveImmediateValues(const TargetLowering *TLI,
const Type *UseTy,
SCEVHandle &Val, SCEVHandle &Imm,
bool isAddress, Loop *L,
ScalarEvolution *SE) {
if (SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
Chris Lattner
committed
std::vector<SCEVHandle> NewOps;
NewOps.reserve(SAE->getNumOperands());
for (unsigned i = 0; i != SAE->getNumOperands(); ++i) {
SCEVHandle NewOp = SAE->getOperand(i);
MoveImmediateValues(TLI, UseTy, NewOp, Imm, isAddress, L, SE);
// If this is a loop-variant expression, it must stay in the immediate
// field of the expression.
Imm = SE->getAddExpr(Imm, NewOp);
Chris Lattner
committed
} else {
}
Chris Lattner
committed
if (NewOps.empty())
Val = SE->getIntegerSCEV(0, Val->getType());
Chris Lattner
committed
else
Val = SE->getAddExpr(NewOps);
Chris Lattner
committed
return;
} else if (SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) {
// Try to pull immediates out of the start value of nested addrec's.
Chris Lattner
committed
SCEVHandle Start = SARE->getStart();
MoveImmediateValues(TLI, UseTy, Start, Imm, isAddress, L, SE);
Chris Lattner
committed
if (Start != SARE->getStart()) {
std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end());
Ops[0] = Start;
Val = SE->getAddRecExpr(Ops, SARE->getLoop());
Chris Lattner
committed
}
return;
} else if (SCEVMulExpr *SME = dyn_cast<SCEVMulExpr>(Val)) {
// Transform "8 * (4 + v)" -> "32 + 8*V" if "32" fits in the immed field.
if (isAddress && fitsInAddressMode(SME->getOperand(0), UseTy, TLI, false) &&
SME->getNumOperands() == 2 && SME->isLoopInvariant(L)) {