Newer
Older
//===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//===----------------------------------------------------------------------===//
//
// This transformation analyzes and transforms the induction variables (and
// computations derived from them) into forms suitable for efficient execution
// on the target.
//
// This pass performs a strength reduction on array references inside loops that
// have as one or more of their components the loop induction variable, it
// rewrites expressions to take advantage of scaled-index addressing modes
// available on the target, and it performs a variety of other optimizations
// related to loop induction variables.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar.h"
#include "llvm/Constants.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Analysis/IVUsers.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Transforms/Utils/AddrModeMatcher.h"
Chris Lattner
committed
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLowering.h"
using namespace llvm;
STATISTIC(NumReduced , "Number of IV uses strength reduced");
STATISTIC(NumInserted, "Number of PHIs inserted");
STATISTIC(NumVariable, "Number of PHIs with variable strides");
STATISTIC(NumEliminated, "Number of strides eliminated");
STATISTIC(NumShadow, "Number of Shadow IVs optimized");
STATISTIC(NumImmSunk, "Number of common expr immediates sunk into uses");
Evan Cheng
committed
STATISTIC(NumLoopCond, "Number of loop terminating conds optimized");
STATISTIC(NumCountZero, "Number of count iv optimized to count toward zero");
static cl::opt<bool> EnableFullLSRMode("enable-full-lsr",
cl::init(false),
cl::Hidden);
Chris Lattner
committed
namespace {
/// IVInfo - This structure keeps track of one IV expression inserted during
/// StrengthReduceStridedIVUsers. It contains the stride, the common base, as
/// well as the PHI node and increment value created for rewrite.
struct IVExpr {
PHINode *PHI;
IVExpr(const SCEV *const stride, const SCEV *const base, PHINode *phi)
: Stride(stride), Base(base), PHI(phi) {}
};
/// IVsOfOneStride - This structure keeps track of all IV expression inserted
/// during StrengthReduceStridedIVUsers for a particular stride of the IV.
struct IVsOfOneStride {
std::vector<IVExpr> IVs;
void addIV(const SCEV *const Stride, const SCEV *const Base, PHINode *PHI) {
IVs.push_back(IVExpr(Stride, Base, PHI));
}
};
class LoopStrengthReduce : public LoopPass {
IVUsers *IU;
ScalarEvolution *SE;
bool Changed;
/// IVsByStride - Keep track of all IVs that have been inserted for a
/// particular stride.
std::map<const SCEV *, IVsOfOneStride> IVsByStride;
/// DeadInsts - Keep track of instructions we may have made dead, so that
/// we can remove them after we are done working.
SmallVector<WeakVH, 16> DeadInsts;
/// TLI - Keep a pointer of a TargetLowering to consult for determining
/// transformation profitability.
const TargetLowering *TLI;
public:
explicit LoopStrengthReduce(const TargetLowering *tli = NULL) :
LoopPass(&ID), TLI(tli) {}
bool runOnLoop(Loop *L, LPPassManager &LPM);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
// We split critical edges, so we change the CFG. However, we do update
// many analyses if they are around.
AU.addPreservedID(LoopSimplifyID);
AU.addPreserved("loops");
AU.addPreserved("domfrontier");
AU.addPreserved("domtree");
Jeff Cohen
committed
AU.addRequiredID(LoopSimplifyID);
AU.addRequired<ScalarEvolution>();
Devang Patel
committed
AU.addPreserved<ScalarEvolution>();
AU.addRequired<IVUsers>();
AU.addPreserved<IVUsers>();
}
void OptimizeIndvars(Loop *L);
Evan Cheng
committed
/// OptimizeLoopTermCond - Change loop terminating condition to use the
Evan Cheng
committed
/// postinc iv when possible.
void OptimizeLoopTermCond(Loop *L);
Devang Patel
committed
/// OptimizeShadowIV - If IV is used in a int-to-float cast
/// inside the loop then try to eliminate the cast opeation.
void OptimizeShadowIV(Loop *L);
/// OptimizeMax - Rewrite the loop's terminating condition
/// if it uses a max computation.
ICmpInst *OptimizeMax(Loop *L, ICmpInst *Cond,
IVStrideUse* &CondUse);
Evan Cheng
committed
/// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for
/// deciding when to exit the loop is used only for that purpose, try to
/// rearrange things so it counts down to a test against zero.
bool OptimizeLoopCountIV(Loop *L);
bool OptimizeLoopCountIVOfStride(const SCEV* &Stride,
IVStrideUse* &CondUse, Loop *L);
/// StrengthReduceIVUsersOfStride - Strength reduce all of the users of a
/// single stride of IV. All of the users may have different starting
/// values, and this may not be the only stride.
void StrengthReduceIVUsersOfStride(const SCEV *Stride,
Evan Cheng
committed
IVUsersOfOneStride &Uses,
Loop *L);
void StrengthReduceIVUsers(Loop *L);
ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond,
Evan Cheng
committed
bool PostPass = false);
bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
Evan Cheng
committed
const SCEV* &CondStride);
bool RequiresTypeConversion(const Type *Ty, const Type *NewTy);
const SCEV *CheckForIVReuse(bool, bool, bool, const SCEV *,
const std::vector<BasedUser>& UsersToProcess);
Evan Cheng
committed
bool ValidScale(bool, int64_t,
const std::vector<BasedUser>& UsersToProcess);
bool ValidOffset(bool, int64_t, int64_t,
const std::vector<BasedUser>& UsersToProcess);
const SCEV *CollectIVUsers(const SCEV *Stride,
IVUsersOfOneStride &Uses,
Loop *L,
bool &AllUsesAreAddresses,
bool &AllUsesAreOutsideLoop,
std::vector<BasedUser> &UsersToProcess);
Evan Cheng
committed
bool StrideMightBeShared(const SCEV *Stride, Loop *L, bool CheckPreInc);
bool ShouldUseFullStrengthReductionMode(
const std::vector<BasedUser> &UsersToProcess,
const Loop *L,
bool AllUsesAreAddresses,
void PrepareToStrengthReduceFully(
std::vector<BasedUser> &UsersToProcess,
const SCEV *Stride,
const SCEV *CommonExprs,
const Loop *L,
SCEVExpander &PreheaderRewriter);
void PrepareToStrengthReduceFromSmallerStride(
std::vector<BasedUser> &UsersToProcess,
Value *CommonBaseV,
const IVExpr &ReuseIV,
Instruction *PreInsertPt);
void PrepareToStrengthReduceWithNewPhi(
std::vector<BasedUser> &UsersToProcess,
const SCEV *Stride,
const SCEV *CommonExprs,
Value *CommonBaseV,
Evan Cheng
committed
Instruction *IVIncInsertPt,
const Loop *L,
SCEVExpander &PreheaderRewriter);
Evan Cheng
committed
void DeleteTriviallyDeadInstructions();
};
}
char LoopStrengthReduce::ID = 0;
static RegisterPass<LoopStrengthReduce>
X("loop-reduce", "Loop Strength Reduction");
Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
return new LoopStrengthReduce(TLI);
}
/// DeleteTriviallyDeadInstructions - If any of the instructions is the
/// specified set are trivially dead, delete them and see if this makes any of
/// their operands subsequently dead.
void LoopStrengthReduce::DeleteTriviallyDeadInstructions() {
while (!DeadInsts.empty()) {
Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
if (I == 0 || !isInstructionTriviallyDead(I))
continue;
for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
if (Instruction *U = dyn_cast<Instruction>(*OI)) {
*OI = 0;
if (U->use_empty())
DeadInsts.push_back(U);
I->eraseFromParent();
Changed = true;
}
}
/// isAddressUse - Returns true if the specified instruction is using the
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
/// specified value as an address.
static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
bool isAddress = isa<LoadInst>(Inst);
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
if (SI->getOperand(1) == OperandVal)
isAddress = true;
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
// Addressing modes can also be folded into prefetches and a variety
// of intrinsics.
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::prefetch:
case Intrinsic::x86_sse2_loadu_dq:
case Intrinsic::x86_sse2_loadu_pd:
case Intrinsic::x86_sse_loadu_ps:
case Intrinsic::x86_sse_storeu_ps:
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
case Intrinsic::x86_sse2_storel_dq:
if (II->getOperand(1) == OperandVal)
isAddress = true;
break;
}
}
return isAddress;
}
/// getAccessType - Return the type of the memory being accessed.
static const Type *getAccessType(const Instruction *Inst) {
const Type *AccessTy = Inst->getType();
if (const StoreInst *SI = dyn_cast<StoreInst>(Inst))
AccessTy = SI->getOperand(0)->getType();
else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
// Addressing modes can also be folded into prefetches and a variety
// of intrinsics.
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::x86_sse_storeu_ps:
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
case Intrinsic::x86_sse2_storel_dq:
AccessTy = II->getOperand(1)->getType();
break;
}
}
return AccessTy;
namespace {
/// BasedUser - For a particular base value, keep information about how we've
/// partitioned the expression so far.
struct BasedUser {
/// Base - The Base value for the PHI node that needs to be inserted for
/// this use. As the use is processed, information gets moved from this
/// field to the Imm field (below). BasedUser values are sorted by this
/// field.
/// Inst - The instruction using the induction variable.
Instruction *Inst;
/// OperandValToReplace - The operand value of Inst to replace with the
/// EmittedBase.
Value *OperandValToReplace;
/// Imm - The immediate value that should be added to the base immediately
/// before Inst, because it will be folded into the imm field of the
/// instruction. This is also sometimes used for loop-variant values that
/// must be added inside the loop.
/// Phi - The induction variable that performs the striding that
/// should be used for this user.
PHINode *Phi;
// isUseOfPostIncrementedValue - True if this should use the
// post-incremented version of this IV, not the preincremented version.
// This can only be set in special cases, such as the terminating setcc
// instruction for a loop and uses outside the loop that are dominated by
// the loop.
bool isUseOfPostIncrementedValue;
BasedUser(IVStrideUse &IVSU, ScalarEvolution *se)
: Base(IVSU.getOffset()), Inst(IVSU.getUser()),
OperandValToReplace(IVSU.getOperandValToReplace()),
Imm(se->getIntegerSCEV(0, Base->getType())),
isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {}
// Once we rewrite the code to insert the new IVs we want, update the
// operands of Inst to use the new expression 'NewBase', with 'Imm' added
// to it.
void RewriteInstructionToUseNewBase(const SCEV *NewBase,
Instruction *InsertPt,
Evan Cheng
committed
SCEVExpander &Rewriter, Loop *L, Pass *P,
SmallVectorImpl<WeakVH> &DeadInsts,
ScalarEvolution *SE);
Value *InsertCodeForBaseAtPosition(const SCEV *NewBase,
const Type *Ty,
Instruction *IP,
ScalarEvolution *SE);
void dump() const;
};
}
void BasedUser::dump() const {
dbgs() << " Base=" << *Base;
dbgs() << " Imm=" << *Imm;
dbgs() << " Inst: " << *Inst;
}
Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *NewBase,
const Type *Ty,
Instruction *IP,
ScalarEvolution *SE) {
Value *Base = Rewriter.expandCodeFor(NewBase, 0, IP);
// Wrap the base in a SCEVUnknown so that ScalarEvolution doesn't try to
// re-analyze it.
// Always emit the immediate into the same block as the user.
NewValSCEV = SE->getAddExpr(NewValSCEV, Imm);
return Rewriter.expandCodeFor(NewValSCEV, Ty, IP);
// Once we rewrite the code to insert the new IVs we want, update the
// operands of Inst to use the new expression 'NewBase', with 'Imm' added
// to it. NewBasePt is the last instruction which contributes to the
// value of NewBase in the case that it's a diffferent instruction from
// the PHI that NewBase is computed from, or null otherwise.
//
void BasedUser::RewriteInstructionToUseNewBase(const SCEV *NewBase,
Instruction *NewBasePt,
Evan Cheng
committed
SCEVExpander &Rewriter, Loop *L, Pass *P,
SmallVectorImpl<WeakVH> &DeadInsts,
ScalarEvolution *SE) {
if (!isa<PHINode>(Inst)) {
// By default, insert code at the user instruction.
BasicBlock::iterator InsertPt = Inst;
// However, if the Operand is itself an instruction, the (potentially
// complex) inserted code may be shared by many users. Because of this, we
// want to emit code for the computation of the operand right before its old
// computation. This is usually safe, because we obviously used to use the
// computation when it was computed in its current block. However, in some
// cases (e.g. use of a post-incremented induction variable) the NewBase
// value will be pinned to live somewhere after the original computation.
// In this case, we have to back off.
//
// If this is a use outside the loop (which means after, since it is based
// on a loop indvar) we use the post-incremented value, so that we don't
// artificially make the preinc value live out the bottom of the loop.
if (!isUseOfPostIncrementedValue && L->contains(Inst)) {
if (NewBasePt && isa<PHINode>(OperandValToReplace)) {
InsertPt = NewBasePt;
++InsertPt;
} else if (Instruction *OpInst
= dyn_cast<Instruction>(OperandValToReplace)) {
InsertPt = OpInst;
while (isa<PHINode>(InsertPt)) ++InsertPt;
}
}
Value *NewVal = InsertCodeForBaseAtPosition(NewBase,
OperandValToReplace->getType(),
Rewriter, InsertPt, SE);
// Replace the use of the operand Value with the new Phi we just created.
Inst->replaceUsesOfWith(OperandValToReplace, NewVal);
DEBUG(dbgs() << " Replacing with ");
DEBUG(WriteAsOperand(dbgs(), NewVal, /*PrintType=*/false));
DEBUG(dbgs() << ", which has value " << *NewBase << " plus IMM "
<< *Imm << "\n");
return;
}
// PHI nodes are more complex. We have to insert one copy of the NewBase+Imm
Chris Lattner
committed
// expression into each operand block that uses it. Note that PHI nodes can
// have multiple entries for the same predecessor. We use a map to make sure
// that a PHI node only has a single Value* for each predecessor (which also
// prevents us from inserting duplicate code in some blocks).
DenseMap<BasicBlock*, Value*> InsertedCode;
PHINode *PN = cast<PHINode>(Inst);
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
if (PN->getIncomingValue(i) == OperandValToReplace) {
// If the original expression is outside the loop, put the replacement
// code in the same place as the original expression,
// which need not be an immediate predecessor of this PHI. This way we
// need only one copy of it even if it is referenced multiple times in
// the PHI. We don't do this when the original expression is inside the
// loop because multiple copies sometimes do useful sinking of code in
// that case(?).
Instruction *OldLoc = dyn_cast<Instruction>(OperandValToReplace);
BasicBlock *PHIPred = PN->getIncomingBlock(i);
if (L->contains(OldLoc)) {
// If this is a critical edge, split the edge so that we do not insert
// the code on all predecessor/successor paths. We do this unless this
// is the canonical backedge for this loop, as this can make some
// inserted code be in an illegal position.
if (e != 1 && PHIPred->getTerminator()->getNumSuccessors() > 1 &&
!isa<IndirectBrInst>(PHIPred->getTerminator()) &&
(PN->getParent() != L->getHeader() || !L->contains(PHIPred))) {
// First step, split the critical edge.
BasicBlock *NewBB = SplitCriticalEdge(PHIPred, PN->getParent(),
P, false);
// Next step: move the basic block. In particular, if the PHI node
// is outside of the loop, and PredTI is in the loop, we want to
// move the block to be immediately before the PHI block, not
// immediately after PredTI.
if (L->contains(PHIPred) && !L->contains(PN))
NewBB->moveBefore(PN->getParent());
// Splitting the edge can reduce the number of PHI entries we have.
e = PN->getNumIncomingValues();
PHIPred = NewBB;
i = PN->getBasicBlockIndex(PHIPred);
Chris Lattner
committed
}
}
Value *&Code = InsertedCode[PHIPred];
Chris Lattner
committed
if (!Code) {
// Insert the code into the end of the predecessor block.
Instruction *InsertPt = (L->contains(OldLoc)) ?
PHIPred->getTerminator() :
OldLoc->getParent()->getTerminator();
Code = InsertCodeForBaseAtPosition(NewBase, PN->getType(),
Rewriter, InsertPt, SE);
DEBUG(dbgs() << " Changing PHI use to ");
DEBUG(WriteAsOperand(dbgs(), Code, /*PrintType=*/false));
DEBUG(dbgs() << ", which has value " << *NewBase << " plus IMM "
<< *Imm << "\n");
Chris Lattner
committed
}
// Replace the use of the operand Value with the new Phi we just created.
Chris Lattner
committed
PN->setIncomingValue(i, Code);
Rewriter.clear();
}
}
Evan Cheng
committed
// PHI node might have become a constant value after SplitCriticalEdge.
DeadInsts.push_back(Inst);
}
/// fitsInAddressMode - Return true if V can be subsumed within an addressing
/// mode, and does not need to be put in a register first.
static bool fitsInAddressMode(const SCEV *V, const Type *AccessTy,
const TargetLowering *TLI, bool HasBaseReg) {
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(V)) {
int64_t VC = SC->getValue()->getSExtValue();
if (TLI) {
TargetLowering::AddrMode AM;
AM.BaseOffs = VC;
AM.HasBaseReg = HasBaseReg;
return TLI->isLegalAddressingMode(AM, AccessTy);
} else {
// Defaults to PPC. PPC allows a sign-extended 16-bit immediate field.
return (VC > -(1 << 16) && VC < (1 << 16)-1);
}
if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V))
if (GlobalValue *GV = dyn_cast<GlobalValue>(SU->getValue())) {
if (TLI) {
TargetLowering::AddrMode AM;
AM.BaseGV = GV;
AM.HasBaseReg = HasBaseReg;
return TLI->isLegalAddressingMode(AM, AccessTy);
} else {
// Default: assume global addresses are not legal.
}
return false;
}
/// MoveLoopVariantsToImmediateField - Move any subexpressions from Val that are
/// loop varying to the Imm operand.
static void MoveLoopVariantsToImmediateField(const SCEV *&Val, const SCEV *&Imm,
Evan Cheng
committed
Loop *L, ScalarEvolution *SE) {
if (Val->isLoopInvariant(L)) return; // Nothing to do.
if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
NewOps.reserve(SAE->getNumOperands());
for (unsigned i = 0; i != SAE->getNumOperands(); ++i)
if (!SAE->getOperand(i)->isLoopInvariant(L)) {
// If this is a loop-variant expression, it must stay in the immediate
// field of the expression.
Imm = SE->getAddExpr(Imm, SAE->getOperand(i));
} else {
NewOps.push_back(SAE->getOperand(i));
}
if (NewOps.empty())
Val = SE->getIntegerSCEV(0, Val->getType());
else
Val = SE->getAddExpr(NewOps);
} else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) {
// Try to pull immediates out of the start value of nested addrec's.
MoveLoopVariantsToImmediateField(Start, Imm, L, SE);
SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());
Ops[0] = Start;
Val = SE->getAddRecExpr(Ops, SARE->getLoop());
} else {
// Otherwise, all of Val is variant, move the whole thing over.
Imm = SE->getAddExpr(Imm, Val);
Val = SE->getIntegerSCEV(0, Val->getType());
}
}
Chris Lattner
committed
/// MoveImmediateValues - Look at Val, and pull out any additions of constants
/// that can fit into the immediate field of instructions in the target.
Chris Lattner
committed
/// Accumulate these immediate values into the Imm value.
static void MoveImmediateValues(const TargetLowering *TLI,
const Type *AccessTy,
bool isAddress, Loop *L,
ScalarEvolution *SE) {
if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
Chris Lattner
committed
NewOps.reserve(SAE->getNumOperands());
for (unsigned i = 0; i != SAE->getNumOperands(); ++i) {
MoveImmediateValues(TLI, AccessTy, NewOp, Imm, isAddress, L, SE);
// If this is a loop-variant expression, it must stay in the immediate
// field of the expression.
Imm = SE->getAddExpr(Imm, NewOp);
Chris Lattner
committed
} else {
}
Chris Lattner
committed
if (NewOps.empty())
Val = SE->getIntegerSCEV(0, Val->getType());
Chris Lattner
committed
else
Val = SE->getAddExpr(NewOps);
Chris Lattner
committed
return;
} else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) {
// Try to pull immediates out of the start value of nested addrec's.
MoveImmediateValues(TLI, AccessTy, Start, Imm, isAddress, L, SE);
Chris Lattner
committed
if (Start != SARE->getStart()) {
SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());
Chris Lattner
committed
Ops[0] = Start;
Val = SE->getAddRecExpr(Ops, SARE->getLoop());
Chris Lattner
committed
}
return;
} else if (const SCEVMulExpr *SME = dyn_cast<SCEVMulExpr>(Val)) {
// Transform "8 * (4 + v)" -> "32 + 8*V" if "32" fits in the immed field.
if (isAddress &&
fitsInAddressMode(SME->getOperand(0), AccessTy, TLI, false) &&
SME->getNumOperands() == 2 && SME->isLoopInvariant(L)) {
const SCEV *SubImm = SE->getIntegerSCEV(0, Val->getType());
const SCEV *NewOp = SME->getOperand(1);
MoveImmediateValues(TLI, AccessTy, NewOp, SubImm, isAddress, L, SE);
// If we extracted something out of the subexpressions, see if we can
// simplify this!
if (NewOp != SME->getOperand(1)) {
// Scale SubImm up by "8". If the result is a target constant, we are
// good.
SubImm = SE->getMulExpr(SubImm, SME->getOperand(0));
if (fitsInAddressMode(SubImm, AccessTy, TLI, false)) {
Imm = SE->getAddExpr(Imm, SubImm);
Val = SE->getMulExpr(SME->getOperand(0), NewOp);
}
Chris Lattner
committed
// Loop-variant expressions must stay in the immediate field of the
// expression.
if ((isAddress && fitsInAddressMode(Val, AccessTy, TLI, false)) ||
Chris Lattner
committed
!Val->isLoopInvariant(L)) {
Imm = SE->getAddExpr(Imm, Val);
Val = SE->getIntegerSCEV(0, Val->getType());
Chris Lattner
committed
return;
Chris Lattner
committed
// Otherwise, no immediates to move.
}
static void MoveImmediateValues(const TargetLowering *TLI,
Instruction *User,
bool isAddress, Loop *L,
ScalarEvolution *SE) {
const Type *AccessTy = getAccessType(User);
MoveImmediateValues(TLI, AccessTy, Val, Imm, isAddress, L, SE);
}
/// SeparateSubExprs - Decompose Expr into all of the subexpressions that are
/// added together. This is used to reassociate common addition subexprs
/// together for maximal sharing when rewriting bases.
static void SeparateSubExprs(SmallVector<const SCEV *, 16> &SubExprs,
const SCEV *Expr,
ScalarEvolution *SE) {
if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(Expr)) {
for (unsigned j = 0, e = AE->getNumOperands(); j != e; ++j)
SeparateSubExprs(SubExprs, AE->getOperand(j), SE);
} else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Expr)) {
const SCEV *Zero = SE->getIntegerSCEV(0, Expr->getType());
if (SARE->getOperand(0) == Zero) {
SubExprs.push_back(Expr);
} else {
// Compute the addrec with zero as its base.
SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());
Ops[0] = Zero; // Start with zero base.
SubExprs.push_back(SE->getAddRecExpr(Ops, SARE->getLoop()));
SeparateSubExprs(SubExprs, SARE->getOperand(0), SE);
}
} else if (!Expr->isZero()) {
// Do not add zero.
SubExprs.push_back(Expr);
}
}
// This is logically local to the following function, but C++ says we have
// to make it file scope.
struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; };
/// RemoveCommonExpressionsFromUseBases - Look through all of the Bases of all
/// the Uses, removing any common subexpressions, except that if all such
/// subexpressions can be folded into an addressing mode for all uses inside
/// the loop (this case is referred to as "free" in comments herein) we do
/// not remove anything. This looks for things like (a+b+c) and
/// (a+c+d) and computes the common (a+c) subexpression. The common expression
/// is *removed* from the Bases and returned.
RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
ScalarEvolution *SE, Loop *L,
const TargetLowering *TLI) {
unsigned NumUses = Uses.size();
// Only one use? This is a very common case, so we handle it specially and
// cheaply.
const SCEV *Zero = SE->getIntegerSCEV(0, Uses[0].Base->getType());
const SCEV *Result = Zero;
const SCEV *FreeResult = Zero;
// If the use is inside the loop, use its base, regardless of what it is:
// it is clearly shared across all the IV's. If the use is outside the loop
// (which means after it) we don't want to factor anything *into* the loop,
// so just use 0 as the base.
if (L->contains(Uses[0].Inst))
std::swap(Result, Uses[0].Base);
return Result;
}
// To find common subexpressions, count how many of Uses use each expression.
// If any subexpressions are used Uses.size() times, they are common.
// Also track whether all uses of each expression can be moved into an
// an addressing mode "for free"; such expressions are left within the loop.
// struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; };
std::map<const SCEV *, SubExprUseData> SubExpressionUseData;
// UniqueSubExprs - Keep track of all of the subexpressions we see in the
// order we see them.
for (unsigned i = 0; i != NumUses; ++i) {
// If the user is outside the loop, just ignore it for base computation.
// Since the user is outside the loop, it must be *after* the loop (if it
// were before, it could not be based on the loop IV). We don't want users
// after the loop to affect base computation of values *inside* the loop,
// because we can always add their offsets to the result IV after the loop
// is done, ensuring we get good code inside the loop.
if (!L->contains(Uses[i].Inst))
continue;
NumUsesInsideLoop++;
// If the base is zero (which is common), return zero now, there are no
// CSEs we can find.
if (Uses[i].Base == Zero) return Zero;
// If this use is as an address we may be able to put CSEs in the addressing
// mode rather than hoisting them.
bool isAddrUse = isAddressUse(Uses[i].Inst, Uses[i].OperandValToReplace);
// We may need the AccessTy below, but only when isAddrUse, so compute it
// only in that case.
const Type *AccessTy = 0;
AccessTy = getAccessType(Uses[i].Inst);
// Split the expression into subexprs.
SeparateSubExprs(SubExprs, Uses[i].Base, SE);
// Add one to SubExpressionUseData.Count for each subexpr present, and
// if the subexpr is not a valid immediate within an addressing mode use,
// set SubExpressionUseData.notAllUsesAreFree. We definitely want to
// hoist these out of the loop (if they are common to all uses).
for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) {
if (++SubExpressionUseData[SubExprs[j]].Count == 1)
UniqueSubExprs.push_back(SubExprs[j]);
if (!isAddrUse || !fitsInAddressMode(SubExprs[j], AccessTy, TLI, false))
SubExpressionUseData[SubExprs[j]].notAllUsesAreFree = true;
}
SubExprs.clear();
}
// Now that we know how many times each is used, build Result. Iterate over
// UniqueSubexprs so that we have a stable ordering.
for (unsigned i = 0, e = UniqueSubExprs.size(); i != e; ++i) {
std::map<const SCEV *, SubExprUseData>::iterator I =
SubExpressionUseData.find(UniqueSubExprs[i]);
assert(I != SubExpressionUseData.end() && "Entry not found?");
if (I->second.Count == NumUsesInsideLoop) { // Found CSE!
if (I->second.notAllUsesAreFree)
Result = SE->getAddExpr(Result, I->first);
FreeResult = SE->getAddExpr(FreeResult, I->first);
} else
// Remove non-cse's from SubExpressionUseData.
SubExpressionUseData.erase(I);
}
if (FreeResult != Zero) {
// We have some subexpressions that can be subsumed into addressing
// modes in every use inside the loop. However, it's possible that
// there are so many of them that the combined FreeResult cannot
// be subsumed, or that the target cannot handle both a FreeResult
// and a Result in the same instruction (for example because it would
// require too many registers). Check this.
for (unsigned i=0; i<NumUses; ++i) {
if (!L->contains(Uses[i].Inst))
continue;
// We know this is an addressing mode use; if there are any uses that
// are not, FreeResult would be Zero.
const Type *AccessTy = getAccessType(Uses[i].Inst);
if (!fitsInAddressMode(FreeResult, AccessTy, TLI, Result!=Zero)) {
// FIXME: could split up FreeResult into pieces here, some hoisted
// and some not. There is no obvious advantage to this.
Result = SE->getAddExpr(Result, FreeResult);
FreeResult = Zero;
break;
}
}
}
// If we found no CSE's, return now.
if (Result == Zero) return Result;
// If we still have a FreeResult, remove its subexpressions from
// SubExpressionUseData. This means they will remain in the use Bases.
if (FreeResult != Zero) {
SeparateSubExprs(SubExprs, FreeResult, SE);
for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) {
std::map<const SCEV *, SubExprUseData>::iterator I =
SubExpressionUseData.find(SubExprs[j]);
SubExpressionUseData.erase(I);
}
SubExprs.clear();
}
// Otherwise, remove all of the CSE's we found from each of the base values.
for (unsigned i = 0; i != NumUses; ++i) {
// Uses outside the loop don't necessarily include the common base, but
// the final IV value coming into those uses does. Instead of trying to
// remove the pieces of the common base, which might not be there,
// subtract off the base to compensate for this.
if (!L->contains(Uses[i].Inst)) {
Uses[i].Base = SE->getMinusSCEV(Uses[i].Base, Result);
// Split the expression into subexprs.
SeparateSubExprs(SubExprs, Uses[i].Base, SE);
// Remove any common subexpressions.
for (unsigned j = 0, e = SubExprs.size(); j != e; ++j)
if (SubExpressionUseData.count(SubExprs[j])) {
SubExprs.erase(SubExprs.begin()+j);
--j; --e;
}
// Finally, add the non-shared expressions together.
if (SubExprs.empty())
else
Uses[i].Base = SE->getAddExpr(SubExprs);
SubExprs.clear();
}
/// ValidScale - Check whether the given Scale is valid for all loads and
/// stores in UsersToProcess.
Evan Cheng
committed
bool LoopStrengthReduce::ValidScale(bool HasBaseReg, int64_t Scale,
const std::vector<BasedUser>& UsersToProcess) {
if (!TLI)
return true;
Evan Cheng
committed
for (unsigned i = 0, e = UsersToProcess.size(); i!=e; ++i) {
// If this is a load or other access, pass the type of the access in.
const Type *AccessTy =
Type::getVoidTy(UsersToProcess[i].Inst->getContext());
if (isAddressUse(UsersToProcess[i].Inst,
UsersToProcess[i].OperandValToReplace))
AccessTy = getAccessType(UsersToProcess[i].Inst);
else if (isa<PHINode>(UsersToProcess[i].Inst))
continue;
TargetLowering::AddrMode AM;
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(UsersToProcess[i].Imm))
AM.BaseOffs = SC->getValue()->getSExtValue();
AM.HasBaseReg = HasBaseReg || !UsersToProcess[i].Base->isZero();
AM.Scale = Scale;
// If load[imm+r*scale] is illegal, bail out.
if (!TLI->isLegalAddressingMode(AM, AccessTy))
return false;
}
return true;
}
/// ValidOffset - Check whether the given Offset is valid for all loads and
/// stores in UsersToProcess.
///
bool LoopStrengthReduce::ValidOffset(bool HasBaseReg,
int64_t Offset,
int64_t Scale,
const std::vector<BasedUser>& UsersToProcess) {
if (!TLI)
return true;
for (unsigned i=0, e = UsersToProcess.size(); i!=e; ++i) {
// If this is a load or other access, pass the type of the access in.
const Type *AccessTy =
Type::getVoidTy(UsersToProcess[i].Inst->getContext());
if (isAddressUse(UsersToProcess[i].Inst,
UsersToProcess[i].OperandValToReplace))
AccessTy = getAccessType(UsersToProcess[i].Inst);
else if (isa<PHINode>(UsersToProcess[i].Inst))
continue;
TargetLowering::AddrMode AM;
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(UsersToProcess[i].Imm))
AM.BaseOffs = SC->getValue()->getSExtValue();
AM.BaseOffs = (uint64_t)AM.BaseOffs + (uint64_t)Offset;
AM.HasBaseReg = HasBaseReg || !UsersToProcess[i].Base->isZero();
AM.Scale = Scale;
// If load[imm+r*scale] is illegal, bail out.
if (!TLI->isLegalAddressingMode(AM, AccessTy))
return false;
}
return true;
}
/// RequiresTypeConversion - Returns true if converting Ty1 to Ty2 is not
/// a nop.
bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty1,
const Type *Ty2) {
if (Ty1 == Ty2)
return false;
Ty1 = SE->getEffectiveSCEVType(Ty1);
Ty2 = SE->getEffectiveSCEVType(Ty2);
if (Ty1 == Ty2)
return false;
if (Ty1->canLosslesslyBitCastTo(Ty2))
return false;
if (TLI && TLI->isTruncateFree(Ty1, Ty2))
return false;
}
/// CheckForIVReuse - Returns the multiple if the stride is the multiple
/// of a previous stride and it is a legal value for the target addressing
/// mode scale component and optional base reg. This allows the users of
/// this stride to be rewritten as prev iv * factor. It returns 0 if no
/// reuse is possible. Factors can be negative on same targets, e.g. ARM.
///
/// If all uses are outside the loop, we don't require that all multiplies
/// be folded into the addressing mode, nor even that the factor be constant;
/// a multiply (executed once) outside the loop is better than another IV
/// within. Well, usually.
const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
bool AllUsesAreAddresses,
bool AllUsesAreOutsideLoop,
IVExpr &IV, const Type *Ty,
const std::vector<BasedUser>& UsersToProcess) {
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) {
int64_t SInt = SC->getValue()->getSExtValue();
for (unsigned NewStride = 0, e = IU->StrideOrder.size();
NewStride != e; ++NewStride) {
std::map<const SCEV *, IVsOfOneStride>::iterator SI =
IVsByStride.find(IU->StrideOrder[NewStride]);
if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first))
Evan Cheng
committed
// The other stride has no uses, don't reuse it.
std::map<const SCEV *, IVUsersOfOneStride *>::iterator UI =
IU->IVUsesByStride.find(IU->StrideOrder[NewStride]);
if (UI->second->Users.empty())
continue;
int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
if (SI->first != Stride &&
(unsigned(abs64(SInt)) < SSInt || (SInt % SSInt) != 0))
continue;
int64_t Scale = SInt / SSInt;
// Check that this stride is valid for all the types used for loads and
// stores; if it can be used for some and not others, we might as well use
// the original stride everywhere, since we have to create the IV for it
// anyway. If the scale is 1, then we don't need to worry about folding
// multiplications.
if (Scale == 1 ||
(AllUsesAreAddresses &&
ValidScale(HasBaseReg, Scale, UsersToProcess))) {
// Prefer to reuse an IV with a base of zero.
for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
IE = SI->second.IVs.end(); II != IE; ++II)
// Only reuse previous IV if it would not require a type conversion
// and if the base difference can be folded.
if (II->Base->isZero() &&
!RequiresTypeConversion(II->Base->getType(), Ty)) {
return SE->getIntegerSCEV(Scale, Stride->getType());
// Otherwise, settle for an IV with a foldable base.
if (AllUsesAreAddresses)
for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
IE = SI->second.IVs.end(); II != IE; ++II)
// Only reuse previous IV if it would not require a type conversion
// and if the base difference can be folded.
if (SE->getEffectiveSCEVType(II->Base->getType()) ==
SE->getEffectiveSCEVType(Ty) &&
isa<SCEVConstant>(II->Base)) {
int64_t Base =
cast<SCEVConstant>(II->Base)->getValue()->getSExtValue();
if (Base > INT32_MIN && Base <= INT32_MAX &&
ValidOffset(HasBaseReg, -Base * Scale,
Scale, UsersToProcess)) {
IV = *II;
return SE->getIntegerSCEV(Scale, Stride->getType());
}
}
}
}
} else if (AllUsesAreOutsideLoop) {
// Accept nonconstant strides here; it is really really right to substitute
// an existing IV if we can.
for (unsigned NewStride = 0, e = IU->StrideOrder.size();