"llvm/git@repo.hca.bsc.es:rferrer/llvm-epi-0.8.git" did not exist on "3a33d0cc4a7fec5ff1f339e6221c9acf959caa8b"
Newer
Older
//===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//===----------------------------------------------------------------------===//
//
// This transformation analyzes and transforms the induction variables (and
// computations derived from them) into forms suitable for efficient execution
// on the target.
//
// This pass performs a strength reduction on array references inside loops that
// have as one or more of their components the loop induction variable, it
// rewrites expressions to take advantage of scaled-index addressing modes
// available on the target, and it performs a variety of other optimizations
// related to loop induction variables.
//
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
// Terminology note: this code has a lot of handling for "post-increment" or
// "post-inc" users. This is not talking about post-increment addressing modes;
// it is instead talking about code like this:
//
// %i = phi [ 0, %entry ], [ %i.next, %latch ]
// ...
// %i.next = add %i, 1
// %c = icmp eq %i.next, %n
//
// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however
// it's useful to think about these as the same register, with some uses using
// the value of the register before the add and some using // it after. In this
// example, the icmp is a post-increment user, since it uses %i.next, which is
// the value of the induction variable after the increment. The other common
// case of post-increment users is users outside the loop.
//
// TODO: More sophistication in the way Formulae are generated and filtered.
//
// TODO: Handle multiple loops at a time.
//
// TODO: Should TargetLowering::AddrMode::BaseGV be changed to a ConstantExpr
// instead of a GlobalValue?
//
// TODO: When truncation is free, truncate ICmp users' operands to make it a
// smaller encoding (on x86 at least).
//
// TODO: When a negated register is used by an add (such as in a list of
// multiple base registers, or as the increment expression in an addrec),
// we may not actually need both reg and (-1 * reg) in registers; the
// negation can be implemented by using a sub instead of an add. The
// lack of support for taking this into consideration when making
// register pressure decisions is partly worked around by the "Special"
// use kind.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar.h"
#include "llvm/Constants.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Analysis/IVUsers.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Assembly/Writer.h"
Chris Lattner
committed
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLowering.h"
using namespace llvm;
namespace {
/// RegSortData - This class holds data which is used to order reuse candidates.
class RegSortData {
public:
/// UsedByIndices - This represents the set of LSRUse indices which reference
/// a particular register.
SmallBitVector UsedByIndices;
RegSortData() {}
void print(raw_ostream &OS) const;
void dump() const;
};
}
void RegSortData::print(raw_ostream &OS) const {
OS << "[NumUses=" << UsedByIndices.count() << ']';
}
void RegSortData::dump() const {
print(errs()); errs() << '\n';
}
/// RegUseTracker - Map register candidates to information about how they are
/// used.
class RegUseTracker {
typedef DenseMap<const SCEV *, RegSortData> RegUsesTy;
RegUsesTy RegUsesMap;
SmallVector<const SCEV *, 16> RegSequence;
public:
void CountRegister(const SCEV *Reg, size_t LUIdx);
void DropRegister(const SCEV *Reg, size_t LUIdx);
void SwapAndDropUse(size_t LUIdx, size_t LastLUIdx);
bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;
const SmallBitVector &getUsedByIndices(const SCEV *Reg) const;
void clear();
typedef SmallVectorImpl<const SCEV *>::iterator iterator;
typedef SmallVectorImpl<const SCEV *>::const_iterator const_iterator;
iterator begin() { return RegSequence.begin(); }
iterator end() { return RegSequence.end(); }
const_iterator begin() const { return RegSequence.begin(); }
const_iterator end() const { return RegSequence.end(); }
};
}
void
RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) {
std::pair<RegUsesTy::iterator, bool> Pair =
RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
RegSortData &RSD = Pair.first->second;
if (Pair.second)
RegSequence.push_back(Reg);
RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
RSD.UsedByIndices.set(LUIdx);
}
void
RegUseTracker::DropRegister(const SCEV *Reg, size_t LUIdx) {
RegUsesTy::iterator It = RegUsesMap.find(Reg);
assert(It != RegUsesMap.end());
RegSortData &RSD = It->second;
assert(RSD.UsedByIndices.size() > LUIdx);
RSD.UsedByIndices.reset(LUIdx);
}
RegUseTracker::SwapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
assert(LUIdx <= LastLUIdx);
// Update RegUses. The data structure is not optimized for this purpose;
// we must iterate through it and update each of the bit vectors.
for (RegUsesTy::iterator I = RegUsesMap.begin(), E = RegUsesMap.end();
I != E; ++I) {
SmallBitVector &UsedByIndices = I->second.UsedByIndices;
if (LUIdx < UsedByIndices.size())
UsedByIndices[LUIdx] =
LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : 0;
UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx));
}
bool
RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {
RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
if (I == RegUsesMap.end())
return false;
const SmallBitVector &UsedByIndices = I->second.UsedByIndices;
int i = UsedByIndices.find_first();
if (i == -1) return false;
if ((size_t)i != LUIdx) return true;
return UsedByIndices.find_next(i) != -1;
}
const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const {
RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
assert(I != RegUsesMap.end() && "Unknown register!");
return I->second.UsedByIndices;
}
void RegUseTracker::clear() {
RegUsesMap.clear();
RegSequence.clear();
}
namespace {
/// Formula - This class holds information that describes a formula for
/// computing satisfying a use. It may include broken-out immediates and scaled
/// registers.
struct Formula {
/// AM - This is used to represent complex addressing, as well as other kinds
/// of interesting uses.
TargetLowering::AddrMode AM;
/// BaseRegs - The list of "base" registers for this use. When this is
/// non-empty, AM.HasBaseReg should be set to true.
SmallVector<const SCEV *, 2> BaseRegs;
/// ScaledReg - The 'scaled' register for this use. This should be non-null
/// when AM.Scale is not zero.
const SCEV *ScaledReg;
Formula() : ScaledReg(0) {}
void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
unsigned getNumRegs() const;
const Type *getType() const;
void DeleteBaseReg(const SCEV *&S);
bool referencesReg(const SCEV *S) const;
bool hasRegsUsedByUsesOtherThan(size_t LUIdx,
const RegUseTracker &RegUses) const;
void print(raw_ostream &OS) const;
void dump() const;
};
}
static void DoInitialMatch(const SCEV *S, Loop *L,
SmallVectorImpl<const SCEV *> &Good,
SmallVectorImpl<const SCEV *> &Bad,
ScalarEvolution &SE) {
// Collect expressions which properly dominate the loop header.
if (SE.properlyDominates(S, L->getHeader())) {
Good.push_back(S);
return;
}
// Look at add operands.
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
I != E; ++I)
DoInitialMatch(*I, L, Good, Bad, SE);
return;
}
// Look at addrec operands.
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
if (!AR->getStart()->isZero()) {
DoInitialMatch(AR->getStart(), L, Good, Bad, SE);
DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
AR->getStepRecurrence(SE),
AR->getLoop()),
L, Good, Bad, SE);
// Handle a multiplication by -1 (negation) if it didn't fold.
if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))
if (Mul->getOperand(0)->isAllOnesValue()) {
SmallVector<const SCEV *, 4> Ops(Mul->op_begin()+1, Mul->op_end());
const SCEV *NewMul = SE.getMulExpr(Ops);
SmallVector<const SCEV *, 4> MyGood;
SmallVector<const SCEV *, 4> MyBad;
DoInitialMatch(NewMul, L, MyGood, MyBad, SE);
const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(
SE.getEffectiveSCEVType(NewMul->getType())));
for (SmallVectorImpl<const SCEV *>::const_iterator I = MyGood.begin(),
E = MyGood.end(); I != E; ++I)
Good.push_back(SE.getMulExpr(NegOne, *I));
for (SmallVectorImpl<const SCEV *>::const_iterator I = MyBad.begin(),
E = MyBad.end(); I != E; ++I)
Bad.push_back(SE.getMulExpr(NegOne, *I));
return;
// Ok, we can't do anything interesting. Just stuff the whole thing into a
// register and hope for the best.
Bad.push_back(S);
}
/// InitialMatch - Incorporate loop-variant parts of S into this Formula,
/// attempting to keep all loop-invariant and loop-computable values in a
/// single base register.
void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
SmallVector<const SCEV *, 4> Good;
SmallVector<const SCEV *, 4> Bad;
DoInitialMatch(S, L, Good, Bad, SE);
if (!Good.empty()) {
const SCEV *Sum = SE.getAddExpr(Good);
if (!Sum->isZero())
BaseRegs.push_back(Sum);
AM.HasBaseReg = true;
}
if (!Bad.empty()) {
const SCEV *Sum = SE.getAddExpr(Bad);
if (!Sum->isZero())
BaseRegs.push_back(Sum);
AM.HasBaseReg = true;
}
}
/// getNumRegs - Return the total number of register operands used by this
/// formula. This does not include register uses implied by non-constant
/// addrec strides.
unsigned Formula::getNumRegs() const {
return !!ScaledReg + BaseRegs.size();
}
/// getType - Return the type of this formula, if it has one, or null
/// otherwise. This type is meaningless except for the bit size.
const Type *Formula::getType() const {
return !BaseRegs.empty() ? BaseRegs.front()->getType() :
ScaledReg ? ScaledReg->getType() :
AM.BaseGV ? AM.BaseGV->getType() :
0;
}
/// DeleteBaseReg - Delete the given base reg from the BaseRegs list.
void Formula::DeleteBaseReg(const SCEV *&S) {
if (&S != &BaseRegs.back())
std::swap(S, BaseRegs.back());
BaseRegs.pop_back();
}
/// referencesReg - Test if this formula references the given register.
bool Formula::referencesReg(const SCEV *S) const {
return S == ScaledReg ||
std::find(BaseRegs.begin(), BaseRegs.end(), S) != BaseRegs.end();
}
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
/// hasRegsUsedByUsesOtherThan - Test whether this formula uses registers
/// which are used by uses other than the use with the given index.
bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
const RegUseTracker &RegUses) const {
if (ScaledReg)
if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
return true;
for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
E = BaseRegs.end(); I != E; ++I)
if (RegUses.isRegUsedByUsesOtherThan(*I, LUIdx))
return true;
return false;
}
void Formula::print(raw_ostream &OS) const {
bool First = true;
if (AM.BaseGV) {
if (!First) OS << " + "; else First = false;
WriteAsOperand(OS, AM.BaseGV, /*PrintType=*/false);
}
if (AM.BaseOffs != 0) {
if (!First) OS << " + "; else First = false;
OS << AM.BaseOffs;
}
for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
E = BaseRegs.end(); I != E; ++I) {
if (!First) OS << " + "; else First = false;
OS << "reg(" << **I << ')';
}
if (AM.HasBaseReg && BaseRegs.empty()) {
if (!First) OS << " + "; else First = false;
OS << "**error: HasBaseReg**";
} else if (!AM.HasBaseReg && !BaseRegs.empty()) {
if (!First) OS << " + "; else First = false;
OS << "**error: !HasBaseReg**";
}
if (AM.Scale != 0) {
if (!First) OS << " + "; else First = false;
OS << AM.Scale << "*reg(";
if (ScaledReg)
OS << *ScaledReg;
else
OS << "<unknown>";
OS << ')';
}
}
void Formula::dump() const {
print(errs()); errs() << '\n';
}
/// isAddRecSExtable - Return true if the given addrec can be sign-extended
/// without changing its value.
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
const Type *WideTy =
IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1);
return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
}
/// isAddSExtable - Return true if the given add can be sign-extended
/// without changing its value.
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
const Type *WideTy =
IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);
return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
}
/// isMulSExtable - Return true if the given mul can be sign-extended
/// without changing its value.
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
const Type *WideTy =
IntegerType::get(SE.getContext(),
SE.getTypeSizeInBits(M->getType()) * M->getNumOperands());
return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));
/// getExactSDiv - Return an expression for LHS /s RHS, if it can be determined
/// and if the remainder is known to be zero, or null otherwise. If
/// IgnoreSignificantBits is true, expressions like (X * Y) /s Y are simplified
/// to Y, ignoring that the multiplication may overflow, which is useful when
/// the result will be used in a context where the most significant bits are
/// ignored.
static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
ScalarEvolution &SE,
bool IgnoreSignificantBits = false) {
// Handle the trivial case, which works for any SCEV type.
if (LHS == RHS)
return SE.getConstant(LHS->getType(), 1);
// Handle a few RHS special cases.
const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
if (RC) {
const APInt &RA = RC->getValue()->getValue();
// Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do
// some folding.
if (RA.isAllOnesValue())
return SE.getMulExpr(LHS, RC);
// Handle x /s 1 as x.
if (RA == 1)
return LHS;
}
// Check for a division of a constant by a constant.
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
if (!RC)
return 0;
const APInt &LA = C->getValue()->getValue();
const APInt &RA = RC->getValue()->getValue();
if (LA.srem(RA) != 0)
return 0;
return SE.getConstant(LA.sdiv(RA));
// Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) {
const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,
IgnoreSignificantBits);
if (!Step) return 0;
const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
IgnoreSignificantBits);
if (!Start) return 0;
return SE.getAddRecExpr(Start, Step, AR->getLoop());
}
// Distribute the sdiv over add operands, if the add doesn't overflow.
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {
if (IgnoreSignificantBits || isAddSExtable(Add, SE)) {
SmallVector<const SCEV *, 8> Ops;
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
I != E; ++I) {
const SCEV *Op = getExactSDiv(*I, RHS, SE,
IgnoreSignificantBits);
if (!Op) return 0;
Ops.push_back(Op);
}
return SE.getAddExpr(Ops);
}
// Check for a multiply operand that we can pull RHS out of.
if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) {
if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) {
SmallVector<const SCEV *, 4> Ops;
bool Found = false;
for (SCEVMulExpr::op_iterator I = Mul->op_begin(), E = Mul->op_end();
I != E; ++I) {
if (!Found)
IgnoreSignificantBits)) {
Found = true;
}
}
return Found ? SE.getMulExpr(Ops) : 0;
}
// Otherwise we don't know.
return 0;
}
/// ExtractImmediate - If S involves the addition of a constant integer value,
/// return that integer value, and mutate S to point to a new SCEV with that
/// value excluded.
static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
if (C->getValue()->getValue().getMinSignedBits() <= 64) {
S = SE.getConstant(C->getType(), 0);
return C->getValue()->getSExtValue();
}
} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
int64_t Result = ExtractImmediate(NewOps.front(), SE);
if (Result != 0)
S = SE.getAddExpr(NewOps);
return Result;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
int64_t Result = ExtractImmediate(NewOps.front(), SE);
if (Result != 0)
S = SE.getAddRecExpr(NewOps, AR->getLoop());
return Result;
return 0;
}
/// ExtractSymbol - If S involves the addition of a GlobalValue address,
/// return that symbol, and mutate S to point to a new SCEV with that
/// value excluded.
static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
S = SE.getConstant(GV->getType(), 0);
return GV;
}
} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
GlobalValue *Result = ExtractSymbol(NewOps.back(), SE);
if (Result)
S = SE.getAddExpr(NewOps);
return Result;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
if (Result)
S = SE.getAddRecExpr(NewOps, AR->getLoop());
return Result;
}
return 0;
}
/// isAddressUse - Returns true if the specified instruction is using the
/// specified value as an address.
static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
bool isAddress = isa<LoadInst>(Inst);
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
if (SI->getOperand(1) == OperandVal)
isAddress = true;
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
// Addressing modes can also be folded into prefetches and a variety
// of intrinsics.
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::prefetch:
case Intrinsic::x86_sse2_loadu_dq:
case Intrinsic::x86_sse2_loadu_pd:
case Intrinsic::x86_sse_loadu_ps:
case Intrinsic::x86_sse_storeu_ps:
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
case Intrinsic::x86_sse2_storel_dq:
if (II->getArgOperand(0) == OperandVal)
isAddress = true;
break;
}
}
return isAddress;
}
/// getAccessType - Return the type of the memory being accessed.
static const Type *getAccessType(const Instruction *Inst) {
const Type *AccessTy = Inst->getType();
if (const StoreInst *SI = dyn_cast<StoreInst>(Inst))
AccessTy = SI->getOperand(0)->getType();
else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
// Addressing modes can also be folded into prefetches and a variety
// of intrinsics.
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::x86_sse_storeu_ps:
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
case Intrinsic::x86_sse2_storel_dq:
AccessTy = II->getArgOperand(0)->getType();
// All pointers have the same requirements, so canonicalize them to an
// arbitrary pointer type to minimize variation.
if (const PointerType *PTy = dyn_cast<PointerType>(AccessTy))
AccessTy = PointerType::get(IntegerType::get(PTy->getContext(), 1),
PTy->getAddressSpace());
}
/// DeleteTriviallyDeadInstructions - If any of the instructions is the
/// specified set are trivially dead, delete them and see if this makes any of
/// their operands subsequently dead.
static bool
DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
bool Changed = false;
while (!DeadInsts.empty()) {
Instruction *I = dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val());
if (I == 0 || !isInstructionTriviallyDead(I))
continue;
for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
if (Instruction *U = dyn_cast<Instruction>(*OI)) {
*OI = 0;
if (U->use_empty())
DeadInsts.push_back(U);
}
I->eraseFromParent();
Changed = true;
}
return Changed;
namespace {
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
/// Cost - This class is used to measure and compare candidate formulae.
class Cost {
/// TODO: Some of these could be merged. Also, a lexical ordering
/// isn't always optimal.
unsigned NumRegs;
unsigned AddRecCost;
unsigned NumIVMuls;
unsigned NumBaseAdds;
unsigned ImmCost;
unsigned SetupCost;
public:
Cost()
: NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), ImmCost(0),
SetupCost(0) {}
bool operator<(const Cost &Other) const;
void Loose();
void RateFormula(const Formula &F,
SmallPtrSet<const SCEV *, 16> &Regs,
const DenseSet<const SCEV *> &VisitedRegs,
const Loop *L,
const SmallVectorImpl<int64_t> &Offsets,
ScalarEvolution &SE, DominatorTree &DT);
void print(raw_ostream &OS) const;
void dump() const;
private:
void RateRegister(const SCEV *Reg,
SmallPtrSet<const SCEV *, 16> &Regs,
const Loop *L,
ScalarEvolution &SE, DominatorTree &DT);
void RatePrimaryRegister(const SCEV *Reg,
SmallPtrSet<const SCEV *, 16> &Regs,
const Loop *L,
ScalarEvolution &SE, DominatorTree &DT);
Evan Cheng
committed
/// RateRegister - Tally up interesting quantities from the given register.
void Cost::RateRegister(const SCEV *Reg,
SmallPtrSet<const SCEV *, 16> &Regs,
const Loop *L,
ScalarEvolution &SE, DominatorTree &DT) {
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
if (AR->getLoop() == L)
AddRecCost += 1; /// TODO: This should be a function of the stride.
// If this is an addrec for a loop that's already been visited by LSR,
// don't second-guess its addrec phi nodes. LSR isn't currently smart
// enough to reason about more than one loop at a time. Consider these
// registers free and leave them alone.
else if (L->contains(AR->getLoop()) ||
(!AR->getLoop()->contains(L) &&
DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))) {
for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
PHINode *PN = dyn_cast<PHINode>(I); ++I)
if (SE.isSCEVable(PN->getType()) &&
(SE.getEffectiveSCEVType(PN->getType()) ==
SE.getEffectiveSCEVType(AR->getType())) &&
SE.getSCEV(PN) == AR)
return;
// If this isn't one of the addrecs that the loop already has, it
// would require a costly new phi and add. TODO: This isn't
// precisely modeled right now.
++NumBaseAdds;
if (!Regs.count(AR->getStart()))
RateRegister(AR->getStart(), Regs, L, SE, DT);
// Add the step value register, if it needs one.
// TODO: The non-affine case isn't precisely modeled here.
if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1)))
if (!Regs.count(AR->getStart()))
RateRegister(AR->getOperand(1), Regs, L, SE, DT);
}
++NumRegs;
// Rough heuristic; favor registers which don't require extra setup
// instructions in the preheader.
if (!isa<SCEVUnknown>(Reg) &&
!isa<SCEVConstant>(Reg) &&
!(isa<SCEVAddRecExpr>(Reg) &&
(isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) ||
isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
++SetupCost;
NumIVMuls += isa<SCEVMulExpr>(Reg) &&
SE.hasComputableLoopEvolution(Reg, L);
}
/// RatePrimaryRegister - Record this register in the set. If we haven't seen it
/// before, rate it.
void Cost::RatePrimaryRegister(const SCEV *Reg,
SmallPtrSet<const SCEV *, 16> &Regs,
const Loop *L,
ScalarEvolution &SE, DominatorTree &DT) {
if (Regs.insert(Reg))
RateRegister(Reg, Regs, L, SE, DT);
void Cost::RateFormula(const Formula &F,
SmallPtrSet<const SCEV *, 16> &Regs,
const DenseSet<const SCEV *> &VisitedRegs,
const Loop *L,
const SmallVectorImpl<int64_t> &Offsets,
ScalarEvolution &SE, DominatorTree &DT) {
// Tally up the registers.
if (const SCEV *ScaledReg = F.ScaledReg) {
if (VisitedRegs.count(ScaledReg)) {
Loose();
return;
RatePrimaryRegister(ScaledReg, Regs, L, SE, DT);
for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
E = F.BaseRegs.end(); I != E; ++I) {
const SCEV *BaseReg = *I;
if (VisitedRegs.count(BaseReg)) {
Loose();
return;
RatePrimaryRegister(BaseReg, Regs, L, SE, DT);
}
if (F.BaseRegs.size() > 1)
NumBaseAdds += F.BaseRegs.size() - 1;
// Tally up the non-zero immediates.
for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
E = Offsets.end(); I != E; ++I) {
int64_t Offset = (uint64_t)*I + F.AM.BaseOffs;
if (F.AM.BaseGV)
ImmCost += 64; // Handle symbolic values conservatively.
// TODO: This should probably be the pointer size.
else if (Offset != 0)
ImmCost += APInt(64, Offset, true).getMinSignedBits();
}
}
/// Loose - Set this cost to a loosing value.
void Cost::Loose() {
NumRegs = ~0u;
AddRecCost = ~0u;
NumIVMuls = ~0u;
NumBaseAdds = ~0u;
ImmCost = ~0u;
SetupCost = ~0u;
}
/// operator< - Choose the lower cost.
bool Cost::operator<(const Cost &Other) const {
if (NumRegs != Other.NumRegs)
return NumRegs < Other.NumRegs;
if (AddRecCost != Other.AddRecCost)
return AddRecCost < Other.AddRecCost;
if (NumIVMuls != Other.NumIVMuls)
return NumIVMuls < Other.NumIVMuls;
if (NumBaseAdds != Other.NumBaseAdds)
return NumBaseAdds < Other.NumBaseAdds;
if (ImmCost != Other.ImmCost)
return ImmCost < Other.ImmCost;
if (SetupCost != Other.SetupCost)
return SetupCost < Other.SetupCost;
return false;
}
void Cost::print(raw_ostream &OS) const {
OS << NumRegs << " reg" << (NumRegs == 1 ? "" : "s");
if (AddRecCost != 0)
OS << ", with addrec cost " << AddRecCost;
if (NumIVMuls != 0)
OS << ", plus " << NumIVMuls << " IV mul" << (NumIVMuls == 1 ? "" : "s");
if (NumBaseAdds != 0)
OS << ", plus " << NumBaseAdds << " base add"
<< (NumBaseAdds == 1 ? "" : "s");
if (ImmCost != 0)
OS << ", plus " << ImmCost << " imm cost";
if (SetupCost != 0)
OS << ", plus " << SetupCost << " setup cost";
}
void Cost::dump() const {
print(errs()); errs() << '\n';
}
namespace {
/// LSRFixup - An operand value in an instruction which is to be replaced
/// with some equivalent, possibly strength-reduced, replacement.
struct LSRFixup {
/// UserInst - The instruction which will be updated.
Instruction *UserInst;
/// OperandValToReplace - The operand of the instruction which will
/// be replaced. The operand may be used more than once; every instance
/// will be replaced.
Value *OperandValToReplace;
/// PostIncLoops - If this user is to use the post-incremented value of an
/// induction variable, this variable is non-null and holds the loop
/// associated with the induction variable.
PostIncLoopSet PostIncLoops;
Chris Lattner
committed
/// LUIdx - The index of the LSRUse describing the expression which
/// this fixup needs, minus an offset (below).
size_t LUIdx;
Chris Lattner
committed
/// Offset - A constant offset to be added to the LSRUse expression.
/// This allows multiple fixups to share the same LSRUse with different
/// offsets, for example in an unrolled loop.
int64_t Offset;
bool isUseFullyOutsideLoop(const Loop *L) const;
LSRFixup();
void print(raw_ostream &OS) const;
void dump() const;
};
}
LSRFixup::LSRFixup()
: UserInst(0), OperandValToReplace(0), LUIdx(~size_t(0)), Offset(0) {}
/// isUseFullyOutsideLoop - Test whether this fixup always uses its
/// value outside of the given loop.
bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
// PHI nodes use their value in their incoming blocks.
if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) == OperandValToReplace &&
L->contains(PN->getIncomingBlock(i)))
return false;
return true;
}
return !L->contains(UserInst);
}
void LSRFixup::print(raw_ostream &OS) const {
OS << "UserInst=";
// Store is common and interesting enough to be worth special-casing.
if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
OS << "store ";
WriteAsOperand(OS, Store->getOperand(0), /*PrintType=*/false);
} else if (UserInst->getType()->isVoidTy())
OS << UserInst->getOpcodeName();
else
WriteAsOperand(OS, UserInst, /*PrintType=*/false);
OS << ", OperandValToReplace=";
WriteAsOperand(OS, OperandValToReplace, /*PrintType=*/false);
for (PostIncLoopSet::const_iterator I = PostIncLoops.begin(),
E = PostIncLoops.end(); I != E; ++I) {
OS << ", PostIncLoop=";
WriteAsOperand(OS, (*I)->getHeader(), /*PrintType=*/false);
}
if (LUIdx != ~size_t(0))
OS << ", LUIdx=" << LUIdx;
if (Offset != 0)
OS << ", Offset=" << Offset;
void LSRFixup::dump() const {
print(errs()); errs() << '\n';
namespace {
/// UniquifierDenseMapInfo - A DenseMapInfo implementation for holding
/// DenseMaps and DenseSets of sorted SmallVectors of const SCEV*.
struct UniquifierDenseMapInfo {
static SmallVector<const SCEV *, 2> getEmptyKey() {
SmallVector<const SCEV *, 2> V;
V.push_back(reinterpret_cast<const SCEV *>(-1));
return V;
}
static SmallVector<const SCEV *, 2> getTombstoneKey() {
SmallVector<const SCEV *, 2> V;
V.push_back(reinterpret_cast<const SCEV *>(-2));
return V;
}
static unsigned getHashValue(const SmallVector<const SCEV *, 2> &V) {
unsigned Result = 0;
for (SmallVectorImpl<const SCEV *>::const_iterator I = V.begin(),
E = V.end(); I != E; ++I)
Result ^= DenseMapInfo<const SCEV *>::getHashValue(*I);
}
static bool isEqual(const SmallVector<const SCEV *, 2> &LHS,
const SmallVector<const SCEV *, 2> &RHS) {
return LHS == RHS;
}
};
/// LSRUse - This class holds the state that LSR keeps for each use in
/// IVUsers, as well as uses invented by LSR itself. It includes information
/// about what kinds of things can be folded into the user, information about
/// the user itself, and information about how the use may be satisfied.
/// TODO: Represent multiple users of the same expression in common?
class LSRUse {
DenseSet<SmallVector<const SCEV *, 2>, UniquifierDenseMapInfo> Uniquifier;
public:
/// KindType - An enum for a kind of use, indicating what types of
/// scaled and immediate operands it might support.
enum KindType {
Basic, ///< A normal use, with no folding.
Special, ///< A special case of basic, allowing -1 scales.
Address, ///< An address use; folding according to TargetLowering
ICmpZero ///< An equality icmp with both operands folded into one.
// TODO: Add a generic icmp too?
};
KindType Kind;
const Type *AccessTy;
SmallVector<int64_t, 8> Offsets;
int64_t MinOffset;
int64_t MaxOffset;
/// AllFixupsOutsideLoop - This records whether all of the fixups using this
/// LSRUse are outside of the loop, in which case some special-case heuristics
/// may be used.
bool AllFixupsOutsideLoop;
/// WidestFixupType - This records the widest use type for any fixup using
/// this LSRUse. FindUseWithSimilarFormula can't consider uses with different
/// max fixup widths to be equivalent, because the narrower one may be relying
/// on the implicit truncation to truncate away bogus bits.
const Type *WidestFixupType;
/// Formulae - A list of ways to build a value that can satisfy this user.
/// After the list is populated, one of these is selected heuristically and
/// used to formulate a replacement for OperandValToReplace in UserInst.
SmallVector<Formula, 12> Formulae;
/// Regs - The set of register candidates used by all formulae in this LSRUse.
SmallPtrSet<const SCEV *, 4> Regs;
LSRUse(KindType K, const Type *T) : Kind(K), AccessTy(T),
MinOffset(INT64_MAX),
MaxOffset(INT64_MIN),
AllFixupsOutsideLoop(true),
WidestFixupType(0) {}
bool HasFormulaWithSameRegs(const Formula &F) const;
bool InsertFormula(const Formula &F);
void DeleteFormula(Formula &F);