Newer
Older
//===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//===----------------------------------------------------------------------===//
//
// This transformation analyzes and transforms the induction variables (and
// computations derived from them) into forms suitable for efficient execution
// on the target.
//
// This pass performs a strength reduction on array references inside loops that
// have as one or more of their components the loop induction variable, it
// rewrites expressions to take advantage of scaled-index addressing modes
// available on the target, and it performs a variety of other optimizations
// related to loop induction variables.
//
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
// Terminology note: this code has a lot of handling for "post-increment" or
// "post-inc" users. This is not talking about post-increment addressing modes;
// it is instead talking about code like this:
//
// %i = phi [ 0, %entry ], [ %i.next, %latch ]
// ...
// %i.next = add %i, 1
// %c = icmp eq %i.next, %n
//
// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however
// it's useful to think about these as the same register, with some uses using
// the value of the register before the add and some using // it after. In this
// example, the icmp is a post-increment user, since it uses %i.next, which is
// the value of the induction variable after the increment. The other common
// case of post-increment users is users outside the loop.
//
// TODO: More sophistication in the way Formulae are generated.
//
// TODO: Handle multiple loops at a time.
//
// TODO: test/CodeGen/X86/full-lsr.ll should get full lsr. The problem is
// that {0,+,1}<%bb> is getting picked first because all 7 uses can
// use it, and while it's a pretty good solution, it means that LSR
// doesn't look further to find an even better solution.
//
// TODO: Should TargetLowering::AddrMode::BaseGV be changed to a ConstantExpr
// instead of a GlobalValue?
//
// TODO: When truncation is free, truncate ICmp users' operands to make it a
// smaller encoding (on x86 at least).
//
// TODO: When a negated register is used by an add (such as in a list of
// multiple base registers, or as the increment expression in an addrec),
// we may not actually need both reg and (-1 * reg) in registers; the
// negation can be implemented by using a sub instead of an add. The
// lack of support for taking this into consideration when making
// register pressure decisions is partly worked around by the "Special"
// use kind.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar.h"
#include "llvm/Constants.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Analysis/IVUsers.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
Chris Lattner
committed
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLowering.h"
using namespace llvm;
Chris Lattner
committed
namespace {
// Constant strides come first which in turns are sorted by their absolute
// values. If absolute values are the same, then positive strides comes first.
// e.g.
// 4, -1, X, 1, 2 ==> 1, -1, 2, 4, X
struct StrideCompare {
const ScalarEvolution &SE;
explicit StrideCompare(const ScalarEvolution &se) : SE(se) {}
bool operator()(const SCEV *const &LHS, const SCEV *const &RHS) const {
const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS);
const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS);
if (LHSC && RHSC) {
unsigned BitWidth = std::max(SE.getTypeSizeInBits(LHS->getType()),
SE.getTypeSizeInBits(RHS->getType()));
APInt LV = LHSC->getValue()->getValue();
APInt RV = RHSC->getValue()->getValue();
LV.sextOrTrunc(BitWidth);
RV.sextOrTrunc(BitWidth);
APInt ALV = LV.abs();
APInt ARV = RV.abs();
if (ALV == ARV) {
if (LV != RV)
return LV.sgt(RV);
} else {
return ALV.ult(ARV);
}
// If it's the same value but different type, sort by bit width so
// that we emit larger induction variables before smaller
// ones, letting the smaller be re-written in terms of larger ones.
return SE.getTypeSizeInBits(RHS->getType()) <
SE.getTypeSizeInBits(LHS->getType());
}
return LHSC && !RHSC;
}
};
/// RegSortData - This class holds data which is used to order reuse
/// candidates.
class RegSortData {
public:
/// Bits - This represents the set of LSRUses (by index) which reference a
/// particular register.
SmallBitVector Bits;
/// MaxComplexity - This represents the greatest complexity (see the comments
/// on Formula::getComplexity) seen with a particular register.
uint32_t MaxComplexity;
/// Index - This holds an arbitrary value used as a last-resort tie breaker
/// to ensure deterministic behavior.
unsigned Index;
RegSortData() {}
void print(raw_ostream &OS) const;
void dump() const;
};
}
void RegSortData::print(raw_ostream &OS) const {
OS << "[NumUses=" << Bits.count()
<< ", MaxComplexity=";
OS.write_hex(MaxComplexity);
OS << ", Index=" << Index << ']';
}
void RegSortData::dump() const {
print(errs()); errs() << '\n';
}
namespace {
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
/// RegCount - This is a helper class to sort a given set of registers
/// according to associated RegSortData values.
class RegCount {
public:
const SCEV *Reg;
RegSortData Sort;
RegCount(const SCEV *R, const RegSortData &RSD)
: Reg(R), Sort(RSD) {}
// Sort by count. Returning true means the register is preferred.
bool operator<(const RegCount &Other) const {
// Sort by the number of unique uses of this register.
unsigned A = Sort.Bits.count();
unsigned B = Other.Sort.Bits.count();
if (A != B) return A > B;
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
const SCEVAddRecExpr *BR = dyn_cast<SCEVAddRecExpr>(Other.Reg);
// AddRecs have higher priority than other things.
if (!BR) return true;
// Prefer affine values.
if (AR->isAffine() != BR->isAffine())
return AR->isAffine();
const Loop *AL = AR->getLoop();
const Loop *BL = BR->getLoop();
if (AL != BL) {
unsigned ADepth = AL->getLoopDepth();
unsigned BDepth = BL->getLoopDepth();
// Prefer a less deeply nested addrec.
if (ADepth != BDepth)
return ADepth < BDepth;
// Different loops at the same depth; do something arbitrary.
BasicBlock *AH = AL->getHeader();
BasicBlock *BH = BL->getHeader();
for (Function::iterator I = AH, E = AH->getParent()->end(); I != E; ++I)
if (&*I == BH) return true;
return false;
Loading
Loading full blame...