Newer
Older
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
NewStride != e; ++NewStride) {
std::map<const SCEV *, IVsOfOneStride>::iterator SI =
IVsByStride.find(IU->StrideOrder[NewStride]);
if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first))
continue;
int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
if (SI->first != Stride && SSInt != 1)
continue;
for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
IE = SI->second.IVs.end(); II != IE; ++II)
// Accept nonzero base here.
// Only reuse previous IV if it would not require a type conversion.
if (!RequiresTypeConversion(II->Base->getType(), Ty)) {
IV = *II;
return Stride;
}
}
// Special case, old IV is -1*x and this one is x. Can treat this one as
// -1*old.
for (unsigned NewStride = 0, e = IU->StrideOrder.size();
NewStride != e; ++NewStride) {
std::map<const SCEV *, IVsOfOneStride>::iterator SI =
IVsByStride.find(IU->StrideOrder[NewStride]);
if (SI == IVsByStride.end())
continue;
if (const SCEVMulExpr *ME = dyn_cast<SCEVMulExpr>(SI->first))
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(ME->getOperand(0)))
if (Stride == ME->getOperand(1) &&
SC->getValue()->getSExtValue() == -1LL)
for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
IE = SI->second.IVs.end(); II != IE; ++II)
// Accept nonzero base here.
// Only reuse previous IV if it would not require type conversion.
if (!RequiresTypeConversion(II->Base->getType(), Ty)) {
IV = *II;
return SE->getIntegerSCEV(-1LL, Stride->getType());
}
}
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
return SE->getIntegerSCEV(0, Stride->getType());
}
/// PartitionByIsUseOfPostIncrementedValue - Simple boolean predicate that
/// returns true if Val's isUseOfPostIncrementedValue is true.
static bool PartitionByIsUseOfPostIncrementedValue(const BasedUser &Val) {
return Val.isUseOfPostIncrementedValue;
}
/// isNonConstantNegative - Return true if the specified scev is negated, but
/// not a constant.
static bool isNonConstantNegative(const SCEV *Expr) {
const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Expr);
if (!Mul) return false;
// If there is a constant factor, it will be first.
const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
if (!SC) return false;
// Return true if the value is negative, this matches things like (-42 * V).
return SC->getValue()->getValue().isNegative();
}
/// CollectIVUsers - Transform our list of users and offsets to a bit more
/// complex table. In this new vector, each 'BasedUser' contains 'Base', the
/// base of the strided accesses, as well as the old information from Uses. We
/// progressively move information from the Base field to the Imm field, until
/// we eventually have the full access expression to rewrite the use.
const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *Stride,
IVUsersOfOneStride &Uses,
Loop *L,
bool &AllUsesAreAddresses,
bool &AllUsesAreOutsideLoop,
std::vector<BasedUser> &UsersToProcess) {
// FIXME: Generalize to non-affine IV's.
if (!Stride->isLoopInvariant(L))
return SE->getIntegerSCEV(0, Stride->getType());
UsersToProcess.reserve(Uses.Users.size());
for (ilist<IVStrideUse>::iterator I = Uses.Users.begin(),
E = Uses.Users.end(); I != E; ++I) {
UsersToProcess.push_back(BasedUser(*I, SE));
// Move any loop variant operands from the offset field to the immediate
// field of the use, so that we don't try to use something before it is
// computed.
MoveLoopVariantsToImmediateField(UsersToProcess.back().Base,
UsersToProcess.back().Imm, L, SE);
assert(UsersToProcess.back().Base->isLoopInvariant(L) &&
"Base value is not loop invariant!");
}
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
// We now have a whole bunch of uses of like-strided induction variables, but
// they might all have different bases. We want to emit one PHI node for this
// stride which we fold as many common expressions (between the IVs) into as
// possible. Start by identifying the common expressions in the base values
// for the strides (e.g. if we have "A+C+B" and "A+B+D" as our bases, find
// "A+B"), emit it to the preheader, then remove the expression from the
// UsersToProcess base values.
const SCEV *CommonExprs =
RemoveCommonExpressionsFromUseBases(UsersToProcess, SE, L, TLI);
// Next, figure out what we can represent in the immediate fields of
// instructions. If we can represent anything there, move it to the imm
// fields of the BasedUsers. We do this so that it increases the commonality
// of the remaining uses.
unsigned NumPHI = 0;
bool HasAddress = false;
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
// If the user is not in the current loop, this means it is using the exit
// value of the IV. Do not put anything in the base, make sure it's all in
// the immediate field to allow as much factoring as possible.
if (!L->contains(UsersToProcess[i].Inst)) {
UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm,
UsersToProcess[i].Base);
UsersToProcess[i].Base =
SE->getIntegerSCEV(0, UsersToProcess[i].Base->getType());
} else {
// Not all uses are outside the loop.
AllUsesAreOutsideLoop = false;
// Addressing modes can be folded into loads and stores. Be careful that
// the store is through the expression, not of the expression though.
bool isPHI = false;
bool isAddress = isAddressUse(UsersToProcess[i].Inst,
UsersToProcess[i].OperandValToReplace);
if (isa<PHINode>(UsersToProcess[i].Inst)) {
isPHI = true;
++NumPHI;
}
if (isAddress)
HasAddress = true;
// If this use isn't an address, then not all uses are addresses.
if (!isAddress && !isPHI)
AllUsesAreAddresses = false;
MoveImmediateValues(TLI, UsersToProcess[i].Inst, UsersToProcess[i].Base,
UsersToProcess[i].Imm, isAddress, L, SE);
}
}
// If one of the use is a PHI node and all other uses are addresses, still
// allow iv reuse. Essentially we are trading one constant multiplication
// for one fewer iv.
if (NumPHI > 1)
AllUsesAreAddresses = false;
// There are no in-loop address uses.
if (AllUsesAreAddresses && (!HasAddress && !AllUsesAreOutsideLoop))
AllUsesAreAddresses = false;
return CommonExprs;
}
/// ShouldUseFullStrengthReductionMode - Test whether full strength-reduction
/// is valid and profitable for the given set of users of a stride. In
/// full strength-reduction mode, all addresses at the current stride are
/// strength-reduced all the way down to pointer arithmetic.
///
bool LoopStrengthReduce::ShouldUseFullStrengthReductionMode(
const std::vector<BasedUser> &UsersToProcess,
const Loop *L,
bool AllUsesAreAddresses,
const SCEV *Stride) {
if (!EnableFullLSRMode)
return false;
// The heuristics below aim to avoid increasing register pressure, but
// fully strength-reducing all the addresses increases the number of
// add instructions, so don't do this when optimizing for size.
// TODO: If the loop is large, the savings due to simpler addresses
// may oughtweight the costs of the extra increment instructions.
if (L->getHeader()->getParent()->hasFnAttr(Attribute::OptimizeForSize))
return false;
// TODO: For now, don't do full strength reduction if there could
// potentially be greater-stride multiples of the current stride
// which could reuse the current stride IV.
if (IU->StrideOrder.back() != Stride)
return false;
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
// Iterate through the uses to find conditions that automatically rule out
// full-lsr mode.
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ) {
const SCEV *Base = UsersToProcess[i].Base;
const SCEV *Imm = UsersToProcess[i].Imm;
// If any users have a loop-variant component, they can't be fully
// strength-reduced.
if (Imm && !Imm->isLoopInvariant(L))
return false;
// If there are to users with the same base and the difference between
// the two Imm values can't be folded into the address, full
// strength reduction would increase register pressure.
do {
const SCEV *CurImm = UsersToProcess[i].Imm;
if ((CurImm || Imm) && CurImm != Imm) {
if (!CurImm) CurImm = SE->getIntegerSCEV(0, Stride->getType());
if (!Imm) Imm = SE->getIntegerSCEV(0, Stride->getType());
const Instruction *Inst = UsersToProcess[i].Inst;
const Type *AccessTy = getAccessType(Inst);
const SCEV *Diff = SE->getMinusSCEV(UsersToProcess[i].Imm, Imm);
if (!Diff->isZero() &&
(!AllUsesAreAddresses ||
!fitsInAddressMode(Diff, AccessTy, TLI, /*HasBaseReg=*/true)))
return false;
}
} while (++i != e && Base == UsersToProcess[i].Base);
}
// If there's exactly one user in this stride, fully strength-reducing it
// won't increase register pressure. If it's starting from a non-zero base,
// it'll be simpler this way.
if (UsersToProcess.size() == 1 && !UsersToProcess[0].Base->isZero())
return true;
// Otherwise, if there are any users in this stride that don't require
// a register for their base, full strength-reduction will increase
// register pressure.
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)
if (UsersToProcess[i].Base->isZero())
return false;
// Otherwise, go for it.
return true;
Evan Cheng
committed
}
/// InsertAffinePhi Create and insert a PHI node for an induction variable
/// with the specified start and step values in the specified loop.
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
/// If NegateStride is true, the stride should be negated by using a
/// subtract instead of an add.
///
/// Return the created phi node.
///
static PHINode *InsertAffinePhi(const SCEV *Start, const SCEV *Step,
Instruction *IVIncInsertPt,
const Loop *L,
SCEVExpander &Rewriter) {
assert(Start->isLoopInvariant(L) && "New PHI start is not loop invariant!");
assert(Step->isLoopInvariant(L) && "New PHI stride is not loop invariant!");
BasicBlock *Header = L->getHeader();
BasicBlock *Preheader = L->getLoopPreheader();
BasicBlock *LatchBlock = L->getLoopLatch();
const Type *Ty = Start->getType();
Ty = Rewriter.SE.getEffectiveSCEVType(Ty);
PHINode *PN = PHINode::Create(Ty, "lsr.iv", Header->begin());
PN->addIncoming(Rewriter.expandCodeFor(Start, Ty, Preheader->getTerminator()),
Preheader);
// If the stride is negative, insert a sub instead of an add for the
// increment.
bool isNegative = isNonConstantNegative(Step);
const SCEV *IncAmount = Step;
if (isNegative)
IncAmount = Rewriter.SE.getNegativeSCEV(Step);
// Insert an add instruction right before the terminator corresponding
// to the back-edge or just before the only use. The location is determined
// by the caller and passed in as IVIncInsertPt.
Value *StepV = Rewriter.expandCodeFor(IncAmount, Ty,
Preheader->getTerminator());
Instruction *IncV;
if (isNegative) {
IncV = BinaryOperator::CreateSub(PN, StepV, "lsr.iv.next",
IVIncInsertPt);
} else {
IncV = BinaryOperator::CreateAdd(PN, StepV, "lsr.iv.next",
IVIncInsertPt);
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
if (!isa<ConstantInt>(StepV)) ++NumVariable;
PN->addIncoming(IncV, LatchBlock);
++NumInserted;
return PN;
}
static void SortUsersToProcess(std::vector<BasedUser> &UsersToProcess) {
// We want to emit code for users inside the loop first. To do this, we
// rearrange BasedUser so that the entries at the end have
// isUseOfPostIncrementedValue = false, because we pop off the end of the
// vector (so we handle them first).
std::partition(UsersToProcess.begin(), UsersToProcess.end(),
PartitionByIsUseOfPostIncrementedValue);
// Sort this by base, so that things with the same base are handled
// together. By partitioning first and stable-sorting later, we are
// guaranteed that within each base we will pop off users from within the
// loop before users outside of the loop with a particular base.
//
// We would like to use stable_sort here, but we can't. The problem is that
// const SCEV *'s don't have a deterministic ordering w.r.t to each other, so
// we don't have anything to do a '<' comparison on. Because we think the
// number of uses is small, do a horrible bubble sort which just relies on
// ==.
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
// Get a base value.
const SCEV *Base = UsersToProcess[i].Base;
// Compact everything with this base to be consecutive with this one.
for (unsigned j = i+1; j != e; ++j) {
if (UsersToProcess[j].Base == Base) {
std::swap(UsersToProcess[i+1], UsersToProcess[j]);
++i;
}
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
/// PrepareToStrengthReduceFully - Prepare to fully strength-reduce
/// UsersToProcess, meaning lowering addresses all the way down to direct
/// pointer arithmetic.
///
void
LoopStrengthReduce::PrepareToStrengthReduceFully(
std::vector<BasedUser> &UsersToProcess,
const SCEV *Stride,
const SCEV *CommonExprs,
const Loop *L,
SCEVExpander &PreheaderRewriter) {
DEBUG(dbgs() << " Fully reducing all users\n");
// Rewrite the UsersToProcess records, creating a separate PHI for each
// unique Base value.
Instruction *IVIncInsertPt = L->getLoopLatch()->getTerminator();
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ) {
// TODO: The uses are grouped by base, but not sorted. We arbitrarily
// pick the first Imm value here to start with, and adjust it for the
// other uses.
const SCEV *Imm = UsersToProcess[i].Imm;
const SCEV *Base = UsersToProcess[i].Base;
const SCEV *Start = SE->getAddExpr(CommonExprs, Base, Imm);
PHINode *Phi = InsertAffinePhi(Start, Stride, IVIncInsertPt, L,
PreheaderRewriter);
// Loop over all the users with the same base.
do {
UsersToProcess[i].Base = SE->getIntegerSCEV(0, Stride->getType());
UsersToProcess[i].Imm = SE->getMinusSCEV(UsersToProcess[i].Imm, Imm);
UsersToProcess[i].Phi = Phi;
assert(UsersToProcess[i].Imm->isLoopInvariant(L) &&
"ShouldUseFullStrengthReductionMode should reject this!");
} while (++i != e && Base == UsersToProcess[i].Base);
}
}
/// FindIVIncInsertPt - Return the location to insert the increment instruction.
/// If the only use if a use of postinc value, (must be the loop termination
/// condition), then insert it just before the use.
static Instruction *FindIVIncInsertPt(std::vector<BasedUser> &UsersToProcess,
const Loop *L) {
if (UsersToProcess.size() == 1 &&
UsersToProcess[0].isUseOfPostIncrementedValue &&
L->contains(UsersToProcess[0].Inst))
return UsersToProcess[0].Inst;
return L->getLoopLatch()->getTerminator();
}
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
/// PrepareToStrengthReduceWithNewPhi - Insert a new induction variable for the
/// given users to share.
///
void
LoopStrengthReduce::PrepareToStrengthReduceWithNewPhi(
std::vector<BasedUser> &UsersToProcess,
const SCEV *Stride,
const SCEV *CommonExprs,
Value *CommonBaseV,
Instruction *IVIncInsertPt,
const Loop *L,
SCEVExpander &PreheaderRewriter) {
DEBUG(dbgs() << " Inserting new PHI:\n");
PHINode *Phi = InsertAffinePhi(SE->getUnknown(CommonBaseV),
Stride, IVIncInsertPt, L,
PreheaderRewriter);
// Remember this in case a later stride is multiple of this.
IVsByStride[Stride].addIV(Stride, CommonExprs, Phi);
// All the users will share this new IV.
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)
UsersToProcess[i].Phi = Phi;
DEBUG(dbgs() << " IV=");
DEBUG(WriteAsOperand(dbgs(), Phi, /*PrintType=*/false));
DEBUG(dbgs() << "\n");
}
/// PrepareToStrengthReduceFromSmallerStride - Prepare for the given users to
/// reuse an induction variable with a stride that is a factor of the current
/// induction variable.
///
void
LoopStrengthReduce::PrepareToStrengthReduceFromSmallerStride(
std::vector<BasedUser> &UsersToProcess,
Value *CommonBaseV,
const IVExpr &ReuseIV,
Instruction *PreInsertPt) {
DEBUG(dbgs() << " Rewriting in terms of existing IV of STRIDE "
<< *ReuseIV.Stride << " and BASE " << *ReuseIV.Base << "\n");
// All the users will share the reused IV.
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)
UsersToProcess[i].Phi = ReuseIV.PHI;
Constant *C = dyn_cast<Constant>(CommonBaseV);
if (C &&
(!C->isNullValue() &&
!fitsInAddressMode(SE->getUnknown(CommonBaseV), CommonBaseV->getType(),
TLI, false)))
// We want the common base emitted into the preheader! This is just
// using cast as a copy so BitCast (no-op cast) is appropriate
CommonBaseV = new BitCastInst(CommonBaseV, CommonBaseV->getType(),
"commonbase", PreInsertPt);
}
static bool IsImmFoldedIntoAddrMode(GlobalValue *GV, int64_t Offset,
const Type *AccessTy,
std::vector<BasedUser> &UsersToProcess,
const TargetLowering *TLI) {
SmallVector<Instruction*, 16> AddrModeInsts;
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
if (UsersToProcess[i].isUseOfPostIncrementedValue)
continue;
ExtAddrMode AddrMode =
AddressingModeMatcher::Match(UsersToProcess[i].OperandValToReplace,
AccessTy, UsersToProcess[i].Inst,
AddrModeInsts, *TLI);
if (GV && GV != AddrMode.BaseGV)
return false;
if (Offset && !AddrMode.BaseOffs)
// FIXME: How to accurate check it's immediate offset is folded.
return false;
AddrModeInsts.clear();
}
return true;
}
/// StrengthReduceIVUsersOfStride - Strength reduce all of the users of a single
/// stride of IV. All of the users may have different starting values, and this
/// may not be the only stride.
void
LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *Stride,
IVUsersOfOneStride &Uses,
Loop *L) {
// If all the users are moved to another stride, then there is nothing to do.
if (Uses.Users.empty())
return;
// Keep track if every use in UsersToProcess is an address. If they all are,
// we may be able to rewrite the entire collection of them in terms of a
// smaller-stride IV.
bool AllUsesAreAddresses = true;
// Keep track if every use of a single stride is outside the loop. If so,
// we want to be more aggressive about reusing a smaller-stride IV; a
// multiply outside the loop is better than another IV inside. Well, usually.
bool AllUsesAreOutsideLoop = true;
// Transform our list of users and offsets to a bit more complex table. In
// this new vector, each 'BasedUser' contains 'Base' the base of the strided
// access as well as the old information from Uses. We progressively move
// information from the Base field to the Imm field until we eventually have
// the full access expression to rewrite the use.
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
std::vector<BasedUser> UsersToProcess;
const SCEV *CommonExprs = CollectIVUsers(Stride, Uses, L, AllUsesAreAddresses,
AllUsesAreOutsideLoop,
UsersToProcess);
// Sort the UsersToProcess array so that users with common bases are
// next to each other.
SortUsersToProcess(UsersToProcess);
// If we managed to find some expressions in common, we'll need to carry
// their value in a register and add it in for each use. This will take up
// a register operand, which potentially restricts what stride values are
// valid.
bool HaveCommonExprs = !CommonExprs->isZero();
const Type *ReplacedTy = CommonExprs->getType();
// If all uses are addresses, consider sinking the immediate part of the
// common expression back into uses if they can fit in the immediate fields.
if (TLI && HaveCommonExprs && AllUsesAreAddresses) {
const SCEV *NewCommon = CommonExprs;
const SCEV *Imm = SE->getIntegerSCEV(0, ReplacedTy);
MoveImmediateValues(TLI, Type::getVoidTy(
L->getLoopPreheader()->getContext()),
NewCommon, Imm, true, L, SE);
if (!Imm->isZero()) {
bool DoSink = true;
// If the immediate part of the common expression is a GV, check if it's
// possible to fold it into the target addressing mode.
GlobalValue *GV = 0;
if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(Imm))
GV = dyn_cast<GlobalValue>(SU->getValue());
int64_t Offset = 0;
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Imm))
Offset = SC->getValue()->getSExtValue();
if (GV || Offset)
// Pass VoidTy as the AccessTy to be conservative, because
// there could be multiple access types among all the uses.
DoSink = IsImmFoldedIntoAddrMode(GV, Offset,
Type::getVoidTy(L->getLoopPreheader()->getContext()),
UsersToProcess, TLI);
if (DoSink) {
DEBUG(dbgs() << " Sinking " << *Imm << " back down into uses\n");
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)
UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm, Imm);
CommonExprs = NewCommon;
HaveCommonExprs = !CommonExprs->isZero();
++NumImmSunk;
}
}
}
// Now that we know what we need to do, insert the PHI node itself.
//
DEBUG(dbgs() << "LSR: Examining IVs of TYPE " << *ReplacedTy << " of STRIDE "
<< *Stride << ":\n"
<< " Common base: " << *CommonExprs << '\n');
SCEVExpander Rewriter(*SE);
SCEVExpander PreheaderRewriter(*SE);
Evan Cheng
committed
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
BasicBlock *Preheader = L->getLoopPreheader();
Instruction *PreInsertPt = Preheader->getTerminator();
BasicBlock *LatchBlock = L->getLoopLatch();
Instruction *IVIncInsertPt = LatchBlock->getTerminator();
Value *CommonBaseV = Constant::getNullValue(ReplacedTy);
const SCEV *RewriteFactor = SE->getIntegerSCEV(0, ReplacedTy);
IVExpr ReuseIV(SE->getIntegerSCEV(0,
Type::getInt32Ty(Preheader->getContext())),
SE->getIntegerSCEV(0,
Type::getInt32Ty(Preheader->getContext())),
0);
// Choose a strength-reduction strategy and prepare for it by creating
// the necessary PHIs and adjusting the bookkeeping.
if (ShouldUseFullStrengthReductionMode(UsersToProcess, L,
AllUsesAreAddresses, Stride)) {
PrepareToStrengthReduceFully(UsersToProcess, Stride, CommonExprs, L,
PreheaderRewriter);
} else {
// Emit the initial base value into the loop preheader.
CommonBaseV = PreheaderRewriter.expandCodeFor(CommonExprs, ReplacedTy,
PreInsertPt);
// If all uses are addresses, check if it is possible to reuse an IV. The
// new IV must have a stride that is a multiple of the old stride; the
// multiple must be a number that can be encoded in the scale field of the
// target addressing mode; and we must have a valid instruction after this
// substitution, including the immediate field, if any.
RewriteFactor = CheckForIVReuse(HaveCommonExprs, AllUsesAreAddresses,
AllUsesAreOutsideLoop,
Stride, ReuseIV, ReplacedTy,
UsersToProcess);
if (!RewriteFactor->isZero())
PrepareToStrengthReduceFromSmallerStride(UsersToProcess, CommonBaseV,
ReuseIV, PreInsertPt);
else {
IVIncInsertPt = FindIVIncInsertPt(UsersToProcess, L);
PrepareToStrengthReduceWithNewPhi(UsersToProcess, Stride, CommonExprs,
CommonBaseV, IVIncInsertPt,
L, PreheaderRewriter);
}
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
// Process all the users now, replacing their strided uses with
// strength-reduced forms. This outer loop handles all bases, the inner
// loop handles all users of a particular base.
while (!UsersToProcess.empty()) {
const SCEV *Base = UsersToProcess.back().Base;
Instruction *Inst = UsersToProcess.back().Inst;
// Emit the code for Base into the preheader.
Value *BaseV = 0;
if (!Base->isZero()) {
BaseV = PreheaderRewriter.expandCodeFor(Base, 0, PreInsertPt);
DEBUG(dbgs() << " INSERTING code for BASE = " << *Base << ":");
if (BaseV->hasName())
DEBUG(dbgs() << " Result value name = %" << BaseV->getName());
DEBUG(dbgs() << "\n");
// If BaseV is a non-zero constant, make sure that it gets inserted into
// the preheader, instead of being forward substituted into the uses. We
// do this by forcing a BitCast (noop cast) to be inserted into the
// preheader in this case.
if (!fitsInAddressMode(Base, getAccessType(Inst), TLI, false) &&
isa<Constant>(BaseV)) {
// We want this constant emitted into the preheader! This is just
// using cast as a copy so BitCast (no-op cast) is appropriate
BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert",
PreInsertPt);
}
}
// Emit the code to add the immediate offset to the Phi value, just before
// the instructions that we identified as using this stride and base.
do {
// FIXME: Use emitted users to emit other users.
BasedUser &User = UsersToProcess.back();
DEBUG(dbgs() << " Examining ");
if (User.isUseOfPostIncrementedValue)
DEBUG(dbgs() << "postinc");
else
DEBUG(dbgs() << "preinc");
DEBUG(dbgs() << " use ");
DEBUG(WriteAsOperand(dbgs(), UsersToProcess.back().OperandValToReplace,
/*PrintType=*/false));
DEBUG(dbgs() << " in Inst: " << *User.Inst << '\n');
// If this instruction wants to use the post-incremented value, move it
// after the post-inc and use its value instead of the PHI.
Value *RewriteOp = User.Phi;
if (User.isUseOfPostIncrementedValue) {
RewriteOp = User.Phi->getIncomingValueForBlock(LatchBlock);
// If this user is in the loop, make sure it is the last thing in the
// loop to ensure it is dominated by the increment. In case it's the
// only use of the iv, the increment instruction is already before the
// use.
if (L->contains(User.Inst) && User.Inst != IVIncInsertPt)
User.Inst->moveBefore(IVIncInsertPt);
}
const SCEV *RewriteExpr = SE->getUnknown(RewriteOp);
if (SE->getEffectiveSCEVType(RewriteOp->getType()) !=
SE->getEffectiveSCEVType(ReplacedTy)) {
assert(SE->getTypeSizeInBits(RewriteOp->getType()) >
SE->getTypeSizeInBits(ReplacedTy) &&
"Unexpected widening cast!");
RewriteExpr = SE->getTruncateExpr(RewriteExpr, ReplacedTy);
}
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
// If we had to insert new instructions for RewriteOp, we have to
// consider that they may not have been able to end up immediately
// next to RewriteOp, because non-PHI instructions may never precede
// PHI instructions in a block. In this case, remember where the last
// instruction was inserted so that if we're replacing a different
// PHI node, we can use the later point to expand the final
// RewriteExpr.
Instruction *NewBasePt = dyn_cast<Instruction>(RewriteOp);
if (RewriteOp == User.Phi) NewBasePt = 0;
// Clear the SCEVExpander's expression map so that we are guaranteed
// to have the code emitted where we expect it.
Rewriter.clear();
// If we are reusing the iv, then it must be multiplied by a constant
// factor to take advantage of the addressing mode scale component.
if (!RewriteFactor->isZero()) {
// If we're reusing an IV with a nonzero base (currently this happens
// only when all reuses are outside the loop) subtract that base here.
// The base has been used to initialize the PHI node but we don't want
// it here.
if (!ReuseIV.Base->isZero()) {
const SCEV *typedBase = ReuseIV.Base;
if (SE->getEffectiveSCEVType(RewriteExpr->getType()) !=
SE->getEffectiveSCEVType(ReuseIV.Base->getType())) {
// It's possible the original IV is a larger type than the new IV,
// in which case we have to truncate the Base. We checked in
// RequiresTypeConversion that this is valid.
assert(SE->getTypeSizeInBits(RewriteExpr->getType()) <
SE->getTypeSizeInBits(ReuseIV.Base->getType()) &&
"Unexpected lengthening conversion!");
typedBase = SE->getTruncateExpr(ReuseIV.Base,
RewriteExpr->getType());
}
RewriteExpr = SE->getMinusSCEV(RewriteExpr, typedBase);
}
// Multiply old variable, with base removed, by new scale factor.
RewriteExpr = SE->getMulExpr(RewriteFactor,
RewriteExpr);
// The common base is emitted in the loop preheader. But since we
// are reusing an IV, it has not been used to initialize the PHI node.
// Add it to the expression used to rewrite the uses.
// When this use is outside the loop, we earlier subtracted the
// common base, and are adding it back here. Use the same expression
// as before, rather than CommonBaseV, so DAGCombiner will zap it.
if (!CommonExprs->isZero()) {
if (L->contains(User.Inst))
RewriteExpr = SE->getAddExpr(RewriteExpr,
SE->getUnknown(CommonBaseV));
else
RewriteExpr = SE->getAddExpr(RewriteExpr, CommonExprs);
}
}
// Now that we know what we need to do, insert code before User for the
// immediate and any loop-variant expressions.
if (BaseV)
// Add BaseV to the PHI value if needed.
RewriteExpr = SE->getAddExpr(RewriteExpr, SE->getUnknown(BaseV));
User.RewriteInstructionToUseNewBase(RewriteExpr, NewBasePt,
Rewriter, L, this,
DeadInsts, SE);
// Mark old value we replaced as possibly dead, so that it is eliminated
// if we just replaced the last use of that value.
DeadInsts.push_back(User.OperandValToReplace);
UsersToProcess.pop_back();
++NumReduced;
// If there are any more users to process with the same base, process them
// now. We sorted by base above, so we just have to check the last elt.
} while (!UsersToProcess.empty() && UsersToProcess.back().Base == Base);
// TODO: Next, find out which base index is the most common, pull it out.
}
// IMPORTANT TODO: Figure out how to partition the IV's with this stride, but
// different starting values, into different PHIs.
}
void LoopStrengthReduce::StrengthReduceIVUsers(Loop *L) {
// Note: this processes each stride/type pair individually. All users
// passed into StrengthReduceIVUsersOfStride have the same type AND stride.
// Also, note that we iterate over IVUsesByStride indirectly by using
// StrideOrder. This extra layer of indirection makes the ordering of
// strides deterministic - not dependent on map order.
for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e; ++Stride) {
std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
// FIXME: Generalize to non-affine IV's.
if (!SI->first->isLoopInvariant(L))
continue;
StrengthReduceIVUsersOfStride(SI->first, *SI->second, L);
}
}
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
/// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
/// set the IV user and stride information and return true, otherwise return
/// false.
bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond,
IVStrideUse *&CondUse,
const SCEV* &CondStride) {
for (unsigned Stride = 0, e = IU->StrideOrder.size();
Stride != e && !CondUse; ++Stride) {
std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(),
E = SI->second->Users.end(); UI != E; ++UI)
if (UI->getUser() == Cond) {
// NOTE: we could handle setcc instructions with multiple uses here, but
// InstCombine does it as well for simple uses, it's not clear that it
// occurs enough in real life to handle.
CondUse = UI;
CondStride = SI->first;
return true;
}
}
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
namespace {
// Constant strides come first which in turns are sorted by their absolute
// values. If absolute values are the same, then positive strides comes first.
// e.g.
// 4, -1, X, 1, 2 ==> 1, -1, 2, 4, X
struct StrideCompare {
const ScalarEvolution *SE;
explicit StrideCompare(const ScalarEvolution *se) : SE(se) {}
bool operator()(const SCEV *LHS, const SCEV *RHS) {
const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS);
const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS);
if (LHSC && RHSC) {
int64_t LV = LHSC->getValue()->getSExtValue();
int64_t RV = RHSC->getValue()->getSExtValue();
uint64_t ALV = (LV < 0) ? -LV : LV;
uint64_t ARV = (RV < 0) ? -RV : RV;
if (ALV == ARV) {
if (LV != RV)
return LV > RV;
} else {
return ALV < ARV;
}
Evan Cheng
committed
// If it's the same value but different type, sort by bit width so
// that we emit larger induction variables before smaller
// ones, letting the smaller be re-written in terms of larger ones.
return SE->getTypeSizeInBits(RHS->getType()) <
SE->getTypeSizeInBits(LHS->getType());
}
return LHSC && !RHSC;
}
};
}
Evan Cheng
committed
/// ChangeCompareStride - If a loop termination compare instruction is the only
/// use of its stride, and the comparison is against a constant value, try to
/// eliminate the stride by moving the compare instruction to another stride and
/// changing its constant operand accordingly. E.g.
///
/// loop:
/// ...
/// v1 = v1 + 3
/// v2 = v2 + 1
/// if (v2 < 10) goto loop
/// =>
/// loop:
/// ...
/// v1 = v1 + 3
/// if (v1 < 30) goto loop
ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
IVStrideUse* &CondUse,
const SCEV* &CondStride,
bool PostPass) {
// If there's only one stride in the loop, there's nothing to do here.
if (IU->StrideOrder.size() < 2)
return Cond;
// If there are other users of the condition's stride, don't bother trying to
// change the condition because the stride will still remain.
std::map<const SCEV *, IVUsersOfOneStride *>::iterator I =
IU->IVUsesByStride.find(CondStride);
if (I == IU->IVUsesByStride.end())
return Cond;
if (I->second->Users.size() > 1) {
for (ilist<IVStrideUse>::iterator II = I->second->Users.begin(),
EE = I->second->Users.end(); II != EE; ++II) {
if (II->getUser() == Cond)
continue;
if (!isInstructionTriviallyDead(II->getUser()))
return Cond;
}
}
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
// Only handle constant strides for now.
const SCEVConstant *SC = dyn_cast<SCEVConstant>(CondStride);
if (!SC) return Cond;
ICmpInst::Predicate Predicate = Cond->getPredicate();
int64_t CmpSSInt = SC->getValue()->getSExtValue();
unsigned BitWidth = SE->getTypeSizeInBits(CondStride->getType());
uint64_t SignBit = 1ULL << (BitWidth-1);
const Type *CmpTy = Cond->getOperand(0)->getType();
const Type *NewCmpTy = NULL;
unsigned TyBits = SE->getTypeSizeInBits(CmpTy);
unsigned NewTyBits = 0;
const SCEV *NewStride = NULL;
Value *NewCmpLHS = NULL;
Value *NewCmpRHS = NULL;
int64_t Scale = 1;
const SCEV *NewOffset = SE->getIntegerSCEV(0, CmpTy);
if (ConstantInt *C = dyn_cast<ConstantInt>(Cond->getOperand(1))) {
int64_t CmpVal = C->getValue().getSExtValue();
// Check the relevant induction variable for conformance to the pattern.
const SCEV *IV = SE->getSCEV(Cond->getOperand(0));
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
if (!AR || !AR->isAffine())
return Cond;
const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
// Check stride constant and the comparision constant signs to detect
// overflow.
if (StartC) {
if ((StartC->getValue()->getSExtValue() < CmpVal && CmpSSInt < 0) ||
(StartC->getValue()->getSExtValue() > CmpVal && CmpSSInt > 0))
return Cond;
} else {
// More restrictive check for the other cases.
if ((CmpVal & SignBit) != (CmpSSInt & SignBit))
return Cond;
}
// Look for a suitable stride / iv as replacement.
for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
IU->IVUsesByStride.find(IU->StrideOrder[i]);
if (!isa<SCEVConstant>(SI->first) || SI->second->Users.empty())
continue;
int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
if (SSInt == CmpSSInt ||
abs64(SSInt) < abs64(CmpSSInt) ||
(SSInt % CmpSSInt) != 0)
continue;
Scale = SSInt / CmpSSInt;
int64_t NewCmpVal = CmpVal * Scale;
// If old icmp value fits in icmp immediate field, but the new one doesn't
// try something else.
if (TLI &&
TLI->isLegalICmpImmediate(CmpVal) &&
!TLI->isLegalICmpImmediate(NewCmpVal))
continue;
APInt Mul = APInt(BitWidth*2, CmpVal, true);
Mul = Mul * APInt(BitWidth*2, Scale, true);
// Check for overflow.
if (!Mul.isSignedIntN(BitWidth))
continue;
// Check for overflow in the stride's type too.
if (!Mul.isSignedIntN(SE->getTypeSizeInBits(SI->first->getType())))
continue;
// Watch out for overflow.
if (ICmpInst::isSigned(Predicate) &&
(CmpVal & SignBit) != (NewCmpVal & SignBit))
continue;
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
// Pick the best iv to use trying to avoid a cast.
NewCmpLHS = NULL;
for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(),
E = SI->second->Users.end(); UI != E; ++UI) {
Value *Op = UI->getOperandValToReplace();
// If the IVStrideUse implies a cast, check for an actual cast which
// can be used to find the original IV expression.
if (SE->getEffectiveSCEVType(Op->getType()) !=
SE->getEffectiveSCEVType(SI->first->getType())) {
CastInst *CI = dyn_cast<CastInst>(Op);
// If it's not a simple cast, it's complicated.
if (!CI)
continue;
// If it's a cast from a type other than the stride type,
// it's complicated.
if (CI->getOperand(0)->getType() != SI->first->getType())
continue;
// Ok, we found the IV expression in the stride's type.
Op = CI->getOperand(0);
}
NewCmpLHS = Op;
if (NewCmpLHS->getType() == CmpTy)
break;
}
if (!NewCmpLHS)
continue;
NewCmpTy = NewCmpLHS->getType();
NewTyBits = SE->getTypeSizeInBits(NewCmpTy);
const Type *NewCmpIntTy = IntegerType::get(Cond->getContext(), NewTyBits);
if (RequiresTypeConversion(NewCmpTy, CmpTy)) {
// Check if it is possible to rewrite it using
// an iv / stride of a smaller integer type.
unsigned Bits = NewTyBits;
if (ICmpInst::isSigned(Predicate))
--Bits;
uint64_t Mask = (1ULL << Bits) - 1;
if (((uint64_t)NewCmpVal & Mask) != (uint64_t)NewCmpVal)
continue;
}
// Don't rewrite if use offset is non-constant and the new type is
// of a different type.
// FIXME: too conservative?
if (NewTyBits != TyBits && !isa<SCEVConstant>(CondUse->getOffset()))
continue;
if (!PostPass) {
bool AllUsesAreAddresses = true;
bool AllUsesAreOutsideLoop = true;
std::vector<BasedUser> UsersToProcess;
const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
AllUsesAreAddresses,
AllUsesAreOutsideLoop,
UsersToProcess);
// Avoid rewriting the compare instruction with an iv of new stride
// if it's likely the new stride uses will be rewritten using the
// stride of the compare instruction.
if (AllUsesAreAddresses &&
ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess))
continue;
}
// Avoid rewriting the compare instruction with an iv which has
// implicit extension or truncation built into it.
// TODO: This is over-conservative.
if (SE->getTypeSizeInBits(CondUse->getOffset()->getType()) != TyBits)
continue;
// If scale is negative, use swapped predicate unless it's testing
// for equality.
if (Scale < 0 && !Cond->isEquality())
Predicate = ICmpInst::getSwappedPredicate(Predicate);
NewStride = IU->StrideOrder[i];
if (!isa<PointerType>(NewCmpTy))
NewCmpRHS = ConstantInt::get(NewCmpTy, NewCmpVal);
else {
Constant *CI = ConstantInt::get(NewCmpIntTy, NewCmpVal);