Newer
Older
SmallPtrSet<BasicBlock*, 16> SeenPreds;
SmallVector<std::pair<BasicBlock*, BasicBlock*>, 16> PredToDestList;
BasicBlock *OnlyDest = 0;
BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
BasicBlock *Pred = PredValues[i].second;
if (!SeenPreds.insert(Pred))
continue; // Duplicate predecessor entry.
// If the predecessor ends with an indirect goto, we can't change its
// destination.
if (isa<IndirectBrInst>(Pred->getTerminator()))
ConstantInt *Val = PredValues[i].first;
BasicBlock *DestBB;
if (Val == 0) // Undef.
DestBB = 0;
else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
DestBB = BI->getSuccessor(Val->isZero());
else {
SwitchInst *SI = cast<SwitchInst>(BB->getTerminator());
DestBB = SI->getSuccessor(SI->findCaseValue(Val));
// If we have exactly one destination, remember it for efficiency below.
if (i == 0)
OnlyDest = DestBB;
else if (OnlyDest != DestBB)
OnlyDest = MultipleDestSentinel;
PredToDestList.push_back(std::make_pair(Pred, DestBB));
// If all edges were unthreadable, we fail.
if (PredToDestList.empty())
// Determine which is the most common successor. If we have many inputs and
// this block is a switch, we want to start by threading the batch that goes
// to the most popular destination first. If we only know about one
// threadable destination (the common case) we can avoid this.
BasicBlock *MostPopularDest = OnlyDest;
if (MostPopularDest == MultipleDestSentinel)
MostPopularDest = FindMostPopularDest(BB, PredToDestList);
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
// Now that we know what the most popular destination is, factor all
// predecessors that will jump to it into a single predecessor.
SmallVector<BasicBlock*, 16> PredsToFactor;
for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
if (PredToDestList[i].second == MostPopularDest) {
BasicBlock *Pred = PredToDestList[i].first;
// This predecessor may be a switch or something else that has multiple
// edges to the block. Factor each of these edges by listing them
// according to # occurrences in PredsToFactor.
TerminatorInst *PredTI = Pred->getTerminator();
for (unsigned i = 0, e = PredTI->getNumSuccessors(); i != e; ++i)
if (PredTI->getSuccessor(i) == BB)
PredsToFactor.push_back(Pred);
}
// If the threadable edges are branching on an undefined value, we get to pick
// the destination that these predecessors should get to.
if (MostPopularDest == 0)
MostPopularDest = BB->getTerminator()->
getSuccessor(GetBestDestForJumpOnUndef(BB));
// Ok, try to thread it!
return ThreadEdge(BB, PredsToFactor, MostPopularDest);
}
/// ProcessBranchOnPHI - We have an otherwise unthreadable conditional branch on
/// a PHI node in the current block. See if there are any simplifications we
/// can do based on inputs to the phi node.
bool JumpThreading::ProcessBranchOnPHI(PHINode *PN) {
BasicBlock *BB = PN->getParent();
// TODO: We could make use of this to do it once for blocks with common PHI
// values.
SmallVector<BasicBlock*, 1> PredBBs;
PredBBs.resize(1);
// If any of the predecessor blocks end in an unconditional branch, we can
// *duplicate* the conditional branch into that block in order to further
// encourage jump threading and to eliminate cases where we have branch on a
// phi of an icmp (branch on icmp is much better).
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
BasicBlock *PredBB = PN->getIncomingBlock(i);
if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator()))
if (PredBr->isUnconditional()) {
PredBBs[0] = PredBB;
// Try to duplicate BB into PredBB.
if (DuplicateCondBranchOnPHIIntoPred(BB, PredBBs))
return true;
}
}
return false;
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
/// ProcessBranchOnXOR - We have an otherwise unthreadable conditional branch on
/// a xor instruction in the current block. See if there are any
/// simplifications we can do based on inputs to the xor.
///
bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
BasicBlock *BB = BO->getParent();
// If either the LHS or RHS of the xor is a constant, don't do this
// optimization.
if (isa<ConstantInt>(BO->getOperand(0)) ||
isa<ConstantInt>(BO->getOperand(1)))
return false;
// If we have a xor as the branch input to this block, and we know that the
// LHS or RHS of the xor in any predecessor is true/false, then we can clone
// the condition into the predecessor and fix that value to true, saving some
// logical ops on that path and encouraging other paths to simplify.
//
// This copies something like this:
//
// BB:
// %X = phi i1 [1], [%X']
// %Y = icmp eq i32 %A, %B
// %Z = xor i1 %X, %Y
// br i1 %Z, ...
//
// Into:
// BB':
// %Y = icmp ne i32 %A, %B
// br i1 %Z, ...
SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> XorOpValues;
bool isLHS = true;
if (!ComputeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues)) {
assert(XorOpValues.empty());
if (!ComputeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues))
return false;
isLHS = false;
}
assert(!XorOpValues.empty() &&
"ComputeValueKnownInPredecessors returned true with no values");
// Scan the information to see which is most popular: true or false. The
// predecessors can be of the set true, false, or undef.
unsigned NumTrue = 0, NumFalse = 0;
for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
if (!XorOpValues[i].first) continue; // Ignore undefs for the count.
if (XorOpValues[i].first->isZero())
++NumFalse;
else
++NumTrue;
}
// Determine which value to split on, true, false, or undef if neither.
ConstantInt *SplitVal = 0;
if (NumTrue > NumFalse)
SplitVal = ConstantInt::getTrue(BB->getContext());
else if (NumTrue != 0 || NumFalse != 0)
SplitVal = ConstantInt::getFalse(BB->getContext());
// Collect all of the blocks that this can be folded into so that we can
// factor this once and clone it once.
SmallVector<BasicBlock*, 8> BlocksToFoldInto;
for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
if (XorOpValues[i].first != SplitVal && XorOpValues[i].first != 0) continue;
BlocksToFoldInto.push_back(XorOpValues[i].second);
}
// Try to duplicate BB into PredBB.
if (DuplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto)) {
// errs() << "CLONE XOR COND: " << *BB << "Into PRED: " << *BlocksToFoldInto[0];
return true;
}
return false;
}
/// AddPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
/// predecessor to the PHIBB block. If it has PHI nodes, add entries for
/// NewPred using the entries from OldPred (suitably mapped).
static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
BasicBlock *OldPred,
BasicBlock *NewPred,
DenseMap<Instruction*, Value*> &ValueMap) {
for (BasicBlock::iterator PNI = PHIBB->begin();
PHINode *PN = dyn_cast<PHINode>(PNI); ++PNI) {
// Ok, we have a PHI node. Figure out what the incoming value was for the
// DestBlock.
Value *IV = PN->getIncomingValueForBlock(OldPred);
// Remap the value if necessary.
if (Instruction *Inst = dyn_cast<Instruction>(IV)) {
DenseMap<Instruction*, Value*>::iterator I = ValueMap.find(Inst);
if (I != ValueMap.end())
IV = I->second;
}
PN->addIncoming(IV, NewPred);
}
}
/// ThreadEdge - We have decided that it is safe and profitable to factor the
/// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
/// across BB. Transform the IR to reflect this change.
bool JumpThreading::ThreadEdge(BasicBlock *BB,
const SmallVectorImpl<BasicBlock*> &PredBBs,
BasicBlock *SuccBB) {
// If threading to the same block as we come from, we would infinite loop.
if (SuccBB == BB) {
DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
<< "' - would thread to self!\n");
return false;
}
// If threading this would thread across a loop header, don't thread the edge.
// See the comments above FindLoopHeaders for justifications and caveats.
if (LoopHeaders.count(BB)) {
DEBUG(dbgs() << " Not threading across loop header BB '" << BB->getName()
<< "' to dest BB '" << SuccBB->getName()
<< "' - it might create an irreducible loop!\n");
return false;
}
unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB);
if (JumpThreadCost > Threshold) {
DEBUG(dbgs() << " Not threading BB '" << BB->getName()
<< "' - Cost is too high: " << JumpThreadCost << "\n");
return false;
}
// And finally, do it! Start by factoring the predecessors is needed.
BasicBlock *PredBB;
if (PredBBs.size() == 1)
PredBB = PredBBs[0];
else {
DEBUG(dbgs() << " Factoring out " << PredBBs.size()
<< " common predecessors.\n");
PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
".thr_comm", this);
}
// And finally, do it!
DEBUG(dbgs() << " Threading edge from '" << PredBB->getName() << "' to '"
<< SuccBB->getName() << "' with cost: " << JumpThreadCost
<< ", across block:\n "
<< *BB << "\n");
// We are going to have to map operands from the original BB block to the new
// copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to
// account for entry from PredBB.
DenseMap<Instruction*, Value*> ValueMapping;
BasicBlock *NewBB = BasicBlock::Create(BB->getContext(),
BB->getName()+".thread",
BB->getParent(), BB);
NewBB->moveAfter(PredBB);
BasicBlock::iterator BI = BB->begin();
for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
// Clone the non-phi instructions of BB into NewBB, keeping track of the
// mapping and using it to remap operands in the cloned instructions.
for (; !isa<TerminatorInst>(BI); ++BI) {
Instruction *New = BI->clone();
New->setName(BI->getName());
NewBB->getInstList().push_back(New);
ValueMapping[BI] = New;
// Remap operands to patch up intra-block references.
for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
if (I != ValueMapping.end())
New->setOperand(i, I->second);
}
}
// We didn't copy the terminator from BB over to NewBB, because there is now
// an unconditional jump to SuccBB. Insert the unconditional jump.
BranchInst::Create(SuccBB, NewBB);
// Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
// PHI nodes for NewBB now.
AddPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping);
Chris Lattner
committed
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
// If there were values defined in BB that are used outside the block, then we
// now have to update all uses of the value to use either the original value,
// the cloned value, or some PHI derived value. This can require arbitrary
// PHI insertion, of which we are prepared to do, clean these up now.
SSAUpdater SSAUpdate;
SmallVector<Use*, 16> UsesToRename;
for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
// Scan all uses of this instruction to see if it is used outside of its
// block, and if so, record them in UsesToRename.
for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
++UI) {
Instruction *User = cast<Instruction>(*UI);
if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
if (UserPN->getIncomingBlock(UI) == BB)
continue;
} else if (User->getParent() == BB)
continue;
UsesToRename.push_back(&UI.getUse());
}
// If there are no uses outside the block, we're done with this instruction.
if (UsesToRename.empty())
continue;
DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");
// We found a use of I outside of BB. Rename all uses of I that are outside
// its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
// with the two values we know.
SSAUpdate.Initialize(I);
SSAUpdate.AddAvailableValue(BB, I);
SSAUpdate.AddAvailableValue(NewBB, ValueMapping[I]);
while (!UsesToRename.empty())
SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
}
// Ok, NewBB is good to go. Update the terminator of PredBB to jump to
// NewBB instead of BB. This eliminates predecessors from BB, which requires
// us to simplify any PHI nodes in BB.
TerminatorInst *PredTerm = PredBB->getTerminator();
for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)
if (PredTerm->getSuccessor(i) == BB) {
RemovePredecessorAndSimplify(BB, PredBB, TD);
PredTerm->setSuccessor(i, NewBB);
}
// At this point, the IR is fully up to date and consistent. Do a quick scan
// over the new instructions and zap any that are constants or dead. This
// frequently happens because of phi translation.
BI = NewBB->begin();
for (BasicBlock::iterator E = NewBB->end(); BI != E; ) {
Instruction *Inst = BI++;
if (Value *V = SimplifyInstruction(Inst, TD)) {
WeakVH BIHandle(BI);
ReplaceAndSimplifyAllUses(Inst, V, TD);
if (BIHandle == 0)
BI = NewBB->begin();
continue;
}
RecursivelyDeleteTriviallyDeadInstructions(Inst);
}
// Threaded an edge!
++NumThreads;
return true;
Chris Lattner
committed
}
/// DuplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
/// to BB which contains an i1 PHI node and a conditional branch on that PHI.
/// If we can duplicate the contents of BB up into PredBB do so now, this
/// improves the odds that the branch will be on an analyzable instruction like
/// a compare.
bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
const SmallVectorImpl<BasicBlock *> &PredBBs) {
assert(!PredBBs.empty() && "Can't handle an empty set");
// If BB is a loop header, then duplicating this block outside the loop would
// cause us to transform this into an irreducible loop, don't do this.
// See the comments above FindLoopHeaders for justifications and caveats.
if (LoopHeaders.count(BB)) {
DEBUG(dbgs() << " Not duplicating loop header '" << BB->getName()
<< "' into predecessor block '" << PredBBs[0]->getName()
<< "' - it might create an irreducible loop!\n");
return false;
}
unsigned DuplicationCost = getJumpThreadDuplicationCost(BB);
if (DuplicationCost > Threshold) {
DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()
<< "' - Cost is too high: " << DuplicationCost << "\n");
return false;
}
// And finally, do it! Start by factoring the predecessors is needed.
BasicBlock *PredBB;
if (PredBBs.size() == 1)
PredBB = PredBBs[0];
else {
DEBUG(dbgs() << " Factoring out " << PredBBs.size()
<< " common predecessors.\n");
PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
".thr_comm", this);
}
// Okay, we decided to do this! Clone all the instructions in BB onto the end
// of PredBB.
DEBUG(dbgs() << " Duplicating block '" << BB->getName() << "' into end of '"
<< PredBB->getName() << "' to eliminate branch on phi. Cost: "
<< DuplicationCost << " block is:" << *BB << "\n");
// Unless PredBB ends with an unconditional branch, split the edge so that we
// can just clone the bits from BB into the end of the new PredBB.
BranchInst *OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
if (!OldPredBranch->isUnconditional()) {
PredBB = SplitEdge(PredBB, BB, this);
OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
}
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
// We are going to have to map operands from the original BB block into the
// PredBB block. Evaluate PHI nodes in BB.
DenseMap<Instruction*, Value*> ValueMapping;
BasicBlock::iterator BI = BB->begin();
for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
// Clone the non-phi instructions of BB into PredBB, keeping track of the
// mapping and using it to remap operands in the cloned instructions.
for (; BI != BB->end(); ++BI) {
Instruction *New = BI->clone();
New->setName(BI->getName());
PredBB->getInstList().insert(OldPredBranch, New);
ValueMapping[BI] = New;
// Remap operands to patch up intra-block references.
for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
if (I != ValueMapping.end())
New->setOperand(i, I->second);
}
}
// Check to see if the targets of the branch had PHI nodes. If so, we need to
// add entries to the PHI nodes for branch from PredBB now.
BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator());
AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB,
ValueMapping);
AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB,
ValueMapping);
// If there were values defined in BB that are used outside the block, then we
// now have to update all uses of the value to use either the original value,
// the cloned value, or some PHI derived value. This can require arbitrary
// PHI insertion, of which we are prepared to do, clean these up now.
SSAUpdater SSAUpdate;
SmallVector<Use*, 16> UsesToRename;
for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
// Scan all uses of this instruction to see if it is used outside of its
// block, and if so, record them in UsesToRename.
for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
++UI) {
Instruction *User = cast<Instruction>(*UI);
if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
if (UserPN->getIncomingBlock(UI) == BB)
continue;
} else if (User->getParent() == BB)
continue;
UsesToRename.push_back(&UI.getUse());
}
// If there are no uses outside the block, we're done with this instruction.
if (UsesToRename.empty())
continue;
DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");
// We found a use of I outside of BB. Rename all uses of I that are outside
// its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
// with the two values we know.
SSAUpdate.Initialize(I);
SSAUpdate.AddAvailableValue(BB, I);
SSAUpdate.AddAvailableValue(PredBB, ValueMapping[I]);
while (!UsesToRename.empty())
SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
}
// PredBB no longer jumps to BB, remove entries in the PHI node for the edge
// that we nuked.
RemovePredecessorAndSimplify(BB, PredBB, TD);