Skip to content
LoopStrengthReduce.cpp 58.4 KiB
Newer Older
  for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
    // If the user is not in the current loop, this means it is using the exit
    // value of the IV.  Do not put anything in the base, make sure it's all in
    // the immediate field to allow as much factoring as possible.
    if (!L->contains(UsersToProcess[i].Inst->getParent())) {
      UsersToProcess[i].Imm = SCEVAddExpr::get(UsersToProcess[i].Imm,
                                               UsersToProcess[i].Base);
      UsersToProcess[i].Base = 
        SCEVUnknown::getIntegerSCEV(0, UsersToProcess[i].Base->getType());
    } else {
      
      // Addressing modes can be folded into loads and stores.  Be careful that
      // the store is through the expression, not of the expression though.
      bool isAddress = isa<LoadInst>(UsersToProcess[i].Inst);
      if (StoreInst *SI = dyn_cast<StoreInst>(UsersToProcess[i].Inst))
        if (SI->getOperand(1) == UsersToProcess[i].OperandValToReplace)
          isAddress = true;
      
      MoveImmediateValues(TLI, UsersToProcess[i].Inst, UsersToProcess[i].Base,
                          UsersToProcess[i].Imm, isAddress, L);
  // Check if it is possible to reuse a IV with stride that is factor of this
  // stride. And the multiple is a number that can be encoded in the scale
  // field of the target addressing mode.  And we will have a valid
  // instruction after this substition, including the immediate field, if any.
  PHINode *NewPHI = NULL;
  Value   *IncV   = NULL;
  IVExpr   ReuseIV;
  unsigned RewriteFactor = CheckForIVReuse(Stride, ReuseIV,
                                           CommonExprs->getType(),
                                           UsersToProcess);
  if (RewriteFactor != 0) {
    DOUT << "BASED ON IV of STRIDE " << *ReuseIV.Stride
         << " and BASE " << *ReuseIV.Base << " :\n";
    NewPHI = ReuseIV.PHI;
    IncV   = ReuseIV.IncV;
  }

  const Type *ReplacedTy = CommonExprs->getType();
  
  // Now that we know what we need to do, insert the PHI node itself.
  //
  DOUT << "INSERTING IV of TYPE " << *ReplacedTy << " of STRIDE "
       << *Stride << " and BASE " << *CommonExprs << " :\n";
  SCEVExpander Rewriter(*SE, *LI);
  SCEVExpander PreheaderRewriter(*SE, *LI);
  
  BasicBlock  *Preheader = L->getLoopPreheader();
  Instruction *PreInsertPt = Preheader->getTerminator();
  Instruction *PhiInsertBefore = L->getHeader()->begin();
  
  BasicBlock *LatchBlock = L->getLoopLatch();

  // Emit the initial base value into the loop preheader.
  Value *CommonBaseV
    = PreheaderRewriter.expandCodeFor(CommonExprs, PreInsertPt,
                                      ReplacedTy);

  if (RewriteFactor == 0) {
    // Create a new Phi for this base, and stick it in the loop header.
    NewPHI = new PHINode(ReplacedTy, "iv.", PhiInsertBefore);
    ++NumInserted;
    // Add common base to the new Phi node.
    NewPHI->addIncoming(CommonBaseV, Preheader);

    // Insert the stride into the preheader.
    Value *StrideV = PreheaderRewriter.expandCodeFor(Stride, PreInsertPt,
                                                     ReplacedTy);
    if (!isa<ConstantInt>(StrideV)) ++NumVariable;
    // Emit the increment of the base value before the terminator of the loop
    // latch block, and add it to the Phi node.
    SCEVHandle IncExp = SCEVAddExpr::get(SCEVUnknown::get(NewPHI),
                                         SCEVUnknown::get(StrideV));
    IncV = Rewriter.expandCodeFor(IncExp, LatchBlock->getTerminator(),
                                  ReplacedTy);
    IncV->setName(NewPHI->getName()+".inc");
    NewPHI->addIncoming(IncV, LatchBlock);

    // Remember this in case a later stride is multiple of this.
    IVsByStride[Stride].addIV(Stride, CommonExprs, NewPHI, IncV);
  } else {
    Constant *C = dyn_cast<Constant>(CommonBaseV);
    if (!C ||
        (!C->isNullValue() &&
         !isTargetConstant(SCEVUnknown::get(CommonBaseV), ReplacedTy, TLI)))
Reid Spencer's avatar
Reid Spencer committed
      // We want the common base emitted into the preheader! This is just
      // using cast as a copy so BitCast (no-op cast) is appropriate
      CommonBaseV = new BitCastInst(CommonBaseV, CommonBaseV->getType(), 
                                    "commonbase", PreInsertPt);
Chris Lattner's avatar
 
Chris Lattner committed
  // We want to emit code for users inside the loop first.  To do this, we
  // rearrange BasedUser so that the entries at the end have
  // isUseOfPostIncrementedValue = false, because we pop off the end of the
  // vector (so we handle them first).
  std::partition(UsersToProcess.begin(), UsersToProcess.end(),
                 PartitionByIsUseOfPostIncrementedValue);
  
  // Sort this by base, so that things with the same base are handled
  // together.  By partitioning first and stable-sorting later, we are
  // guaranteed that within each base we will pop off users from within the
  // loop before users outside of the loop with a particular base.
  //
  // We would like to use stable_sort here, but we can't.  The problem is that
  // SCEVHandle's don't have a deterministic ordering w.r.t to each other, so
  // we don't have anything to do a '<' comparison on.  Because we think the
  // number of uses is small, do a horrible bubble sort which just relies on
  // ==.
  for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
    // Get a base value.
    SCEVHandle Base = UsersToProcess[i].Base;
    
    // Compact everything with this base to be consequetive with this one.
    for (unsigned j = i+1; j != e; ++j) {
      if (UsersToProcess[j].Base == Base) {
        std::swap(UsersToProcess[i+1], UsersToProcess[j]);
        ++i;
      }
    }
  }

  // Process all the users now.  This outer loop handles all bases, the inner
  // loop handles all users of a particular base.
    SCEVHandle Base = UsersToProcess.back().Base;
    DOUT << "  INSERTING code for BASE = " << *Base << ":\n";
    // Emit the code for Base into the preheader.
    Value *BaseV = PreheaderRewriter.expandCodeFor(Base, PreInsertPt,
                                                   ReplacedTy);
    
    // If BaseV is a constant other than 0, make sure that it gets inserted into
    // the preheader, instead of being forward substituted into the uses.  We do
Reid Spencer's avatar
Reid Spencer committed
    // this by forcing a BitCast (noop cast) to be inserted into the preheader 
    // in this case.
Chris Lattner's avatar
 
Chris Lattner committed
    if (Constant *C = dyn_cast<Constant>(BaseV)) {
      if (!C->isNullValue() && !isTargetConstant(Base, ReplacedTy, TLI)) {
Reid Spencer's avatar
Reid Spencer committed
        // We want this constant emitted into the preheader! This is just
        // using cast as a copy so BitCast (no-op cast) is appropriate
        BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert",
Chris Lattner's avatar
 
Chris Lattner committed
    }

    // Emit the code to add the immediate offset to the Phi value, just before
    // the instructions that we identified as using this stride and base.
Chris Lattner's avatar
 
Chris Lattner committed
      // FIXME: Use emitted users to emit other users.
      BasedUser &User = UsersToProcess.back();
      // If this instruction wants to use the post-incremented value, move it
      // after the post-inc and use its value instead of the PHI.
      Value *RewriteOp = NewPHI;
      if (User.isUseOfPostIncrementedValue) {
        RewriteOp = IncV;
Chris Lattner's avatar
Chris Lattner committed

        // If this user is in the loop, make sure it is the last thing in the
        // loop to ensure it is dominated by the increment.
        if (L->contains(User.Inst->getParent()))
          User.Inst->moveBefore(LatchBlock->getTerminator());
      if (RewriteOp->getType() != ReplacedTy) {
        Instruction::CastOps opcode = Instruction::Trunc;
        if (ReplacedTy->getPrimitiveSizeInBits() ==
            RewriteOp->getType()->getPrimitiveSizeInBits())
          opcode = Instruction::BitCast;
        RewriteOp = SCEVExpander::InsertCastOfTo(opcode, RewriteOp, ReplacedTy);
      }
      SCEVHandle RewriteExpr = SCEVUnknown::get(RewriteOp);

      // Clear the SCEVExpander's expression map so that we are guaranteed
      // to have the code emitted where we expect it.
      Rewriter.clear();

      // If we are reusing the iv, then it must be multiplied by a constant
      // factor take advantage of addressing mode scale component.
      if (RewriteFactor != 0) {
        RewriteExpr =
          SCEVMulExpr::get(SCEVUnknown::getIntegerSCEV(RewriteFactor,
                                                       RewriteExpr->getType()),
                           RewriteExpr);

        // The common base is emitted in the loop preheader. But since we
        // are reusing an IV, it has not been used to initialize the PHI node.
        // Add it to the expression used to rewrite the uses.
        if (!isa<ConstantInt>(CommonBaseV) ||
            !cast<ConstantInt>(CommonBaseV)->isZero())
          RewriteExpr = SCEVAddExpr::get(RewriteExpr,
                                         SCEVUnknown::get(CommonBaseV));
      }
      // Now that we know what we need to do, insert code before User for the
      // immediate and any loop-variant expressions.
      if (!isa<ConstantInt>(BaseV) || !cast<ConstantInt>(BaseV)->isZero())
        // Add BaseV to the PHI value if needed.
        RewriteExpr = SCEVAddExpr::get(RewriteExpr, SCEVUnknown::get(BaseV));
      User.RewriteInstructionToUseNewBase(RewriteExpr, Rewriter, L, this);

      // Mark old value we replaced as possibly dead, so that it is elminated
      // if we just replaced the last use of that value.
      DeadInsts.insert(cast<Instruction>(User.OperandValToReplace));
      ++NumReduced;
Chris Lattner's avatar
 
Chris Lattner committed
      // If there are any more users to process with the same base, process them
      // now.  We sorted by base above, so we just have to check the last elt.
    } while (!UsersToProcess.empty() && UsersToProcess.back().Base == Base);
    // TODO: Next, find out which base index is the most common, pull it out.
  }

  // IMPORTANT TODO: Figure out how to partition the IV's with this stride, but
  // different starting values, into different PHIs.
/// FindIVForUser - If Cond has an operand that is an expression of an IV,
/// set the IV user and stride information and return true, otherwise return
/// false.
bool LoopStrengthReduce::FindIVForUser(ICmpInst *Cond, IVStrideUse *&CondUse,
                                       const SCEVHandle *&CondStride) {
  for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e && !CondUse;
       ++Stride) {
    std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI = 
    IVUsesByStride.find(StrideOrder[Stride]);
    assert(SI != IVUsesByStride.end() && "Stride doesn't exist!");
    
    for (std::vector<IVStrideUse>::iterator UI = SI->second.Users.begin(),
         E = SI->second.Users.end(); UI != E; ++UI)
      if (UI->User == Cond) {
        // NOTE: we could handle setcc instructions with multiple uses here, but
        // InstCombine does it as well for simple uses, it's not clear that it
        // occurs enough in real life to handle.
        CondUse = &*UI;
        CondStride = &SI->first;
        return true;
      }
  }
  return false;
}    

// OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar
// uses in the loop, look to see if we can eliminate some, in favor of using
// common indvars for the different uses.
void LoopStrengthReduce::OptimizeIndvars(Loop *L) {
  // TODO: implement optzns here.

  // Finally, get the terminating condition for the loop if possible.  If we
  // can, we want to change it to use a post-incremented version of its
Chris Lattner's avatar
Chris Lattner committed
  // induction variable, to allow coalescing the live ranges for the IV into
  // one register value.
  PHINode *SomePHI = cast<PHINode>(L->getHeader()->begin());
  BasicBlock  *Preheader = L->getLoopPreheader();
  BasicBlock *LatchBlock =
   SomePHI->getIncomingBlock(SomePHI->getIncomingBlock(0) == Preheader);
  BranchInst *TermBr = dyn_cast<BranchInst>(LatchBlock->getTerminator());
Reid Spencer's avatar
Reid Spencer committed
  if (!TermBr || TermBr->isUnconditional() || 
      !isa<ICmpInst>(TermBr->getCondition()))
Reid Spencer's avatar
Reid Spencer committed
  ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());

  // Search IVUsesByStride to find Cond's IVUse if there is one.
  IVStrideUse *CondUse = 0;
  if (!FindIVForUser(Cond, CondUse, CondStride))
    return; // setcc doesn't use the IV.
  

  // It's possible for the setcc instruction to be anywhere in the loop, and
  // possible for it to have multiple users.  If it is not immediately before
  // the latch block branch, move it.
  if (&*++BasicBlock::iterator(Cond) != (Instruction*)TermBr) {
    if (Cond->hasOneUse()) {   // Condition has a single use, just move it.
      Cond->moveBefore(TermBr);
    } else {
      // Otherwise, clone the terminating condition and insert into the loopend.
Reid Spencer's avatar
Reid Spencer committed
      Cond = cast<ICmpInst>(Cond->clone());
      Cond->setName(L->getHeader()->getName() + ".termcond");
      LatchBlock->getInstList().insert(TermBr, Cond);
      
      // Clone the IVUse, as the old use still exists!
      IVUsesByStride[*CondStride].addUser(CondUse->Offset, Cond,
      CondUse = &IVUsesByStride[*CondStride].Users.back();
    }
  }

  // If we get to here, we know that we can transform the setcc instruction to
Chris Lattner's avatar
Chris Lattner committed
  // use the post-incremented version of the IV, allowing us to coalesce the
  CondUse->Offset = SCEV::getMinusSCEV(CondUse->Offset, *CondStride);
  CondUse->isUseOfPostIncrementedValue = true;
}
namespace {
  // Constant strides come first which in turns are sorted by their absolute
  // values. If absolute values are the same, then positive strides comes first.
  // e.g.
  // 4, -1, X, 1, 2 ==> 1, -1, 2, 4, X
  struct StrideCompare {
    bool operator()(const SCEVHandle &LHS, const SCEVHandle &RHS) {
      SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS);
      SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS);
      if (LHSC && RHSC) {
        int64_t  LV = LHSC->getValue()->getSExtValue();
        int64_t  RV = RHSC->getValue()->getSExtValue();
        uint64_t ALV = (LV < 0) ? -LV : LV;
        uint64_t ARV = (RV < 0) ? -RV : RV;
          return LV > RV;
          return ALV < ARV;
      }
      return (LHSC && !RHSC);
bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
  LI = &getAnalysis<LoopInfo>();
  EF = &getAnalysis<ETForest>();
  SE = &getAnalysis<ScalarEvolution>();
  TD = &getAnalysis<TargetData>();
  UIntPtrTy = TD->getIntPtrType();

  // Find all uses of induction variables in this loop, and catagorize
  // them by stride.  Start by finding all of the PHI nodes in the header for
  // this loop.  If they are induction variables, inspect their uses.
  std::set<Instruction*> Processed;   // Don't reprocess instructions.
  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I)
    AddUsersIfInteresting(I, L, Processed);
  if (IVUsesByStride.empty()) return false;

  // Optimize induction variables.  Some indvar uses can be transformed to use
  // strides that will be needed for other purposes.  A common example of this
  // is the exit test for the loop, which can often be rewritten to use the
  // computation of some other indvar to decide when to terminate the loop.
  OptimizeIndvars(L);


  // FIXME: We can widen subreg IV's here for RISC targets.  e.g. instead of
  // doing computation in byte values, promote to 32-bit values if safe.

  // FIXME: Attempt to reuse values across multiple IV's.  In particular, we
  // could have something like "for(i) { foo(i*8); bar(i*16) }", which should be
  // codegened as "for (j = 0;; j+=8) { foo(j); bar(j+j); }" on X86/PPC.  Need
  // to be careful that IV's are all the same type.  Only works for intptr_t
  // indvars.

  // If we only have one stride, we can more aggressively eliminate some things.
  bool HasOneStride = IVUsesByStride.size() == 1;
  DEBUG(L->dump());
#endif

  // IVsByStride keeps IVs for one particular loop.
  IVsByStride.clear();

  // Sort the StrideOrder so we process larger strides first.
  std::stable_sort(StrideOrder.begin(), StrideOrder.end(), StrideCompare());

  // Note: this processes each stride/type pair individually.  All users passed
  // into StrengthReduceStridedIVUsers have the same type AND stride.  Also,
  // node that we iterate over IVUsesByStride indirectly by using StrideOrder.
  // This extra layer of indirection makes the ordering of strides deterministic
  // - not dependent on map order.
  for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e; ++Stride) {
    std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI = 
      IVUsesByStride.find(StrideOrder[Stride]);
    assert(SI != IVUsesByStride.end() && "Stride doesn't exist!");
    StrengthReduceStridedIVUsers(SI->first, SI->second, L, HasOneStride);

  // Clean up after ourselves
  if (!DeadInsts.empty()) {
    DeleteTriviallyDeadInstructions(DeadInsts);

    BasicBlock::iterator I = L->getHeader()->begin();
    PHINode *PN;
    while ((PN = dyn_cast<PHINode>(I))) {
      ++I;  // Preincrement iterator to avoid invalidating it when deleting PN.
      
      // At this point, we know that we have killed one or more GEP
      // instructions.  It is worth checking to see if the cann indvar is also
      // dead, so that we can remove it as well.  The requirements for the cann
      // indvar to be considered dead are:
      // 1. the cann indvar has one use
      // 2. the use is an add instruction
      // 3. the add has one use
      // 4. the add is used by the cann indvar
      // If all four cases above are true, then we can remove both the add and
      // the cann indvar.
      // FIXME: this needs to eliminate an induction variable even if it's being
      // compared against some value to decide loop termination.
      if (PN->hasOneUse()) {
Reid Spencer's avatar
Reid Spencer committed
        Instruction *BO = dyn_cast<Instruction>(*PN->use_begin());
        if (BO && (isa<BinaryOperator>(BO) || isa<CmpInst>(BO))) {
          if (BO->hasOneUse() && PN == *(BO->use_begin())) {
            DeadInsts.insert(BO);
            // Break the cycle, then delete the PHI.
            PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
            SE->deleteInstructionFromRecords(PN);
            PN->eraseFromParent();
    DeleteTriviallyDeadInstructions(DeadInsts);
  return false;