Skip to content
  1. Aug 04, 2005
    • Nate Begeman's avatar
      Add Subtarget support to PowerPC. Next up, using it. · 3bcfcd94
      Nate Begeman authored
      llvm-svn: 22644
      3bcfcd94
    • Chris Lattner's avatar
      refactor some code · 6f286b76
      Chris Lattner authored
      llvm-svn: 22643
      6f286b76
    • Chris Lattner's avatar
      this is not implemented by lsr yet · 9969c861
      Chris Lattner authored
      llvm-svn: 22642
      9969c861
    • Chris Lattner's avatar
      invert to if's to make the logic simpler · 65107490
      Chris Lattner authored
      llvm-svn: 22641
      65107490
    • Chris Lattner's avatar
      When processing outer loops and we find uses of an IV in inner loops, make · a0102fbc
      Chris Lattner authored
      sure to handle the use, just don't recurse into it.
      
      This permits us to generate this code for a simple nested loop case:
      
      .LBB_foo_0:     ; entry
              stwu r1, -48(r1)
              stw r29, 44(r1)
              stw r30, 40(r1)
              mflr r11
              stw r11, 56(r1)
              lis r2, ha16(L_A$non_lazy_ptr)
              lwz r30, lo16(L_A$non_lazy_ptr)(r2)
              li r29, 1
      .LBB_foo_1:     ; no_exit.0
              bl L_bar$stub
              li r2, 1
              or r3, r30, r30
      .LBB_foo_2:     ; no_exit.1
              lfd f0, 8(r3)
              stfd f0, 0(r3)
              addi r4, r2, 1
              addi r3, r3, 8
              cmpwi cr0, r2, 100
              or r2, r4, r4
              bne .LBB_foo_2  ; no_exit.1
      .LBB_foo_3:     ; loopexit.1
              addi r30, r30, 800
              addi r2, r29, 1
              cmpwi cr0, r29, 100
              or r29, r2, r2
              bne .LBB_foo_1  ; no_exit.0
      .LBB_foo_4:     ; return
              lwz r11, 56(r1)
              mtlr r11
              lwz r30, 40(r1)
              lwz r29, 44(r1)
              lwz r1, 0(r1)
              blr
      
      instead of this:
      
      _foo:
      .LBB_foo_0:     ; entry
              stwu r1, -48(r1)
              stw r28, 44(r1)                   ;; uses an extra register.
              stw r29, 40(r1)
              stw r30, 36(r1)
              mflr r11
              stw r11, 56(r1)
              li r30, 1
              li r29, 0
              or r28, r29, r29
      .LBB_foo_1:     ; no_exit.0
              bl L_bar$stub
              mulli r2, r28, 800           ;; unstrength-reduced multiply
              lis r3, ha16(L_A$non_lazy_ptr)   ;; loop invariant address computation
              lwz r3, lo16(L_A$non_lazy_ptr)(r3)
              add r2, r2, r3
              mulli r4, r29, 800           ;; unstrength-reduced multiply
              addi r3, r3, 8
              add r3, r4, r3
              li r4, 1
      .LBB_foo_2:     ; no_exit.1
              lfd f0, 0(r3)
              stfd f0, 0(r2)
              addi r5, r4, 1
              addi r2, r2, 8                 ;; multiple stride 8 IV's
              addi r3, r3, 8
              cmpwi cr0, r4, 100
              or r4, r5, r5
              bne .LBB_foo_2  ; no_exit.1
      .LBB_foo_3:     ; loopexit.1
              addi r28, r28, 1               ;;; Many IV's with stride 1
              addi r29, r29, 1
              addi r2, r30, 1
              cmpwi cr0, r30, 100
              or r30, r2, r2
              bne .LBB_foo_1  ; no_exit.0
      .LBB_foo_4:     ; return
              lwz r11, 56(r1)
              mtlr r11
              lwz r30, 36(r1)
              lwz r29, 40(r1)
              lwz r28, 44(r1)
              lwz r1, 0(r1)
              blr
      
      llvm-svn: 22640
      a0102fbc
    • Chris Lattner's avatar
      Teach loop-reduce to see into nested loops, to pull out immediate values · fc624704
      Chris Lattner authored
      pushed down by SCEV.
      
      In a nested loop case, this allows us to emit this:
      
              lis r3, ha16(L_A$non_lazy_ptr)
              lwz r3, lo16(L_A$non_lazy_ptr)(r3)
              add r2, r2, r3
              li r3, 1
      .LBB_foo_2:     ; no_exit.1
              lfd f0, 8(r2)        ;; Uses offset of 8 instead of 0
              stfd f0, 0(r2)
              addi r4, r3, 1
              addi r2, r2, 8
              cmpwi cr0, r3, 100
              or r3, r4, r4
              bne .LBB_foo_2  ; no_exit.1
      
      instead of this:
      
              lis r3, ha16(L_A$non_lazy_ptr)
              lwz r3, lo16(L_A$non_lazy_ptr)(r3)
              add r2, r2, r3
              addi r3, r3, 8
              li r4, 1
      .LBB_foo_2:     ; no_exit.1
              lfd f0, 0(r3)
              stfd f0, 0(r2)
              addi r5, r4, 1
              addi r2, r2, 8
              addi r3, r3, 8
              cmpwi cr0, r4, 100
              or r4, r5, r5
              bne .LBB_foo_2  ; no_exit.1
      
      llvm-svn: 22639
      fc624704
    • Chris Lattner's avatar
      improve debug output · bb78c97e
      Chris Lattner authored
      llvm-svn: 22638
      bb78c97e
    • Nate Begeman's avatar
      Scalar SSE: load +0.0 -> xorps/xorpd · 8d394eb7
      Nate Begeman authored
      Scalar SSE: a < b ? c : 0.0 -> cmpss, andps
      Scalar SSE: float -> i16 needs to be promoted
      
      llvm-svn: 22637
      8d394eb7
    • Chris Lattner's avatar
      this now passes · 47b57322
      Chris Lattner authored
      llvm-svn: 22636
      47b57322
    • Chris Lattner's avatar
      Move from Stage 0 to Stage 1. · db23c74e
      Chris Lattner authored
      Only emit one PHI node for IV uses with identical bases and strides (after
      moving foldable immediates to the load/store instruction).
      
      This implements LoopStrengthReduce/dont_insert_redundant_ops.ll, allowing
      us to generate this PPC code for test1:
      
              or r30, r3, r3
      .LBB_test1_1:   ; Loop
              li r2, 0
              stw r2, 0(r30)
              stw r2, 4(r30)
              bl L_pred$stub
              addi r30, r30, 8
              cmplwi cr0, r3, 0
              bne .LBB_test1_1        ; Loop
      
      instead of this code:
      
              or r30, r3, r3
              or r29, r3, r3
      .LBB_test1_1:   ; Loop
              li r2, 0
              stw r2, 0(r29)
              stw r2, 4(r30)
              bl L_pred$stub
              addi r30, r30, 8        ;; Two iv's with step of 8
              addi r29, r29, 8
              cmplwi cr0, r3, 0
              bne .LBB_test1_1        ; Loop
      
      llvm-svn: 22635
      db23c74e
    • Andrew Lenharth's avatar
    • Chris Lattner's avatar
      Rename IVUse to IVUsersOfOneStride, use a struct instead of a pair to · 430d0022
      Chris Lattner authored
      unify some parallel vectors and get field names more descriptive than
      "first" and "second".  This isn't lisp afterall :)
      
      llvm-svn: 22633
      430d0022
  2. Aug 03, 2005
Loading