Skip to content
  • Chris Lattner's avatar
    When processing outer loops and we find uses of an IV in inner loops, make · a0102fbc
    Chris Lattner authored
    sure to handle the use, just don't recurse into it.
    
    This permits us to generate this code for a simple nested loop case:
    
    .LBB_foo_0:     ; entry
            stwu r1, -48(r1)
            stw r29, 44(r1)
            stw r30, 40(r1)
            mflr r11
            stw r11, 56(r1)
            lis r2, ha16(L_A$non_lazy_ptr)
            lwz r30, lo16(L_A$non_lazy_ptr)(r2)
            li r29, 1
    .LBB_foo_1:     ; no_exit.0
            bl L_bar$stub
            li r2, 1
            or r3, r30, r30
    .LBB_foo_2:     ; no_exit.1
            lfd f0, 8(r3)
            stfd f0, 0(r3)
            addi r4, r2, 1
            addi r3, r3, 8
            cmpwi cr0, r2, 100
            or r2, r4, r4
            bne .LBB_foo_2  ; no_exit.1
    .LBB_foo_3:     ; loopexit.1
            addi r30, r30, 800
            addi r2, r29, 1
            cmpwi cr0, r29, 100
            or r29, r2, r2
            bne .LBB_foo_1  ; no_exit.0
    .LBB_foo_4:     ; return
            lwz r11, 56(r1)
            mtlr r11
            lwz r30, 40(r1)
            lwz r29, 44(r1)
            lwz r1, 0(r1)
            blr
    
    instead of this:
    
    _foo:
    .LBB_foo_0:     ; entry
            stwu r1, -48(r1)
            stw r28, 44(r1)                   ;; uses an extra register.
            stw r29, 40(r1)
            stw r30, 36(r1)
            mflr r11
            stw r11, 56(r1)
            li r30, 1
            li r29, 0
            or r28, r29, r29
    .LBB_foo_1:     ; no_exit.0
            bl L_bar$stub
            mulli r2, r28, 800           ;; unstrength-reduced multiply
            lis r3, ha16(L_A$non_lazy_ptr)   ;; loop invariant address computation
            lwz r3, lo16(L_A$non_lazy_ptr)(r3)
            add r2, r2, r3
            mulli r4, r29, 800           ;; unstrength-reduced multiply
            addi r3, r3, 8
            add r3, r4, r3
            li r4, 1
    .LBB_foo_2:     ; no_exit.1
            lfd f0, 0(r3)
            stfd f0, 0(r2)
            addi r5, r4, 1
            addi r2, r2, 8                 ;; multiple stride 8 IV's
            addi r3, r3, 8
            cmpwi cr0, r4, 100
            or r4, r5, r5
            bne .LBB_foo_2  ; no_exit.1
    .LBB_foo_3:     ; loopexit.1
            addi r28, r28, 1               ;;; Many IV's with stride 1
            addi r29, r29, 1
            addi r2, r30, 1
            cmpwi cr0, r30, 100
            or r30, r2, r2
            bne .LBB_foo_1  ; no_exit.0
    .LBB_foo_4:     ; return
            lwz r11, 56(r1)
            mtlr r11
            lwz r30, 36(r1)
            lwz r29, 40(r1)
            lwz r28, 44(r1)
            lwz r1, 0(r1)
            blr
    
    llvm-svn: 22640
    a0102fbc
Loading