Skip to content
  • Chris Lattner's avatar
    When splitting critical edges, make sure not to leave the new block in the · 8447b495
    Chris Lattner authored
    middle of the loop.  This turns a critical loop in gzip into this:
    
    .LBB_test_1:    ; loopentry
            or r27, r28, r28
            add r28, r3, r27
            lhz r28, 3(r28)
            add r26, r4, r27
            lhz r26, 3(r26)
            cmpw cr0, r28, r26
            bne .LBB_test_8 ; loopentry.loopexit_crit_edge
    .LBB_test_2:    ; shortcirc_next.0
            add r28, r3, r27
            lhz r28, 5(r28)
            add r26, r4, r27
            lhz r26, 5(r26)
            cmpw cr0, r28, r26
            bne .LBB_test_7 ; shortcirc_next.0.loopexit_crit_edge
    .LBB_test_3:    ; shortcirc_next.1
            add r28, r3, r27
            lhz r28, 7(r28)
            add r26, r4, r27
            lhz r26, 7(r26)
            cmpw cr0, r28, r26
            bne .LBB_test_6 ; shortcirc_next.1.loopexit_crit_edge
    .LBB_test_4:    ; shortcirc_next.2
            add r28, r3, r27
            lhz r26, 9(r28)
            add r28, r4, r27
            lhz r25, 9(r28)
            addi r28, r27, 8
            cmpw cr7, r26, r25
            mfcr r26, 1
            rlwinm r26, r26, 31, 31, 31
            add r25, r8, r27
            cmpw cr7, r25, r7
            mfcr r25, 1
            rlwinm r25, r25, 29, 31, 31
            and. r26, r26, r25
            bne .LBB_test_1 ; loopentry
    
    instead of this:
    
    .LBB_test_1:    ; loopentry
            or r27, r28, r28
            add r28, r3, r27
            lhz r28, 3(r28)
            add r26, r4, r27
            lhz r26, 3(r26)
            cmpw cr0, r28, r26
            beq .LBB_test_3 ; shortcirc_next.0
    .LBB_test_2:    ; loopentry.loopexit_crit_edge
            add r2, r30, r27
            add r8, r29, r27
            b .LBB_test_9   ; loopexit
    .LBB_test_3:    ; shortcirc_next.0
            add r28, r3, r27
            lhz r28, 5(r28)
            add r26, r4, r27
            lhz r26, 5(r26)
            cmpw cr0, r28, r26
            beq .LBB_test_5 ; shortcirc_next.1
    .LBB_test_4:    ; shortcirc_next.0.loopexit_crit_edge
            add r2, r11, r27
            add r8, r12, r27
            b .LBB_test_9   ; loopexit
    .LBB_test_5:    ; shortcirc_next.1
            add r28, r3, r27
            lhz r28, 7(r28)
            add r26, r4, r27
            lhz r26, 7(r26)
            cmpw cr0, r28, r26
            beq .LBB_test_7 ; shortcirc_next.2
    .LBB_test_6:    ; shortcirc_next.1.loopexit_crit_edge
            add r2, r9, r27
            add r8, r10, r27
            b .LBB_test_9   ; loopexit
    .LBB_test_7:    ; shortcirc_next.2
            add r28, r3, r27
            lhz r26, 9(r28)
            add r28, r4, r27
            lhz r25, 9(r28)
            addi r28, r27, 8
            cmpw cr7, r26, r25
            mfcr r26, 1
            rlwinm r26, r26, 31, 31, 31
            add r25, r8, r27
            cmpw cr7, r25, r7
            mfcr r25, 1
            rlwinm r25, r25, 29, 31, 31
            and. r26, r26, r25
            bne .LBB_test_1 ; loopentry
    
    Next up, improve the code for the loop.
    
    llvm-svn: 22769
    8447b495
Loading