Skip to content
  • Chris Lattner's avatar
    Implement: LoopStrengthReduce/share_ivs.ll · a091ff17
    Chris Lattner authored
    Two changes:
      * Only insert one PHI node for each stride.  Other values are live in
        values.  This cannot introduce higher register pressure than the
        previous approach, and can take advantage of reg+reg addressing modes.
      * Factor common base values out of uses before moving values from the
        base to the immediate fields.  This improves codegen by starting the
        stride-specific PHI node out at a common place for each IV use.
    
    As an example, we used to generate this for a loop in swim:
    
    .LBB_main_no_exit_2E_6_2E_i_no_exit_2E_7_2E_i_2:        ; no_exit.7.i
            lfd f0, 0(r8)
            stfd f0, 0(r3)
            lfd f0, 0(r6)
            stfd f0, 0(r7)
            lfd f0, 0(r2)
            stfd f0, 0(r5)
            addi r9, r9, 1
            addi r2, r2, 8
            addi r5, r5, 8
            addi r6, r6, 8
            addi r7, r7, 8
            addi r8, r8, 8
            addi r3, r3, 8
            cmpw cr0, r9, r4
            bgt .LBB_main_no_exit_2E_6_2E_i_no_exit_2E_7_2E_i_1
    
    now we emit:
    
    .LBB_main_no_exit_2E_6_2E_i_no_exit_2E_7_2E_i_2:        ; no_exit.7.i
            lfdx f0, r8, r2
            stfdx f0, r9, r2
            lfdx f0, r5, r2
            stfdx f0, r7, r2
            lfdx f0, r3, r2
            stfdx f0, r6, r2
            addi r10, r10, 1
            addi r2, r2, 8
            cmpw cr0, r10, r4
            bgt .LBB_main_no_exit_2E_6_2E_i_no_exit_2E_7_2E_i_1
    
    As another more dramatic example, we used to emit this:
    
    .LBB_main_L_90_no_exit_2E_0_2E_i16_no_exit_2E_1_2E_i19_2:       ; no_exit.1.i19
            lfd f0, 8(r21)
            lfd f4, 8(r3)
            lfd f5, 8(r27)
            lfd f6, 8(r22)
            lfd f7, 8(r5)
            lfd f8, 8(r6)
            lfd f9, 8(r30)
            lfd f10, 8(r11)
            lfd f11, 8(r12)
            fsub f10, f10, f11
            fadd f5, f4, f5
            fmul f5, f5, f1
            fadd f6, f6, f7
            fadd f6, f6, f8
            fadd f6, f6, f9
            fmadd f0, f5, f6, f0
            fnmsub f0, f10, f2, f0
            stfd f0, 8(r4)
            lfd f0, 8(r25)
            lfd f5, 8(r26)
            lfd f6, 8(r23)
            lfd f9, 8(r28)
            lfd f10, 8(r10)
            lfd f12, 8(r9)
            lfd f13, 8(r29)
            fsub f11, f13, f11
            fadd f4, f4, f5
            fmul f4, f4, f1
            fadd f5, f6, f9
            fadd f5, f5, f10
            fadd f5, f5, f12
            fnmsub f0, f4, f5, f0
            fnmsub f0, f11, f3, f0
            stfd f0, 8(r24)
            lfd f0, 8(r8)
            fsub f4, f7, f8
            fsub f5, f12, f10
            fnmsub f0, f5, f2, f0
            fnmsub f0, f4, f3, f0
            stfd f0, 8(r2)
            addi r20, r20, 1
            addi r2, r2, 8
            addi r8, r8, 8
            addi r10, r10, 8
            addi r12, r12, 8
            addi r6, r6, 8
            addi r29, r29, 8
            addi r28, r28, 8
            addi r26, r26, 8
            addi r25, r25, 8
            addi r24, r24, 8
            addi r5, r5, 8
            addi r23, r23, 8
            addi r22, r22, 8
            addi r3, r3, 8
            addi r9, r9, 8
            addi r11, r11, 8
            addi r30, r30, 8
            addi r27, r27, 8
            addi r21, r21, 8
            addi r4, r4, 8
            cmpw cr0, r20, r7
            bgt .LBB_main_L_90_no_exit_2E_0_2E_i16_no_exit_2E_1_2E_i19_1
    
    we now emit:
    
    .LBB_main_L_90_no_exit_2E_0_2E_i16_no_exit_2E_1_2E_i19_2:       ; no_exit.1.i19
            lfdx f0, r21, r20
            lfdx f4, r3, r20
            lfdx f5, r27, r20
            lfdx f6, r22, r20
            lfdx f7, r5, r20
            lfdx f8, r6, r20
            lfdx f9, r30, r20
            lfdx f10, r11, r20
            lfdx f11, r12, r20
            fsub f10, f10, f11
            fadd f5, f4, f5
            fmul f5, f5, f1
            fadd f6, f6, f7
            fadd f6, f6, f8
            fadd f6, f6, f9
            fmadd f0, f5, f6, f0
            fnmsub f0, f10, f2, f0
            stfdx f0, r4, r20
            lfdx f0, r25, r20
            lfdx f5, r26, r20
            lfdx f6, r23, r20
            lfdx f9, r28, r20
            lfdx f10, r10, r20
            lfdx f12, r9, r20
            lfdx f13, r29, r20
            fsub f11, f13, f11
            fadd f4, f4, f5
            fmul f4, f4, f1
            fadd f5, f6, f9
            fadd f5, f5, f10
            fadd f5, f5, f12
            fnmsub f0, f4, f5, f0
            fnmsub f0, f11, f3, f0
            stfdx f0, r24, r20
            lfdx f0, r8, r20
            fsub f4, f7, f8
            fsub f5, f12, f10
            fnmsub f0, f5, f2, f0
            fnmsub f0, f4, f3, f0
            stfdx f0, r2, r20
            addi r19, r19, 1
            addi r20, r20, 8
            cmpw cr0, r19, r7
            bgt .LBB_main_L_90_no_exit_2E_0_2E_i16_no_exit_2E_1_2E_i19_1
    
    llvm-svn: 22722
    a091ff17
Loading