Skip to content
  • Chris Lattner's avatar
    If an indvar with a variable stride is used by the exit condition, go ahead · 21eba2da
    Chris Lattner authored
    and handle it like constant stride vars.  This fixes some bad codegen in
    variable stride cases.  For example, it compiles this:
    
    void foo(int k, int i) {
      for (k=i+i; k <= 8192; k+=i)
        flags2[k] = 0;
    }
    
    to:
    
    LBB1_1: #bb.preheader
            movl %eax, %ecx
            addl %ecx, %ecx
            movl L_flags2$non_lazy_ptr, %edx
    LBB1_2: #bb
            movb $0, (%edx,%ecx)
            addl %eax, %ecx
            cmpl $8192, %ecx
            jle LBB1_2      #bb
    LBB1_5: #return
            ret
    
    or (if the array is local and we are in dynamic-nonpic or static mode):
    
    LBB3_2: #bb
            movb $0, _flags2(%ecx)
            addl %eax, %ecx
            cmpl $8192, %ecx
            jle LBB3_2      #bb
    
    and:
    
            lis r2, ha16(L_flags2$non_lazy_ptr)
            lwz r2, lo16(L_flags2$non_lazy_ptr)(r2)
            slwi r3, r4, 1
    LBB1_2: ;bb
            li r5, 0
            add r6, r4, r3
            stbx r5, r2, r3
            cmpwi cr0, r6, 8192
            bgt cr0, LBB1_5 ;return
    
    instead of:
    
            leal (%eax,%eax,2), %ecx
            movl %eax, %edx
            addl %edx, %edx
            addl L_flags2$non_lazy_ptr, %edx
            xorl %esi, %esi
    LBB1_2: #bb
            movb $0, (%edx,%esi)
            movl %eax, %edi
            addl %esi, %edi
            addl %ecx, %esi
            cmpl $8192, %esi
            jg LBB1_5       #return
    
    and:
    
            lis r2, ha16(L_flags2$non_lazy_ptr)
            lwz r2, lo16(L_flags2$non_lazy_ptr)(r2)
            mulli r3, r4, 3
            slwi r5, r4, 1
            li r6, 0
            add r2, r2, r5
    LBB1_2: ;bb
            li r5, 0
            add r7, r3, r6
            stbx r5, r2, r6
            add r6, r4, r6
            cmpwi cr0, r7, 8192
            ble cr0, LBB1_2 ;bb
    
    This speeds up Benchmarks/Shootout/sieve from 8.533s to 6.464s and
    implements LoopStrengthReduce/var_stride_used_by_compare.ll
    
    llvm-svn: 31809
    21eba2da
Loading