Skip to content
  1. Jan 11, 2005
  2. Jan 10, 2005
    • Chris Lattner's avatar
      Implement a couple of more simplifications. This lets us codegen: · 41b76414
      Chris Lattner authored
      int test2(int * P, int* Q, int A, int B) {
              return P+A == P;
      }
      
      into:
      
      test2:
              movl 4(%esp), %eax
              movl 12(%esp), %eax
              shll $2, %eax
              cmpl $0, %eax
              sete %al
              movzbl %al, %eax
              ret
      
      instead of:
      
      test2:
              movl 4(%esp), %eax
              movl 12(%esp), %ecx
              leal (%eax,%ecx,4), %ecx
              cmpl %eax, %ecx
              sete %al
              movzbl %al, %eax
              ret
      
      ICC is producing worse code:
      
      test2:
              movl      4(%esp), %eax                                 #8.5
              movl      12(%esp), %edx                                #8.5
              lea       (%edx,%edx), %ecx                             #9.9
              addl      %ecx, %ecx                                    #9.9
              addl      %eax, %ecx                                    #9.9
              cmpl      %eax, %ecx                                    #9.16
              movl      $0, %eax                                      #9.16
              sete      %al                                           #9.16
              ret                                                     #9.16
      
      as is GCC (looks like our old code):
      
      test2:
              movl    4(%esp), %edx
              movl    12(%esp), %eax
              leal    (%edx,%eax,4), %ecx
              cmpl    %edx, %ecx
              sete    %al
              movzbl  %al, %eax
              ret
      
      llvm-svn: 19430
      41b76414
    • Chris Lattner's avatar
      Fix incorrect constant folds, fixing Stepanov after the SHR patch. · 00c231ba
      Chris Lattner authored
      llvm-svn: 19429
      00c231ba
    • Chris Lattner's avatar
      Constant fold shifts, turning this loop: · 0966a75e
      Chris Lattner authored
      .LBB_Z5test0PdS__3:     # no_exit.1
              fldl data(,%eax,8)
              fldl 24(%esp)
              faddp %st(1)
              fstl 24(%esp)
              incl %eax
              movl $16000, %ecx
              sarl $3, %ecx
              cmpl %eax, %ecx
              fstpl 16(%esp)
              #FP_REG_KILL
              jg .LBB_Z5test0PdS__3   # no_exit.1
      
      into:
      
      .LBB_Z5test0PdS__3:     # no_exit.1
              fldl data(,%eax,8)
              fldl 24(%esp)
              faddp %st(1)
              fstl 24(%esp)
              incl %eax
              cmpl $2000, %eax
              fstpl 16(%esp)
              #FP_REG_KILL
              jl .LBB_Z5test0PdS__3   # no_exit.1
      
      llvm-svn: 19427
      0966a75e
  3. Jan 09, 2005
    • Chris Lattner's avatar
      Add some folds for == and != comparisons. This allows us to · fde3a212
      Chris Lattner authored
      codegen this loop in stepanov:
      
      no_exit.i:              ; preds = %entry, %no_exit.i, %then.i, %_Z5checkd.exit
              %i.0.0 = phi int [ 0, %entry ], [ %i.0.0, %no_exit.i ], [ %inc.0, %_Z5checkd.exit ], [ %inc.012, %then.i ]              ; <int> [#uses=3]
              %indvar = phi uint [ %indvar.next, %no_exit.i ], [ 0, %entry ], [ 0, %then.i ], [ 0, %_Z5checkd.exit ]          ; <uint> [#uses=3]
              %result_addr.i.0 = phi double [ %tmp.4.i.i, %no_exit.i ], [ 0.000000e+00, %entry ], [ 0.000000e+00, %then.i ], [ 0.000000e+00, %_Z5checkd.exit ]          ; <double> [#uses=1]
              %first_addr.0.i.2.rec = cast uint %indvar to int                ; <int> [#uses=1]
              %first_addr.0.i.2 = getelementptr [2000 x double]* %data, int 0, uint %indvar           ; <double*> [#uses=1]
              %inc.i.rec = add int %first_addr.0.i.2.rec, 1           ; <int> [#uses=1]
              %inc.i = getelementptr [2000 x double]* %data, int 0, int %inc.i.rec            ; <double*> [#uses=1]
              %tmp.3.i.i = load double* %first_addr.0.i.2             ; <double> [#uses=1]
              %tmp.4.i.i = add double %result_addr.i.0, %tmp.3.i.i            ; <double> [#uses=2]
              %tmp.2.i = seteq double* %inc.i, getelementptr ([2000 x double]* %data, int 0, int 2000)                ; <bool> [#uses=1]
              %indvar.next = add uint %indvar, 1              ; <uint> [#uses=1]
              br bool %tmp.2.i, label %_Z10accumulateIPddET0_T_S2_S1_.exit, label %no_exit.i
      
      To this:
      
      .LBB_Z4testIPddEvT_S1_T0__1:    # no_exit.i
              fldl data(,%eax,8)
              fldl 16(%esp)
              faddp %st(1)
              fstpl 16(%esp)
              incl %eax
              movl %eax, %ecx
              shll $3, %ecx
              cmpl $16000, %ecx
              #FP_REG_KILL
              jne .LBB_Z4testIPddEvT_S1_T0__1 # no_exit.i
      
      instead of this:
      
      .LBB_Z4testIPddEvT_S1_T0__1:    # no_exit.i
              fldl data(,%eax,8)
              fldl 16(%esp)
              faddp %st(1)
              fstpl 16(%esp)
              incl %eax
              leal data(,%eax,8), %ecx
              leal data+16000, %edx
              cmpl %edx, %ecx
              #FP_REG_KILL
              jne .LBB_Z4testIPddEvT_S1_T0__1 # no_exit.i
      
      llvm-svn: 19425
      fde3a212
    • Jeff Cohen's avatar
      Fix VC++ compilation error · 7d1670da
      Jeff Cohen authored
      llvm-svn: 19423
      7d1670da
    • Chris Lattner's avatar
      Print the DAG out more like a DAG in nested format. · e6f7882c
      Chris Lattner authored
      llvm-svn: 19422
      e6f7882c
    • Chris Lattner's avatar
    • Chris Lattner's avatar
      Add a simple transformation. This allows us to compile one of the inner · 3d5d5022
      Chris Lattner authored
      loops in stepanov to this:
      
      .LBB_Z5test0PdS__2:     # no_exit.1
              fldl data(,%eax,8)
              fldl 24(%esp)
              faddp %st(1)
              fstl 24(%esp)
              incl %eax
              cmpl $2000, %eax
              fstpl 16(%esp)
              #FP_REG_KILL
              jl .LBB_Z5test0PdS__2
      
      instead of this:
      
      .LBB_Z5test0PdS__2:     # no_exit.1
              fldl data(,%eax,8)
              fldl 24(%esp)
              faddp %st(1)
              fstl 24(%esp)
              incl %eax
              movl $data, %ecx
              movl %ecx, %edx
              addl $16000, %edx
              subl %ecx, %edx
              movl %edx, %ecx
              sarl $2, %ecx
              shrl $29, %ecx
              addl %ecx, %edx
              sarl $3, %edx
              cmpl %edx, %eax
              fstpl 16(%esp)
              #FP_REG_KILL
              jl .LBB_Z5test0PdS__2
      
      The old instruction selector produced:
      
      .LBB_Z5test0PdS__2:     # no_exit.1
              fldl 24(%esp)
              faddl data(,%eax,8)
              fstl 24(%esp)
              movl %eax, %ecx
              incl %ecx
              incl %eax
              leal data+16000, %edx
              movl $data, %edi
              subl %edi, %edx
              movl %edx, %edi
              sarl $2, %edi
              shrl $29, %edi
              addl %edi, %edx
              sarl $3, %edx
              cmpl %edx, %ecx
              fstpl 16(%esp)
              #FP_REG_KILL
              jl .LBB_Z5test0PdS__2   # no_exit.1
      
      Which is even worse!
      
      llvm-svn: 19419
      3d5d5022
  4. Jan 08, 2005
  5. Jan 07, 2005
  6. Jul 04, 2004
  7. Jun 17, 2004
  8. Jun 02, 2004
  9. Nov 11, 2003
  10. Oct 20, 2003
  11. Aug 15, 2003
  12. Aug 11, 2003
Loading