Skip to content
  1. Dec 12, 2006
  2. Dec 11, 2006
    • Chris Lattner's avatar
      trunc to integer, not to FP. · e810140c
      Chris Lattner authored
      llvm-svn: 32426
      e810140c
    • Chris Lattner's avatar
      implement promotion of unions containing two packed types of the same width. · 23f4b68f
      Chris Lattner authored
      This implements Transforms/ScalarRepl/union-packed.ll
      
      llvm-svn: 32422
      23f4b68f
    • Chris Lattner's avatar
      * Eliminate calls to CastInst::createInferredCast. · 216c3028
      Chris Lattner authored
       * Add support for promoting unions with fp values in them.  This produces
         our new int<->fp bitcast instructions, implementing
         Transforms/ScalarRepl/union-fp-int.ll
      
      As an example, this allows us to compile this:
      
      union intfloat { int i; float f; };
      float invsqrt(const float arg_x) {
          union intfloat x = { .f = arg_x };
          const float xhalf = arg_x * 0.5f;
          x.i = 0x5f3759df - (x.i >> 1);
          return x.f * (1.5f - xhalf * x.f * x.f);
      }
      
      into:
      
      _invsqrt:
              movss 4(%esp), %xmm0
              movd %xmm0, %eax
              sarl %eax
              movl $1597463007, %ecx
              subl %eax, %ecx
              movd %ecx, %xmm1
              mulss LCPI1_0, %xmm0
              mulss %xmm1, %xmm0
              movss LCPI1_1, %xmm2
              mulss %xmm1, %xmm0
              subss %xmm0, %xmm2
              movl 8(%esp), %eax
              mulss %xmm2, %xmm1
              movss %xmm1, (%eax)
              ret
      
      instead of:
      
      _invsqrt:
              subl $4, %esp
              movss 8(%esp), %xmm0
              movss %xmm0, (%esp)
              movl (%esp), %eax
              movl $1597463007, %ecx
              sarl %eax
              subl %eax, %ecx
              movl %ecx, (%esp)
              mulss LCPI1_0, %xmm0
              movss (%esp), %xmm1
              mulss %xmm1, %xmm0
              mulss %xmm1, %xmm0
              movss LCPI1_1, %xmm2
              subss %xmm0, %xmm2
              mulss %xmm2, %xmm1
              movl 12(%esp), %eax
              movss %xmm1, (%eax)
              addl $4, %esp
              ret
      
      llvm-svn: 32418
      216c3028
  3. Dec 08, 2006
  4. Dec 07, 2006
  5. Dec 06, 2006
  6. Dec 05, 2006
  7. Dec 04, 2006
  8. Dec 02, 2006
  9. Dec 01, 2006
  10. Nov 30, 2006
  11. Nov 29, 2006
  12. Nov 28, 2006
  13. Nov 27, 2006
    • Chris Lattner's avatar
      Fix PR1014 and InstCombine/2006-11-27-XorBug.ll. · 8e9a7b73
      Chris Lattner authored
      llvm-svn: 31941
      8e9a7b73
    • Reid Spencer's avatar
      For PR950: · 6c38f0bb
      Reid Spencer authored
      The long awaited CAST patch. This introduces 12 new instructions into LLVM
      to replace the cast instruction. Corresponding changes throughout LLVM are
      provided. This passes llvm-test, llvm/test, and SPEC CPUINT2000 with the
      exception of 175.vpr which fails only on a slight floating point output
      difference.
      
      llvm-svn: 31931
      6c38f0bb
  14. Nov 26, 2006
  15. Nov 23, 2006
  16. Nov 21, 2006
  17. Nov 18, 2006
  18. Nov 17, 2006
    • Chris Lattner's avatar
      If an indvar with a variable stride is used by the exit condition, go ahead · 21eba2da
      Chris Lattner authored
      and handle it like constant stride vars.  This fixes some bad codegen in
      variable stride cases.  For example, it compiles this:
      
      void foo(int k, int i) {
        for (k=i+i; k <= 8192; k+=i)
          flags2[k] = 0;
      }
      
      to:
      
      LBB1_1: #bb.preheader
              movl %eax, %ecx
              addl %ecx, %ecx
              movl L_flags2$non_lazy_ptr, %edx
      LBB1_2: #bb
              movb $0, (%edx,%ecx)
              addl %eax, %ecx
              cmpl $8192, %ecx
              jle LBB1_2      #bb
      LBB1_5: #return
              ret
      
      or (if the array is local and we are in dynamic-nonpic or static mode):
      
      LBB3_2: #bb
              movb $0, _flags2(%ecx)
              addl %eax, %ecx
              cmpl $8192, %ecx
              jle LBB3_2      #bb
      
      and:
      
              lis r2, ha16(L_flags2$non_lazy_ptr)
              lwz r2, lo16(L_flags2$non_lazy_ptr)(r2)
              slwi r3, r4, 1
      LBB1_2: ;bb
              li r5, 0
              add r6, r4, r3
              stbx r5, r2, r3
              cmpwi cr0, r6, 8192
              bgt cr0, LBB1_5 ;return
      
      instead of:
      
              leal (%eax,%eax,2), %ecx
              movl %eax, %edx
              addl %edx, %edx
              addl L_flags2$non_lazy_ptr, %edx
              xorl %esi, %esi
      LBB1_2: #bb
              movb $0, (%edx,%esi)
              movl %eax, %edi
              addl %esi, %edi
              addl %ecx, %esi
              cmpl $8192, %esi
              jg LBB1_5       #return
      
      and:
      
              lis r2, ha16(L_flags2$non_lazy_ptr)
              lwz r2, lo16(L_flags2$non_lazy_ptr)(r2)
              mulli r3, r4, 3
              slwi r5, r4, 1
              li r6, 0
              add r2, r2, r5
      LBB1_2: ;bb
              li r5, 0
              add r7, r3, r6
              stbx r5, r2, r6
              add r6, r4, r6
              cmpwi cr0, r7, 8192
              ble cr0, LBB1_2 ;bb
      
      This speeds up Benchmarks/Shootout/sieve from 8.533s to 6.464s and
      implements LoopStrengthReduce/var_stride_used_by_compare.ll
      
      llvm-svn: 31809
      21eba2da
  19. Nov 15, 2006
  20. Nov 14, 2006
Loading