Skip to content
  1. Dec 21, 2005
  2. Dec 20, 2005
  3. Dec 19, 2005
  4. Dec 18, 2005
  5. Dec 17, 2005
  6. Dec 16, 2005
  7. Dec 15, 2005
  8. Dec 14, 2005
  9. Dec 13, 2005
  10. Dec 12, 2005
  11. Dec 11, 2005
  12. Dec 10, 2005
  13. Dec 09, 2005
  14. Dec 08, 2005
    • Chris Lattner's avatar
      improve code insertion in two ways: · be73d6ee
      Chris Lattner authored
      1. Only forward subst offsets into loads and stores, not into arbitrary
         things, where it will likely become a load.
      2. If the source is a cast from pointer, forward subst the cast as well,
         allowing us to fold the cast away (improving cases when the cast is
         from an alloca or global).
      
      This hasn't been fully tested, but does appear to further reduce register
      pressure and improve code.  Lets let the testers grind on it a bit. :)
      
      llvm-svn: 24640
      be73d6ee
  15. Dec 07, 2005
    • Nate Begeman's avatar
      Fix a crash where ConstantVec nodes were being generated with the wrong · ae89d862
      Nate Begeman authored
      type when the target did not support them.  Also teach Legalize how to
      expand ConstantVecs.
      
      This allows us to generate
      
      _test:
              lwz r2, 12(r3)
              lwz r4, 8(r3)
              lwz r5, 4(r3)
              lwz r6, 0(r3)
              addi r2, r2, 4
              addi r4, r4, 3
              addi r5, r5, 2
              addi r6, r6, 1
              stw r2, 12(r3)
              stw r4, 8(r3)
              stw r5, 4(r3)
              stw r6, 0(r3)
              blr
      
      For:
      
      void %test(%v4i *%P) {
              %T = load %v4i* %P
              %S = add %v4i %T, <int 1, int 2, int 3, int 4>
              store %v4i %S, %v4i * %P
              ret void
      }
      
      On PowerPC.
      
      llvm-svn: 24633
      ae89d862
    • Chris Lattner's avatar
      Only transform (sext (truncate x)) -> (sextinreg x) if before legalize or · 57c882ed
      Chris Lattner authored
      if the target supports the resultant sextinreg
      
      llvm-svn: 24632
      57c882ed
    • Chris Lattner's avatar
      Teach the dag combiner to turn a truncate/sign_extend pair into a sextinreg · cbd3d01a
      Chris Lattner authored
      when the types match up.  This allows the X86 backend to compile:
      
      sbyte %toggle_value(sbyte* %tmp.1) {
              %tmp.2 = load sbyte* %tmp.1
              ret sbyte %tmp.2
      }
      
      to this:
      
      _toggle_value:
              mov %EAX, DWORD PTR [%ESP + 4]
              movsx %EAX, BYTE PTR [%EAX]
              ret
      
      instead of this:
      
      _toggle_value:
              mov %EAX, DWORD PTR [%ESP + 4]
              movsx %EAX, BYTE PTR [%EAX]
              movsx %EAX, %AL
              ret
      
      noticed in Shootout/objinst.
      
      -Chris
      
      llvm-svn: 24630
      cbd3d01a
  16. Dec 06, 2005
    • Nate Begeman's avatar
      Teach the SelectionDAG ISel how to turn ConstantPacked values into · 41b1cdc7
      Nate Begeman authored
      constant nodes with vector types.  Also teach the asm printer how to print
      ConstantPacked constant pool entries.  This allows us to generate altivec
      code such as the following, which adds a vector constantto a packed float.
      
      LCPI1_0:  <4 x float> < float 0.0e+0, float 0.0e+0, float 0.0e+0, float 1.0e+0 >
              .space  4
              .space  4
              .space  4
              .long   1065353216      ; float 1
              .text
              .align  4
              .globl  _foo
      _foo:
              lis r2, ha16(LCPI1_0)
              la r2, lo16(LCPI1_0)(r2)
              li r4, 0
              lvx v0, r4, r2
              lvx v1, r4, r3
              vaddfp v0, v1, v0
              stvx v0, r4, r3
              blr
      
      For the llvm code:
      
      void %foo(<4 x float> * %a) {
      entry:
        %tmp1 = load <4 x float> * %a;
        %tmp2 = add <4 x float> %tmp1, < float 0.0, float 0.0, float 0.0, float 1.0 >
        store <4 x float> %tmp2, <4 x float> *%a
        ret void
      }
      
      llvm-svn: 24616
      41b1cdc7
  17. Dec 05, 2005
    • Chris Lattner's avatar
      Fix the #1 code quality problem that I have seen on X86 (and it also affects · 35397788
      Chris Lattner authored
      PPC and other targets).  In a particular, consider code like this:
      
      struct Vector3 { double x, y, z; };
      struct Matrix3 { Vector3 a, b, c; };
      double dot(Vector3 &a, Vector3 &b) {
         return a.x * b.x  +  a.y * b.y  +  a.z * b.z;
      }
      Vector3 mul(Vector3 &a, Matrix3 &b) {
         Vector3 r;
         r.x = dot( a, b.a );
         r.y = dot( a, b.b );
         r.z = dot( a, b.c );
         return r;
      }
      void transform(Matrix3 &m, Vector3 *x, int n) {
         for (int i = 0; i < n; i++)
            x[i] = mul( x[i], m );
      }
      
      we compile transform to a loop with all of the GEP instructions for indexing
      into 'm' pulled out of the loop (9 of them).  Because isel occurs a bb at a time
      we are unable to fold the constant index into the loads in the loop, leading to
      PPC code that looks like this:
      
      LBB3_1: ; no_exit.preheader
              li r2, 0
              addi r6, r3, 64        ;; 9 values live across the loop body!
              addi r7, r3, 56
              addi r8, r3, 48
              addi r9, r3, 40
              addi r10, r3, 32
              addi r11, r3, 24
              addi r12, r3, 16
              addi r30, r3, 8
      LBB3_2: ; no_exit
              lfd f0, 0(r30)
              lfd f1, 8(r4)
              fmul f0, f1, f0
              lfd f2, 0(r3)        ;; no constant indices folded into the loads!
              lfd f3, 0(r4)
              lfd f4, 0(r10)
              lfd f5, 0(r6)
              lfd f6, 0(r7)
              lfd f7, 0(r8)
              lfd f8, 0(r9)
              lfd f9, 0(r11)
              lfd f10, 0(r12)
              lfd f11, 16(r4)
              fmadd f0, f3, f2, f0
              fmul f2, f1, f4
              fmadd f0, f11, f10, f0
              fmadd f2, f3, f9, f2
              fmul f1, f1, f6
              stfd f0, 0(r4)
              fmadd f0, f11, f8, f2
              fmadd f1, f3, f7, f1
              stfd f0, 8(r4)
              fmadd f0, f11, f5, f1
              addi r29, r4, 24
              stfd f0, 16(r4)
              addi r2, r2, 1
              cmpw cr0, r2, r5
              or r4, r29, r29
              bne cr0, LBB3_2 ; no_exit
      
      uh, yuck.  With this patch, we now sink the constant offsets into the loop, producing
      this code:
      
      LBB3_1: ; no_exit.preheader
              li r2, 0
      LBB3_2: ; no_exit
              lfd f0, 8(r3)
              lfd f1, 8(r4)
              fmul f0, f1, f0
              lfd f2, 0(r3)
              lfd f3, 0(r4)
              lfd f4, 32(r3)       ;; much nicer.
              lfd f5, 64(r3)
              lfd f6, 56(r3)
              lfd f7, 48(r3)
              lfd f8, 40(r3)
              lfd f9, 24(r3)
              lfd f10, 16(r3)
              lfd f11, 16(r4)
              fmadd f0, f3, f2, f0
              fmul f2, f1, f4
              fmadd f0, f11, f10, f0
              fmadd f2, f3, f9, f2
              fmul f1, f1, f6
              stfd f0, 0(r4)
              fmadd f0, f11, f8, f2
              fmadd f1, f3, f7, f1
              stfd f0, 8(r4)
              fmadd f0, f11, f5, f1
              addi r6, r4, 24
              stfd f0, 16(r4)
              addi r2, r2, 1
              cmpw cr0, r2, r5
              or r4, r6, r6
              bne cr0, LBB3_2 ; no_exit
      
      This is much nicer as it reduces register pressure in the loop a lot.  On X86,
      this takes the function from having 9 spilled registers to 2.  This should help
      some spec programs on X86 (gzip?)
      
      This is currently only enabled with -enable-gep-isel-opt to allow perf testing
      tonight.
      
      llvm-svn: 24606
      35397788
  18. Dec 03, 2005
Loading