Skip to content
  1. Apr 17, 2006
    • Chris Lattner's avatar
      Codegen insertelement with constant insertion points as scalar_to_vector · 326870b4
      Chris Lattner authored
      and a shuffle.  For this:
      
      void %test2(<4 x float>* %F, float %f) {
              %tmp = load <4 x float>* %F             ; <<4 x float>> [#uses=2]
              %tmp3 = add <4 x float> %tmp, %tmp              ; <<4 x float>> [#uses=1]
              %tmp2 = insertelement <4 x float> %tmp3, float %f, uint 2               ; <<4 x float>> [#uses=2]
              %tmp6 = add <4 x float> %tmp2, %tmp2            ; <<4 x float>> [#uses=1]
              store <4 x float> %tmp6, <4 x float>* %F
              ret void
      }
      
      we now get this on X86 (which will get better):
      
      _test2:
              movl 4(%esp), %eax
              movaps (%eax), %xmm0
              addps %xmm0, %xmm0
              movaps %xmm0, %xmm1
              shufps $3, %xmm1, %xmm1
              movaps %xmm0, %xmm2
              shufps $1, %xmm2, %xmm2
              unpcklps %xmm1, %xmm2
              movss 8(%esp), %xmm1
              unpcklps %xmm1, %xmm0
              unpcklps %xmm2, %xmm0
              addps %xmm0, %xmm0
              movaps %xmm0, (%eax)
              ret
      
      instead of:
      
      _test2:
              subl $28, %esp
              movl 32(%esp), %eax
              movaps (%eax), %xmm0
              addps %xmm0, %xmm0
              movaps %xmm0, (%esp)
              movss 36(%esp), %xmm0
              movss %xmm0, 8(%esp)
              movaps (%esp), %xmm0
              addps %xmm0, %xmm0
              movaps %xmm0, (%eax)
              addl $28, %esp
              ret
      
      llvm-svn: 27765
      326870b4
  2. Apr 16, 2006
  3. Apr 14, 2006
  4. Apr 13, 2006
  5. Apr 12, 2006
  6. Apr 11, 2006
  7. Apr 10, 2006
  8. Apr 09, 2006
  9. Apr 08, 2006
  10. Apr 07, 2006
  11. Apr 05, 2006
  12. Apr 04, 2006
  13. Apr 03, 2006
  14. Apr 02, 2006
    • Andrew Lenharth's avatar
      This should be a win of every arch · 015eaf5f
      Andrew Lenharth authored
      llvm-svn: 27364
      015eaf5f
    • Chris Lattner's avatar
      Add a little dag combine to compile this: · 4993249a
      Chris Lattner authored
      int %AreSecondAndThirdElementsBothNegative(<4 x float>* %in) {
      entry:
              %tmp1 = load <4 x float>* %in           ; <<4 x float>> [#uses=1]
              %tmp = tail call int %llvm.ppc.altivec.vcmpgefp.p( int 1, <4 x float> < float 0x7FF8000000000000, float 0.000000e+00, float 0.000000e+00, float 0x7FF8000000000000 >, <4 x float> %tmp1 )           ; <int> [#uses=1]
              %tmp = seteq int %tmp, 0                ; <bool> [#uses=1]
              %tmp3 = cast bool %tmp to int           ; <int> [#uses=1]
              ret int %tmp3
      }
      
      into this:
      
      _AreSecondAndThirdElementsBothNegative:
              mfspr r2, 256
              oris r4, r2, 49152
              mtspr 256, r4
              li r4, lo16(LCPI1_0)
              lis r5, ha16(LCPI1_0)
              lvx v0, 0, r3
              lvx v1, r5, r4
              vcmpgefp. v0, v1, v0
              mfcr r3, 2
              rlwinm r3, r3, 27, 31, 31
              mtspr 256, r2
              blr
      
      instead of this:
      
      _AreSecondAndThirdElementsBothNegative:
              mfspr r2, 256
              oris r4, r2, 49152
              mtspr 256, r4
              li r4, lo16(LCPI1_0)
              lis r5, ha16(LCPI1_0)
              lvx v0, 0, r3
              lvx v1, r5, r4
              vcmpgefp. v0, v1, v0
              mfcr r3, 2
              rlwinm r3, r3, 27, 31, 31
              xori r3, r3, 1
              cntlzw r3, r3
              srwi r3, r3, 5
              mtspr 256, r2
              blr
      
      llvm-svn: 27356
      4993249a
    • Chris Lattner's avatar
    • Chris Lattner's avatar
      Implement the Expand action for binary vector operations to break the binop · 87f08094
      Chris Lattner authored
      into elements and operate on each piece.  This allows generic vector integer
      multiplies to work on PPC, though the generated code is horrible.
      
      llvm-svn: 27347
      87f08094
    • Chris Lattner's avatar
      Intrinsics that just load from memory can be treated like loads: they don't · a9c59156
      Chris Lattner authored
      have to serialize against each other.  This allows us to schedule lvx's
      across each other, for example.
      
      llvm-svn: 27346
      a9c59156
    • Chris Lattner's avatar
      Constant fold all of the vector binops. This allows us to compile this: · 0442a187
      Chris Lattner authored
      "vector unsigned char mergeLowHigh = (vector unsigned char)
      ( 8, 9, 10, 11, 16, 17, 18, 19, 12, 13, 14, 15, 20, 21, 22, 23 );
      vector unsigned char mergeHighLow = vec_xor( mergeLowHigh, vec_splat_u8(8));"
      
      aka:
      
      void %test2(<16 x sbyte>* %P) {
        store <16 x sbyte> cast (<4 x int> xor (<4 x int> cast (<16 x ubyte> < ubyte 8, ubyte 9, ubyte 10, ubyte 11, ubyte 16, ubyte 17, ubyte 18, ubyte 19, ubyte 12, ubyte 13, ubyte 14, ubyte 15, ubyte 20, ubyte 21, ubyte 22, ubyte 23 > to <4 x int>), <4 x int> cast (<16 x sbyte> < sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8 > to <4 x int>)) to <16 x sbyte>), <16 x sbyte> * %P
        ret void
      }
      
      into this:
      
      _test2:
              mfspr r2, 256
              oris r4, r2, 32768
              mtspr 256, r4
              li r4, lo16(LCPI2_0)
              lis r5, ha16(LCPI2_0)
              lvx v0, r5, r4
              stvx v0, 0, r3
              mtspr 256, r2
              blr
      
      instead of this:
      
      _test2:
              mfspr r2, 256
              oris r4, r2, 49152
              mtspr 256, r4
              li r4, lo16(LCPI2_0)
              lis r5, ha16(LCPI2_0)
              vspltisb v0, 8
              lvx v1, r5, r4
              vxor v0, v1, v0
              stvx v0, 0, r3
              mtspr 256, r2
              blr
      
      ... which occurs here:
      http://developer.apple.com/hardware/ve/calcspeed.html
      
      llvm-svn: 27343
      0442a187
Loading