Skip to content
  1. Apr 28, 2006
  2. Apr 19, 2006
  3. Apr 18, 2006
    • Chris Lattner's avatar
      add a note · 197d7622
      Chris Lattner authored
      llvm-svn: 27809
      197d7622
    • Chris Lattner's avatar
      Implement an important entry from README_ALTIVEC: · 9754d142
      Chris Lattner authored
      If an altivec predicate compare is used immediately by a branch, don't
      use a (serializing) MFCR instruction to read the CR6 register, which requires
      a compare to get it back to CR's.  Instead, just branch on CR6 directly. :)
      
      For example, for:
      void foo2(vector float *A, vector float *B) {
        if (!vec_any_eq(*A, *B))
          *B = (vector float){0,0,0,0};
      }
      
      We now generate:
      
      _foo2:
              mfspr r2, 256
              oris r5, r2, 12288
              mtspr 256, r5
              lvx v2, 0, r4
              lvx v3, 0, r3
              vcmpeqfp. v2, v3, v2
              bne cr6, LBB1_2 ; UnifiedReturnBlock
      LBB1_1: ; cond_true
              vxor v2, v2, v2
              stvx v2, 0, r4
              mtspr 256, r2
              blr
      LBB1_2: ; UnifiedReturnBlock
              mtspr 256, r2
              blr
      
      instead of:
      
      _foo2:
              mfspr r2, 256
              oris r5, r2, 12288
              mtspr 256, r5
              lvx v2, 0, r4
              lvx v3, 0, r3
              vcmpeqfp. v2, v3, v2
              mfcr r3, 2
              rlwinm r3, r3, 27, 31, 31
              cmpwi cr0, r3, 0
              beq cr0, LBB1_2 ; UnifiedReturnBlock
      LBB1_1: ; cond_true
              vxor v2, v2, v2
              stvx v2, 0, r4
              mtspr 256, r2
              blr
      LBB1_2: ; UnifiedReturnBlock
              mtspr 256, r2
              blr
      
      This implements CodeGen/PowerPC/vec_br_cmp.ll.
      
      llvm-svn: 27804
      9754d142
    • Chris Lattner's avatar
      move some stuff around, clean things up · 68c16a20
      Chris Lattner authored
      llvm-svn: 27802
      68c16a20
    • Chris Lattner's avatar
      Implement v16i8 multiply with this code: · d6d82aa8
      Chris Lattner authored
              vmuloub v5, v3, v2
              vmuleub v2, v3, v2
              vperm v2, v2, v5, v4
      
      This implements CodeGen/PowerPC/vec_mul.ll.  With this, v16i8 multiplies are
      6.79x faster than before.
      
      Overall, UnitTests/Vector/multiplies.c is now 2.45x faster with LLVM than with
      GCC.
      
      Remove the 'integer multiplies' todo from the README file.
      
      llvm-svn: 27792
      d6d82aa8
  4. Apr 17, 2006
  5. Apr 16, 2006
  6. Apr 13, 2006
  7. Apr 12, 2006
    • Chris Lattner's avatar
      Add a new way to match vector constants, which make it easier to bang bits of · 147e50e1
      Chris Lattner authored
      different types.
      
      Codegen spltw(0x7FFFFFFF) and spltw(0x80000000) without a constant pool load,
      implementing PowerPC/vec_constants.ll:test1.  This compiles:
      
      typedef float vf __attribute__ ((vector_size (16)));
      typedef int vi __attribute__ ((vector_size (16)));
      void test(vi *P1, vi *P2, vf *P3) {
        *P1 &= (vi){0x80000000,0x80000000,0x80000000,0x80000000};
        *P2 &= (vi){0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF};
        *P3 = vec_abs((vector float)*P3);
      }
      
      to:
      
      _test:
              mfspr r2, 256
              oris r6, r2, 49152
              mtspr 256, r6
              vspltisw v0, -1
              vslw v0, v0, v0
              lvx v1, 0, r3
              vand v1, v1, v0
              stvx v1, 0, r3
              lvx v1, 0, r4
              vandc v1, v1, v0
              stvx v1, 0, r4
              lvx v1, 0, r5
              vandc v0, v1, v0
              stvx v0, 0, r5
              mtspr 256, r2
              blr
      
      instead of (with two constant pool entries):
      
      _test:
              mfspr r2, 256
              oris r6, r2, 49152
              mtspr 256, r6
              li r6, lo16(LCPI1_0)
              lis r7, ha16(LCPI1_0)
              li r8, lo16(LCPI1_1)
              lis r9, ha16(LCPI1_1)
              lvx v0, r7, r6
              lvx v1, 0, r3
              vand v0, v1, v0
              stvx v0, 0, r3
              lvx v0, r9, r8
              lvx v1, 0, r4
              vand v1, v1, v0
              stvx v1, 0, r4
              lvx v1, 0, r5
              vand v0, v1, v0
              stvx v0, 0, r5
              mtspr 256, r2
              blr
      
      GCC produces (with 2 cp entries):
      
      _test:
              mfspr r0,256
              stw r0,-4(r1)
              oris r0,r0,0xc00c
              mtspr 256,r0
              lis r2,ha16(LC0)
              lis r9,ha16(LC1)
              la r2,lo16(LC0)(r2)
              lvx v0,0,r3
              lvx v1,0,r5
              la r9,lo16(LC1)(r9)
              lwz r12,-4(r1)
              lvx v12,0,r2
              lvx v13,0,r9
              vand v0,v0,v12
              stvx v0,0,r3
              vspltisw v0,-1
              vslw v12,v0,v0
              vandc v1,v1,v12
              stvx v1,0,r5
              lvx v0,0,r4
              vand v0,v0,v13
              stvx v0,0,r4
              mtspr 256,r12
              blr
      
      llvm-svn: 27624
      147e50e1
  8. Apr 11, 2006
  9. Apr 07, 2006
  10. Apr 06, 2006
  11. Apr 05, 2006
  12. Apr 02, 2006
  13. Mar 31, 2006
    • Chris Lattner's avatar
      add a note · 40ff17dc
      Chris Lattner authored
      llvm-svn: 27302
      40ff17dc
    • Chris Lattner's avatar
      Implement an item from the readme, folding vcmp/vcmp. instructions with · d4058a59
      Chris Lattner authored
      identical instructions into a single instruction.  For example, for:
      
      void test(vector float *x, vector float *y, int *P) {
        int v = vec_any_out(*x, *y);
        *x = (vector float)vec_cmpb(*x, *y);
        *P = v;
      }
      
      we now generate:
      
      _test:
              mfspr r2, 256
              oris r6, r2, 49152
              mtspr 256, r6
              lvx v0, 0, r4
              lvx v1, 0, r3
              vcmpbfp. v0, v1, v0
              mfcr r4, 2
              stvx v0, 0, r3
              rlwinm r3, r4, 27, 31, 31
              xori r3, r3, 1
              stw r3, 0(r5)
              mtspr 256, r2
              blr
      
      instead of:
      
      _test:
              mfspr r2, 256
              oris r6, r2, 57344
              mtspr 256, r6
              lvx v0, 0, r4
              lvx v1, 0, r3
              vcmpbfp. v2, v1, v0
              mfcr r4, 2
      ***     vcmpbfp v0, v1, v0
              rlwinm r4, r4, 27, 31, 31
              stvx v0, 0, r3
              xori r3, r4, 1
              stw r3, 0(r5)
              mtspr 256, r2
              blr
      
      Testcase here: CodeGen/PowerPC/vcmp-fold.ll
      
      llvm-svn: 27290
      d4058a59
    • Chris Lattner's avatar
      These are done · e5a6c4f8
      Chris Lattner authored
      llvm-svn: 27284
      e5a6c4f8
  14. Mar 29, 2006
  15. Mar 28, 2006
  16. Mar 27, 2006
Loading