Skip to content
  • Chris Lattner's avatar
    Add a new way to match vector constants, which make it easier to bang bits of · 147e50e1
    Chris Lattner authored
    different types.
    
    Codegen spltw(0x7FFFFFFF) and spltw(0x80000000) without a constant pool load,
    implementing PowerPC/vec_constants.ll:test1.  This compiles:
    
    typedef float vf __attribute__ ((vector_size (16)));
    typedef int vi __attribute__ ((vector_size (16)));
    void test(vi *P1, vi *P2, vf *P3) {
      *P1 &= (vi){0x80000000,0x80000000,0x80000000,0x80000000};
      *P2 &= (vi){0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF};
      *P3 = vec_abs((vector float)*P3);
    }
    
    to:
    
    _test:
            mfspr r2, 256
            oris r6, r2, 49152
            mtspr 256, r6
            vspltisw v0, -1
            vslw v0, v0, v0
            lvx v1, 0, r3
            vand v1, v1, v0
            stvx v1, 0, r3
            lvx v1, 0, r4
            vandc v1, v1, v0
            stvx v1, 0, r4
            lvx v1, 0, r5
            vandc v0, v1, v0
            stvx v0, 0, r5
            mtspr 256, r2
            blr
    
    instead of (with two constant pool entries):
    
    _test:
            mfspr r2, 256
            oris r6, r2, 49152
            mtspr 256, r6
            li r6, lo16(LCPI1_0)
            lis r7, ha16(LCPI1_0)
            li r8, lo16(LCPI1_1)
            lis r9, ha16(LCPI1_1)
            lvx v0, r7, r6
            lvx v1, 0, r3
            vand v0, v1, v0
            stvx v0, 0, r3
            lvx v0, r9, r8
            lvx v1, 0, r4
            vand v1, v1, v0
            stvx v1, 0, r4
            lvx v1, 0, r5
            vand v0, v1, v0
            stvx v0, 0, r5
            mtspr 256, r2
            blr
    
    GCC produces (with 2 cp entries):
    
    _test:
            mfspr r0,256
            stw r0,-4(r1)
            oris r0,r0,0xc00c
            mtspr 256,r0
            lis r2,ha16(LC0)
            lis r9,ha16(LC1)
            la r2,lo16(LC0)(r2)
            lvx v0,0,r3
            lvx v1,0,r5
            la r9,lo16(LC1)(r9)
            lwz r12,-4(r1)
            lvx v12,0,r2
            lvx v13,0,r9
            vand v0,v0,v12
            stvx v0,0,r3
            vspltisw v0,-1
            vslw v12,v0,v0
            vandc v1,v1,v12
            stvx v1,0,r5
            lvx v0,0,r4
            vand v0,v0,v13
            stvx v0,0,r4
            mtspr 256,r12
            blr
    
    llvm-svn: 27624
    147e50e1
Loading