Skip to content
Commit 49b090ed authored by Chris Lattner's avatar Chris Lattner
Browse files

teach scev to analyze X*4|1 like X*4+c. This allows us to produce:

LBB1_1: #bb
        movdqa (%esi), %xmm2
        movaps %xmm2, %xmm3
        punpcklbw %xmm0, %xmm3
        movaps %xmm3, %xmm4
        punpcklwd %xmm0, %xmm4
        cvtdq2ps %xmm4, %xmm4
        mulps %xmm1, %xmm4
        movaps %xmm4, (%edi)
        leal 1(,%eax,4), %ebx
        shll $4, %ebx
        punpckhwd %xmm0, %xmm3
        cvtdq2ps %xmm3, %xmm3
        mulps %xmm1, %xmm3
        movaps %xmm3, (%edx,%ebx)
        leal 2(,%eax,4), %ebx
        shll $4, %ebx
        punpckhbw %xmm0, %xmm2
        movaps %xmm2, %xmm3
        punpcklwd %xmm0, %xmm3
        cvtdq2ps %xmm3, %xmm3
        mulps %xmm1, %xmm3
        movaps %xmm3, (%edx,%ebx)
        leal 3(,%eax,4), %ebx
        shll $4, %ebx
        punpckhwd %xmm0, %xmm2
        cvtdq2ps %xmm2, %xmm2
        mulps %xmm1, %xmm2
        movaps %xmm2, (%edx,%ebx)
        addl $64, %edi
        incl %eax
        addl $16, %esi
        cmpl %ecx, %eax
        jne LBB1_1      #bb

instead of:

LBB1_1: #bb
        movdqa (%esi), %xmm2
        movaps %xmm2, %xmm3
        punpcklbw %xmm0, %xmm3
        movaps %xmm3, %xmm4
        punpcklwd %xmm0, %xmm4
        cvtdq2ps %xmm4, %xmm4
        mulps %xmm1, %xmm4
        movaps %xmm4, (%edi)
        leal 1(,%eax,4), %ebx
        shll $4, %ebx
        punpckhwd %xmm0, %xmm3
        cvtdq2ps %xmm3, %xmm3
        mulps %xmm1, %xmm3
        movaps %xmm3, (%edx,%ebx)
        leal 2(,%eax,4), %ebx
        shll $4, %ebx
        punpckhbw %xmm0, %xmm2
        movaps %xmm2, %xmm3
        punpcklwd %xmm0, %xmm3
        cvtdq2ps %xmm3, %xmm3
        mulps %xmm1, %xmm3
        movaps %xmm3, (%edx,%ebx)
        leal 3(,%eax,4), %ebx
        shll $4, %ebx
        punpckhwd %xmm0, %xmm2
        cvtdq2ps %xmm2, %xmm2
        mulps %xmm1, %xmm2
        movaps %xmm2, (%edx,%ebx)
        addl $64, %edi
        incl %eax
        addl $16, %esi
        cmpl %ecx, %eax
        jne LBB1_1      #bb

for a testcase.

llvm-svn: 32463
parent 38dbe1ca
Loading
Loading
Loading
Loading
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment