Commits · 5fbf58a7b0439a05d6de519ce143032fd2d6691f · Roger Ferrer / llvm-epi-0.8

Aug 17, 2005

Fix grammar · 5fbf58a7
Misha Brukman authored Aug 17, 2005
```
llvm-svn: 22821
```
5fbf58a7
make sure to remove a node from the use list of its operands when we replace · 40f909ad
Chris Lattner authored Aug 17, 2005
```
it.

llvm-svn: 22820
```
40f909ad

Fix a few small typos I noticed when converting this over to the DAG->DAG · 63f774ec

Chris Lattner authored Aug 17, 2005

selector.  Also, there is no difference between addSImm and addImm, so just
use addImm, folding some branches.

llvm-svn: 22819

63f774ec

Removed UINT_TO_FP and SINT_TO_FP from ISel outright. · 9828f26c
Jim Laskey authored Aug 17, 2005
```
llvm-svn: 22818
```
9828f26c
thinko. Should fix s4addl.ll regression · 73370ba5
Andrew Lenharth authored Aug 17, 2005
```
llvm-svn: 22817
```
73370ba5
Remove ISel code generation for UINT_TO_FP and SINT_TO_FP. Now asserts if · 5909c8b1
Jim Laskey authored Aug 17, 2005
```
marked as legal.

llvm-svn: 22816
```
5909c8b1
Make UINT_TO_FP and SINT_TO_FP use generic expansion. · 6267b2c9
Jim Laskey authored Aug 17, 2005
```
llvm-svn: 22815
```
6267b2c9

· f2516a91

Jim Laskey authored Aug 17, 2005

Added generic code expansion for [signed|unsigned] i32 to [f32|f64] casts in the
legalizer.  PowerPC now uses this expansion instead of ISel version.

Example:

// signed integer to double conversion
double f1(signed x) {
  return (double)x;
}

// unsigned integer to double conversion
double f2(unsigned x) {
  return (double)x;
}

// signed integer to float conversion
float f3(signed x) {
  return (float)x;
}

// unsigned integer to float conversion
float f4(unsigned x) {
  return (float)x;
}


Byte Code:

internal fastcc double %_Z2f1i(int %x) {
entry:
        %tmp.1 = cast int %x to double          ; <double> [#uses=1]
        ret double %tmp.1
}

internal fastcc double %_Z2f2j(uint %x) {
entry:
        %tmp.1 = cast uint %x to double         ; <double> [#uses=1]
        ret double %tmp.1
}

internal fastcc float %_Z2f3i(int %x) {
entry:
        %tmp.1 = cast int %x to float           ; <float> [#uses=1]
        ret float %tmp.1
}

internal fastcc float %_Z2f4j(uint %x) {
entry:
        %tmp.1 = cast uint %x to float          ; <float> [#uses=1]
        ret float %tmp.1
}

internal fastcc double %_Z2g1i(int %x) {
entry:
        %buffer = alloca [2 x uint]             ; <[2 x uint]*> [#uses=3]
        %tmp.0 = getelementptr [2 x uint]* %buffer, int 0, int 0                ; <uint*> [#uses=1]
        store uint 1127219200, uint* %tmp.0
        %tmp.2 = cast int %x to uint            ; <uint> [#uses=1]
        %tmp.3 = xor uint %tmp.2, 2147483648            ; <uint> [#uses=1]
        %tmp.5 = getelementptr [2 x uint]* %buffer, int 0, int 1                ; <uint*> [#uses=1]
        store uint %tmp.3, uint* %tmp.5
        %tmp.9 = cast [2 x uint]* %buffer to double*            ; <double*> [#uses=1]
        %tmp.10 = load double* %tmp.9           ; <double> [#uses=1]
        %tmp.13 = load double* cast (long* %signed_bias to double*)             ; <double> [#uses=1]
        %tmp.14 = sub double %tmp.10, %tmp.13           ; <double> [#uses=1]
        ret double %tmp.14
}

internal fastcc double %_Z2g2j(uint %x) {
entry:
        %buffer = alloca [2 x uint]             ; <[2 x uint]*> [#uses=3]
        %tmp.0 = getelementptr [2 x uint]* %buffer, int 0, int 0                ; <uint*> [#uses=1]
        store uint 1127219200, uint* %tmp.0
        %tmp.1 = getelementptr [2 x uint]* %buffer, int 0, int 1                ; <uint*> [#uses=1]
        store uint %x, uint* %tmp.1
        %tmp.4 = cast [2 x uint]* %buffer to double*            ; <double*> [#uses=1]
        %tmp.5 = load double* %tmp.4            ; <double> [#uses=1]
        %tmp.8 = load double* cast (long* %unsigned_bias to double*)            ; <double> [#uses=1]
        %tmp.9 = sub double %tmp.5, %tmp.8              ; <double> [#uses=1]
        ret double %tmp.9
}

internal fastcc float %_Z2g3i(int %x) {
entry:
        %buffer = alloca [2 x uint]             ; <[2 x uint]*> [#uses=3]
        %tmp.0 = getelementptr [2 x uint]* %buffer, int 0, int 0                ; <uint*> [#uses=1]
        store uint 1127219200, uint* %tmp.0
        %tmp.2 = cast int %x to uint            ; <uint> [#uses=1]
        %tmp.3 = xor uint %tmp.2, 2147483648            ; <uint> [#uses=1]
        %tmp.5 = getelementptr [2 x uint]* %buffer, int 0, int 1                ; <uint*> [#uses=1]
        store uint %tmp.3, uint* %tmp.5
        %tmp.9 = cast [2 x uint]* %buffer to double*            ; <double*> [#uses=1]
        %tmp.10 = load double* %tmp.9           ; <double> [#uses=1]
        %tmp.13 = load double* cast (long* %signed_bias to double*)             ; <double> [#uses=1]
        %tmp.14 = sub double %tmp.10, %tmp.13           ; <double> [#uses=1]
        %tmp.16 = cast double %tmp.14 to float          ; <float> [#uses=1]
        ret float %tmp.16
}

internal fastcc float %_Z2g4j(uint %x) {
entry:
        %buffer = alloca [2 x uint]             ; <[2 x uint]*> [#uses=3]
        %tmp.0 = getelementptr [2 x uint]* %buffer, int 0, int 0                ; <uint*> [#uses=1]
        store uint 1127219200, uint* %tmp.0
        %tmp.1 = getelementptr [2 x uint]* %buffer, int 0, int 1                ; <uint*> [#uses=1]
        store uint %x, uint* %tmp.1
        %tmp.4 = cast [2 x uint]* %buffer to double*            ; <double*> [#uses=1]
        %tmp.5 = load double* %tmp.4            ; <double> [#uses=1]
        %tmp.8 = load double* cast (long* %unsigned_bias to double*)            ; <double> [#uses=1]
        %tmp.9 = sub double %tmp.5, %tmp.8              ; <double> [#uses=1]
        %tmp.11 = cast double %tmp.9 to float           ; <float> [#uses=1]
        ret float %tmp.11
}


PowerPC Code:

        .machine ppc970


        .const
        .align  2
.CPIl1__Z2f1i_0:                                        ; float 0x4330000080000000
        .long   1501560836      ; float 4.5036e+15
        .text
        .align  2
        .globl  l1__Z2f1i
l1__Z2f1i:
.LBBl1__Z2f1i_0:        ; entry
        xoris r2, r3, 32768
        stw r2, -4(r1)
        lis r2, 17200
        stw r2, -8(r1)
        lfd f0, -8(r1)
        lis r2, ha16(.CPIl1__Z2f1i_0)
        lfs f1, lo16(.CPIl1__Z2f1i_0)(r2)
        fsub f1, f0, f1
        blr


        .const
        .align  2
.CPIl2__Z2f2j_0:                                        ; float 0x4330000000000000
        .long   1501560832      ; float 4.5036e+15
        .text
        .align  2
        .globl  l2__Z2f2j
l2__Z2f2j:
.LBBl2__Z2f2j_0:        ; entry
        stw r3, -4(r1)
        lis r2, 17200
        stw r2, -8(r1)
        lfd f0, -8(r1)
        lis r2, ha16(.CPIl2__Z2f2j_0)
        lfs f1, lo16(.CPIl2__Z2f2j_0)(r2)
        fsub f1, f0, f1
        blr


        .const
        .align  2
.CPIl3__Z2f3i_0:                                        ; float 0x4330000080000000
        .long   1501560836      ; float 4.5036e+15
        .text
        .align  2
        .globl  l3__Z2f3i
l3__Z2f3i:
.LBBl3__Z2f3i_0:        ; entry
        xoris r2, r3, 32768
        stw r2, -4(r1)
        lis r2, 17200
        stw r2, -8(r1)
        lfd f0, -8(r1)
        lis r2, ha16(.CPIl3__Z2f3i_0)
        lfs f1, lo16(.CPIl3__Z2f3i_0)(r2)
        fsub f0, f0, f1
        frsp f1, f0
        blr


        .const
        .align  2
.CPIl4__Z2f4j_0:                                        ; float 0x4330000000000000
        .long   1501560832      ; float 4.5036e+15
        .text
        .align  2
        .globl  l4__Z2f4j
l4__Z2f4j:
.LBBl4__Z2f4j_0:        ; entry
        stw r3, -4(r1)
        lis r2, 17200
        stw r2, -8(r1)
        lfd f0, -8(r1)
        lis r2, ha16(.CPIl4__Z2f4j_0)
        lfs f1, lo16(.CPIl4__Z2f4j_0)(r2)
        fsub f0, f0, f1
        frsp f1, f0
        blr

llvm-svn: 22814

f2516a91

add a new TargetConstant node · 0d2456e1
Chris Lattner authored Aug 17, 2005
```
llvm-svn: 22813
```
0d2456e1
add some helper methods, and a new TargetConstant node, which is not · 12214cf5
Chris Lattner authored Aug 17, 2005
```
subjected to folding.

llvm-svn: 22812
```
12214cf5

Implement a couple improvements: · 784c8068

Nate Begeman authored Aug 17, 2005

Remove dead code in ISD::Constant handling
Add support for add long, imm16

We now codegen 'long long foo(long long a) { return ++a; }'
as:
addic r4, r4, 1
addze r3, r3
blr

instead of:
li r2, 1
li r5, 0
addc r2, r4, r2
adde r3, r3, r5
blr

llvm-svn: 22811

784c8068

remove a dead enum value, making a comment above correct again · 2111aae8
Chris Lattner authored Aug 16, 2005
```
llvm-svn: 22810
```
2111aae8

Aug 16, 2005
- This is a dummy, it doesn't matter what the ValueType is · 5a1d5e30
  Chris Lattner authored Aug 16, 2005
```
llvm-svn: 22809
```
  5a1d5e30
- updates for changes in nodes · 79f5ebc7
  Chris Lattner authored Aug 16, 2005
```
llvm-svn: 22808
```
  79f5ebc7
- update the backends to work with the new CopyFromReg/CopyToReg/ImplicitDef nodes · 7c762782
  Chris Lattner authored Aug 16, 2005
```
llvm-svn: 22807
```
  7c762782
- Eliminate the RegSDNode class, which 3 nodes (CopyFromReg/CopyToReg/ImplicitDef) · 33182325
  Chris Lattner authored Aug 16, 2005
```
used to tack a register number onto the node.

Instead of doing this, make a new node, RegisterSDNode, which is a leaf
containing a register number.  These three operations just become normal
DAG nodes now, instead of requiring special handling.

Note that with this change, it is no longer correct to make illegal
CopyFromReg/CopyToReg nodes.  The legalizer will not touch them, and this
is bad, so don't do it. :)

llvm-svn: 22806
```
  33182325
- Implement BR_CC and BRTWOWAY_CC. This allows the removal of a rather nasty · 371e4951
  Nate Begeman authored Aug 16, 2005
```
fixme from the PowerPC backend.  Emit slightly better code for legalizing
select_cc.

llvm-svn: 22805
```
  371e4951
- Allow passing a dag into dump and getOperationName. If one is available · bc892265
  Chris Lattner authored Aug 16, 2005
```
when printing a node, use it to render target operations with their
target instruction name instead of "<<unknown>>".

llvm-svn: 22804
```
  bc892265
- allow passing a dag into getOperationName and dump · 577af487
  Chris Lattner authored Aug 16, 2005
```
llvm-svn: 22803
```
  577af487
- Use a extant helper to do this. · 7e57d18b
  Chris Lattner authored Aug 16, 2005
```
llvm-svn: 22802
```
  7e57d18b
- Add some methods for dag->dag isel. · 1973278b
  Chris Lattner authored Aug 16, 2005
```
Split RemoveNodeFromCSEMaps out of DeleteNodesIfDead to do it.

llvm-svn: 22801
```
  1973278b
- add some methods for dag->dag isel · ba19325e
  Chris Lattner authored Aug 16, 2005
```
llvm-svn: 22800
```
  ba19325e
- Pull the LLVM -> DAG lowering code out of the pattern selector so that it · f22556d3
  Chris Lattner authored Aug 16, 2005
```
can be shared with the DAG->DAG selector.

llvm-svn: 22799
```
  f22556d3
- Fix a bad case in gzip where we put lots of things in registers across the · 5cf983ee
  Chris Lattner authored Aug 16, 2005
```
loop, because a IV-dependent value was used outside of the loop and didn't
have immediate-folding capability

llvm-svn: 22798
```
  5cf983ee
- Fix Transforms/LoopStrengthReduce/2005-08-15-AddRecIV.ll · e5154163
  Chris Lattner authored Aug 16, 2005
```
llvm-svn: 22797
```
  e5154163
- testcase that crashes lsr, distilled from 175.vpr · 3cf8ef17
  Chris Lattner authored Aug 16, 2005
```
llvm-svn: 22796
```
  3cf8ef17
- Turn loop strength reduction on by default. · 73785d2e
  Chris Lattner authored Aug 15, 2005
```
Only run createLowerConstantExpressionsPass for the simple isel.  The DAG
isel has no need for it.

llvm-svn: 22794
```
  73785d2e
- Teach LLVM to know how many times a loop executes when constructed with · 587a75b6
  Chris Lattner authored Aug 15, 2005
```
a < expression, e.g.: for (i = m; i < n; ++i)

llvm-svn: 22793
```
  587a75b6
Aug 15, 2005
- Broke 80 column rule. · 24b84072
  Jim Laskey authored Aug 15, 2005
```
llvm-svn: 22792
```
  24b84072
- · 42623a95
  Jim Laskey authored Aug 15, 2005
```
Changed code gen for int to f32 to use rounding.  This makes FP results
consistent with gcc.

llvm-svn: 22791
```
  42623a95
- isIntImmediate is a good Idea. Add a flavor that checks bounds while it is at it · b65b1568
  Andrew Lenharth authored Aug 15, 2005
```
llvm-svn: 22790
```
  b65b1568
Aug 14, 2005

Fix last night's PPC32 regressions by · d5e739dc

Nate Begeman authored Aug 14, 2005

1. Not selecting the false value of a select_cc in the false arm, which
   isn't legal for nested selects.
2. Actually returning the node we created and Legalized in the FP_TO_UINT
   Expander.

llvm-svn: 22789

d5e739dc

Fix last night's X86 regressions by putting code for SSE in the if(SSE) · e5394d45
Nate Begeman authored Aug 14, 2005
```
block.  nur.

llvm-svn: 22788
```
e5394d45
only build .a on alpha · ed072338
Andrew Lenharth authored Aug 14, 2005
```
llvm-svn: 22787
```
ed072338

Fix FP_TO_UINT with Scalar SSE2 now that the legalizer can handle it. We · 4d959f66

Nate Begeman authored Aug 14, 2005

now generate the relatively good code sequences:
unsigned short foo(float a) { return a; }
_foo:
        movss 4(%esp), %xmm0
        cvttss2si %xmm0, %eax
        movzwl %ax, %eax
        ret

and
unsigned bar(float a) { return a; }
_bar:
        movss .CPI_bar_0, %xmm0
        movss 4(%esp), %xmm1
        movapd %xmm1, %xmm2
        subss %xmm0, %xmm2
        cvttss2si %xmm2, %eax
        xorl $-2147483648, %eax
        cvttss2si %xmm1, %ecx
        ucomiss %xmm0, %xmm1
        cmovb %ecx, %eax
        ret

llvm-svn: 22786

4d959f66

Teach the legalizer how to legalize FP_TO_UINT. · 36853ee1

Nate Begeman authored Aug 14, 2005

Teach the legalizer to promote FP_TO_UINT to FP_TO_SINT if the wider
  FP_TO_UINT is also illegal.  This allows us on PPC to codegen
  unsigned short foo(float a) { return a; }

as:
_foo:
.LBB_foo_0:     ; entry
        fctiwz f0, f1
        stfd f0, -8(r1)
        lwz r2, -4(r1)
        rlwinm r3, r2, 0, 16, 31
        blr

instead of:
_foo:
.LBB_foo_0:     ; entry
        fctiwz f0, f1
        stfd f0, -8(r1)
        lwz r2, -4(r1)
        lis r3, ha16(.CPI_foo_0)
        lfs f0, lo16(.CPI_foo_0)(r3)
        fcmpu cr0, f1, f0
        blt .LBB_foo_2  ; entry
.LBB_foo_1:     ; entry
        fsubs f0, f1, f0
        fctiwz f0, f0
        stfd f0, -16(r1)
        lwz r2, -12(r1)
        xoris r2, r2, 32768
.LBB_foo_2:     ; entry
        rlwinm r3, r2, 0, 16, 31
        blr

llvm-svn: 22785

36853ee1

Make FP_TO_UINT Illegal. This allows us to generate significantly better · 83f6b98c

Nate Begeman authored Aug 14, 2005

codegen for FP_TO_UINT by using the legalizer's SELECT variant.

Implement a codegen improvement for SELECT_CC, selecting the false node in
the MBB that feeds the phi node.  This allows us to codegen:
void foo(int *a, int b, int c) { int d = (a < b) ? 5 : 9; *a = d; }
as:
_foo:
        li r2, 5
        cmpw cr0, r4, r3
        bgt .LBB_foo_2  ; entry
.LBB_foo_1:     ; entry
        li r2, 9
.LBB_foo_2:     ; entry
        stw r2, 0(r3)
        blr

insted of:
_foo:
        li r2, 5
        li r5, 9
        cmpw cr0, r4, r3
        bgt .LBB_foo_2  ; entry
.LBB_foo_1:     ; entry
        or r2, r5, r5
.LBB_foo_2:     ; entry
        stw r2, 0(r3)
        blr

llvm-svn: 22784

83f6b98c

Aug 13, 2005

Testing a variable before it is defined doesn't work so well. It is a fairly... · 107a0a76

Andrew Lenharth authored Aug 13, 2005

Testing a variable before it is defined doesn't work so well.  It is a fairly small thing, so just let everyone build the .a file

llvm-svn: 22783

107a0a76

Ooops, don't forget to clear this. The real inner loop is now: · 47d3ec35

Chris Lattner authored Aug 13, 2005

.LBB_foo_3:     ; no_exit.1
        lfd f2, 0(r9)
        lfd f3, 8(r9)
        fmul f4, f1, f2
        fmadd f4, f0, f3, f4
        stfd f4, 8(r9)
        fmul f3, f1, f3
        fmsub f2, f0, f2, f3
        stfd f2, 0(r9)
        addi r9, r9, 16
        addi r8, r8, 1
        cmpw cr0, r8, r4
        ble .LBB_foo_3  ; no_exit.1

llvm-svn: 22782

47d3ec35

Recursively scan scev expressions for common subexpressions. This allows us · 5949d490

Chris Lattner authored Aug 13, 2005

to handle nested loops much better, for example, by being able to tell that
these two expressions:

{( 8 + ( 16 * ( 1 +  %Tmp11 +  %Tmp12)) +  %c_),+,( 16 *  %Tmp 12)}<loopentry.1>

{(( 16 * ( 1 +  %Tmp11 +  %Tmp12)) +  %c_),+,( 16 *  %Tmp12)}<loopentry.1>

Have the following common part that can be shared:
{(( 16 * ( 1 +  %Tmp11 +  %Tmp12)) +  %c_),+,( 16 *  %Tmp12)}<loopentry.1>

This allows us to codegen an important inner loop in 168.wupwise as:

.LBB_foo_4:     ; no_exit.1
        lfd f2, 16(r9)
        fmul f3, f0, f2
        fmul f2, f1, f2
        fadd f4, f3, f2
        stfd f4, 8(r9)
        fsub f2, f3, f2
        stfd f2, 16(r9)
        addi r8, r8, 1
        addi r9, r9, 16
        cmpw cr0, r8, r4
        ble .LBB_foo_4  ; no_exit.1

instead of:

.LBB_foo_3:     ; no_exit.1
        lfdx f2, r6, r9
        add r10, r6, r9
        lfd f3, 8(r10)
        fmul f4, f1, f2
        fmadd f4, f0, f3, f4
        stfd f4, 8(r10)
        fmul f3, f1, f3
        fmsub f2, f0, f2, f3
        stfdx f2, r6, r9
        addi r9, r9, 16
        addi r8, r8, 1
        cmpw cr0, r8, r4
        ble .LBB_foo_3  ; no_exit.1

llvm-svn: 22781

5949d490