Commits · c6aa80668e67972e9c8d4b4a3cdfc285def7c246 · Roger Ferrer / llvm-epi-0.8

Aug 17, 2005

add a beta option for turning on dag->dag isel · c6aa8066
Chris Lattner authored Aug 17, 2005
```
llvm-svn: 22837
```
c6aa8066
initial hack at a dag->dag instruction selector. This is obviously woefully · 43ff01e2
Chris Lattner authored Aug 17, 2005
```
incomplete, but it is a start.  It handles basic argument/retval stuff, immediates,
add and sub.

llvm-svn: 22836
```
43ff01e2
add prototype, remove dead proto · f61cce95
Chris Lattner authored Aug 17, 2005
```
llvm-svn: 22835
```
f61cce95
Fix a bug in RemoveDeadNodes where it would crash when its "optional" · ab0de9d7
Chris Lattner authored Aug 17, 2005
```
argument is not specified.

Implement ReplaceAllUsesWith.

llvm-svn: 22834
```
ab0de9d7
Switched to using BitsToDouble for int_to_float to avoid aliasing problem. · 686d6a1c
Jim Laskey authored Aug 17, 2005
```
llvm-svn: 22831
```
686d6a1c
Fix some bugs in the alpha backend, some of which I introduced yesterday, · 33900811
Chris Lattner authored Aug 17, 2005
```
and some that were preexisting.  All alpha regtests pass now.

llvm-svn: 22829
```
33900811
Change hex float constants for the sake of VC++. · 898ba557
Jim Laskey authored Aug 17, 2005
```
llvm-svn: 22828
```
898ba557

Add a new beta option for critical edge splitting, to avoid a problem that · c9950c11

Chris Lattner authored Aug 17, 2005

Nate noticed in yacr2 (and I know occurs in other places as well).

This is still rough, as the critical edge blocks are not intelligently placed
but is added to get some idea to see if this improves performance.

llvm-svn: 22825

c9950c11

Use a new helper to split critical edges, making the code simpler. · 2bf7cb52

Chris Lattner authored Aug 17, 2005

Do not claim to not change the CFG.  We do change the cfg to split critical
edges.  This isn't causing us a problem now, but could likely do so in the
future.

llvm-svn: 22824

2bf7cb52

Fix a regression on X86, where FP values can be promoted too. · ba28c273
Chris Lattner authored Aug 17, 2005
```
llvm-svn: 22822
```
ba28c273

Fix a few small typos I noticed when converting this over to the DAG->DAG · 63f774ec

Chris Lattner authored Aug 17, 2005

selector.  Also, there is no difference between addSImm and addImm, so just
use addImm, folding some branches.

llvm-svn: 22819

63f774ec

Removed UINT_TO_FP and SINT_TO_FP from ISel outright. · 9828f26c
Jim Laskey authored Aug 17, 2005
```
llvm-svn: 22818
```
9828f26c
thinko. Should fix s4addl.ll regression · 73370ba5
Andrew Lenharth authored Aug 17, 2005
```
llvm-svn: 22817
```
73370ba5
Remove ISel code generation for UINT_TO_FP and SINT_TO_FP. Now asserts if · 5909c8b1
Jim Laskey authored Aug 17, 2005
```
marked as legal.

llvm-svn: 22816
```
5909c8b1
Make UINT_TO_FP and SINT_TO_FP use generic expansion. · 6267b2c9
Jim Laskey authored Aug 17, 2005
```
llvm-svn: 22815
```
6267b2c9

· f2516a91

Jim Laskey authored Aug 17, 2005

Added generic code expansion for [signed|unsigned] i32 to [f32|f64] casts in the
legalizer.  PowerPC now uses this expansion instead of ISel version.

Example:

// signed integer to double conversion
double f1(signed x) {
  return (double)x;
}

// unsigned integer to double conversion
double f2(unsigned x) {
  return (double)x;
}

// signed integer to float conversion
float f3(signed x) {
  return (float)x;
}

// unsigned integer to float conversion
float f4(unsigned x) {
  return (float)x;
}


Byte Code:

internal fastcc double %_Z2f1i(int %x) {
entry:
        %tmp.1 = cast int %x to double          ; <double> [#uses=1]
        ret double %tmp.1
}

internal fastcc double %_Z2f2j(uint %x) {
entry:
        %tmp.1 = cast uint %x to double         ; <double> [#uses=1]
        ret double %tmp.1
}

internal fastcc float %_Z2f3i(int %x) {
entry:
        %tmp.1 = cast int %x to float           ; <float> [#uses=1]
        ret float %tmp.1
}

internal fastcc float %_Z2f4j(uint %x) {
entry:
        %tmp.1 = cast uint %x to float          ; <float> [#uses=1]
        ret float %tmp.1
}

internal fastcc double %_Z2g1i(int %x) {
entry:
        %buffer = alloca [2 x uint]             ; <[2 x uint]*> [#uses=3]
        %tmp.0 = getelementptr [2 x uint]* %buffer, int 0, int 0                ; <uint*> [#uses=1]
        store uint 1127219200, uint* %tmp.0
        %tmp.2 = cast int %x to uint            ; <uint> [#uses=1]
        %tmp.3 = xor uint %tmp.2, 2147483648            ; <uint> [#uses=1]
        %tmp.5 = getelementptr [2 x uint]* %buffer, int 0, int 1                ; <uint*> [#uses=1]
        store uint %tmp.3, uint* %tmp.5
        %tmp.9 = cast [2 x uint]* %buffer to double*            ; <double*> [#uses=1]
        %tmp.10 = load double* %tmp.9           ; <double> [#uses=1]
        %tmp.13 = load double* cast (long* %signed_bias to double*)             ; <double> [#uses=1]
        %tmp.14 = sub double %tmp.10, %tmp.13           ; <double> [#uses=1]
        ret double %tmp.14
}

internal fastcc double %_Z2g2j(uint %x) {
entry:
        %buffer = alloca [2 x uint]             ; <[2 x uint]*> [#uses=3]
        %tmp.0 = getelementptr [2 x uint]* %buffer, int 0, int 0                ; <uint*> [#uses=1]
        store uint 1127219200, uint* %tmp.0
        %tmp.1 = getelementptr [2 x uint]* %buffer, int 0, int 1                ; <uint*> [#uses=1]
        store uint %x, uint* %tmp.1
        %tmp.4 = cast [2 x uint]* %buffer to double*            ; <double*> [#uses=1]
        %tmp.5 = load double* %tmp.4            ; <double> [#uses=1]
        %tmp.8 = load double* cast (long* %unsigned_bias to double*)            ; <double> [#uses=1]
        %tmp.9 = sub double %tmp.5, %tmp.8              ; <double> [#uses=1]
        ret double %tmp.9
}

internal fastcc float %_Z2g3i(int %x) {
entry:
        %buffer = alloca [2 x uint]             ; <[2 x uint]*> [#uses=3]
        %tmp.0 = getelementptr [2 x uint]* %buffer, int 0, int 0                ; <uint*> [#uses=1]
        store uint 1127219200, uint* %tmp.0
        %tmp.2 = cast int %x to uint            ; <uint> [#uses=1]
        %tmp.3 = xor uint %tmp.2, 2147483648            ; <uint> [#uses=1]
        %tmp.5 = getelementptr [2 x uint]* %buffer, int 0, int 1                ; <uint*> [#uses=1]
        store uint %tmp.3, uint* %tmp.5
        %tmp.9 = cast [2 x uint]* %buffer to double*            ; <double*> [#uses=1]
        %tmp.10 = load double* %tmp.9           ; <double> [#uses=1]
        %tmp.13 = load double* cast (long* %signed_bias to double*)             ; <double> [#uses=1]
        %tmp.14 = sub double %tmp.10, %tmp.13           ; <double> [#uses=1]
        %tmp.16 = cast double %tmp.14 to float          ; <float> [#uses=1]
        ret float %tmp.16
}

internal fastcc float %_Z2g4j(uint %x) {
entry:
        %buffer = alloca [2 x uint]             ; <[2 x uint]*> [#uses=3]
        %tmp.0 = getelementptr [2 x uint]* %buffer, int 0, int 0                ; <uint*> [#uses=1]
        store uint 1127219200, uint* %tmp.0
        %tmp.1 = getelementptr [2 x uint]* %buffer, int 0, int 1                ; <uint*> [#uses=1]
        store uint %x, uint* %tmp.1
        %tmp.4 = cast [2 x uint]* %buffer to double*            ; <double*> [#uses=1]
        %tmp.5 = load double* %tmp.4            ; <double> [#uses=1]
        %tmp.8 = load double* cast (long* %unsigned_bias to double*)            ; <double> [#uses=1]
        %tmp.9 = sub double %tmp.5, %tmp.8              ; <double> [#uses=1]
        %tmp.11 = cast double %tmp.9 to float           ; <float> [#uses=1]
        ret float %tmp.11
}


PowerPC Code:

        .machine ppc970


        .const
        .align  2
.CPIl1__Z2f1i_0:                                        ; float 0x4330000080000000
        .long   1501560836      ; float 4.5036e+15
        .text
        .align  2
        .globl  l1__Z2f1i
l1__Z2f1i:
.LBBl1__Z2f1i_0:        ; entry
        xoris r2, r3, 32768
        stw r2, -4(r1)
        lis r2, 17200
        stw r2, -8(r1)
        lfd f0, -8(r1)
        lis r2, ha16(.CPIl1__Z2f1i_0)
        lfs f1, lo16(.CPIl1__Z2f1i_0)(r2)
        fsub f1, f0, f1
        blr


        .const
        .align  2
.CPIl2__Z2f2j_0:                                        ; float 0x4330000000000000
        .long   1501560832      ; float 4.5036e+15
        .text
        .align  2
        .globl  l2__Z2f2j
l2__Z2f2j:
.LBBl2__Z2f2j_0:        ; entry
        stw r3, -4(r1)
        lis r2, 17200
        stw r2, -8(r1)
        lfd f0, -8(r1)
        lis r2, ha16(.CPIl2__Z2f2j_0)
        lfs f1, lo16(.CPIl2__Z2f2j_0)(r2)
        fsub f1, f0, f1
        blr


        .const
        .align  2
.CPIl3__Z2f3i_0:                                        ; float 0x4330000080000000
        .long   1501560836      ; float 4.5036e+15
        .text
        .align  2
        .globl  l3__Z2f3i
l3__Z2f3i:
.LBBl3__Z2f3i_0:        ; entry
        xoris r2, r3, 32768
        stw r2, -4(r1)
        lis r2, 17200
        stw r2, -8(r1)
        lfd f0, -8(r1)
        lis r2, ha16(.CPIl3__Z2f3i_0)
        lfs f1, lo16(.CPIl3__Z2f3i_0)(r2)
        fsub f0, f0, f1
        frsp f1, f0
        blr


        .const
        .align  2
.CPIl4__Z2f4j_0:                                        ; float 0x4330000000000000
        .long   1501560832      ; float 4.5036e+15
        .text
        .align  2
        .globl  l4__Z2f4j
l4__Z2f4j:
.LBBl4__Z2f4j_0:        ; entry
        stw r3, -4(r1)
        lis r2, 17200
        stw r2, -8(r1)
        lfd f0, -8(r1)
        lis r2, ha16(.CPIl4__Z2f4j_0)
        lfs f1, lo16(.CPIl4__Z2f4j_0)(r2)
        fsub f0, f0, f1
        frsp f1, f0
        blr

llvm-svn: 22814

f2516a91

add a new TargetConstant node · 0d2456e1
Chris Lattner authored Aug 17, 2005
```
llvm-svn: 22813
```
0d2456e1

Implement a couple improvements: · 784c8068

Nate Begeman authored Aug 17, 2005

Remove dead code in ISD::Constant handling
Add support for add long, imm16

We now codegen 'long long foo(long long a) { return ++a; }'
as:
addic r4, r4, 1
addze r3, r3
blr

instead of:
li r2, 1
li r5, 0
addc r2, r4, r2
adde r3, r3, r5
blr

llvm-svn: 22811

784c8068

Aug 16, 2005
- This is a dummy, it doesn't matter what the ValueType is · 5a1d5e30
  Chris Lattner authored Aug 16, 2005
```
llvm-svn: 22809
```
  5a1d5e30
- updates for changes in nodes · 79f5ebc7
  Chris Lattner authored Aug 16, 2005
```
llvm-svn: 22808
```
  79f5ebc7
- update the backends to work with the new CopyFromReg/CopyToReg/ImplicitDef nodes · 7c762782
  Chris Lattner authored Aug 16, 2005
```
llvm-svn: 22807
```
  7c762782
- Eliminate the RegSDNode class, which 3 nodes (CopyFromReg/CopyToReg/ImplicitDef) · 33182325
  Chris Lattner authored Aug 16, 2005
```
used to tack a register number onto the node.

Instead of doing this, make a new node, RegisterSDNode, which is a leaf
containing a register number.  These three operations just become normal
DAG nodes now, instead of requiring special handling.

Note that with this change, it is no longer correct to make illegal
CopyFromReg/CopyToReg nodes.  The legalizer will not touch them, and this
is bad, so don't do it. :)

llvm-svn: 22806
```
  33182325
- Implement BR_CC and BRTWOWAY_CC. This allows the removal of a rather nasty · 371e4951
  Nate Begeman authored Aug 16, 2005
```
fixme from the PowerPC backend.  Emit slightly better code for legalizing
select_cc.

llvm-svn: 22805
```
  371e4951
- Allow passing a dag into dump and getOperationName. If one is available · bc892265
  Chris Lattner authored Aug 16, 2005
```
when printing a node, use it to render target operations with their
target instruction name instead of "<<unknown>>".

llvm-svn: 22804
```
  bc892265
- Use a extant helper to do this. · 7e57d18b
  Chris Lattner authored Aug 16, 2005
```
llvm-svn: 22802
```
  7e57d18b
- Add some methods for dag->dag isel. · 1973278b
  Chris Lattner authored Aug 16, 2005
```
Split RemoveNodeFromCSEMaps out of DeleteNodesIfDead to do it.

llvm-svn: 22801
```
  1973278b
- Pull the LLVM -> DAG lowering code out of the pattern selector so that it · f22556d3
  Chris Lattner authored Aug 16, 2005
```
can be shared with the DAG->DAG selector.

llvm-svn: 22799
```
  f22556d3
- Fix a bad case in gzip where we put lots of things in registers across the · 5cf983ee
  Chris Lattner authored Aug 16, 2005
```
loop, because a IV-dependent value was used outside of the loop and didn't
have immediate-folding capability

llvm-svn: 22798
```
  5cf983ee
- Fix Transforms/LoopStrengthReduce/2005-08-15-AddRecIV.ll · e5154163
  Chris Lattner authored Aug 16, 2005
```
llvm-svn: 22797
```
  e5154163
- Turn loop strength reduction on by default. · 73785d2e
  Chris Lattner authored Aug 15, 2005
```
Only run createLowerConstantExpressionsPass for the simple isel.  The DAG
isel has no need for it.

llvm-svn: 22794
```
  73785d2e
- Teach LLVM to know how many times a loop executes when constructed with · 587a75b6
  Chris Lattner authored Aug 15, 2005
```
a < expression, e.g.: for (i = m; i < n; ++i)

llvm-svn: 22793
```
  587a75b6
Aug 15, 2005
- Broke 80 column rule. · 24b84072
  Jim Laskey authored Aug 15, 2005
```
llvm-svn: 22792
```
  24b84072
- · 42623a95
  Jim Laskey authored Aug 15, 2005
```
Changed code gen for int to f32 to use rounding.  This makes FP results
consistent with gcc.

llvm-svn: 22791
```
  42623a95
- isIntImmediate is a good Idea. Add a flavor that checks bounds while it is at it · b65b1568
  Andrew Lenharth authored Aug 15, 2005
```
llvm-svn: 22790
```
  b65b1568
Aug 14, 2005

Fix last night's PPC32 regressions by · d5e739dc

Nate Begeman authored Aug 14, 2005

1. Not selecting the false value of a select_cc in the false arm, which
   isn't legal for nested selects.
2. Actually returning the node we created and Legalized in the FP_TO_UINT
   Expander.

llvm-svn: 22789

d5e739dc

Fix last night's X86 regressions by putting code for SSE in the if(SSE) · e5394d45
Nate Begeman authored Aug 14, 2005
```
block.  nur.

llvm-svn: 22788
```
e5394d45
only build .a on alpha · ed072338
Andrew Lenharth authored Aug 14, 2005
```
llvm-svn: 22787
```
ed072338

Fix FP_TO_UINT with Scalar SSE2 now that the legalizer can handle it. We · 4d959f66

Nate Begeman authored Aug 14, 2005

now generate the relatively good code sequences:
unsigned short foo(float a) { return a; }
_foo:
        movss 4(%esp), %xmm0
        cvttss2si %xmm0, %eax
        movzwl %ax, %eax
        ret

and
unsigned bar(float a) { return a; }
_bar:
        movss .CPI_bar_0, %xmm0
        movss 4(%esp), %xmm1
        movapd %xmm1, %xmm2
        subss %xmm0, %xmm2
        cvttss2si %xmm2, %eax
        xorl $-2147483648, %eax
        cvttss2si %xmm1, %ecx
        ucomiss %xmm0, %xmm1
        cmovb %ecx, %eax
        ret

llvm-svn: 22786

4d959f66

Teach the legalizer how to legalize FP_TO_UINT. · 36853ee1

Nate Begeman authored Aug 14, 2005

Teach the legalizer to promote FP_TO_UINT to FP_TO_SINT if the wider
  FP_TO_UINT is also illegal.  This allows us on PPC to codegen
  unsigned short foo(float a) { return a; }

as:
_foo:
.LBB_foo_0:     ; entry
        fctiwz f0, f1
        stfd f0, -8(r1)
        lwz r2, -4(r1)
        rlwinm r3, r2, 0, 16, 31
        blr

instead of:
_foo:
.LBB_foo_0:     ; entry
        fctiwz f0, f1
        stfd f0, -8(r1)
        lwz r2, -4(r1)
        lis r3, ha16(.CPI_foo_0)
        lfs f0, lo16(.CPI_foo_0)(r3)
        fcmpu cr0, f1, f0
        blt .LBB_foo_2  ; entry
.LBB_foo_1:     ; entry
        fsubs f0, f1, f0
        fctiwz f0, f0
        stfd f0, -16(r1)
        lwz r2, -12(r1)
        xoris r2, r2, 32768
.LBB_foo_2:     ; entry
        rlwinm r3, r2, 0, 16, 31
        blr

llvm-svn: 22785

36853ee1

Make FP_TO_UINT Illegal. This allows us to generate significantly better · 83f6b98c

Nate Begeman authored Aug 14, 2005

codegen for FP_TO_UINT by using the legalizer's SELECT variant.

Implement a codegen improvement for SELECT_CC, selecting the false node in
the MBB that feeds the phi node.  This allows us to codegen:
void foo(int *a, int b, int c) { int d = (a < b) ? 5 : 9; *a = d; }
as:
_foo:
        li r2, 5
        cmpw cr0, r4, r3
        bgt .LBB_foo_2  ; entry
.LBB_foo_1:     ; entry
        li r2, 9
.LBB_foo_2:     ; entry
        stw r2, 0(r3)
        blr

insted of:
_foo:
        li r2, 5
        li r5, 9
        cmpw cr0, r4, r3
        bgt .LBB_foo_2  ; entry
.LBB_foo_1:     ; entry
        or r2, r5, r5
.LBB_foo_2:     ; entry
        stw r2, 0(r3)
        blr

llvm-svn: 22784

83f6b98c