Commits · dc3154ec66d5534f98cf558652fd6c6e98e41a24 · Roger Ferrer / llvm-epi-0.8

Aug 13, 2005

Remove an unncessary argument to SimplifySelectCC and add an additional · dc3154ec
Nate Begeman authored Aug 13, 2005
```
assert when creating a select_cc node.

llvm-svn: 22780
```
dc3154ec

Fix the fabs regression on x86 by abstracting the select_cc optimization · b6651e81

Nate Begeman authored Aug 13, 2005

out into SimplifySelectCC.  This allows both ISD::SELECT and ISD::SELECT_CC
to use the same set of simplifying folds.

llvm-svn: 22779

b6651e81

Remove support for 64b PPC, it's been broken for a long time. It'll be · a22bf778
Nate Begeman authored Aug 13, 2005
```
back once a DAG->DAG ISel exists.

llvm-svn: 22778
```
a22bf778
Fix oversized GOT problem with gcc-4 on alpha · 6b62b479
Andrew Lenharth authored Aug 13, 2005
```
llvm-svn: 22777
```
6b62b479

Teach SplitCriticalEdge to update LoopInfo if it is alive. This fixes · 89c1dfc7

Chris Lattner authored Aug 13, 2005

a problem in LoopStrengthReduction, where it would split critical edges
then confused itself with outdated loop information.

llvm-svn: 22776

89c1dfc7

remove dead code. The exit block list is computed on demand, thus does not · 79396539
Chris Lattner authored Aug 13, 2005
```
need to be updated.  This code is a relic from when it did.

llvm-svn: 22775
```
79396539
implement a couple of simple shift foldings. · 21381e84
Chris Lattner authored Aug 12, 2005
```
e.g.  (X & 7) >> 3   -> 0

llvm-svn: 22774
```
21381e84

· 35960708

Jim Laskey authored Aug 12, 2005

Fix for 2005-08-12-rlwimi-crash.ll.  Make allowance for masks being shifted to
zero.

llvm-svn: 22773

35960708

· a5687006

Jim Laskey authored Aug 12, 2005

1. This changes handles the cases of (~x)&y and x&(~y) yielding ANDC, and
   (~x)|y and x|(~y) yielding ORC.

llvm-svn: 22771

a5687006

When splitting critical edges, make sure not to leave the new block in the · 8447b495

Chris Lattner authored Aug 12, 2005

middle of the loop.  This turns a critical loop in gzip into this:

.LBB_test_1:    ; loopentry
        or r27, r28, r28
        add r28, r3, r27
        lhz r28, 3(r28)
        add r26, r4, r27
        lhz r26, 3(r26)
        cmpw cr0, r28, r26
        bne .LBB_test_8 ; loopentry.loopexit_crit_edge
.LBB_test_2:    ; shortcirc_next.0
        add r28, r3, r27
        lhz r28, 5(r28)
        add r26, r4, r27
        lhz r26, 5(r26)
        cmpw cr0, r28, r26
        bne .LBB_test_7 ; shortcirc_next.0.loopexit_crit_edge
.LBB_test_3:    ; shortcirc_next.1
        add r28, r3, r27
        lhz r28, 7(r28)
        add r26, r4, r27
        lhz r26, 7(r26)
        cmpw cr0, r28, r26
        bne .LBB_test_6 ; shortcirc_next.1.loopexit_crit_edge
.LBB_test_4:    ; shortcirc_next.2
        add r28, r3, r27
        lhz r26, 9(r28)
        add r28, r4, r27
        lhz r25, 9(r28)
        addi r28, r27, 8
        cmpw cr7, r26, r25
        mfcr r26, 1
        rlwinm r26, r26, 31, 31, 31
        add r25, r8, r27
        cmpw cr7, r25, r7
        mfcr r25, 1
        rlwinm r25, r25, 29, 31, 31
        and. r26, r26, r25
        bne .LBB_test_1 ; loopentry

instead of this:

.LBB_test_1:    ; loopentry
        or r27, r28, r28
        add r28, r3, r27
        lhz r28, 3(r28)
        add r26, r4, r27
        lhz r26, 3(r26)
        cmpw cr0, r28, r26
        beq .LBB_test_3 ; shortcirc_next.0
.LBB_test_2:    ; loopentry.loopexit_crit_edge
        add r2, r30, r27
        add r8, r29, r27
        b .LBB_test_9   ; loopexit
.LBB_test_3:    ; shortcirc_next.0
        add r28, r3, r27
        lhz r28, 5(r28)
        add r26, r4, r27
        lhz r26, 5(r26)
        cmpw cr0, r28, r26
        beq .LBB_test_5 ; shortcirc_next.1
.LBB_test_4:    ; shortcirc_next.0.loopexit_crit_edge
        add r2, r11, r27
        add r8, r12, r27
        b .LBB_test_9   ; loopexit
.LBB_test_5:    ; shortcirc_next.1
        add r28, r3, r27
        lhz r28, 7(r28)
        add r26, r4, r27
        lhz r26, 7(r26)
        cmpw cr0, r28, r26
        beq .LBB_test_7 ; shortcirc_next.2
.LBB_test_6:    ; shortcirc_next.1.loopexit_crit_edge
        add r2, r9, r27
        add r8, r10, r27
        b .LBB_test_9   ; loopexit
.LBB_test_7:    ; shortcirc_next.2
        add r28, r3, r27
        lhz r26, 9(r28)
        add r28, r4, r27
        lhz r25, 9(r28)
        addi r28, r27, 8
        cmpw cr7, r26, r25
        mfcr r26, 1
        rlwinm r26, r26, 31, 31, 31
        add r25, r8, r27
        cmpw cr7, r25, r7
        mfcr r25, 1
        rlwinm r25, r25, 29, 31, 31
        and. r26, r26, r25
        bne .LBB_test_1 ; loopentry

Next up, improve the code for the loop.

llvm-svn: 22769

8447b495

Add a helper method · e09bbc80
Chris Lattner authored Aug 12, 2005
```
llvm-svn: 22768
```
e09bbc80

Fix a FIXME: if we are inserting code for a PHI argument, split the critical · 4fec86d3

Chris Lattner authored Aug 12, 2005

edge so that the code is not always executed for both operands.  This
prevents LSR from inserting code into loops whose exit blocks contain
PHI uses of IV expressions (which are outside of loops).  On gzip, for
example, we turn this ugly code:

.LBB_test_1:    ; loopentry
        add r27, r3, r28
        lhz r27, 3(r27)
        add r26, r4, r28
        lhz r26, 3(r26)
        add r25, r30, r28    ;; Only live if exiting the loop
        add r24, r29, r28    ;; Only live if exiting the loop
        cmpw cr0, r27, r26
        bne .LBB_test_5 ; loopexit

into this:

.LBB_test_1:    ; loopentry
        or r27, r28, r28
        add r28, r3, r27
        lhz r28, 3(r28)
        add r26, r4, r27
        lhz r26, 3(r26)
        cmpw cr0, r28, r26
        beq .LBB_test_3 ; shortcirc_next.0
.LBB_test_2:    ; loopentry.loopexit_crit_edge
        add r2, r30, r27
        add r8, r29, r27
        b .LBB_test_9   ; loopexit
.LBB_test_2:    ; shortcirc_next.0
        ...
        blt .LBB_test_1


into this:

.LBB_test_1:    ; loopentry
        or r27, r28, r28
        add r28, r3, r27
        lhz r28, 3(r28)
        add r26, r4, r27
        lhz r26, 3(r26)
        cmpw cr0, r28, r26
        beq .LBB_test_3 ; shortcirc_next.0
.LBB_test_2:    ; loopentry.loopexit_crit_edge
        add r2, r30, r27
        add r8, r29, r27
        b .LBB_t_3:    ; shortcirc_next.0
.LBB_test_3:    ; shortcirc_next.0
        ...
        blt .LBB_test_1


Next step: get the block out of the loop so that the loop is all
fall-throughs again.

llvm-svn: 22766

4fec86d3

Aug 12, 2005
- Change break critical edges to not remove, then insert, PHI node entries. · b7ebe65c
  Chris Lattner authored Aug 12, 2005
```
Instead, just update the BB in-place.  This is both faster, and it prevents
split-critical-edges from shuffling the PHI argument list unneccesarily.

llvm-svn: 22765
```
  b7ebe65c
- match gcc's use of tabs, makes diffs easier · 8c6701be
  Andrew Lenharth authored Aug 12, 2005
```
llvm-svn: 22764
```
  8c6701be
- .section cleanup, patch from Nicholas Riley · ca94102d
  Andrew Lenharth authored Aug 12, 2005
```
llvm-svn: 22763
```
  ca94102d
Aug 11, 2005

1. Added the function isOpcWithIntImmediate to simplify testing of operand with · a50f770a

Jim Laskey authored Aug 11, 2005

specified opcode and an integer constant right operand.

2. Modified ISD::SHL, ISD::SRL, ISD::SRA to use rlwinm when applied after a mask.

llvm-svn: 22761

a50f770a

Tidied up the use of dyn_cast<ConstantSDNode> by using isIntImmediate more. · d418d752
Chris Lattner authored Aug 11, 2005
```
Patch by Jim Laskey.

llvm-svn: 22760
```
d418d752

Use a more efficient method of creating integer and float virtual registers · c5e1312b

Chris Lattner authored Aug 11, 2005

(avoids an extra level of indirection in MakeReg).

  defined MakeIntReg using RegMap->createVirtualRegister(PPC32::GPRCRegisterClass)
  defined MakeFPReg using RegMap->createVirtualRegister(PPC32::FPRCRegisterClass)

  s/MakeReg(MVT::i32)/MakeIntReg/
  s/MakeReg(MVT::f64)/MakeFPReg/

Patch by Jim Laskey!

llvm-svn: 22759

c5e1312b

Add a select_cc optimization for recognizing abs(int). This speeds up an · 5c7656fd
Nate Begeman authored Aug 11, 2005
```
integer MPEG encoding loop by a factor of two.

llvm-svn: 22758
```
5c7656fd

Some SELECT_CC cleanups: · 180b0889

Nate Begeman authored Aug 11, 2005

1. move assertions for node creation to getNode()
2. legalize the values returned in ExpandOp immediately
3. Move select_cc optimizations from SELECT's getNode() to SELECT_CC's,
   allowing them to be cleaned up significantly.

This paves the way to pick up additional optimizations on SELECT_CC, such
as sum-of-absolute-differences.

llvm-svn: 22757

180b0889

Aug 10, 2005

Make SELECT illegal on PPC32, switch to using SELECT_CC, which more closely · 5646b181

Nate Begeman authored Aug 10, 2005

reflects what the hardware is capable of.  This significantly simplifies
the CC handling logic throughout the ISel.

llvm-svn: 22756

5646b181

Add new node, SELECT_CC. This node is for targets that don't natively · e5b86d74
Nate Begeman authored Aug 10, 2005
```
implement SELECT.

llvm-svn: 22755
```
e5b86d74

Changes for PPC32ISelPattern.cpp · 3428b956

Chris Lattner authored Aug 10, 2005

1. Clean up how SelectIntImmediateExpr handles use counts.
2. "Subtract from" was not clearing hi 16 bits.

Patch by Jim Laskey

llvm-svn: 22754

3428b956

Fix an oversight that may be causing PR617. · 21c0fd9e
Chris Lattner authored Aug 10, 2005
```
llvm-svn: 22753
```
21c0fd9e
remove some trickiness that broke yacr2 and some other programs last night · 62df7989
Chris Lattner authored Aug 10, 2005
```
llvm-svn: 22751
```
62df7989
Changed the XOR case to use the isOprNot predicate. · aeedcc7f
Chris Lattner authored Aug 10, 2005
```
Patch by Jim Laskey!

llvm-svn: 22750
```
aeedcc7f

1. Refactored handling of integer immediate values for add, or, xor and sub. · 67d07537

Chris Lattner authored Aug 10, 2005

  New routine: ISel::SelectIntImmediateExpr
  2. Now checking use counts of large constants.  If use count is > 2 then drop
  thru so that the constant gets loaded into a register.
  Source:

int %test1(int %a) {
entry:
       %tmp.1 = add int %a,      123456789      ; <int> [#uses=1]
       %tmp.2 = or  int %tmp.1,  123456789      ; <int> [#uses=1]
       %tmp.3 = xor int %tmp.2,  123456789      ; <int> [#uses=1]
       %tmp.4 = sub int %tmp.3, -123456789      ; <int> [#uses=1]
       ret int %tmp.4
}

Did Emit:

       .machine ppc970


       .text
       .align  2
       .globl  _test1
_test1:
.LBB_test1_0:   ; entry
       addi r2, r3, -13035
       addis r2, r2, 1884
       ori r2, r2, 52501
       oris r2, r2, 1883
       xori r2, r2, 52501
       xoris r2, r2, 1883
       addi r2, r2, 52501
       addis r3, r2, 1883
       blr


Now Emits:

       .machine ppc970


       .text
       .align  2
       .globl  _test1
_test1:
.LBB_test1_0:   ; entry
       lis r2, 1883
       ori r2, r2, 52501
       add r3, r3, r2
       or r3, r3, r2
       xor r3, r3, r2
       add r3, r3, r2
       blr

Patch by Jim Laskey!

llvm-svn: 22749

67d07537

sorry!! this is temporary; for some reason the nasty constmul code seems to · 1c2f9fdf

Duraid Madina authored Aug 10, 2005

be an infinite loop when using g++-4.0.1*, this kills the ia64 nightly
tester. A proper fix shall be forthcoming!!! thanks for not killing me. :)

llvm-svn: 22748

1c2f9fdf

Fix a bug compiling: select (i32 < i32), f32, f32 · 5f56d71c
Chris Lattner authored Aug 10, 2005
```
llvm-svn: 22747
```
5f56d71c

Make loop-simplify produce better loops by turning PHI nodes like X = phi [X, Y] · f83ce5fa

Chris Lattner authored Aug 10, 2005

into just Y.  This often occurs when it seperates loops that have collapsed loop
headers.  This implements LoopSimplify/phi-node-simplify.ll

llvm-svn: 22746

f83ce5fa

Allow indvar simplify to canonicalize ANY affine IV, not just affine IVs with · 677d8578
Chris Lattner authored Aug 10, 2005
```
constant stride.  This implements Transforms/IndVarsSimplify/variable-stride-ivs.ll

llvm-svn: 22744
```
677d8578
Fix an obvious oops · 35c0e2ee
Chris Lattner authored Aug 10, 2005
```
llvm-svn: 22742
```
35c0e2ee

Teach LSR to strength reduce IVs that have a loop-invariant but non-constant stride. · edff91a4

Chris Lattner authored Aug 10, 2005

For code like this:

void foo(float *a, float *b, int n, int stride_a, int stride_b) {
  int i;
  for (i=0; i<n; i++)
      a[i*stride_a] = b[i*stride_b];
}

we now emit:

.LBB_foo2_2:    ; no_exit
        lfs f0, 0(r4)
        stfs f0, 0(r3)
        addi r7, r7, 1
        add r4, r2, r4
        add r3, r6, r3
        cmpw cr0, r7, r5
        blt .LBB_foo2_2 ; no_exit

instead of:

.LBB_foo_2:     ; no_exit
        mullw r8, r2, r7     ;; multiply!
        slwi r8, r8, 2
        lfsx f0, r4, r8
        mullw r8, r2, r6     ;; multiply!
        slwi r8, r8, 2
        stfsx f0, r3, r8
        addi r2, r2, 1
        cmpw cr0, r2, r5
        blt .LBB_foo_2  ; no_exit

loops with variable strides occur pretty often.  For example, in SPECFP2K
there are 317 variable strides in 177.mesa, 3 in 179.art, 14 in 188.ammp,
56 in 168.wupwise, 36 in 172.mgrid.

Now we can allow indvars to turn functions written like this:

void foo2(float *a, float *b, int n, int stride_a, int stride_b) {
  int i, ai = 0, bi = 0;
  for (i=0; i<n; i++)
    {
      a[ai] = b[bi];
      ai += stride_a;
      bi += stride_b;
    }
}

into code like the above for better analysis.  With this patch, they generate
identical code.

llvm-svn: 22740

edff91a4

Fix Regression/Transforms/LoopStrengthReduce/phi_node_update_multiple_preds.ll · dde7dc52
Chris Lattner authored Aug 10, 2005
```
by being more careful about updating PHI nodes

llvm-svn: 22739
```
dde7dc52

Fix some 80 column violations. · c6c4d99a

Chris Lattner authored Aug 09, 2005

Once we compute the evolution for a GEP, tell SE about it.  This allows users
of the GEP to know it, if the users are not direct.  This allows us to compile
this testcase:

void fbSolidFillmmx(int w, unsigned char *d) {
    while (w >= 64) {
        *(unsigned long long *) (d +  0) = 0;
        *(unsigned long long *) (d +  8) = 0;
        *(unsigned long long *) (d + 16) = 0;
        *(unsigned long long *) (d + 24) = 0;
        *(unsigned long long *) (d + 32) = 0;
        *(unsigned long long *) (d + 40) = 0;
        *(unsigned long long *) (d + 48) = 0;
        *(unsigned long long *) (d + 56) = 0;
        w -= 64;
        d += 64;
    }
}

into:

.LBB_fbSolidFillmmx_2:  ; no_exit
        li r2, 0
        stw r2, 0(r4)
        stw r2, 4(r4)
        stw r2, 8(r4)
        stw r2, 12(r4)
        stw r2, 16(r4)
        stw r2, 20(r4)
        stw r2, 24(r4)
        stw r2, 28(r4)
        stw r2, 32(r4)
        stw r2, 36(r4)
        stw r2, 40(r4)
        stw r2, 44(r4)
        stw r2, 48(r4)
        stw r2, 52(r4)
        stw r2, 56(r4)
        stw r2, 60(r4)
        addi r4, r4, 64
        addi r3, r3, -64
        cmpwi cr0, r3, 63
        bgt .LBB_fbSolidFillmmx_2       ; no_exit

instead of:

.LBB_fbSolidFillmmx_2:  ; no_exit
        li r11, 0
        stw r11, 0(r4)
        stw r11, 4(r4)
        stwx r11, r10, r4
        add r12, r10, r4
        stw r11, 4(r12)
        stwx r11, r9, r4
        add r12, r9, r4
        stw r11, 4(r12)
        stwx r11, r8, r4
        add r12, r8, r4
        stw r11, 4(r12)
        stwx r11, r7, r4
        add r12, r7, r4
        stw r11, 4(r12)
        stwx r11, r6, r4
        add r12, r6, r4
        stw r11, 4(r12)
        stwx r11, r5, r4
        add r12, r5, r4
        stw r11, 4(r12)
        stwx r11, r2, r4
        add r12, r2, r4
        stw r11, 4(r12)
        addi r4, r4, 64
        addi r3, r3, -64
        cmpwi cr0, r3, 63
        bgt .LBB_fbSolidFillmmx_2       ; no_exit

llvm-svn: 22737

c6c4d99a

implement two helper methods · b310ac4a
Chris Lattner authored Aug 09, 2005
```
llvm-svn: 22736
```
b310ac4a
Fix spelling, fix some broken canonicalizations by my last patch · 679f5b0b
Chris Lattner authored Aug 09, 2005
```
llvm-svn: 22734
```
679f5b0b
add a optimization note · 54ee86ac
Chris Lattner authored Aug 09, 2005
```
llvm-svn: 22732
```
54ee86ac

Aug 09, 2005
- add cc nodes to the AllNodes list so they show up in Graphviz output · 14e060f7
  Chris Lattner authored Aug 09, 2005
```
llvm-svn: 22731
```
  14e060f7
- Update the targets to the new SETCC/CondCodeSDNode interfaces. · 6ec7745e
  Chris Lattner authored Aug 09, 2005
```
llvm-svn: 22729
```
  6ec7745e