Commits · 2f838f2192efa710de7024662200b36aa90392b0 · Roger Ferrer / llvm-epi-0.8

Sep 19, 2005

Teach the local spiller to turn stack slot loads into register-register copies · 2f838f21

Chris Lattner authored Sep 19, 2005

when possible, avoiding the load (and avoiding the copy if the value is already
in the right register).

This patch came about when I noticed code like the following being generated:

  store R17 -> [SS1]
  ...blah...
  R4 = load [SS1]

This was causing an LSU reject on the G5.  This problem was due to the register
allocator folding spill code into a reg-reg copy (producing the load), which
prevented the spiller from being able to rewrite the load into a copy, despite
the fact that the value was already available in a register.  In the case
above, we now rip out the R4 load and replace it with a R4 = R17 copy.

This speeds up several programs on X86 (which spills a lot :) ), e.g.
smg2k from 22.39->20.60s, povray from 12.93->12.66s, 168.wupwise from
68.54->53.83s (!), 197.parser from 7.33->6.62s (!), etc.  This may have a larger
impact in some cases on the G5 (by avoiding LSU rejects), though it probably
won't trigger as often (less spilling in general).

Targets that implement folding of loads/stores into copies should implement
the isLoadFromStackSlot hook to get this.

llvm-svn: 23388

2f838f21

Implement the isLoadFromStackSlot interface · de3c87a2
Chris Lattner authored Sep 19, 2005
```
llvm-svn: 23387
```
de3c87a2

Sep 18, 2005

Refactor this code a bit and make it more general. This now compiles: · b4b2530a

Chris Lattner authored Sep 18, 2005

struct S { unsigned int i : 6, j : 11, k : 15; } b;
void plus2 (unsigned int x) { b.j += x; }

To:

_plus2:
        lis r2, ha16(L_b$non_lazy_ptr)
        lwz r2, lo16(L_b$non_lazy_ptr)(r2)
        lwz r4, 0(r2)
        slwi r3, r3, 6
        add r3, r4, r3
        rlwimi r3, r4, 0, 26, 14
        stw r3, 0(r2)
        blr


instead of:

_plus2:
        lis r2, ha16(L_b$non_lazy_ptr)
        lwz r2, lo16(L_b$non_lazy_ptr)(r2)
        lwz r4, 0(r2)
        rlwinm r5, r4, 26, 21, 31
        add r3, r5, r3
        rlwimi r4, r3, 6, 15, 25
        stw r4, 0(r2)
        blr

by eliminating an 'and'.

I'm pretty sure this is as small as we can go :)

llvm-svn: 23386

b4b2530a

Compile · 797dee77

Chris Lattner authored Sep 18, 2005

struct S { unsigned int i : 6, j : 11, k : 15; } b;
void plus2 (unsigned int x) {
  b.j += x;
}

to:

plus2:
        mov %EAX, DWORD PTR [b]
        mov %ECX, %EAX
        and %ECX, 131008
        mov %EDX, DWORD PTR [%ESP + 4]
        shl %EDX, 6
        add %EDX, %ECX
        and %EDX, 131008
        and %EAX, -131009
        or %EDX, %EAX
        mov DWORD PTR [b], %EDX
        ret

instead of:

plus2:
        mov %EAX, DWORD PTR [b]
        mov %ECX, %EAX
        shr %ECX, 6
        and %ECX, 2047
        add %ECX, DWORD PTR [%ESP + 4]
        shl %ECX, 6
        and %ECX, 131008
        and %EAX, -131009
        or %ECX, %EAX
        mov DWORD PTR [b], %ECX
        ret

llvm-svn: 23385

797dee77

Generalize this transform, using MaskedValueIsZero, allowing us to compile: · 01f56c68

Chris Lattner authored Sep 18, 2005

struct S { unsigned int i : 6, j : 11, k : 15; } b;
void plus3 (unsigned int x) { b.k += x; }

To:

plus3:
        mov %EAX, DWORD PTR [%ESP + 4]
        shl %EAX, 17
        add DWORD PTR [b], %EAX
        ret

instead of:

plus3:
        mov %EAX, DWORD PTR [%ESP + 4]
        shl %EAX, 17
        mov %ECX, DWORD PTR [b]
        add %EAX, %ECX
        and %EAX, -131072
        and %ECX, 131071
        or %ECX, %EAX
        mov DWORD PTR [b], %ECX
        ret

llvm-svn: 23384

01f56c68

fix typeo · 4ebc8ab4
Chris Lattner authored Sep 18, 2005
```
llvm-svn: 23383
```
4ebc8ab4
Remove unintentionally committed code · e5b23a6d
Chris Lattner authored Sep 18, 2005
```
llvm-svn: 23382
```
e5b23a6d

implement shift.ll:test25. This compiles: · 27cb9dbd

Chris Lattner authored Sep 18, 2005

struct S { unsigned int i : 6, j : 11, k : 15; } b;
void plus3 (unsigned int x) {
  b.k += x;
}

to:

_plus3:
        lis r2, ha16(L_b$non_lazy_ptr)
        lwz r2, lo16(L_b$non_lazy_ptr)(r2)
        lwz r3, 0(r2)
        rlwinm r4, r3, 0, 0, 14
        add r4, r4, r3
        rlwimi r4, r3, 0, 15, 31
        stw r4, 0(r2)
        blr

instead of:

_plus3:
        lis r2, ha16(L_b$non_lazy_ptr)
        lwz r2, lo16(L_b$non_lazy_ptr)(r2)
        lwz r4, 0(r2)
        srwi r5, r4, 17
        add r3, r5, r3
        slwi r3, r3, 17
        rlwimi r3, r4, 0, 15, 31
        stw r3, 0(r2)
        blr

llvm-svn: 23381

27cb9dbd

new testcase · 1813aabc
Chris Lattner authored Sep 18, 2005
```
llvm-svn: 23380
```
1813aabc

Implement add.ll:test29. Codegening: · af517574

Chris Lattner authored Sep 18, 2005

struct S { unsigned int i : 6, j : 11, k : 15; } b;
void plus1 (unsigned int x) {
  b.i += x;
}

as:
_plus1:
        lis r2, ha16(L_b$non_lazy_ptr)
        lwz r2, lo16(L_b$non_lazy_ptr)(r2)
        lwz r4, 0(r2)
        add r3, r4, r3
        rlwimi r3, r4, 0, 0, 25
        stw r3, 0(r2)
        blr

instead of:

_plus1:
        lis r2, ha16(L_b$non_lazy_ptr)
        lwz r2, lo16(L_b$non_lazy_ptr)(r2)
        lwz r4, 0(r2)
        rlwinm r5, r4, 0, 26, 31
        add r3, r5, r3
        rlwimi r3, r4, 0, 0, 25
        stw r3, 0(r2)
        blr

llvm-svn: 23379

af517574

new testcase · 9136c832
Chris Lattner authored Sep 18, 2005
```
llvm-svn: 23378
```
9136c832
remove debug output · 027eaf01
Chris Lattner authored Sep 18, 2005
```
llvm-svn: 23377
```
027eaf01

Implement or.ll:test21. This teaches instcombine to be able to turn this: · 15212989

Chris Lattner authored Sep 18, 2005

struct {
   unsigned int bit0:1;
   unsigned int ubyte:31;
} sdata;

void foo() {
  sdata.ubyte++;
}

into this:

foo:
        add DWORD PTR [sdata], 2
        ret

instead of this:

foo:
        mov %EAX, DWORD PTR [sdata]
        mov %ECX, %EAX
        add %ECX, 2
        and %ECX, -2
        and %EAX, 1
        or %EAX, %ECX
        mov DWORD PTR [sdata], %EAX
        ret

llvm-svn: 23376

15212989

new testcase · c6d63a98
Chris Lattner authored Sep 18, 2005
```
llvm-svn: 23375
```
c6d63a98

Sep 17, 2005
- Implement hook for ppc · 4d9cf680
  Chris Lattner authored Sep 17, 2005
```
llvm-svn: 23374
```
  4d9cf680
- add a new callback · 6db5887d
  Chris Lattner authored Sep 17, 2005
```
llvm-svn: 23373
```
  6db5887d
Sep 16, 2005
- More DAG combining. Still need the branch instructions, and select_cc · 24a7eca2
  Nate Begeman authored Sep 16, 2005
```
llvm-svn: 23371
```
  24a7eca2
- Fix a minor bug, add comments · 7884fffb
  Chris Lattner authored Sep 16, 2005
```
llvm-svn: 23370
```
  7884fffb
- teach the type inference code how to infer types for instructions and node · 59e96143
  Chris Lattner authored Sep 15, 2005
```
xforms.  Run type inference on result patterns, so we always have fully typed
results (and to catch errors in .td files).

llvm-svn: 23369
```
  59e96143
Sep 15, 2005
- put instructions into a map instead of a vector for quick lookup · fedd9a5e
  Chris Lattner authored Sep 15, 2005
```
llvm-svn: 23368
```
  fedd9a5e
- when parsing instructions remember information about the types taken and · f38ce8f7
  Chris Lattner authored Sep 15, 2005
```
returned.

llvm-svn: 23367
```
  f38ce8f7
- disable this for now · 0ebec066
  Chris Lattner authored Sep 15, 2005
```
llvm-svn: 23366
```
  0ebec066
- Start parsing "Pattern" nodes · a0a986c9
  Chris Lattner authored Sep 15, 2005
```
llvm-svn: 23365
```
  a0a986c9
- rename a couple of methods, add structure for pattern parsing · f79ad4cb
  Chris Lattner authored Sep 15, 2005
```
llvm-svn: 23364
```
  f79ad4cb
- Verify that xform functions only occur in logical places · a155256a
  Chris Lattner authored Sep 14, 2005
```
llvm-svn: 23363
```
  a155256a
- Promote xform fns to be explicit nodes in result patterns, and clean off · 991c7c97
  Chris Lattner authored Sep 14, 2005
```
predicates since they will have already matched at this point.

llvm-svn: 23362
```
  991c7c97
- start building the instruction dest pattern correctly. Change the xform · bc7aabce
  Chris Lattner authored Sep 14, 2005
```
functions to preserve the Record for the xform instead of making it into a
function name.

llvm-svn: 23361
```
  bc7aabce
- catch unnamed inputs · e389c615
  Chris Lattner authored Sep 14, 2005
```
llvm-svn: 23360
```
  e389c615
Sep 14, 2005
- check that there are no unexpected operands · 030f876c
  Chris Lattner authored Sep 14, 2005
```
llvm-svn: 23359
```
  030f876c
- force all instruction operands to be named. · 3ced3f8b
  Chris Lattner authored Sep 14, 2005
```
llvm-svn: 23358
```
  3ced3f8b
- Give all operands names · 9e4a4ee3
  Chris Lattner authored Sep 14, 2005
```
llvm-svn: 23357
```
  9e4a4ee3
- give all operands names · 2e84be22
  Chris Lattner authored Sep 14, 2005
```
llvm-svn: 23356
```
  2e84be22
- Check that operands have unique names. REJECT instructions with broken operand · f02994d7
  Chris Lattner authored Sep 14, 2005
```
lists: only don't parse them if they are entirely missing (sparcv9).

llvm-svn: 23355
```
  f02994d7
- fix a broke range check · 24ae3494
  Chris Lattner authored Sep 14, 2005
```
llvm-svn: 23354
```
  24ae3494
- Parse significantly more of the instruction pattern, now collecting and · 3ba60bf6
  Chris Lattner authored Sep 14, 2005
```
verifying information about the operands.

llvm-svn: 23353
```
  3ba60bf6
- Fix some issues exposed by more testing. XORIS had the wrong operands · f006d15e
  Chris Lattner authored Sep 14, 2005
```
specified.  The various *imm operands defined by PPC are really all i32,
even though the actual immediate is restricted to a smaller value in it.

llvm-svn: 23352
```
  f006d15e
- Verify that set destinations occur first in the instruction operand list. · 22e60c99
  Chris Lattner authored Sep 14, 2005
```
llvm-svn: 23351
```
  22e60c99
- Fix some bugs noticed by new checking code · 6b013fc9
  Chris Lattner authored Sep 14, 2005
```
llvm-svn: 23350
```
  6b013fc9
- add an accessor · fcffc98b
  Chris Lattner authored Sep 14, 2005
```
llvm-svn: 23349
```
  fcffc98b
- Fix the regression last night compiling povray · a393e4d4
  Chris Lattner authored Sep 14, 2005
```
llvm-svn: 23348
```
  a393e4d4