Commit ccc75d4f authored Jul 25, 2004 by Chris Lattner
This patch makes use of the infrastructure implemented before to safely and

aggressively coallesce live ranges even if they overlap.  Consider this LLVM
code for example:

int %test(int %X) {
        %Y = mul int %X, 1      ;; Codegens to Y = X
        %Z = add int %X, %Y
        ret int %Z
}

The mul is just there to get a copy into the code stream.  This produces
this machine code:

 (0x869e5a8, LLVM BB @0x869b9a0):
        %reg1024 = mov <fi#-2>, 1, %NOREG, 0    ;; "X"
        %reg1025 = mov %reg1024                 ;; "Y"  (subsumed by X)
        %reg1026 = add %reg1024, %reg1025
        %EAX = mov %reg1026
        ret

Note that the life times of reg1024 and reg1025 overlap, even though they
contain the same value.  This results in this machine code:

test:
        mov %EAX, DWORD PTR [%ESP + 4]
        mov %ECX, %EAX
        add %EAX, %ECX
        ret

Another, worse case involves loops and PHI nodes.  Consider this trivial loop:
testcase:

int %test2(int %X) {
entry:
        br label %Loop
Loop:
        %Y = phi int [%X, %entry], [%Z, %Loop]
        %Z = add int %Y, 1
        %cond = seteq int %Z, 100
        br bool %cond, label %Out, label %Loop
Out:
        ret int %Z
}

Because of interactions between the PHI elimination pass and the register
allocator, this got compiled to this code:

test2:
        mov %ECX, DWORD PTR [%ESP + 4]
.LBBtest2_1:
***     mov %EAX, %ECX
        inc %EAX
        cmp %EAX, 100
***     mov %ECX, %EAX
        jne .LBBtest2_1

        ret

Or on powerpc, this code:

_test2:
        mflr r0
        stw r0, 8(r1)
        stwu r1, -60(r1)
.LBB_test2_1:
        addi r2, r3, 1
        cmpwi cr0, r2, 100
***     or r3, r2, r2
        bne cr0, .LBB_test2_1

***     or r3, r2, r2
        lwz r0, 68(r1)
        mtlr r0
        addi r1, r1, 60
        blr 0



With this improvement in place, we now generate this code for these two
testcases, which is what we want:


test:
        mov %EAX, DWORD PTR [%ESP + 4]
        add %EAX, %EAX
        ret

test2:
        mov %EAX, DWORD PTR [%ESP + 4]
.LBBtest2_1:
        inc %EAX
        cmp %EAX, 100
        jne .LBBtest2_1 # Loop
        ret

Or on PPC:

_test2:
        mflr r0
        stw r0, 8(r1)
        stwu r1, -60(r1)
.LBB_test2_1:
        addi r3, r3, 1
        cmpwi cr0, r3, 100
        bne cr0, .LBB_test2_1

        lwz r0, 68(r1)
        mtlr r0
        addi r1, r1, 60
        blr 0


Static numbers for spill code loads/stores/reg-reg copies (smaller is better):

em3d:       before: 47/25/26         after: 44/22/24
164.gzip:   before: 433/245/310      after: 403/231/278
175.vpr:    before: 3721/2189/1581   after: 4144/2081/1423
176.gcc:    before: 26195/8866/9235  after: 25942/8082/8275
186.crafty: before: 4295/2587/3079   after: 4119/2519/2916
252.eon:    before: 12754/7585/5803  after: 12508/7425/5643
256.bzip2:  before: 463/226/315      after: 482:241/309


Runtime perf number samples on X86:

gzip: before: 41.09 after: 39.86
bzip2: runtime: before: 56.71s after: 57.07s
gcc: before: 6.16 after: 6.12
eon: before: 2.03s after: 2.00s
llvm-svn: 15194
parent c8002d49
Show whitespace changes
Inline Side-by-side
Please to comment