llvm/lib/CodeGen/LiveInterval.cpp · d1980dbeba6b41fef42ee5ac71e161cbb85ea30e · Roger Ferrer / llvm-epi-0.8

This patch makes use of the infrastructure implemented before to safely and · ccc75d4f
Chris Lattner authored 20 years ago
aggressively coallesce live ranges even if they overlap.  Consider this LLVM
code for example:

int %test(int %X) {
        %Y = mul int %X, 1      ;; Codegens to Y = X
        %Z = add int %X, %Y
        ret int %Z
}

The mul is just there to get a copy into the code stream.  This produces
this machine code:

 (0x869e5a8, LLVM BB @0x869b9a0):
        %reg1024 = mov <fi#-2>, 1, %NOREG, 0    ;; "X"
        %reg1025 = mov %reg1024                 ;; "Y"  (subsumed by X)
        %reg1026 = add %reg1024, %reg1025
        %EAX = mov %reg1026
        ret

Note that the life times of reg1024 and reg1025 overlap, even though they
contain the same value.  This results in this machine code:

test:
        mov %EAX, DWORD PTR [%ESP + 4]
        mov %ECX, %EAX
        add %EAX, %ECX
        ret

Another, worse case involves loops and PHI nodes.  Consider this trivial loop:
testcase:

int %test2(int %X) {
entry:
        br label %Loop
Loop:
        %Y = phi int [%X, %entry], [%Z, %Loop]
        %Z = add int %Y, 1
        %cond = seteq int %Z, 100
        br bool %cond, label %Out, label %Loop
Out:
        ret int %Z
}

Because of interactions between the PHI elimination pass and the register
allocator, this got compiled to this code:

test2:
        mov %ECX, DWORD PTR [%ESP + 4]
.LBBtest2_1:
***     mov %EAX, %ECX
        inc %EAX
        cmp %EAX, 100
***     mov %ECX, %EAX
        jne .LBBtest2_1

        ret

Or on powerpc, this code:

_test2:
        mflr r0
        stw r0, 8(r1)
        stwu r1, -60(r1)
.LBB_test2_1:
        addi r2, r3, 1
        cmpwi cr0, r2, 100
***     or r3, r2, r2
        bne cr0, .LBB_test2_1

***     or r3, r2, r2
        lwz r0, 68(r1)
        mtlr r0
        addi r1, r1, 60
        blr 0



With this improvement in place, we now generate this code for these two
testcases, which is what we want:


test:
        mov %EAX, DWORD PTR [%ESP + 4]
        add %EAX, %EAX
        ret

test2:
        mov %EAX, DWORD PTR [%ESP + 4]
.LBBtest2_1:
        inc %EAX
        cmp %EAX, 100
        jne .LBBtest2_1 # Loop
        ret

Or on PPC:

_test2:
        mflr r0
        stw r0, 8(r1)
        stwu r1, -60(r1)
.LBB_test2_1:
        addi r3, r3, 1
        cmpwi cr0, r3, 100
        bne cr0, .LBB_test2_1

        lwz r0, 68(r1)
        mtlr r0
        addi r1, r1, 60
        blr 0


Static numbers for spill code loads/stores/reg-reg copies (smaller is better):

em3d:       before: 47/25/26         after: 44/22/24
164.gzip:   before: 433/245/310      after: 403/231/278
175.vpr:    before: 3721/2189/1581   after: 4144/2081/1423
176.gcc:    before: 26195/8866/9235  after: 25942/8082/8275
186.crafty: before: 4295/2587/3079   after: 4119/2519/2916
252.eon:    before: 12754/7585/5803  after: 12508/7425/5643
256.bzip2:  before: 463/226/315      after: 482:241/309


Runtime perf number samples on X86:

gzip: before: 41.09 after: 39.86
bzip2: runtime: before: 56.71s after: 57.07s
gcc: before: 6.16 after: 6.12
eon: before: 2.03s after: 2.00s
llvm-svn: 15194
ccc75d4f