Skip to content
  1. Sep 10, 2006
  2. Sep 07, 2006
    • Chris Lattner's avatar
      Throttle back tail duplication to avoid creating really ugly sequences of code. · c465046e
      Chris Lattner authored
      For Transforms/TailDup/if-tail-dup.ll, f.e., it produces:
      
      _foo:
              movl 8(%esp), %eax
              movl 4(%esp), %ecx
              testl $1, %ecx
              je LBB1_2       #cond_next
      LBB1_1: #cond_true
              movl $1, (%eax)
      LBB1_2: #cond_next
              testl $2, %ecx
              je LBB1_4       #cond_next10
      LBB1_3: #cond_true6
              movl $1, 4(%eax)
      LBB1_4: #cond_next10
              testl $4, %ecx
              je LBB1_6       #cond_next18
      LBB1_5: #cond_true14
              movl $1, 8(%eax)
      LBB1_6: #cond_next18
              testl $8, %ecx
              je LBB1_8       #return
      LBB1_7: #cond_true22
              movl $1, 12(%eax)
              ret
      LBB1_8: #return
              ret
      
      instead of:
      
      _foo:
              movl 4(%esp), %eax
              testl $2, %eax
              sete %cl
              movl 8(%esp), %edx
              testl $1, %eax
              je LBB1_2       #cond_next
      LBB1_1: #cond_true
              movl $1, (%edx)
              testb %cl, %cl
              jne LBB1_4      #cond_next10
              jmp LBB1_3      #cond_true6
      LBB1_2: #cond_next
              testb %cl, %cl
              jne LBB1_4      #cond_next10
      LBB1_3: #cond_true6
              movl $1, 4(%edx)
              testl $4, %eax
              je LBB1_6       #cond_next18
              jmp LBB1_5      #cond_true14
      LBB1_4: #cond_next10
              testl $4, %eax
              je LBB1_6       #cond_next18
      LBB1_5: #cond_true14
              movl $1, 8(%edx)
              testl $8, %eax
              je LBB1_8       #return
              jmp LBB1_7      #cond_true22
      LBB1_6: #cond_next18
              testl $8, %eax
              je LBB1_8       #return
      LBB1_7: #cond_true22
              movl $1, 12(%edx)
              ret
      LBB1_8: #return
              ret
      
      llvm-svn: 30158
      c465046e
  3. Aug 28, 2006
  4. Jan 23, 2006
  5. Apr 22, 2005
  6. Nov 22, 2004
  7. Nov 01, 2004
    • Chris Lattner's avatar
      Speed up the tail duplication pass on the testcase below from 68.2s to 1.23s: · 8af74249
      Chris Lattner authored
      #define CL0(a) case a: f(); goto c;
       #define CL1(a) CL0(a##0) CL0(a##1) CL0(a##2) CL0(a##3) CL0(a##4) CL0(a##5) \
       CL0(a##6) CL0(a##7) CL0(a##8) CL0(a##9)
       #define CL2(a) CL1(a##0) CL1(a##1) CL1(a##2) CL1(a##3) CL1(a##4) CL1(a##5) \
       CL1(a##6) CL1(a##7) CL1(a##8) CL1(a##9)
       #define CL3(a) CL2(a##0) CL2(a##1) CL2(a##2) CL2(a##3) CL2(a##4) CL2(a##5) \
       CL2(a##6) CL2(a##7) CL2(a##8) CL2(a##9)
       #define CL4(a) CL3(a##0) CL3(a##1) CL3(a##2) CL3(a##3) CL3(a##4) CL3(a##5) \
       CL3(a##6) CL3(a##7) CL3(a##8) CL3(a##9)
      
       void f();
      
       void a() {
           int b;
        c: switch (b) {
               CL4(1)
           }
       }
      
      This comes from GCC PR 15524
      
      llvm-svn: 17390
      8af74249
  8. Oct 06, 2004
  9. Sep 20, 2004
  10. Sep 15, 2004
  11. Sep 02, 2004
    • Reid Spencer's avatar
      Changes For Bug 352 · 7c16caa3
      Reid Spencer authored
      Move include/Config and include/Support into include/llvm/Config,
      include/llvm/ADT and include/llvm/Support. From here on out, all LLVM
      public header files must be under include/llvm/.
      
      llvm-svn: 16137
      7c16caa3
  12. Jul 29, 2004
  13. May 25, 2004
  14. Apr 18, 2004
  15. Mar 17, 2004
    • Chris Lattner's avatar
      Fix bug in previous checkin · a3783a57
      Chris Lattner authored
      llvm-svn: 12458
      a3783a57
    • Chris Lattner's avatar
      Okay, so there is no reasonable way for tail duplication to update SSA form, · 95057f6a
      Chris Lattner authored
      as it is making effectively arbitrary modifications to the CFG and we don't
      have a domset/domfrontier implementations that can handle the dynamic updates.
      Instead of having a bunch of code that doesn't actually work in practice,
      just demote any potentially tricky values to the stack (causing the problem
      to go away entirely).  Later invocations of mem2reg will rebuild SSA for us.
      
      This fixes all of the major performance regressions with tail duplication
      from LLVM 1.1.  For example, this loop:
      
      ---
      int popcount(int x) {
        int result = 0;
        while (x != 0) {
          result = result + (x & 0x1);
          x = x >> 1;
        }
        return result;
      }
      ---
      Used to be compiled into:
      
      int %popcount(int %X) {
      entry:
      	br label %loopentry
      
      loopentry:		; preds = %entry, %no_exit
      	%x.0 = phi int [ %X, %entry ], [ %tmp.9, %no_exit ]		; <int> [#uses=3]
      	%result.1.0 = phi int [ 0, %entry ], [ %tmp.6, %no_exit ]		; <int> [#uses=2]
      	%tmp.1 = seteq int %x.0, 0		; <bool> [#uses=1]
      	br bool %tmp.1, label %loopexit, label %no_exit
      
      no_exit:		; preds = %loopentry
      	%tmp.4 = and int %x.0, 1		; <int> [#uses=1]
      	%tmp.6 = add int %tmp.4, %result.1.0		; <int> [#uses=1]
      	%tmp.9 = shr int %x.0, ubyte 1		; <int> [#uses=1]
      	br label %loopentry
      
      loopexit:		; preds = %loopentry
      	ret int %result.1.0
      }
      
      And is now compiled into:
      
      int %popcount(int %X) {
      entry:
              br label %no_exit
      
      no_exit:                ; preds = %entry, %no_exit
              %x.0.0 = phi int [ %X, %entry ], [ %tmp.9, %no_exit ]          ; <int> [#uses=2]
              %result.1.0.0 = phi int [ 0, %entry ], [ %tmp.6, %no_exit ]             ; <int> [#uses=1]
              %tmp.4 = and int %x.0.0, 1              ; <int> [#uses=1]
              %tmp.6 = add int %tmp.4, %result.1.0.0          ; <int> [#uses=2]
              %tmp.9 = shr int %x.0.0, ubyte 1                ; <int> [#uses=2]
              %tmp.1 = seteq int %tmp.9, 0            ; <bool> [#uses=1]
              br bool %tmp.1, label %loopexit, label %no_exit
      
      loopexit:               ; preds = %no_exit
              ret int %tmp.6
      }
      
      llvm-svn: 12457
      95057f6a
  16. Mar 16, 2004
  17. Mar 01, 2004
  18. Feb 29, 2004
  19. Feb 22, 2004
  20. Feb 01, 2004
  21. Jan 09, 2004
  22. Nov 11, 2003
  23. Oct 20, 2003
  24. Aug 31, 2003
  25. Aug 23, 2003
  26. Aug 02, 2003
  27. Jul 23, 2003
  28. Jun 24, 2003
  29. Jun 22, 2003
Loading