Skip to content
  1. Oct 06, 2004
    • Chris Lattner's avatar
      Remove debugging code, fix encoding problem. This fixes the problems · 93867e51
      Chris Lattner authored
      the JIT had last night.
      
      llvm-svn: 16766
      93867e51
    • Nate Begeman's avatar
      Turning on fsel code gen now that we can do so would be good. · 9a1fbaf1
      Nate Begeman authored
      llvm-svn: 16765
      9a1fbaf1
    • Nate Begeman's avatar
      Implement floating point select for lt, gt, le, ge using the powerpc fsel · fac8529d
      Nate Begeman authored
      instruction.
      
      Now, rather than emitting the following loop out of bisect:
      .LBB_main_19:	; no_exit.0.i
      	rlwinm r3, r2, 3, 0, 28
      	lfdx f1, r3, r27
      	addis r3, r30, ha16(.CPI_main_1-"L00000$pb")
      	lfd f2, lo16(.CPI_main_1-"L00000$pb")(r3)
      	fsub f2, f2, f1
      	addis r3, r30, ha16(.CPI_main_1-"L00000$pb")
      	lfd f4, lo16(.CPI_main_1-"L00000$pb")(r3)
      	fcmpu cr0, f1, f4
      	bge .LBB_main_64	; no_exit.0.i
      .LBB_main_63:	; no_exit.0.i
      	b .LBB_main_65	; no_exit.0.i
      .LBB_main_64:	; no_exit.0.i
      	fmr f2, f1
      .LBB_main_65:	; no_exit.0.i
      	addi r3, r2, 1
      	rlwinm r3, r3, 3, 0, 28
      	lfdx f1, r3, r27
      	addis r3, r30, ha16(.CPI_main_1-"L00000$pb")
      	lfd f4, lo16(.CPI_main_1-"L00000$pb")(r3)
      	fsub f4, f4, f1
      	addis r3, r30, ha16(.CPI_main_1-"L00000$pb")
      	lfd f5, lo16(.CPI_main_1-"L00000$pb")(r3)
      	fcmpu cr0, f1, f5
      	bge .LBB_main_67	; no_exit.0.i
      .LBB_main_66:	; no_exit.0.i
      	b .LBB_main_68	; no_exit.0.i
      .LBB_main_67:	; no_exit.0.i
      	fmr f4, f1
      .LBB_main_68:	; no_exit.0.i
      	fadd f1, f2, f4
      	addis r3, r30, ha16(.CPI_main_2-"L00000$pb")
      	lfd f2, lo16(.CPI_main_2-"L00000$pb")(r3)
      	fmul f1, f1, f2
      	rlwinm r3, r2, 3, 0, 28
      	lfdx f2, r3, r28
      	fadd f4, f2, f1
      	fcmpu cr0, f4, f0
      	bgt .LBB_main_70	; no_exit.0.i
      .LBB_main_69:	; no_exit.0.i
      	b .LBB_main_71	; no_exit.0.i
      .LBB_main_70:	; no_exit.0.i
      	fmr f0, f4
      .LBB_main_71:	; no_exit.0.i
      	fsub f1, f2, f1
      	addi r2, r2, -1
      	fcmpu cr0, f1, f3
      	blt .LBB_main_73	; no_exit.0.i
      .LBB_main_72:	; no_exit.0.i
      	b .LBB_main_74	; no_exit.0.i
      .LBB_main_73:	; no_exit.0.i
      	fmr f3, f1
      .LBB_main_74:	; no_exit.0.i
      	cmpwi cr0, r2, -1
      	fmr f16, f0
      	fmr f17, f3
      	bgt .LBB_main_19	; no_exit.0.i
      
      We emit this instead:
      .LBB_main_19:	; no_exit.0.i
      	rlwinm r3, r2, 3, 0, 28
      	lfdx f1, r3, r27
      	addis r3, r30, ha16(.CPI_main_1-"L00000$pb")
      	lfd f2, lo16(.CPI_main_1-"L00000$pb")(r3)
      	fsub f2, f2, f1
      	fsel f1, f1, f1, f2
      	addi r3, r2, 1
      	rlwinm r3, r3, 3, 0, 28
      	lfdx f2, r3, r27
      	addis r3, r30, ha16(.CPI_main_1-"L00000$pb")
      	lfd f4, lo16(.CPI_main_1-"L00000$pb")(r3)
      	fsub f4, f4, f2
      	fsel f2, f2, f2, f4
      	fadd f1, f1, f2
      	addis r3, r30, ha16(.CPI_main_2-"L00000$pb")
      	lfd f2, lo16(.CPI_main_2-"L00000$pb")(r3)
      	fmul f1, f1, f2
      	rlwinm r3, r2, 3, 0, 28
      	lfdx f2, r3, r28
      	fadd f4, f2, f1
      	fsub f5, f0, f4
      	fsel f0, f5, f0, f4
      	fsub f1, f2, f1
      	addi r2, r2, -1
      	fsub f2, f1, f3
      	fsel f3, f2, f3, f1
      	cmpwi cr0, r2, -1
      	fmr f16, f0
      	fmr f17, f3
      	bgt .LBB_main_19	; no_exit.0.i
      
      llvm-svn: 16764
      fac8529d
    • Chris Lattner's avatar
      Codegen signed mod by 2 or -2 more efficiently. Instead of generating: · 6835dedb
      Chris Lattner authored
      t:
              mov %EDX, DWORD PTR [%ESP + 4]
              mov %ECX, 2
              mov %EAX, %EDX
              sar %EDX, 31
              idiv %ECX
              mov %EAX, %EDX
              ret
      
      Generate:
      t:
              mov %ECX, DWORD PTR [%ESP + 4]
      ***     mov %EAX, %ECX
              cdq
              and %ECX, 1
              xor %ECX, %EDX
              sub %ECX, %EDX
      ***     mov %EAX, %ECX
              ret
      
      Note that the two marked moves are redundant, and should be eliminated by the
      register allocator, but aren't.
      
      Compare this to GCC, which generates:
      
      t:
              mov     %eax, DWORD PTR [%esp+4]
              mov     %edx, %eax
              shr     %edx, 31
              lea     %ecx, [%edx+%eax]
              and     %ecx, -2
              sub     %eax, %ecx
              ret
      
      or ICC 8.0, which generates:
      
      t:
              movl      4(%esp), %ecx                                 #3.5
              movl      $-2147483647, %eax                            #3.25
              imull     %ecx                                          #3.25
              movl      %ecx, %eax                                    #3.25
              sarl      $31, %eax                                     #3.25
              addl      %ecx, %edx                                    #3.25
              subl      %edx, %eax                                    #3.25
              addl      %eax, %eax                                    #3.25
              negl      %eax                                          #3.25
              subl      %eax, %ecx                                    #3.25
              movl      %ecx, %eax                                    #3.25
              ret                                                     #3.25
      
      We would be in great shape if not for the moves.
      
      llvm-svn: 16763
      6835dedb
    • Chris Lattner's avatar
      Really fix FreeBSD, which apparently doesn't tolerate the extern. · e4c60eb7
      Chris Lattner authored
      Thanks to Jeff Cohen for pointing out my goof.
      
      llvm-svn: 16762
      e4c60eb7
    • Chris Lattner's avatar
      Fix a scary bug with signed division by a power of two. We used to generate: · 7bd8f133
      Chris Lattner authored
      s:   ;; X / 4
              mov %EAX, DWORD PTR [%ESP + 4]
              mov %ECX, %EAX
              sar %ECX, 1
              shr %ECX, 30
              mov %EDX, %EAX
              add %EDX, %ECX
              sar %EAX, 2
              ret
      
      When we really meant:
      
      s:
              mov %EAX, DWORD PTR [%ESP + 4]
              mov %ECX, %EAX
              sar %ECX, 1
              shr %ECX, 30
              add %EAX, %ECX
              sar %EAX, 2
              ret
      
      Hey, this also reduces register pressure too :)
      
      llvm-svn: 16761
      7bd8f133
    • Chris Lattner's avatar
      Codegen signed divides by 2 and -2 more efficiently. In particular · 147edd2f
      Chris Lattner authored
      instead of:
      
      s:   ;; X / 2
              movl 4(%esp), %eax
              movl %eax, %ecx
              shrl $31, %ecx
              movl %eax, %edx
              addl %ecx, %edx
              sarl $1, %eax
              ret
      
      t:   ;; X / -2
              movl 4(%esp), %eax
              movl %eax, %ecx
              shrl $31, %ecx
              movl %eax, %edx
              addl %ecx, %edx
              sarl $1, %eax
              negl %eax
              ret
      
      Emit:
      
      s:
              movl 4(%esp), %eax
              cmpl $-2147483648, %eax
              sbbl $-1, %eax
              sarl $1, %eax
              ret
      
      t:
              movl 4(%esp), %eax
              cmpl $-2147483648, %eax
              sbbl $-1, %eax
              sarl $1, %eax
              negl %eax
              ret
      
      llvm-svn: 16760
      147edd2f
    • Chris Lattner's avatar
      Add some new instructions. Fix the asm string for sbb32rr · e9bfa5a2
      Chris Lattner authored
      llvm-svn: 16759
      e9bfa5a2
    • Chris Lattner's avatar
      Reduce code growth implied by the tail duplication pass by not duplicating · 2ce32df8
      Chris Lattner authored
      an instruction if it can be hoisted to a common dominator of the block.
      This implements: test/Regression/Transforms/TailDup/MergeTest.ll
      
      llvm-svn: 16758
      2ce32df8
    • Chris Lattner's avatar
      FreeBSD uses GCC. Patch contributed by Jeff Cohen! · 32ed828f
      Chris Lattner authored
      llvm-svn: 16756
      32ed828f
  2. Oct 05, 2004
  3. Oct 04, 2004
  4. Oct 03, 2004
  5. Oct 02, 2004
  6. Oct 01, 2004
    • Chris Lattner's avatar
      Add a simple little improvement to the local spiller to keep track of stores · 04f52079
      Chris Lattner authored
      and delete them if they turn out to be dead.  This is a useful little hack
      that even speeds up some programs.  For example, it speeds up Ptrdist/ks
      from 17.53s to 15.59s, and 188.ammp from 149s to 146s.
      
      This also speeds up llc :)
      
      llvm-svn: 16630
      04f52079
    • Chris Lattner's avatar
      Substantially revamp the local spiller, causing it to actually improve the · d3b1f6c7
      Chris Lattner authored
      generated code over the simple spiller.  The new local spiller generates
      substantially better code than the simple one in some cases, by reusing
      values that are loaded out of stack slots and kept available in registers.
      
      This primarily helps programs that are spilling a lot, and there is still
      stuff that can be done to improve it.  This patch makes the local spiller
      the default, as it's only a tiny bit slower than the simple spiller (it
      increases the runtime of llc by < 1%).
      
      Here are some numbers with speedups.
      
      Program    #reuse  old(s)    new(s)  Speedup
      
      Povray:     3452,  16.87 ->  15.93   (5.5%)
      177.mesa:   2176,   2.77 ->   2.76   (0%)
      179.art:      35,  28.43 ->  28.01   (1.5%)
      183.equake:   55,  61.44 ->  61.41   (0%)
      188.ammp:    869, 174    -> 149      (15%)
      
      164.gzip:     43,  40.73 ->  40.71   (0%)
      175.vpr:     351,  18.54 ->  17.34   (6.5%)
      176.gcc:    2471,   5.01 ->   4.92   (1.8%)
      181.mcf       42,  79.30 ->  75.20   (5.2%)
      186.crafty:  484,  29.73 ->  30.04   (-1%)
      197.parser:  251,  10.47 ->  10.67   (-1%)
      252.eon:    1501,   1.98 ->   1.75   (12%)
      253.perlbm: 1183,  14.83 ->  14.42   (2.8%)
      254.gap:     825,   7.46 ->   7.29   (2.3%)
      255.vortex:  285,  10.51 ->  10.27   (2.3%)
      256.bzip2:    63,  55.70 ->  55.20   (0.9%)
      300.twolf:   830,  21.63 ->  22.00   (-1%)
      
      PtrDist/ks    14,  32.75 -> 17.53    (46.5%)
      Olden/tsp     46,   8.71 ->  8.24    (5.4%)
      Free/distray  70,   1.09 ->  0.99    (9.2%)
      
      llvm-svn: 16629
      d3b1f6c7
    • Chris Lattner's avatar
      Pretty print a bit nicer :) · f06f4a7c
      Chris Lattner authored
      llvm-svn: 16628
      f06f4a7c
    • Alkis Evlogimenos's avatar
      Document this class a bit :-) · cc37da1b
      Alkis Evlogimenos authored
      llvm-svn: 16626
      cc37da1b
  7. Sep 30, 2004
Loading