Commits · 93867e516a104ba2e6ab1f73cde7d6600e3ec4ea · Roger Ferrer / llvm-epi-0.8

Oct 06, 2004

Remove debugging code, fix encoding problem. This fixes the problems · 93867e51
Chris Lattner authored Oct 06, 2004
```
the JIT had last night.

llvm-svn: 16766
```
93867e51
Turning on fsel code gen now that we can do so would be good. · 9a1fbaf1
Nate Begeman authored Oct 06, 2004
```
llvm-svn: 16765
```
9a1fbaf1

Implement floating point select for lt, gt, le, ge using the powerpc fsel · fac8529d

Nate Begeman authored Oct 06, 2004

instruction.

Now, rather than emitting the following loop out of bisect:
.LBB_main_19:	; no_exit.0.i
	rlwinm r3, r2, 3, 0, 28
	lfdx f1, r3, r27
	addis r3, r30, ha16(.CPI_main_1-"L00000$pb")
	lfd f2, lo16(.CPI_main_1-"L00000$pb")(r3)
	fsub f2, f2, f1
	addis r3, r30, ha16(.CPI_main_1-"L00000$pb")
	lfd f4, lo16(.CPI_main_1-"L00000$pb")(r3)
	fcmpu cr0, f1, f4
	bge .LBB_main_64	; no_exit.0.i
.LBB_main_63:	; no_exit.0.i
	b .LBB_main_65	; no_exit.0.i
.LBB_main_64:	; no_exit.0.i
	fmr f2, f1
.LBB_main_65:	; no_exit.0.i
	addi r3, r2, 1
	rlwinm r3, r3, 3, 0, 28
	lfdx f1, r3, r27
	addis r3, r30, ha16(.CPI_main_1-"L00000$pb")
	lfd f4, lo16(.CPI_main_1-"L00000$pb")(r3)
	fsub f4, f4, f1
	addis r3, r30, ha16(.CPI_main_1-"L00000$pb")
	lfd f5, lo16(.CPI_main_1-"L00000$pb")(r3)
	fcmpu cr0, f1, f5
	bge .LBB_main_67	; no_exit.0.i
.LBB_main_66:	; no_exit.0.i
	b .LBB_main_68	; no_exit.0.i
.LBB_main_67:	; no_exit.0.i
	fmr f4, f1
.LBB_main_68:	; no_exit.0.i
	fadd f1, f2, f4
	addis r3, r30, ha16(.CPI_main_2-"L00000$pb")
	lfd f2, lo16(.CPI_main_2-"L00000$pb")(r3)
	fmul f1, f1, f2
	rlwinm r3, r2, 3, 0, 28
	lfdx f2, r3, r28
	fadd f4, f2, f1
	fcmpu cr0, f4, f0
	bgt .LBB_main_70	; no_exit.0.i
.LBB_main_69:	; no_exit.0.i
	b .LBB_main_71	; no_exit.0.i
.LBB_main_70:	; no_exit.0.i
	fmr f0, f4
.LBB_main_71:	; no_exit.0.i
	fsub f1, f2, f1
	addi r2, r2, -1
	fcmpu cr0, f1, f3
	blt .LBB_main_73	; no_exit.0.i
.LBB_main_72:	; no_exit.0.i
	b .LBB_main_74	; no_exit.0.i
.LBB_main_73:	; no_exit.0.i
	fmr f3, f1
.LBB_main_74:	; no_exit.0.i
	cmpwi cr0, r2, -1
	fmr f16, f0
	fmr f17, f3
	bgt .LBB_main_19	; no_exit.0.i

We emit this instead:
.LBB_main_19:	; no_exit.0.i
	rlwinm r3, r2, 3, 0, 28
	lfdx f1, r3, r27
	addis r3, r30, ha16(.CPI_main_1-"L00000$pb")
	lfd f2, lo16(.CPI_main_1-"L00000$pb")(r3)
	fsub f2, f2, f1
	fsel f1, f1, f1, f2
	addi r3, r2, 1
	rlwinm r3, r3, 3, 0, 28
	lfdx f2, r3, r27
	addis r3, r30, ha16(.CPI_main_1-"L00000$pb")
	lfd f4, lo16(.CPI_main_1-"L00000$pb")(r3)
	fsub f4, f4, f2
	fsel f2, f2, f2, f4
	fadd f1, f1, f2
	addis r3, r30, ha16(.CPI_main_2-"L00000$pb")
	lfd f2, lo16(.CPI_main_2-"L00000$pb")(r3)
	fmul f1, f1, f2
	rlwinm r3, r2, 3, 0, 28
	lfdx f2, r3, r28
	fadd f4, f2, f1
	fsub f5, f0, f4
	fsel f0, f5, f0, f4
	fsub f1, f2, f1
	addi r2, r2, -1
	fsub f2, f1, f3
	fsel f3, f2, f3, f1
	cmpwi cr0, r2, -1
	fmr f16, f0
	fmr f17, f3
	bgt .LBB_main_19	; no_exit.0.i

llvm-svn: 16764

fac8529d

Codegen signed mod by 2 or -2 more efficiently. Instead of generating: · 6835dedb

Chris Lattner authored Oct 06, 2004

t:
        mov %EDX, DWORD PTR [%ESP + 4]
        mov %ECX, 2
        mov %EAX, %EDX
        sar %EDX, 31
        idiv %ECX
        mov %EAX, %EDX
        ret

Generate:
t:
        mov %ECX, DWORD PTR [%ESP + 4]
***     mov %EAX, %ECX
        cdq
        and %ECX, 1
        xor %ECX, %EDX
        sub %ECX, %EDX
***     mov %EAX, %ECX
        ret

Note that the two marked moves are redundant, and should be eliminated by the
register allocator, but aren't.

Compare this to GCC, which generates:

t:
        mov     %eax, DWORD PTR [%esp+4]
        mov     %edx, %eax
        shr     %edx, 31
        lea     %ecx, [%edx+%eax]
        and     %ecx, -2
        sub     %eax, %ecx
        ret

or ICC 8.0, which generates:

t:
        movl      4(%esp), %ecx                                 #3.5
        movl      $-2147483647, %eax                            #3.25
        imull     %ecx                                          #3.25
        movl      %ecx, %eax                                    #3.25
        sarl      $31, %eax                                     #3.25
        addl      %ecx, %edx                                    #3.25
        subl      %edx, %eax                                    #3.25
        addl      %eax, %eax                                    #3.25
        negl      %eax                                          #3.25
        subl      %eax, %ecx                                    #3.25
        movl      %ecx, %eax                                    #3.25
        ret                                                     #3.25

We would be in great shape if not for the moves.

llvm-svn: 16763

6835dedb

Really fix FreeBSD, which apparently doesn't tolerate the extern. · e4c60eb7
Chris Lattner authored Oct 06, 2004
```
Thanks to Jeff Cohen for pointing out my goof.

llvm-svn: 16762
```
e4c60eb7

Fix a scary bug with signed division by a power of two. We used to generate: · 7bd8f133

Chris Lattner authored Oct 06, 2004

s:   ;; X / 4
        mov %EAX, DWORD PTR [%ESP + 4]
        mov %ECX, %EAX
        sar %ECX, 1
        shr %ECX, 30
        mov %EDX, %EAX
        add %EDX, %ECX
        sar %EAX, 2
        ret

When we really meant:

s:
        mov %EAX, DWORD PTR [%ESP + 4]
        mov %ECX, %EAX
        sar %ECX, 1
        shr %ECX, 30
        add %EAX, %ECX
        sar %EAX, 2
        ret

Hey, this also reduces register pressure too :)

llvm-svn: 16761

7bd8f133

Codegen signed divides by 2 and -2 more efficiently. In particular · 147edd2f

Chris Lattner authored Oct 06, 2004

instead of:

s:   ;; X / 2
        movl 4(%esp), %eax
        movl %eax, %ecx
        shrl $31, %ecx
        movl %eax, %edx
        addl %ecx, %edx
        sarl $1, %eax
        ret

t:   ;; X / -2
        movl 4(%esp), %eax
        movl %eax, %ecx
        shrl $31, %ecx
        movl %eax, %edx
        addl %ecx, %edx
        sarl $1, %eax
        negl %eax
        ret

Emit:

s:
        movl 4(%esp), %eax
        cmpl $-2147483648, %eax
        sbbl $-1, %eax
        sarl $1, %eax
        ret

t:
        movl 4(%esp), %eax
        cmpl $-2147483648, %eax
        sbbl $-1, %eax
        sarl $1, %eax
        negl %eax
        ret

llvm-svn: 16760

147edd2f

Add some new instructions. Fix the asm string for sbb32rr · e9bfa5a2
Chris Lattner authored Oct 06, 2004
```
llvm-svn: 16759
```
e9bfa5a2

Reduce code growth implied by the tail duplication pass by not duplicating · 2ce32df8

Chris Lattner authored Oct 06, 2004

an instruction if it can be hoisted to a common dominator of the block.
This implements: test/Regression/Transforms/TailDup/MergeTest.ll

llvm-svn: 16758

2ce32df8

FreeBSD uses GCC. Patch contributed by Jeff Cohen! · 32ed828f
Chris Lattner authored Oct 06, 2004
```
llvm-svn: 16756
```
32ed828f

Oct 05, 2004
- Must include sys/stat.h before declaring a 'struct stat' · c5a630bd
  Brian Gaeke authored Oct 05, 2004
```
llvm-svn: 16728
```
  c5a630bd
- Make sure the const bit gets inherited correctly when linking declarations · 9b38ead8
  Chris Lattner authored Oct 05, 2004
```
of disagreeing constness.  This fixes
test/Regression/Linker/ConstantGlobals[123].ll

llvm-svn: 16692
```
  9b38ead8
- Adjust sys/stat.h inclusion so its only for SunOS. · abb04cfc
  Reid Spencer authored Oct 05, 2004
```
llvm-svn: 16686
```
  abb04cfc
- Added a couple of includes to get this to compile on Sparc. · c3ef3cc7
  Tanya Lattner authored Oct 05, 2004
```
llvm-svn: 16685
```
  c3ef3cc7
- Solaris doesn't have MAP_FILE. · 98959376
  Chris Lattner authored Oct 05, 2004
```
llvm-svn: 16682
```
  98959376
Oct 04, 2004
- Excise the ill-advised RLCOMP compression algorithm and simply leave the · 04f1e906
  Reid Spencer authored Oct 04, 2004
```
previously temporary NULLCOMP implementation that merely copies the data
verbatim without compression. Also, don't warn if there's no compression
library as that is taken care of during configuration time.

llvm-svn: 16654
```
  04f1e906
- Add a context for the callback so different compression scenarios can be · 2e3cc54e
  Reid Spencer authored Oct 04, 2004
```
distinguished. Tidy up documentation.  Thanks, Chris.

llvm-svn: 16652
```
  2e3cc54e
- Fix build if not HAVE_BZIP2 · b4abe320
  Chris Lattner authored Oct 04, 2004
```
llvm-svn: 16650
```
  b4abe320
- First version of the MappedFile abstraction for operating system idependent · 161a459d
  Reid Spencer authored Oct 04, 2004
```
mapping of files. This first version uses mmap where its available. The
class needs to implement an alternate mechanism based on malloc'd memory
and file reading/writing for platforms without virtual memory.

llvm-svn: 16649
```
  161a459d
- First version of a support utility to provide generalized compression in · c8950375
  Reid Spencer authored Oct 04, 2004
```
LLVM that handles availability and unavailability of bzip2 and zlib.

llvm-svn: 16648
```
  c8950375
- * Prune #includes · d1ab378b
  Chris Lattner authored Oct 04, 2004
```
* Update comments
* Rearrange code a bit
* Finally ELIMINATE the GAS workaround emitter for Intel mode.  woot!

llvm-svn: 16647
```
  d1ab378b
- Add support for emitting AT&T style .s files, and make it the default. Users · 68ab0beb
  Chris Lattner authored Oct 04, 2004
```
may now choose their output format with the -x86-asm-syntax={intel|att} flag.

llvm-svn: 16646
```
  68ab0beb
- Convert some missed patterns to support AT&T style · 8bbde2fb
  Chris Lattner authored Oct 04, 2004
```
llvm-svn: 16645
```
  8bbde2fb
- Apparently the GNU assembler has a HUGE hack to be compatible with really · 2e99778a
  Chris Lattner authored Oct 04, 2004
```
old and broken AT&T syntax assemblers.  The problem with this hack is that
*SOME* forms of the fdiv and fsub instructions have the 'r' bit inverted.
This was a real pain to figure out, but is trivially easy to support: thus
we are now bug compatible with gas and gcc.

llvm-svn: 16644
```
  2e99778a
- Fix incorrect suffix · af695033
  Chris Lattner authored Oct 04, 2004
```
llvm-svn: 16642
```
  af695033
- Fix some more missed suffixes and swapped operands · e1a2826d
  Chris Lattner authored Oct 04, 2004
```
llvm-svn: 16641
```
  e1a2826d
- Add missing suffixes to FP instructions for AT&T mode · a488f04f
  Chris Lattner authored Oct 04, 2004
```
llvm-svn: 16640
```
  a488f04f
Oct 03, 2004

Add support for the -x86-asm-syntax flag, which can be used to choose between · 56832601

Chris Lattner authored Oct 03, 2004

Intel and AT&T style assembly language.  The ultimate goal of this is to
eliminate the GasBugWorkaroundEmitter class, but for now AT&T style emission
is not fully operational.

llvm-svn: 16639

56832601

Add support to the instruction patterns for AT&T style output, which will · 4e59a149

Chris Lattner authored Oct 03, 2004

hopefully lead to the death of the 'GasBugWorkaroundEmitter'.  This also
includes changes to wrap the whole file to 80 columns! Woot! :)

Note that the AT&T style output has not been tested at all.

llvm-svn: 16638

4e59a149

Add initial support for variants · 42c43b2c
Chris Lattner authored Oct 03, 2004
```
llvm-svn: 16635
```
42c43b2c

Oct 02, 2004
- Do not repeat the map lookup · 00db230c
  Chris Lattner authored Oct 01, 2004
```
llvm-svn: 16633
```
  00db230c
- When a virtual register is folded into an instruction, keep track of whether · 1905ae69
  Chris Lattner authored Oct 01, 2004
```
it was a use, def, or both.  This allows us to be less pessimistic in our
analysis of them.  In practice, this doesn't make a big difference, but it
doesn't hurt either.

llvm-svn: 16632
```
  1905ae69
Oct 01, 2004

Add a simple little improvement to the local spiller to keep track of stores · 04f52079

Chris Lattner authored Oct 01, 2004

and delete them if they turn out to be dead.  This is a useful little hack
that even speeds up some programs.  For example, it speeds up Ptrdist/ks
from 17.53s to 15.59s, and 188.ammp from 149s to 146s.

This also speeds up llc :)

llvm-svn: 16630

04f52079

Substantially revamp the local spiller, causing it to actually improve the · d3b1f6c7

Chris Lattner authored Oct 01, 2004

generated code over the simple spiller.  The new local spiller generates
substantially better code than the simple one in some cases, by reusing
values that are loaded out of stack slots and kept available in registers.

This primarily helps programs that are spilling a lot, and there is still
stuff that can be done to improve it.  This patch makes the local spiller
the default, as it's only a tiny bit slower than the simple spiller (it
increases the runtime of llc by < 1%).

Here are some numbers with speedups.

Program    #reuse  old(s)    new(s)  Speedup

Povray:     3452,  16.87 ->  15.93   (5.5%)
177.mesa:   2176,   2.77 ->   2.76   (0%)
179.art:      35,  28.43 ->  28.01   (1.5%)
183.equake:   55,  61.44 ->  61.41   (0%)
188.ammp:    869, 174    -> 149      (15%)

164.gzip:     43,  40.73 ->  40.71   (0%)
175.vpr:     351,  18.54 ->  17.34   (6.5%)
176.gcc:    2471,   5.01 ->   4.92   (1.8%)
181.mcf       42,  79.30 ->  75.20   (5.2%)
186.crafty:  484,  29.73 ->  30.04   (-1%)
197.parser:  251,  10.47 ->  10.67   (-1%)
252.eon:    1501,   1.98 ->   1.75   (12%)
253.perlbm: 1183,  14.83 ->  14.42   (2.8%)
254.gap:     825,   7.46 ->   7.29   (2.3%)
255.vortex:  285,  10.51 ->  10.27   (2.3%)
256.bzip2:    63,  55.70 ->  55.20   (0.9%)
300.twolf:   830,  21.63 ->  22.00   (-1%)

PtrDist/ks    14,  32.75 -> 17.53    (46.5%)
Olden/tsp     46,   8.71 ->  8.24    (5.4%)
Free/distray  70,   1.09 ->  0.99    (9.2%)

llvm-svn: 16629

d3b1f6c7

Pretty print a bit nicer :) · f06f4a7c
Chris Lattner authored Oct 01, 2004
```
llvm-svn: 16628
```
f06f4a7c
Document this class a bit :-) · cc37da1b
Alkis Evlogimenos authored Oct 01, 2004
```
llvm-svn: 16626
```
cc37da1b

Sep 30, 2004
- Make EmitMappingInfo into an "external location" option, so that it can be set · dd9bb500
  Brian Gaeke authored Sep 30, 2004
```
or cleared externally.

llvm-svn: 16623
```
  dd9bb500
- Add accessor function. · 33e834eb
  Brian Gaeke authored Sep 30, 2004
```
llvm-svn: 16622
```
  33e834eb
- Correct type of accessor functions. · 5a89bde5
  Brian Gaeke authored Sep 30, 2004
```
llvm-svn: 16621
```
  5a89bde5
- Namespacify. Add accessor function. · e80d4cd6
  Brian Gaeke authored Sep 30, 2004
```
llvm-svn: 16620
```
  e80d4cd6