Commits · 895c52ba8ba8e3a70882c83c0dee6c035a7551c5 · Roger Ferrer / llvm-epi-0.8

Jun 27, 2010

Same patch as the previous on the store side. Before we compiled this: · 895c52ba

Chris Lattner authored Jun 27, 2010

struct DeclGroup {
  unsigned NumDecls;
};

int foo(DeclGroup D) {
  return D.NumDecls;
}

to:

%struct.DeclGroup = type { i32 }

define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
  %D = alloca %struct.DeclGroup, align 4          ; <%struct.DeclGroup*> [#uses=2]
  %tmp = alloca i64                               ; <i64*> [#uses=2]
  store i64 %0, i64* %tmp
  %1 = bitcast i64* %tmp to %struct.DeclGroup*    ; <%struct.DeclGroup*> [#uses=1]
  %2 = load %struct.DeclGroup* %1, align 1        ; <%struct.DeclGroup> [#uses=1]
  store %struct.DeclGroup %2, %struct.DeclGroup* %D
  %tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
  %tmp2 = load i32* %tmp1                         ; <i32> [#uses=1]
  ret i32 %tmp2
}

which caused fast isel bailouts due to the FCA load/store of %2.  Now
we generate this just blissful code:

%struct.DeclGroup = type { i32 }

define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
  %D = alloca %struct.DeclGroup, align 4          ; <%struct.DeclGroup*> [#uses=2]
  %tmp = alloca i64                               ; <i64*> [#uses=2]
  %coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
  store i64 %0, i64* %tmp
  %1 = bitcast i64* %tmp to i32*                  ; <i32*> [#uses=1]
  %2 = load i32* %1, align 1                      ; <i32> [#uses=1]
  store i32 %2, i32* %coerce.dive
  %tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
  %tmp2 = load i32* %tmp1                         ; <i32> [#uses=1]
  ret i32 %tmp2
}

This avoids fastisel bailing out and is groundwork for future patch.
This reduces bailouts on CGStmt.ll to 911 from 935.

llvm-svn: 106974

895c52ba

improve CreateCoercedLoad a bit to generate slightly less awful · 1cd6698a

Chris Lattner authored Jun 27, 2010

IR when handling X86-64 by-value struct stuff.  For example, we
use to compile this:

struct DeclGroup {
  unsigned NumDecls;
};

int foo(DeclGroup D);
void bar(DeclGroup *D) {
  foo(*D);
}

into:

define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) ssp nounwind {
entry:
  %D.addr = alloca %struct.DeclGroup*, align 8    ; <%struct.DeclGroup**> [#uses=2]
  %agg.tmp = alloca %struct.DeclGroup, align 4    ; <%struct.DeclGroup*> [#uses=2]
  %tmp3 = alloca i64                              ; <i64*> [#uses=2]
  store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
  %tmp = load %struct.DeclGroup** %D.addr         ; <%struct.DeclGroup*> [#uses=1]
  %tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
  %tmp2 = bitcast %struct.DeclGroup* %tmp to i8*  ; <i8*> [#uses=1]
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
  %0 = bitcast i64* %tmp3 to %struct.DeclGroup*   ; <%struct.DeclGroup*> [#uses=1]
  %1 = load %struct.DeclGroup* %agg.tmp           ; <%struct.DeclGroup> [#uses=1]
  store %struct.DeclGroup %1, %struct.DeclGroup* %0, align 1
  %2 = load i64* %tmp3                            ; <i64> [#uses=1]
  call void @_Z3foo9DeclGroup(i64 %2)
  ret void
}

which would cause fastisel to bail out due to the first class aggregate load %1.  With
this patch we now compile it into the (still awful):

define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind ssp noredzone {
entry:
  %D.addr = alloca %struct.DeclGroup*, align 8    ; <%struct.DeclGroup**> [#uses=2]
  %agg.tmp = alloca %struct.DeclGroup, align 4    ; <%struct.DeclGroup*> [#uses=2]
  %tmp3 = alloca i64                              ; <i64*> [#uses=2]
  store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
  %tmp = load %struct.DeclGroup** %D.addr         ; <%struct.DeclGroup*> [#uses=1]
  %tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
  %tmp2 = bitcast %struct.DeclGroup* %tmp to i8*  ; <i8*> [#uses=1]
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
  %coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <i32*> [#uses=1]
  %0 = bitcast i64* %tmp3 to i32*                 ; <i32*> [#uses=1]
  %1 = load i32* %coerce.dive                     ; <i32> [#uses=1]
  store i32 %1, i32* %0, align 1
  %2 = load i64* %tmp3                            ; <i64> [#uses=1]
  %call = call i32 @_Z3foo9DeclGroup(i64 %2) noredzone ; <i32> [#uses=0]
  ret void
}

which doesn't bail out.  On CGStmt.ll, this reduces fastisel bail outs from 958 to 935,
and is the precursor of better things to come.

llvm-svn: 106973

1cd6698a

Implicitly compare symbolic expressions to zero when they're being used as... · 7f8ea4d6
Jordy Rose authored Jun 27, 2010
```
Implicitly compare symbolic expressions to zero when they're being used as constraints. Part of PR7491.

llvm-svn: 106972
```
7f8ea4d6
merge two tests. · e01d966c
Chris Lattner authored Jun 27, 2010
```
llvm-svn: 106971
```
e01d966c

Change IR generation for return (in the simple case) to avoid doing silly · 3fcc790c

Chris Lattner authored Jun 27, 2010

load/store nonsense in the epilog.  For example, for:

int foo(int X) {
  int A[100];
  return A[X];
}

we used to generate:

  %arrayidx = getelementptr inbounds [100 x i32]* %A, i32 0, i64 %idxprom ; <i32*> [#uses=1]
  %tmp1 = load i32* %arrayidx                     ; <i32> [#uses=1]
  store i32 %tmp1, i32* %retval
  %0 = load i32* %retval                          ; <i32> [#uses=1]
  ret i32 %0
}

which codegen'd to this code:

_foo:                                   ## @foo
## BB#0:                                ## %entry
	subq	$408, %rsp              ## imm = 0x198
	movl	%edi, 400(%rsp)
	movl	400(%rsp), %edi
	movslq	%edi, %rax
	movl	(%rsp,%rax,4), %edi
	movl	%edi, 404(%rsp)
	movl	404(%rsp), %eax
	addq	$408, %rsp              ## imm = 0x198
	ret

Now we generate:

  %arrayidx = getelementptr inbounds [100 x i32]* %A, i32 0, i64 %idxprom ; <i32*> [#uses=1]
  %tmp1 = load i32* %arrayidx                     ; <i32> [#uses=1]
  ret i32 %tmp1
}

and:

_foo:                                   ## @foo
## BB#0:                                ## %entry
	subq	$408, %rsp              ## imm = 0x198
	movl	%edi, 404(%rsp)
	movl	404(%rsp), %edi
	movslq	%edi, %rax
	movl	(%rsp,%rax,4), %eax
	addq	$408, %rsp              ## imm = 0x198
	ret

This actually does matter, cutting out 2000 lines of IR from CGStmt.ll 
for example.

Another interesting effect is that altivec.h functions which are dead
now get dce'd by the inliner.  Hence all the changes to 
builtins-ppc-altivec.c to ensure the calls aren't dead.

llvm-svn: 106970

3fcc790c

add some named accessors for StoreInst · 0875802d
Chris Lattner authored Jun 26, 2010
```
llvm-svn: 106969
```
0875802d
fit in 80 cols · 8e17fa09
Chris Lattner authored Jun 26, 2010
```
llvm-svn: 106968
```
8e17fa09
reduce indentation · 726b3d09
Chris Lattner authored Jun 26, 2010
```
llvm-svn: 106967
```
726b3d09

Implement rdar://7530813 - collapse multiple GEP instructions in IRgen · 6c5abe88

Chris Lattner authored Jun 26, 2010

This avoids generating two gep's for common array operations.  Before
we would generate something like:

  %tmp = load i32* %X.addr                        ; <i32> [#uses=1]
  %arraydecay = getelementptr inbounds [100 x i32]* %A, i32 0, i32 0 ; <i32*> [#uses=1]
  %arrayidx = getelementptr inbounds i32* %arraydecay, i32 %tmp ; <i32*> [#uses=1]
  %tmp1 = load i32* %arrayidx                     ; <i32> [#uses=1]

Now we generate:

  %tmp = load i32* %X.addr                        ; <i32> [#uses=1]
  %arrayidx = getelementptr inbounds [100 x i32]* %A, i32 0, i32 %tmp ; <i32*> [#uses=1]
  %tmp1 = load i32* %arrayidx                     ; <i32> [#uses=1]

Less IR is better at -O0.

llvm-svn: 106966

6c5abe88

Allow '__extension__' to be analyzed in a lvalue context. · f00eac5c
Ted Kremenek authored Jun 26, 2010
```
llvm-svn: 106964
```
f00eac5c
minor cleanup: don't emit the base of an array subscript until after · 57ce9715
Chris Lattner authored Jun 26, 2010
```
we're done diddling around with the index stuff.  Use a cheaper type
comparison.

llvm-svn: 106963
```
57ce9715
fix inc/dec to honor -fwrapv and -ftrapv, implementing PR7426. · 431bef44
Chris Lattner authored Jun 26, 2010
```
llvm-svn: 106962
```
431bef44
move scalar inc/dec codegen into ScalarExprEmitter instead · 05dc78c0
Chris Lattner authored Jun 26, 2010
```
of being in CGF.  No functionality change.

llvm-svn: 106961
```
05dc78c0
this test is failing nondeterministically and blaming me, just disable · 93e63a02
Chris Lattner authored Jun 26, 2010
```
it for now.

llvm-svn: 106960
```
93e63a02
Fix test weirdness. · c1ecfd86
Benjamin Kramer authored Jun 26, 2010
```
llvm-svn: 106959
```
c1ecfd86

Jun 26, 2010
- use more efficient type comparison predicates. · fa20e950
  Chris Lattner authored Jun 26, 2010
```
llvm-svn: 106958
```
  fa20e950
- Fix unary minus to trap on overflow with -ftrapv, refactoring binop · 0bf27620
  Chris Lattner authored Jun 26, 2010
```
code so we can use it from VisitUnaryMinus.

llvm-svn: 106957
```
  0bf27620
- Implement support for -fwrapv, rdar://7221421 · 51924e51
  Chris Lattner authored Jun 26, 2010
```
As part of this, pull together trapv handling into the same enum.

This also add support for NSW multiplies.

This also makes PCH disagreement on overflow behavior silent, since it
really doesn't matter except for warnings and codegen (no macros get 
defined etc).

llvm-svn: 106956
```
  51924e51
- implement rdar://7432000 - signed negate should codegen as NSW. · 217e056e
  Chris Lattner authored Jun 26, 2010
```
While I'm in there, adjust pointer to member adjustments as well.

llvm-svn: 106955
```
  217e056e
- Fix some tests that didn't test anything. · 3bbc52ce
  Benjamin Kramer authored Jun 26, 2010
```
llvm-svn: 106954
```
  3bbc52ce
- Partial specialization test should not depend on the order of specialization... · 7228d98b
  Kenneth Uildriks authored Jun 26, 2010
```
Partial specialization test should not depend on the order of specialization operations or the names assigned to the specialized functions

llvm-svn: 106953
```
  7228d98b
- When splitting a VAARG, remember its alignment. · 2041abd9
  Rafael Espindola authored Jun 26, 2010
```
This produces terrible but correct code.

llvm-svn: 106952
```
  2041abd9
- Revert my if-conversion cleanup since it caused a bunch of nightly test · 418e64a3
  Bob Wilson authored Jun 26, 2010
```
regressions.

--- Reverse-merging r106939 into '.':
U    test/CodeGen/Thumb2/thumb2-ifcvt3.ll
U    lib/CodeGen/IfConversion.cpp

llvm-svn: 106951
```
  418e64a3
- Implement support for #pragma message, patch by Michael Spencer! · 30c924b3
  Chris Lattner authored Jun 26, 2010
```
llvm-svn: 106950
```
  30c924b3
- Change EmitReferenceBindingToExpr to take a decl instead of a boolean. · 04775f84
  Anders Carlsson authored Jun 26, 2010
```
llvm-svn: 106949
```
  04775f84
- Add function for mangling reference temporaries. · 709ef8e4
  Anders Carlsson authored Jun 26, 2010
```
llvm-svn: 106948
```
  709ef8e4
- Fix PR7328: when turning a tail recursion into a loop, need to preserve · 3a5cb69c
  Duncan Sands authored Jun 26, 2010
```
the returned value after the tail call if it differs from other return
values.  The optimal thing to do would be to introduce a phi node for
the return value, but for the moment just fix the miscompile.

llvm-svn: 106947
```
  3a5cb69c
- use ArgOperand API · 7d4038dd
  Gabor Greif authored Jun 26, 2010
```
llvm-svn: 106946
```
  7d4038dd
- use ArgOperand API · c2ac8c42
  Gabor Greif authored Jun 26, 2010
```
llvm-svn: 106945
```
  c2ac8c42
- use ArgOperand API · 83205af3
  Gabor Greif authored Jun 26, 2010
```
llvm-svn: 106944
```
  83205af3
- VNInfos don't need to be destructed anymore. · a0000024
  Benjamin Kramer authored Jun 26, 2010
```
llvm-svn: 106943
```
  a0000024
- resort to ArgOperand API · e9afee29
  Gabor Greif authored Jun 26, 2010
```
llvm-svn: 106942
```
  e9afee29
- Remove bogus test. · b9bdc5a5
  Eli Friedman authored Jun 26, 2010
```
llvm-svn: 106941
```
  b9bdc5a5
- Followup to r106770: actually generate SXTB and SXTH for sign-extensions. · 8cfa7713
  Eli Friedman authored Jun 26, 2010
```
llvm-svn: 106940
```
  8cfa7713
- Clean up some problems with extra CFG edges being introduced during · c72da6bb
  Bob Wilson authored Jun 26, 2010
```
if-conversion.  The RemoveExtraEdges function doesn't work for blocks that
end with unanalyzable branches, so in those cases, the "extra" edges must
be explicitly removed.  The CopyAndPredicateBlock and MergeBlocks methods
can also avoid copying successor edges due to branches that have already
been removed.  The latter case is especially helpful when MergeBlocks is
called for handling "diamond" if-conversions, where otherwise you can end
up with some weird intermediate states in the CFG.  Unfortunately I've
been unable to find cases where this cleanup actually makes a significant
difference in the code.  There is one test where we manage to remove an
empty block at the end of a function.  Radar 6911268.

llvm-svn: 106939
```
  c72da6bb
- Add support for encoding NEON VMOV (from scalar to core register) instructions. · 0248da9d
  Bob Wilson authored Jun 26, 2010
```
llvm-svn: 106938
```
  0248da9d
- Mangle pointer and (lvalue) reference types in the Microsoft C++ Mangler. · f4db33cb
  Charles Davis authored Jun 26, 2010
```
Also, fix mangling of throw specs. Turns out MSVC totally ignores throw
specs when mangling names.

llvm-svn: 106937
```
  f4db33cb
- It's now possible to run code placement pass for ARM. · b71233f3
  Evan Cheng authored Jun 26, 2010
```
llvm-svn: 106935
```
  b71233f3
- When creating X86 MUL8 and DIV8 instructions, make sure we don't produce · d7d0d4e8
  Jakob Stoklund Olesen authored Jun 26, 2010
```
CopyFromReg nodes for aliasing registers (AX and AL). This confuses the fast
register allocator.

Instead of CopyFromReg(AL), use ExtractSubReg(CopyFromReg(AX), sub_8bit).

This fixes PR7312.

llvm-svn: 106934
```
  d7d0d4e8
- Remove cruft that I didn't intend to commit. · 69a5af21
  Daniel Dunbar authored Jun 26, 2010
```
llvm-svn: 106932
```
  69a5af21