improve CreateCoercedLoad a bit to generate slightly less awful
IR when handling X86-64 by-value struct stuff. For example, we use to compile this: struct DeclGroup { unsigned NumDecls; }; int foo(DeclGroup D); void bar(DeclGroup *D) { foo(*D); } into: define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) ssp nounwind { entry: %D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2] %agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2] %tmp3 = alloca i64 ; <i64*> [#uses=2] store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr %tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1] %tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1] %tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1] call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false) %0 = bitcast i64* %tmp3 to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1] %1 = load %struct.DeclGroup* %agg.tmp ; <%struct.DeclGroup> [#uses=1] store %struct.DeclGroup %1, %struct.DeclGroup* %0, align 1 %2 = load i64* %tmp3 ; <i64> [#uses=1] call void @_Z3foo9DeclGroup(i64 %2) ret void } which would cause fastisel to bail out due to the first class aggregate load %1. With this patch we now compile it into the (still awful): define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind ssp noredzone { entry: %D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2] %agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2] %tmp3 = alloca i64 ; <i64*> [#uses=2] store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr %tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1] %tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1] %tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1] call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false) %coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <i32*> [#uses=1] %0 = bitcast i64* %tmp3 to i32* ; <i32*> [#uses=1] %1 = load i32* %coerce.dive ; <i32> [#uses=1] store i32 %1, i32* %0, align 1 %2 = load i64* %tmp3 ; <i64> [#uses=1] %call = call i32 @_Z3foo9DeclGroup(i64 %2) noredzone ; <i32> [#uses=0] ret void } which doesn't bail out. On CGStmt.ll, this reduces fastisel bail outs from 958 to 935, and is the precursor of better things to come. llvm-svn: 106973
Loading
Please register or sign in to comment