Change CGCall to handle the "coerce" case where the coerce-to type
is a FCA to pass each of the elements as individual scalars. This produces code fast isel is less likely to reject and is easier on the optimizers. For example, before we would compile: struct DeclGroup { long NumDecls; char * Y; }; char * foo(DeclGroup D) { return D.NumDecls+D.Y; } to: %struct.DeclGroup = type { i64, i64 } define i64 @_Z3foo9DeclGroup(%struct.DeclGroup) nounwind { entry: %D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3] store %struct.DeclGroup %0, %struct.DeclGroup* %D, align 1 %tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1] %tmp1 = load i64* %tmp ; <i64> [#uses=1] %tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1] %tmp3 = load i64* %tmp2 ; <i64> [#uses=1] %add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1] ret i64 %add } Now we get: %0 = type { i64, i64 } %struct.DeclGroup = type { i64, i8* } define i8* @_Z3foo9DeclGroup(i64, i64) nounwind { entry: %D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3] %2 = insertvalue %0 undef, i64 %0, 0 ; <%0> [#uses=1] %3 = insertvalue %0 %2, i64 %1, 1 ; <%0> [#uses=1] %4 = bitcast %struct.DeclGroup* %D to %0* ; <%0*> [#uses=1] store %0 %3, %0* %4, align 1 %tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1] %tmp1 = load i64* %tmp ; <i64> [#uses=1] %tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i8**> [#uses=1] %tmp3 = load i8** %tmp2 ; <i8*> [#uses=1] %add.ptr = getelementptr inbounds i8* %tmp3, i64 %tmp1 ; <i8*> [#uses=1] ret i8* %add.ptr } Elimination of the FCA inside the function is still-to-come. llvm-svn: 107099
Loading
Please register or sign in to comment