Skip to content
  1. Aug 15, 2013
  2. Jun 28, 2010
    • Chris Lattner's avatar
      X86-64: · a7d81ab7
      Chris Lattner authored
      pass/return structs of float/int as float/i32 instead of double/i64
      to make the code generated for ABI cleaner.  Passing in the low part
      of a double is the same as passing in a float.
      
      For example, we now compile:
      
      struct DeclGroup { float NumDecls; };
      float foo(DeclGroup D);
      void bar(DeclGroup *D) {
       foo(*D);
      }
      
      into:
      
      %struct.DeclGroup = type { float }
      
      define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind {
      entry:
        %D.addr = alloca %struct.DeclGroup*, align 8    ; <%struct.DeclGroup**> [#uses=2]
        %agg.tmp = alloca %struct.DeclGroup, align 4    ; <%struct.DeclGroup*> [#uses=2]
        store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
        %tmp = load %struct.DeclGroup** %D.addr         ; <%struct.DeclGroup*> [#uses=1]
        %tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
        %tmp2 = bitcast %struct.DeclGroup* %tmp to i8*  ; <i8*> [#uses=1]
        call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
        %coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <float*> [#uses=1]
        %0 = load float* %coerce.dive, align 1          ; <float> [#uses=1]
        %call = call float @_Z3foo9DeclGroup(float %0)  ; <float> [#uses=0]
        ret void
      }
      
      instead of:
      
      %struct.DeclGroup = type { float }
      
      define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind {
      entry:
        %D.addr = alloca %struct.DeclGroup*, align 8    ; <%struct.DeclGroup**> [#uses=2]
        %agg.tmp = alloca %struct.DeclGroup, align 4    ; <%struct.DeclGroup*> [#uses=2]
        %tmp3 = alloca double                           ; <double*> [#uses=2]
        store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
        %tmp = load %struct.DeclGroup** %D.addr         ; <%struct.DeclGroup*> [#uses=1]
        %tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
        %tmp2 = bitcast %struct.DeclGroup* %tmp to i8*  ; <i8*> [#uses=1]
        call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
        %coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <float*> [#uses=1]
        %0 = bitcast double* %tmp3 to float*            ; <float*> [#uses=1]
        %1 = load float* %coerce.dive                   ; <float> [#uses=1]
        store float %1, float* %0, align 1
        %2 = load double* %tmp3                         ; <double> [#uses=1]
        %call = call float @_Z3foo9DeclGroup(double %2) ; <float> [#uses=0]
        ret void
      }
      
      which is this machine code (at -O0):
      
      __Z3barP9DeclGroup:
      	subq	$24, %rsp
      	movq	%rdi, 16(%rsp)
      	movq	16(%rsp), %rdi
      	leaq	8(%rsp), %rax
      	movl	(%rdi), %ecx
      	movl	%ecx, (%rax)
      	movss	8(%rsp), %xmm0
      	callq	__Z3foo9DeclGroup
      	addq	$24, %rsp
      	ret
      
      vs this:
      
      __Z3barP9DeclGroup:
      	subq	$24, %rsp
      	movq	%rdi, 16(%rsp)
      	movq	16(%rsp), %rdi
      	leaq	8(%rsp), %rax
      	movl	(%rdi), %ecx
      	movl	%ecx, (%rax)
      	movss	8(%rsp), %xmm0
      	movss	%xmm0, (%rsp)
      	movsd	(%rsp), %xmm0
      	callq	__Z3foo9DeclGroup
      	addq	$24, %rsp
      	ret
      
      At -O3, it is the difference between this now:
      
      __Z3barP9DeclGroup:
      	movss	(%rdi), %xmm0
      	jmp	__Z3foo9DeclGroup  # TAILCALL
      
      vs this before:
      
      __Z3barP9DeclGroup:
      	movl	(%rdi), %eax
      	movd	%rax, %xmm0
      	jmp	__Z3foo9DeclGroup  # TAILCALL
      
      llvm-svn: 107048
      a7d81ab7
  3. Feb 16, 2010
  4. Feb 09, 2010
    • Daniel Dunbar's avatar
      IRgen: Add CreateMemTemp, for creating an temporary memory object for a... · a7566f16
      Daniel Dunbar authored
      IRgen: Add CreateMemTemp, for creating an temporary memory object for a particular type, and flood fill. - CreateMemTemp sets the alignment on the alloca correctly, which fixes a great many places in IRgen where we were doing the wrong thing.
      
      - This fixes many many more places than the test case, but my feeling is we need to audit alignment systematically so I'm not inclined to try hard to test the individual fixes in this patch. If this bothers you, patches welcome!
      
      PR6240.
      
      llvm-svn: 95648
      a7566f16
Loading