Skip to content
README.txt 66.5 KiB
Newer Older
Chris Lattner's avatar
Chris Lattner committed
it could be:

define i32 @test__(i32 %a, i32 %b) nounwind readnone ssp {
entry:
  %0 = icmp sle i32 %a, %b
  %retval = zext i1 %0 to i32
  ret i32 %retval
}

//===---------------------------------------------------------------------===//
This code can be seen in viterbi:

  %64 = call noalias i8* @malloc(i64 %62) nounwind
...
  %67 = call i64 @llvm.objectsize.i64(i8* %64, i1 false) nounwind
  %68 = call i8* @__memset_chk(i8* %64, i32 0, i64 %62, i64 %67) nounwind

llvm.objectsize.i64 should be taught about malloc/calloc, allowing it to
fold to %62.  This is a security win (overflows of malloc will get caught)
and also a performance win by exposing more memsets to the optimizer.

This occurs several times in viterbi.

Note that this would change the semantics of @llvm.objectsize which by its
current definition always folds to a constant. We also should make sure that
we remove checking in code like

  char *p = malloc(strlen(s)+1);
  __strcpy_chk(p, s, __builtin_objectsize(p, 0));

//===---------------------------------------------------------------------===//

This code (from Benchmarks/Dhrystone/dry.c):

define i32 @Func1(i32, i32) nounwind readnone optsize ssp {
entry:
  %sext = shl i32 %0, 24
  %conv = ashr i32 %sext, 24
  %sext6 = shl i32 %1, 24
  %conv4 = ashr i32 %sext6, 24
  %cmp = icmp eq i32 %conv, %conv4
  %. = select i1 %cmp, i32 10000, i32 0
  ret i32 %.
}

Should be simplified into something like:

define i32 @Func1(i32, i32) nounwind readnone optsize ssp {
entry:
  %sext = shl i32 %0, 24
  %conv = and i32 %sext, 0xFF000000
  %sext6 = shl i32 %1, 24
  %conv4 = and i32 %sext6, 0xFF000000
  %cmp = icmp eq i32 %conv, %conv4
  %. = select i1 %cmp, i32 10000, i32 0
  ret i32 %.
}

and then to:

define i32 @Func1(i32, i32) nounwind readnone optsize ssp {
entry:
  %conv = and i32 %0, 0xFF
  %conv4 = and i32 %1, 0xFF
  %cmp = icmp eq i32 %conv, %conv4
  %. = select i1 %cmp, i32 10000, i32 0
  ret i32 %.
}
//===---------------------------------------------------------------------===//
clang -O3 currently compiles this code

int g(unsigned int a) {
  unsigned int c[100];
  c[10] = a;
  c[11] = a;
  unsigned int b = c[10] + c[11];
  if(b > a*2) a = 4;
  else a = 8;
  return a + 7;
}

into

define i32 @g(i32 a) nounwind readnone {
  %add = shl i32 %a, 1
  %mul = shl i32 %a, 1
  %cmp = icmp ugt i32 %add, %mul
  %a.addr.0 = select i1 %cmp, i32 11, i32 15
  ret i32 %a.addr.0
}

The icmp should fold to false. This CSE opportunity is only available
after GVN and InstCombine have run.

//===---------------------------------------------------------------------===//

memcpyopt should turn this:

define i8* @test10(i32 %x) {
  %alloc = call noalias i8* @malloc(i32 %x) nounwind
  call void @llvm.memset.p0i8.i32(i8* %alloc, i8 0, i32 %x, i32 1, i1 false)
  ret i8* %alloc
}

into a call to calloc.  We should make sure that we analyze calloc as
aggressively as malloc though.

//===---------------------------------------------------------------------===//

clang -03 currently compiles this code

void f1(int* begin, int* end) {
  std::fill(begin, end, 0);
}

into

define void @_Z2f1PiS_(i32* %begin, i32* %end) nounwind {
entry:
  %cmp7.i.i = icmp eq i32* %begin, %end
  br i1 %cmp7.i.i, label %_ZSt4fillIPiiEvT_S1_RKT0_.exit, label %for.body.i.i

for.body.i.i:                                     ; preds = %entry, %for.body.i.i
  %indvar.i.i = phi i64 [ %tmp, %for.body.i.i ], [ 0, %entry ]
  %tmp = add i64 %indvar.i.i, 1
  %ptrincdec.i.i = getelementptr i32* %begin, i64 %tmp
  %__first.addr.08.i.i = getelementptr i32* %begin, i64 %indvar.i.i
  store i32 0, i32* %__first.addr.08.i.i, align 4, !tbaa !0
  %cmp.i.i = icmp eq i32* %ptrincdec.i.i, %end
  br i1 %cmp.i.i, label %_ZSt4fillIPiiEvT_S1_RKT0_.exit, label %for.body.i.i

_ZSt4fillIPiiEvT_S1_RKT0_.exit:                   ; preds = %for.body.i.i, %entry
  ret void
}

It should compile it to a memset.

//===---------------------------------------------------------------------===//

clang -O3 -fno-exceptions currently compiles this code:

void f(int N) {
  std::vector<int> v(N);
  g(v);
}

into

define void @_Z1fi(i32 %N) nounwind {
entry:
  %v2 = alloca [3 x i32*], align 8
  %v2.sub = getelementptr inbounds [3 x i32*]* %v2, i64 0, i64 0
  %tmpcast = bitcast [3 x i32*]* %v2 to %"class.std::vector"*
  %conv = sext i32 %N to i64
  store i32* null, i32** %v2.sub, align 8, !tbaa !0
  %tmp3.i.i.i.i.i = getelementptr inbounds [3 x i32*]* %v2, i64 0, i64 1
  store i32* null, i32** %tmp3.i.i.i.i.i, align 8, !tbaa !0
  %tmp4.i.i.i.i.i = getelementptr inbounds [3 x i32*]* %v2, i64 0, i64 2
  store i32* null, i32** %tmp4.i.i.i.i.i, align 8, !tbaa !0
  %cmp.i.i.i.i = icmp eq i32 %N, 0
  br i1 %cmp.i.i.i.i, label %_ZNSt12_Vector_baseIiSaIiEEC2EmRKS0_.exit.thread.i.i, label %cond.true.i.i.i.i

_ZNSt12_Vector_baseIiSaIiEEC2EmRKS0_.exit.thread.i.i: ; preds = %entry
  store i32* null, i32** %v2.sub, align 8, !tbaa !0
  store i32* null, i32** %tmp3.i.i.i.i.i, align 8, !tbaa !0
  %add.ptr.i5.i.i = getelementptr inbounds i32* null, i64 %conv
  store i32* %add.ptr.i5.i.i, i32** %tmp4.i.i.i.i.i, align 8, !tbaa !0
  br label %_ZNSt6vectorIiSaIiEEC1EmRKiRKS0_.exit

cond.true.i.i.i.i:                                ; preds = %entry
  %cmp.i.i.i.i.i = icmp slt i32 %N, 0
  br i1 %cmp.i.i.i.i.i, label %if.then.i.i.i.i.i, label %_ZNSt12_Vector_baseIiSaIiEEC2EmRKS0_.exit.i.i

if.then.i.i.i.i.i:                                ; preds = %cond.true.i.i.i.i
  call void @_ZSt17__throw_bad_allocv() noreturn nounwind
  unreachable

_ZNSt12_Vector_baseIiSaIiEEC2EmRKS0_.exit.i.i:    ; preds = %cond.true.i.i.i.i
  %mul.i.i.i.i.i = shl i64 %conv, 2
  %call3.i.i.i.i.i = call noalias i8* @_Znwm(i64 %mul.i.i.i.i.i) nounwind
  %0 = bitcast i8* %call3.i.i.i.i.i to i32*
  store i32* %0, i32** %v2.sub, align 8, !tbaa !0
  store i32* %0, i32** %tmp3.i.i.i.i.i, align 8, !tbaa !0
  %add.ptr.i.i.i = getelementptr inbounds i32* %0, i64 %conv
  store i32* %add.ptr.i.i.i, i32** %tmp4.i.i.i.i.i, align 8, !tbaa !0
  call void @llvm.memset.p0i8.i64(i8* %call3.i.i.i.i.i, i8 0, i64 %mul.i.i.i.i.i, i32 4, i1 false)
  br label %_ZNSt6vectorIiSaIiEEC1EmRKiRKS0_.exit

This is just the handling the construction of the vector. Most surprising here
is the fact that all three null stores in %entry are dead, but not eliminated.
Also surprising is that %conv isn't simplified to 0 in %....exit.thread.i.i.

//===---------------------------------------------------------------------===//