README.txt

it could be:

define i32 @test__(i32 %a, i32 %b) nounwind readnone ssp {
entry:
  %0 = icmp sle i32 %a, %b
  %retval = zext i1 %0 to i32
  ret i32 %retval
}

//===---------------------------------------------------------------------===//

This code can be seen in viterbi:

  %64 = call noalias i8* @malloc(i64 %62) nounwind
...
  %67 = call i64 @llvm.objectsize.i64(i8* %64, i1 false) nounwind
  %68 = call i8* @__memset_chk(i8* %64, i32 0, i64 %62, i64 %67) nounwind

llvm.objectsize.i64 should be taught about malloc/calloc, allowing it to
fold to %62.  This is a security win (overflows of malloc will get caught)
and also a performance win by exposing more memsets to the optimizer.

This occurs several times in viterbi.

Note that this would change the semantics of @llvm.objectsize which by its
current definition always folds to a constant. We also should make sure that
we remove checking in code like

  char *p = malloc(strlen(s)+1);
  __strcpy_chk(p, s, __builtin_objectsize(p, 0));

//===---------------------------------------------------------------------===//

This code (from Benchmarks/Dhrystone/dry.c):

define i32 @Func1(i32, i32) nounwind readnone optsize ssp {
entry:
  %sext = shl i32 %0, 24
  %conv = ashr i32 %sext, 24
  %sext6 = shl i32 %1, 24
  %conv4 = ashr i32 %sext6, 24
  %cmp = icmp eq i32 %conv, %conv4
  %. = select i1 %cmp, i32 10000, i32 0
  ret i32 %.
}

Should be simplified into something like:

define i32 @Func1(i32, i32) nounwind readnone optsize ssp {
entry:
  %sext = shl i32 %0, 24
  %conv = and i32 %sext, 0xFF000000
  %sext6 = shl i32 %1, 24
  %conv4 = and i32 %sext6, 0xFF000000
  %cmp = icmp eq i32 %conv, %conv4
  %. = select i1 %cmp, i32 10000, i32 0
  ret i32 %.
}

and then to:

define i32 @Func1(i32, i32) nounwind readnone optsize ssp {
entry:
  %conv = and i32 %0, 0xFF
  %conv4 = and i32 %1, 0xFF
  %cmp = icmp eq i32 %conv, %conv4
  %. = select i1 %cmp, i32 10000, i32 0
  ret i32 %.
}
//===---------------------------------------------------------------------===//

clang -O3 currently compiles this code

int g(unsigned int a) {
  unsigned int c[100];
  c[10] = a;
  c[11] = a;
  unsigned int b = c[10] + c[11];
  if(b > a*2) a = 4;
  else a = 8;
  return a + 7;
}

into

define i32 @g(i32 a) nounwind readnone {
  %add = shl i32 %a, 1
  %mul = shl i32 %a, 1
  %cmp = icmp ugt i32 %add, %mul
  %a.addr.0 = select i1 %cmp, i32 11, i32 15
  ret i32 %a.addr.0
}

The icmp should fold to false. This CSE opportunity is only available
after GVN and InstCombine have run.

//===---------------------------------------------------------------------===//

memcpyopt should turn this:

define i8* @test10(i32 %x) {
  %alloc = call noalias i8* @malloc(i32 %x) nounwind
  call void @llvm.memset.p0i8.i32(i8* %alloc, i8 0, i32 %x, i32 1, i1 false)
  ret i8* %alloc
}

into a call to calloc.  We should make sure that we analyze calloc as
aggressively as malloc though.

//===---------------------------------------------------------------------===//

clang -03 currently compiles this code

void f1(int* begin, int* end) {
  std::fill(begin, end, 0);
}

into

define void @_Z2f1PiS_(i32* %begin, i32* %end) nounwind {
entry:
  %cmp7.i.i = icmp eq i32* %begin, %end
  br i1 %cmp7.i.i, label %_ZSt4fillIPiiEvT_S1_RKT0_.exit, label %for.body.i.i

for.body.i.i:                                     ; preds = %entry, %for.body.i.i
  %indvar.i.i = phi i64 [ %tmp, %for.body.i.i ], [ 0, %entry ]
  %tmp = add i64 %indvar.i.i, 1
  %ptrincdec.i.i = getelementptr i32* %begin, i64 %tmp
  %__first.addr.08.i.i = getelementptr i32* %begin, i64 %indvar.i.i
  store i32 0, i32* %__first.addr.08.i.i, align 4, !tbaa !0
  %cmp.i.i = icmp eq i32* %ptrincdec.i.i, %end
  br i1 %cmp.i.i, label %_ZSt4fillIPiiEvT_S1_RKT0_.exit, label %for.body.i.i

_ZSt4fillIPiiEvT_S1_RKT0_.exit:                   ; preds = %for.body.i.i, %entry
  ret void
}

It should compile it to a memset.

//===---------------------------------------------------------------------===//