Skip to content
  • Chris Lattner's avatar
    if an alloca is only ever accessed as a unit, and is accessed with load/store instructions, · 6fab2e94
    Chris Lattner authored
    then don't try to decimate it into its individual pieces.  This will just make a mess of the
    IR and is pointless if none of the elements are individually accessed.  This was generating
    really terrible code for std::bitset (PR8980) because it happens to be lowered by clang
    as an {[8 x i8]} structure instead of {i64}.
    
    The testcase now is optimized to:
    
    define i64 @test2(i64 %X) {
      br label %L2
    
    L2:                                               ; preds = %0
      ret i64 %X
    }
    
    before we generated:
    
    define i64 @test2(i64 %X) {
      %sroa.store.elt = lshr i64 %X, 56
      %1 = trunc i64 %sroa.store.elt to i8
      %sroa.store.elt8 = lshr i64 %X, 48
      %2 = trunc i64 %sroa.store.elt8 to i8
      %sroa.store.elt9 = lshr i64 %X, 40
      %3 = trunc i64 %sroa.store.elt9 to i8
      %sroa.store.elt10 = lshr i64 %X, 32
      %4 = trunc i64 %sroa.store.elt10 to i8
      %sroa.store.elt11 = lshr i64 %X, 24
      %5 = trunc i64 %sroa.store.elt11 to i8
      %sroa.store.elt12 = lshr i64 %X, 16
      %6 = trunc i64 %sroa.store.elt12 to i8
      %sroa.store.elt13 = lshr i64 %X, 8
      %7 = trunc i64 %sroa.store.elt13 to i8
      %8 = trunc i64 %X to i8
      br label %L2
    
    L2:                                               ; preds = %0
      %9 = zext i8 %1 to i64
      %10 = shl i64 %9, 56
      %11 = zext i8 %2 to i64
      %12 = shl i64 %11, 48
      %13 = or i64 %12, %10
      %14 = zext i8 %3 to i64
      %15 = shl i64 %14, 40
      %16 = or i64 %15, %13
      %17 = zext i8 %4 to i64
      %18 = shl i64 %17, 32
      %19 = or i64 %18, %16
      %20 = zext i8 %5 to i64
      %21 = shl i64 %20, 24
      %22 = or i64 %21, %19
      %23 = zext i8 %6 to i64
      %24 = shl i64 %23, 16
      %25 = or i64 %24, %22
      %26 = zext i8 %7 to i64
      %27 = shl i64 %26, 8
      %28 = or i64 %27, %25
      %29 = zext i8 %8 to i64
      %30 = or i64 %29, %28
      ret i64 %30
    }
    
    In this case, instcombine was able to eliminate the nonsense, but in PR8980 enough
    PHIs are in play that instcombine backs off.  It's better to not generate this stuff
    in the first place.
    
    llvm-svn: 123571
    6fab2e94
Loading