Skip to content
Snippets Groups Projects
Commit 9aa91b1f authored by Benjamin Kramer's avatar Benjamin Kramer
Browse files

InstCombine: Turn (zext A) udiv (zext B) into (zext (A udiv B)). Same for urem or constant B.

This obviously helps a lot if the division would be turned into a libcall
(think i64 udiv on i386), but div is also one of the few remaining instructions
on modern CPUs that become more expensive when the bitwidth gets bigger.

This also helps register pressure on i386 when dividing chars, divb needs
two 8-bit parts of a 16 bit register as input where divl uses two registers.

int foo(unsigned char a) { return a/10; }
int bar(unsigned char a, unsigned char b) { return a/b; }

compiles into (x86_64)
_foo:
  imull $205, %edi, %eax
  shrl  $11, %eax
  ret
_bar:
  movzbl        %dil, %eax
  divb  %sil, %al
  movzbl        %al, %eax
  ret

llvm-svn: 130615
parent 57b3df59
No related branches found
No related tags found
No related merge requests found
...@@ -336,6 +336,19 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { ...@@ -336,6 +336,19 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
return 0; return 0;
} }
/// dyn_castZExtVal - Checks if V is a zext or constant that can
/// be truncated to Ty without losing bits.
static Value *dyn_castZExtVal(Value *V, const Type *Ty) {
if (ZExtInst *Z = dyn_cast<ZExtInst>(V)) {
if (Z->getSrcTy() == Ty)
return Z->getOperand(0);
} else if (ConstantInt *C = dyn_cast<ConstantInt>(V)) {
if (C->getValue().getActiveBits() <= cast<IntegerType>(Ty)->getBitWidth())
return ConstantExpr::getTrunc(C, Ty);
}
return 0;
}
Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
...@@ -394,6 +407,14 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { ...@@ -394,6 +407,14 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
return SelectInst::Create(Cond, TSI, FSI); return SelectInst::Create(Cond, TSI, FSI);
} }
} }
// (zext A) udiv (zext B) --> zext (A udiv B)
if (ZExtInst *ZOp0 = dyn_cast<ZExtInst>(Op0))
if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy()))
return new ZExtInst(Builder->CreateUDiv(ZOp0->getOperand(0), ZOp1, "div",
I.isExact()),
I.getType());
return 0; return 0;
} }
...@@ -568,7 +589,13 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { ...@@ -568,7 +589,13 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
return SelectInst::Create(Cond, TrueAnd, FalseAnd); return SelectInst::Create(Cond, TrueAnd, FalseAnd);
} }
} }
// (zext A) urem (zext B) --> zext (A urem B)
if (ZExtInst *ZOp0 = dyn_cast<ZExtInst>(Op0))
if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy()))
return new ZExtInst(Builder->CreateURem(ZOp0->getOperand(0), ZOp1),
I.getType());
return 0; return 0;
} }
......
; RUN: opt < %s -instcombine -S | not grep zext ; RUN: opt < %s -instcombine -S | FileCheck %s
; PR4548
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
; PR4548
define i8 @udiv_i8(i8 %a, i8 %b) nounwind { define i8 @udiv_i8(i8 %a, i8 %b) nounwind {
%conv = zext i8 %a to i32 %conv = zext i8 %a to i32
%conv2 = zext i8 %b to i32 %conv2 = zext i8 %b to i32
%div = udiv i32 %conv, %conv2 %div = udiv i32 %conv, %conv2
%conv3 = trunc i32 %div to i8 %conv3 = trunc i32 %div to i8
ret i8 %conv3 ret i8 %conv3
; CHECK: @udiv_i8
; CHECK: udiv i8 %a, %b
} }
define i8 @urem_i8(i8 %a, i8 %b) nounwind { define i8 @urem_i8(i8 %a, i8 %b) nounwind {
...@@ -17,5 +19,44 @@ define i8 @urem_i8(i8 %a, i8 %b) nounwind { ...@@ -17,5 +19,44 @@ define i8 @urem_i8(i8 %a, i8 %b) nounwind {
%div = urem i32 %conv, %conv2 %div = urem i32 %conv, %conv2
%conv3 = trunc i32 %div to i8 %conv3 = trunc i32 %div to i8
ret i8 %conv3 ret i8 %conv3
; CHECK: @urem_i8
; CHECK: urem i8 %a, %b
} }
define i32 @udiv_i32(i8 %a, i8 %b) nounwind {
%conv = zext i8 %a to i32
%conv2 = zext i8 %b to i32
%div = udiv i32 %conv, %conv2
ret i32 %div
; CHECK: @udiv_i32
; CHECK: udiv i8 %a, %b
; CHECK: zext
}
define i32 @urem_i32(i8 %a, i8 %b) nounwind {
%conv = zext i8 %a to i32
%conv2 = zext i8 %b to i32
%div = urem i32 %conv, %conv2
ret i32 %div
; CHECK: @urem_i32
; CHECK: urem i8 %a, %b
; CHECK: zext
}
define i32 @udiv_i32_c(i8 %a) nounwind {
%conv = zext i8 %a to i32
%div = udiv i32 %conv, 10
ret i32 %div
; CHECK: @udiv_i32_c
; CHECK: udiv i8 %a, 10
; CHECK: zext
}
define i32 @urem_i32_c(i8 %a) nounwind {
%conv = zext i8 %a to i32
%div = urem i32 %conv, 10
ret i32 %div
; CHECK: @urem_i32_c
; CHECK: urem i8 %a, 10
; CHECK: zext
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment