- Oct 06, 2006
-
-
Evan Cheng authored
llvm-svn: 30759
-
Evan Cheng authored
llvm-svn: 30758
-
- Oct 05, 2006
-
-
Chris Lattner authored
llvm-svn: 30757
-
Rafael Espindola authored
llvm-svn: 30756
-
Rafael Espindola authored
implement FMDRR add support for f64 function arguments llvm-svn: 30754
-
Jim Laskey authored
llvm-svn: 30753
-
Chris Lattner authored
SimplifyDemandedBits. The idea is that some operations can be simplified if not all of the computed elements are needed. Some targets (like x86) have a large number of intrinsics that operate on a single element, but pass other elts through unmodified. If those other elements are not needed, the intrinsics can be simplified to scalar operations, and insertelement ops can be removed. This turns (f.e.): ushort %Convert_sse(float %f) { %tmp = insertelement <4 x float> undef, float %f, uint 0 ; <<4 x float>> [#uses=1] %tmp10 = insertelement <4 x float> %tmp, float 0.000000e+00, uint 1 ; <<4 x float>> [#uses=1] %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, uint 2 ; <<4 x float>> [#uses=1] %tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, uint 3 ; <<4 x float>> [#uses=1] %tmp28 = tail call <4 x float> %llvm.x86.sse.sub.ss( <4 x float> %tmp12, <4 x float> < float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) ; <<4 x float>> [#uses=1] %tmp37 = tail call <4 x float> %llvm.x86.sse.mul.ss( <4 x float> %tmp28, <4 x float> < float 5.000000e-01, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) ; <<4 x float>> [#uses=1] %tmp48 = tail call <4 x float> %llvm.x86.sse.min.ss( <4 x float> %tmp37, <4 x float> < float 6.553500e+04, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) ; <<4 x float>> [#uses=1] %tmp59 = tail call <4 x float> %llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> zeroinitializer ) ; <<4 x float>> [#uses=1] %tmp = tail call int %llvm.x86.sse.cvttss2si( <4 x float> %tmp59 ) ; <int> [#uses=1] %tmp69 = cast int %tmp to ushort ; <ushort> [#uses=1] ret ushort %tmp69 } into: ushort %Convert_sse(float %f) { entry: %tmp28 = sub float %f, 1.000000e+00 ; <float> [#uses=1] %tmp37 = mul float %tmp28, 5.000000e-01 ; <float> [#uses=1] %tmp375 = insertelement <4 x float> undef, float %tmp37, uint 0 ; <<4 x float>> [#uses=1] %tmp48 = tail call <4 x float> %llvm.x86.sse.min.ss( <4 x float> %tmp375, <4 x float> < float 6.553500e+04, float undef, float undef, float undef > ) ; <<4 x float>> [#uses=1] %tmp59 = tail call <4 x float> %llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> < float 0.000000e+00, float undef, float undef, float undef > ) ; <<4 x float>> [#uses=1] %tmp = tail call int %llvm.x86.sse.cvttss2si( <4 x float> %tmp59 ) ; <int> [#uses=1] %tmp69 = cast int %tmp to ushort ; <ushort> [#uses=1] ret ushort %tmp69 } which improves codegen from: _Convert_sse: movss LCPI1_0, %xmm0 movss 4(%esp), %xmm1 subss %xmm0, %xmm1 movss LCPI1_1, %xmm0 mulss %xmm0, %xmm1 movss LCPI1_2, %xmm0 minss %xmm0, %xmm1 xorps %xmm0, %xmm0 maxss %xmm0, %xmm1 cvttss2si %xmm1, %eax andl $65535, %eax ret to: _Convert_sse: movss 4(%esp), %xmm0 subss LCPI1_0, %xmm0 mulss LCPI1_1, %xmm0 movss LCPI1_2, %xmm1 minss %xmm1, %xmm0 xorps %xmm1, %xmm1 maxss %xmm1, %xmm0 cvttss2si %xmm0, %eax andl $65535, %eax ret This is just a first step, it can be extended in many ways. Testcase here: Transforms/InstCombine/vec_demanded_elts.ll llvm-svn: 30752
-
Chris Lattner authored
llvm-svn: 30750
-
Chris Lattner authored
llvm-svn: 30748
-
Chris Lattner authored
llvm-svn: 30745
-
Chris Lattner authored
allowing label differences to work. This fixes CodeGen/X86/pic_jumptable.ll llvm-svn: 30744
-
Chris Lattner authored
llvm-svn: 30742
-
Chris Lattner authored
llvm-svn: 30741
-
Chris Lattner authored
llvm-svn: 30740
-
Chris Lattner authored
llvm-svn: 30739
-
Chris Lattner authored
llvm-svn: 30738
-
Chris Lattner authored
llvm-svn: 30737
-
Chris Lattner authored
llvm-svn: 30736
-
Chris Lattner authored
llvm-svn: 30735
-
Chris Lattner authored
function bodies llvm-svn: 30733
-
Chris Lattner authored
llvm-svn: 30732
-
Chris Lattner authored
llvm-svn: 30731
-
Chris Lattner authored
as the function body. llvm-svn: 30730
-
Chris Lattner authored
can happen on bugpoint reduced testcases f.e.. llvm-svn: 30729
-
Evan Cheng authored
llvm-svn: 30727
-
Chris Lattner authored
llvm-svn: 30726
-
- Oct 04, 2006
-
-
Chris Lattner authored
llvm-svn: 30725
-
Chris Lattner authored
llvm-svn: 30724
-
Evan Cheng authored
llvm-svn: 30723
-
Evan Cheng authored
llvm-svn: 30722
-
Jim Laskey authored
llvm-svn: 30721
-
Jim Laskey authored
llvm-svn: 30720
-
Chris Lattner authored
CodeGen/X86/scalar_sse_minmax.ll llvm-svn: 30719
-
Chris Lattner authored
llvm-svn: 30717
-
Chris Lattner authored
ever is, we want to get an assert instead of silent bad codegen. llvm-svn: 30716
-
Jim Laskey authored
llvm-svn: 30715
-
Evan Cheng authored
extra operand to LOADX to specify the exact value extension type. llvm-svn: 30714
-
Chris Lattner authored
is needed to support targets where {|} aren't special symbols. llvm-svn: 30712
-
Evan Cheng authored
llvm-svn: 30711
-
- Oct 03, 2006
-
-
Chris Lattner authored
llvm-svn: 30709
-