diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 7d697f1102252f6e7ee38c73ac312cfebf7cbfba..af4ee8398118543db4c5de63171b78b401fdd867 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -263,6 +263,7 @@ namespace { SDOperand visitLOAD(SDNode *N); SDOperand visitSTORE(SDNode *N); SDOperand visitINSERT_VECTOR_ELT(SDNode *N); + SDOperand visitEXTRACT_VECTOR_ELT(SDNode *N); SDOperand visitBUILD_VECTOR(SDNode *N); SDOperand visitCONCAT_VECTORS(SDNode *N); SDOperand visitVECTOR_SHUFFLE(SDNode *N); @@ -682,6 +683,7 @@ SDOperand DAGCombiner::visit(SDNode *N) { case ISD::LOAD: return visitLOAD(N); case ISD::STORE: return visitSTORE(N); case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N); + case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); @@ -2907,9 +2909,8 @@ SDOperand DAGCombiner::visitBIT_CONVERT(SDNode *N) { return DAG.getNode(ISD::BIT_CONVERT, VT, N0.getOperand(0)); // fold (conv (load x)) -> (load (conv*)x) - // If the resultant load doesn't need a higher alignment than the original! - if (ISD::isNON_EXTLoad(N0.Val) && N0.hasOneUse() && - ISD::isUNINDEXEDLoad(N0.Val) && + // If the resultant load doesn't need a higher alignment than the original! + if (ISD::isNormalLoad(N0.Val) && N0.hasOneUse() && TLI.isOperationLegal(ISD::LOAD, VT)) { LoadSDNode *LN0 = cast(N0); unsigned Align = TLI.getTargetMachine().getTargetData()-> @@ -3901,6 +3902,54 @@ SDOperand DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { return SDOperand(); } +SDOperand DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { + SDOperand InVec = N->getOperand(0); + SDOperand EltNo = N->getOperand(1); + + // (vextract (v4f32 s2v (f32 load $addr)), 0) -> (f32 load $addr) + // (vextract (v4i32 bc (v4f32 s2v (f32 load $addr))), 0) -> (i32 load $addr) + if (isa(EltNo)) { + unsigned Elt = cast(EltNo)->getValue(); + bool NewLoad = false; + if (Elt == 0) { + MVT::ValueType VT = InVec.getValueType(); + MVT::ValueType EVT = MVT::getVectorElementType(VT); + MVT::ValueType LVT = EVT; + unsigned NumElts = MVT::getVectorNumElements(VT); + if (InVec.getOpcode() == ISD::BIT_CONVERT) { + MVT::ValueType BCVT = InVec.getOperand(0).getValueType(); + if (NumElts != MVT::getVectorNumElements(BCVT)) + return SDOperand(); + InVec = InVec.getOperand(0); + EVT = MVT::getVectorElementType(BCVT); + NewLoad = true; + } + if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && + InVec.getOperand(0).getValueType() == EVT && + ISD::isNormalLoad(InVec.getOperand(0).Val) && + InVec.getOperand(0).hasOneUse()) { + LoadSDNode *LN0 = cast(InVec.getOperand(0)); + unsigned Align = LN0->getAlignment(); + if (NewLoad) { + // Check the resultant load doesn't need a higher alignment than the + // original load. + unsigned NewAlign = TLI.getTargetMachine().getTargetData()-> + getABITypeAlignment(MVT::getTypeForValueType(LVT)); + if (!TLI.isOperationLegal(ISD::LOAD, LVT) || NewAlign > Align) + return SDOperand(); + Align = NewAlign; + } + + return DAG.getLoad(LVT, LN0->getChain(), LN0->getBasePtr(), + LN0->getSrcValue(), LN0->getSrcValueOffset(), + LN0->isVolatile(), Align); + } + } + } + return SDOperand(); +} + + SDOperand DAGCombiner::visitBUILD_VECTOR(SDNode *N) { unsigned NumInScalars = N->getNumOperands(); MVT::ValueType VT = N->getValueType(0); diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 1570350c3805aff30f0420466cc729af21123b7d..f2795fff743e720a1a13da3fee4ac48877730313 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -2942,11 +2942,3 @@ def : Pat<(store (v8i16 VR128:$src), addr:$dst), (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; def : Pat<(store (v16i8 VR128:$src), addr:$dst), (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; - -// (vextract (v4i32 bc (v4f32 s2v (f32 load $addr))), 0) -> (i32 load $addr) -def : Pat<(vector_extract - (bc_v4i32 (v4f32 (scalar_to_vector (loadf32 addr:$src)))), (iPTR 0)), - (MOV32rm addr:$src)>, Requires<[HasSSE2]>; -def : Pat<(vector_extract - (bc_v2i64 (v2f64 (scalar_to_vector (loadf64 addr:$src)))), (iPTR 0)), - (MOV64rm addr:$src)>, Requires<[HasSSE2, In64BitMode]>;