Newer
Older
Evan Cheng
committed
12001
12002
12003
12004
12005
12006
12007
12008
12009
12010
12011
12012
12013
12014
12015
12016
12017
12018
12019
12020
// fold (shl (and (setcc_c), c1), c2) -> (and setcc_c, (c1 << c2))
// since the result of setcc_c is all zero's or all ones.
if (N1C && N0.getOpcode() == ISD::AND &&
N0.getOperand(1).getOpcode() == ISD::Constant) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == X86ISD::SETCC_CARRY ||
((N00.getOpcode() == ISD::ANY_EXTEND ||
N00.getOpcode() == ISD::ZERO_EXTEND) &&
N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY)) {
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
APInt ShAmt = N1C->getAPIntValue();
Mask = Mask.shl(ShAmt);
if (Mask != 0)
return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
N00, DAG.getConstant(Mask, VT));
}
}
return SDValue();
}
Evan Cheng
committed
/// PerformShiftCombine - Transforms vector shift nodes to use vector shifts
/// when possible.
static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
Evan Cheng
committed
EVT VT = N->getValueType(0);
if (!VT.isVector() && VT.isInteger() &&
N->getOpcode() == ISD::SHL)
return PerformSHLCombine(N, DAG);
// On X86 with SSE2 support, we can transform this to a vector shift if
// all elements are shifted by the same amount. We can't do this in legalize
// because the a constant vector is typically transformed to a constant pool
// so we have no knowledge of the shift amount.
if (!Subtarget->hasSSE2())
return SDValue();
Owen Anderson
committed
if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16)
SDValue ShAmtOp = N->getOperand(1);
Owen Anderson
committed
EVT EltVT = VT.getVectorElementType();
DebugLoc DL = N->getDebugLoc();
if (ShAmtOp.getOpcode() == ISD::BUILD_VECTOR) {
unsigned NumElts = VT.getVectorNumElements();
unsigned i = 0;
for (; i != NumElts; ++i) {
SDValue Arg = ShAmtOp.getOperand(i);
if (Arg.getOpcode() == ISD::UNDEF) continue;
BaseShAmt = Arg;
break;
for (; i != NumElts; ++i) {
SDValue Arg = ShAmtOp.getOperand(i);
if (Arg.getOpcode() == ISD::UNDEF) continue;
if (Arg != BaseShAmt) {
return SDValue();
}
}
} else if (ShAmtOp.getOpcode() == ISD::VECTOR_SHUFFLE &&
cast<ShuffleVectorSDNode>(ShAmtOp)->isSplat()) {
SDValue InVec = ShAmtOp.getOperand(0);
if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
unsigned NumElts = InVec.getValueType().getVectorNumElements();
unsigned i = 0;
for (; i != NumElts; ++i) {
SDValue Arg = InVec.getOperand(i);
if (Arg.getOpcode() == ISD::UNDEF) continue;
BaseShAmt = Arg;
break;
}
} else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(InVec.getOperand(2))) {
Evan Cheng
committed
unsigned SplatIdx= cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex();
if (C->getZExtValue() == SplatIdx)
BaseShAmt = InVec.getOperand(1);
}
}
if (BaseShAmt.getNode() == 0)
BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp,
DAG.getIntPtrConstant(0));
} else
return SDValue();
Owen Anderson
committed
if (EltVT.bitsGT(MVT::i32))
BaseShAmt = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, BaseShAmt);
else if (EltVT.bitsLT(MVT::i32))
BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, BaseShAmt);
// The shift amount is identical so we can do a vector shift.
SDValue ValOp = N->getOperand(0);
switch (N->getOpcode()) {
default:
llvm_unreachable("Unknown shift opcode!");
Owen Anderson
committed
if (VT == MVT::v2i64)
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
Owen Anderson
committed
DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
Owen Anderson
committed
if (VT == MVT::v4i32)
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
Owen Anderson
committed
DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
Owen Anderson
committed
if (VT == MVT::v8i16)
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
Owen Anderson
committed
DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
Owen Anderson
committed
if (VT == MVT::v4i32)
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
Owen Anderson
committed
DAG.getConstant(Intrinsic::x86_sse2_psrai_d, MVT::i32),
Owen Anderson
committed
if (VT == MVT::v8i16)
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
Owen Anderson
committed
DAG.getConstant(Intrinsic::x86_sse2_psrai_w, MVT::i32),
Owen Anderson
committed
if (VT == MVT::v2i64)
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
Owen Anderson
committed
DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
Owen Anderson
committed
if (VT == MVT::v4i32)
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
Owen Anderson
committed
DAG.getConstant(Intrinsic::x86_sse2_psrli_d, MVT::i32),
Owen Anderson
committed
if (VT == MVT::v8i16)
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
Owen Anderson
committed
DAG.getConstant(Intrinsic::x86_sse2_psrli_w, MVT::i32),
}
return SDValue();
}
12141
12142
12143
12144
12145
12146
12147
12148
12149
12150
12151
12152
12153
12154
12155
12156
12157
12158
12159
12160
12161
12162
12163
12164
12165
12166
12167
12168
12169
12170
12171
12172
12173
12174
12175
12176
12177
12178
12179
12180
12181
12182
12183
12184
12185
12186
12187
12188
12189
12190
12191
12192
12193
12194
12195
12196
12197
12198
12199
12200
12201
12202
12203
12204
12205
12206
12207
12208
12209
12210
12211
12212
12213
12214
12215
12216
12217
12218
// CMPEQCombine - Recognize the distinctive (AND (setcc ...) (setcc ..))
// where both setccs reference the same FP CMP, and rewrite for CMPEQSS
// and friends. Likewise for OR -> CMPNEQSS.
static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
unsigned opcode;
// SSE1 supports CMP{eq|ne}SS, and SSE2 added CMP{eq|ne}SD, but
// we're requiring SSE2 for both.
if (Subtarget->hasSSE2() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue CMP0 = N0->getOperand(1);
SDValue CMP1 = N1->getOperand(1);
DebugLoc DL = N->getDebugLoc();
// The SETCCs should both refer to the same CMP.
if (CMP0.getOpcode() != X86ISD::CMP || CMP0 != CMP1)
return SDValue();
SDValue CMP00 = CMP0->getOperand(0);
SDValue CMP01 = CMP0->getOperand(1);
EVT VT = CMP00.getValueType();
if (VT == MVT::f32 || VT == MVT::f64) {
bool ExpectingFlags = false;
// Check for any users that want flags:
for (SDNode::use_iterator UI = N->use_begin(),
UE = N->use_end();
!ExpectingFlags && UI != UE; ++UI)
switch (UI->getOpcode()) {
default:
case ISD::BR_CC:
case ISD::BRCOND:
case ISD::SELECT:
ExpectingFlags = true;
break;
case ISD::CopyToReg:
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
break;
}
if (!ExpectingFlags) {
enum X86::CondCode cc0 = (enum X86::CondCode)N0.getConstantOperandVal(0);
enum X86::CondCode cc1 = (enum X86::CondCode)N1.getConstantOperandVal(0);
if (cc1 == X86::COND_E || cc1 == X86::COND_NE) {
X86::CondCode tmp = cc0;
cc0 = cc1;
cc1 = tmp;
}
if ((cc0 == X86::COND_E && cc1 == X86::COND_NP) ||
(cc0 == X86::COND_NE && cc1 == X86::COND_P)) {
bool is64BitFP = (CMP00.getValueType() == MVT::f64);
X86ISD::NodeType NTOperator = is64BitFP ?
X86ISD::FSETCCsd : X86ISD::FSETCCss;
// FIXME: need symbolic constants for these magic numbers.
// See X86ATTInstPrinter.cpp:printSSECC().
unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4;
SDValue OnesOrZeroesF = DAG.getNode(NTOperator, DL, MVT::f32, CMP00, CMP01,
DAG.getConstant(x86cc, MVT::i8));
SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, MVT::i32,
OnesOrZeroesF);
SDValue ANDed = DAG.getNode(ISD::AND, DL, MVT::i32, OnesOrZeroesI,
DAG.getConstant(1, MVT::i32));
SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ANDed);
return OneBitOfTruth;
}
}
}
}
return SDValue();
}
12219
12220
12221
12222
12223
12224
12225
12226
12227
12228
12229
12230
12231
12232
12233
12234
12235
12236
12237
12238
12239
12240
12241
12242
12243
12244
12245
12246
12247
/// CanFoldXORWithAllOnes - Test whether the XOR operand is a AllOnes vector
/// so it can be folded inside ANDNP.
static bool CanFoldXORWithAllOnes(const SDNode *N) {
EVT VT = N->getValueType(0);
// Match direct AllOnes for 128 and 256-bit vectors
if (ISD::isBuildVectorAllOnes(N))
return true;
// Look through a bit convert.
if (N->getOpcode() == ISD::BITCAST)
N = N->getOperand(0).getNode();
// Sometimes the operand may come from a insert_subvector building a 256-bit
// allones vector
SDValue V1 = N->getOperand(0);
SDValue V2 = N->getOperand(1);
if (VT.getSizeInBits() == 256 &&
N->getOpcode() == ISD::INSERT_SUBVECTOR &&
V1.getOpcode() == ISD::INSERT_SUBVECTOR &&
V1.getOperand(0).getOpcode() == ISD::UNDEF &&
ISD::isBuildVectorAllOnes(V1.getOperand(1).getNode()) &&
ISD::isBuildVectorAllOnes(V2.getNode()))
return true;
return false;
}
static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget);
if (R.getNode())
return R;
// Want to form ANDNP nodes:
// 1) In the hopes of then easily combining them with OR and AND nodes
// to form PBLEND/PSIGN.
// 2) To match ANDN packed intrinsics
EVT VT = N->getValueType(0);
if (VT != MVT::v2i64 && VT != MVT::v4i64)
return SDValue();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
DebugLoc DL = N->getDebugLoc();
// Check LHS for vnot
//ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode()))
CanFoldXORWithAllOnes(N0.getOperand(1).getNode()))
Bruno Cardoso Lopes
committed
return DAG.getNode(X86ISD::ANDNP, DL, VT, N0.getOperand(0), N1);
// Check RHS for vnot
if (N1.getOpcode() == ISD::XOR &&
//ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode()))
CanFoldXORWithAllOnes(N1.getOperand(1).getNode()))
Bruno Cardoso Lopes
committed
return DAG.getNode(X86ISD::ANDNP, DL, VT, N1.getOperand(0), N0);
return SDValue();
}
static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget);
if (R.getNode())
return R;
EVT VT = N->getValueType(0);
if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64 && VT != MVT::v2i64)
return SDValue();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
// look for psign/blend
if (Subtarget->hasSSSE3()) {
if (VT == MVT::v2i64) {
// Canonicalize pandn to RHS
Bruno Cardoso Lopes
committed
if (N0.getOpcode() == X86ISD::ANDNP)
std::swap(N0, N1);
// or (and (m, x), (pandn m, y))
Bruno Cardoso Lopes
committed
if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::ANDNP) {
SDValue Mask = N1.getOperand(0);
SDValue X = N1.getOperand(1);
SDValue Y;
if (N0.getOperand(0) == Mask)
Y = N0.getOperand(1);
if (N0.getOperand(1) == Mask)
Y = N0.getOperand(0);
Bruno Cardoso Lopes
committed
// Check to see if the mask appeared in both the AND and ANDNP and
if (!Y.getNode())
return SDValue();
// Validate that X, Y, and Mask are BIT_CONVERTS, and see through them.
if (Mask.getOpcode() != ISD::BITCAST ||
X.getOpcode() != ISD::BITCAST ||
Y.getOpcode() != ISD::BITCAST)
return SDValue();
// Look through mask bitcast.
Mask = Mask.getOperand(0);
EVT MaskVT = Mask.getValueType();
// Validate that the Mask operand is a vector sra node. The sra node
// will be an intrinsic.
if (Mask.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
return SDValue();
// FIXME: what to do for bytes, since there is a psignb/pblendvb, but
// there is no psrai.b
switch (cast<ConstantSDNode>(Mask.getOperand(0))->getZExtValue()) {
case Intrinsic::x86_sse2_psrai_w:
case Intrinsic::x86_sse2_psrai_d:
break;
default: return SDValue();
}
// Check that the SRA is all signbits.
SDValue SraC = Mask.getOperand(2);
unsigned SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue();
unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits();
if ((SraAmt + 1) != EltBits)
return SDValue();
12353
12354
12355
12356
12357
12358
12359
12360
12361
12362
12363
12364
12365
12366
12367
12368
12369
12370
12371
12372
12373
12374
12375
12376
12377
DebugLoc DL = N->getDebugLoc();
// Now we know we at least have a plendvb with the mask val. See if
// we can form a psignb/w/d.
// psign = x.type == y.type == mask.type && y = sub(0, x);
X = X.getOperand(0);
Y = Y.getOperand(0);
if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&
ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) &&
X.getValueType() == MaskVT && X.getValueType() == Y.getValueType()){
unsigned Opc = 0;
switch (EltBits) {
case 8: Opc = X86ISD::PSIGNB; break;
case 16: Opc = X86ISD::PSIGNW; break;
case 32: Opc = X86ISD::PSIGND; break;
default: break;
}
if (Opc) {
SDValue Sign = DAG.getNode(Opc, DL, MaskVT, X, Mask.getOperand(1));
return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Sign);
}
}
// PBLENDVB only available on SSE 4.1
if (!Subtarget->hasSSE41())
return SDValue();
X = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, X);
Y = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Y);
Mask = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Mask);
Nate Begeman
committed
Mask = DAG.getNode(X86ISD::PBLENDVB, DL, MVT::v16i8, X, Y, Mask);
return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Mask);
}
}
}
// fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
std::swap(N0, N1);
if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
return SDValue();
if (!N0.hasOneUse() || !N1.hasOneUse())
return SDValue();
12395
12396
12397
12398
12399
12400
12401
12402
12403
12404
12405
12406
12407
12408
12409
12410
12411
12412
12413
12414
12415
12416
SDValue ShAmt0 = N0.getOperand(1);
if (ShAmt0.getValueType() != MVT::i8)
return SDValue();
SDValue ShAmt1 = N1.getOperand(1);
if (ShAmt1.getValueType() != MVT::i8)
return SDValue();
if (ShAmt0.getOpcode() == ISD::TRUNCATE)
ShAmt0 = ShAmt0.getOperand(0);
if (ShAmt1.getOpcode() == ISD::TRUNCATE)
ShAmt1 = ShAmt1.getOperand(0);
DebugLoc DL = N->getDebugLoc();
unsigned Opc = X86ISD::SHLD;
SDValue Op0 = N0.getOperand(0);
SDValue Op1 = N1.getOperand(0);
if (ShAmt0.getOpcode() == ISD::SUB) {
Opc = X86ISD::SHRD;
std::swap(Op0, Op1);
std::swap(ShAmt0, ShAmt1);
}
unsigned Bits = VT.getSizeInBits();
if (ShAmt1.getOpcode() == ISD::SUB) {
SDValue Sum = ShAmt1.getOperand(0);
if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) {
SDValue ShAmt1Op1 = ShAmt1.getOperand(1);
if (ShAmt1Op1.getNode()->getOpcode() == ISD::TRUNCATE)
ShAmt1Op1 = ShAmt1Op1.getOperand(0);
if (SumC->getSExtValue() == Bits && ShAmt1Op1 == ShAmt0)
return DAG.getNode(Opc, DL, VT,
Op0, Op1,
DAG.getNode(ISD::TRUNCATE, DL,
MVT::i8, ShAmt0));
}
} else if (ConstantSDNode *ShAmt1C = dyn_cast<ConstantSDNode>(ShAmt1)) {
ConstantSDNode *ShAmt0C = dyn_cast<ConstantSDNode>(ShAmt0);
if (ShAmt0C &&
ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue() == Bits)
return DAG.getNode(Opc, DL, VT,
N0.getOperand(0), N1.getOperand(0),
DAG.getNode(ISD::TRUNCATE, DL,
MVT::i8, ShAmt0));
}
return SDValue();
}
/// PerformSTORECombine - Do target-specific dag combines on STORE nodes.
static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
// Turn load->store of MMX types into GPR load/stores. This avoids clobbering
// the FP state in cases where an emms may be missing.
// A preferable solution to the general problem is to figure out the right
// places to insert EMMS. This qualifies as a quick hack.
// Similarly, turn load->store of i64 into double load/stores in 32-bit mode.
Evan Cheng
committed
StoreSDNode *St = cast<StoreSDNode>(N);
Owen Anderson
committed
EVT VT = St->getValue().getValueType();
if (VT.getSizeInBits() != 64)
return SDValue();
const Function *F = DAG.getMachineFunction().getFunction();
bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat);
bool F64IsLegal = !UseSoftFloat && !NoImplicitFloatOps
if ((VT.isVector() ||
Owen Anderson
committed
(VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) &&
isa<LoadSDNode>(St->getValue()) &&
!cast<LoadSDNode>(St->getValue())->isVolatile() &&
St->getChain().hasOneUse() && !St->isVolatile()) {
Gabor Greif
committed
SDNode* LdVal = St->getValue().getNode();
LoadSDNode *Ld = 0;
int TokenFactorIndex = -1;
Gabor Greif
committed
SDNode* ChainVal = St->getChain().getNode();
// Must be a store of a load. We currently handle two cases: the load
// is a direct child, and it's under an intervening TokenFactor. It is
// possible to dig deeper under nested TokenFactors.
Ld = cast<LoadSDNode>(St->getChain());
else if (St->getValue().hasOneUse() &&
ChainVal->getOpcode() == ISD::TokenFactor) {
for (unsigned i=0, e = ChainVal->getNumOperands(); i != e; ++i) {
Gabor Greif
committed
if (ChainVal->getOperand(i).getNode() == LdVal) {
TokenFactorIndex = i;
Ld = cast<LoadSDNode>(St->getValue());
} else
Ops.push_back(ChainVal->getOperand(i));
}
}
if (!Ld || !ISD::isNormalLoad(Ld))
return SDValue();
// If this is not the MMX case, i.e. we are just turning i64 load/store
// into f64 load/store, avoid the transformation if there are multiple
// uses of the loaded value.
if (!VT.isVector() && !Ld->hasNUsesOfValue(1, 0))
return SDValue();
DebugLoc LdDL = Ld->getDebugLoc();
DebugLoc StDL = N->getDebugLoc();
// If we are a 64-bit capable x86, lower to a single movq load/store pair.
// Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store
// pair instead.
if (Subtarget->is64Bit() || F64IsLegal) {
Owen Anderson
committed
EVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64;
SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(),
Ld->getPointerInfo(), Ld->isVolatile(),
SDValue NewChain = NewLd.getValue(1);
if (TokenFactorIndex != -1) {
Ops.push_back(NewChain);
Owen Anderson
committed
NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, &Ops[0],
Ops.size());
}
return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(),
St->getPointerInfo(),
// Otherwise, lower to two pairs of 32-bit loads / stores.
SDValue LoAddr = Ld->getBasePtr();
Owen Anderson
committed
SDValue HiAddr = DAG.getNode(ISD::ADD, LdDL, MVT::i32, LoAddr,
DAG.getConstant(4, MVT::i32));
Owen Anderson
committed
SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr,
Ld->getPointerInfo(),
Owen Anderson
committed
SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr,
Ld->getPointerInfo().getWithOffset(4),
MinAlign(Ld->getAlignment(), 4));
SDValue NewChain = LoLd.getValue(1);
if (TokenFactorIndex != -1) {
Ops.push_back(LoLd);
Ops.push_back(HiLd);
Owen Anderson
committed
NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, &Ops[0],
Ops.size());
}
LoAddr = St->getBasePtr();
Owen Anderson
committed
HiAddr = DAG.getNode(ISD::ADD, StDL, MVT::i32, LoAddr,
DAG.getConstant(4, MVT::i32));
SDValue LoSt = DAG.getStore(NewChain, StDL, LoLd, LoAddr,
SDValue HiSt = DAG.getStore(NewChain, StDL, HiLd, HiAddr,
St->getPointerInfo().getWithOffset(4),
St->isVolatile(),
MinAlign(St->getAlignment(), 4));
Owen Anderson
committed
return DAG.getNode(ISD::TokenFactor, StDL, MVT::Other, LoSt, HiSt);
}
/// PerformFORCombine - Do target-specific dag combines on X86ISD::FOR and
/// X86ISD::FXOR nodes.
static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR);
// F[X]OR(0.0, x) -> x
// F[X]OR(x, 0.0) -> x
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
if (C->getValueAPF().isPosZero())
return N->getOperand(1);
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
if (C->getValueAPF().isPosZero())
return N->getOperand(0);
}
/// PerformFANDCombine - Do target-specific dag combines on X86ISD::FAND nodes.
static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
// FAND(0.0, x) -> 0.0
// FAND(x, 0.0) -> 0.0
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
if (C->getValueAPF().isPosZero())
return N->getOperand(0);
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
if (C->getValueAPF().isPosZero())
return N->getOperand(1);
}
static SDValue PerformBTCombine(SDNode *N,
SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
// BT ignores high bits in the bit index operand.
SDValue Op1 = N->getOperand(1);
if (Op1.hasOneUse()) {
unsigned BitWidth = Op1.getValueSizeInBits();
APInt DemandedMask = APInt::getLowBitsSet(BitWidth, Log2_32(BitWidth));
APInt KnownZero, KnownOne;
TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
!DCI.isBeforeLegalizeOps());
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLO.ShrinkDemandedConstant(Op1, DemandedMask) ||
TLI.SimplifyDemandedBits(Op1, DemandedMask, KnownZero, KnownOne, TLO))
DCI.CommitTargetLoweringOpt(TLO);
}
return SDValue();
}
static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) {
SDValue Op = N->getOperand(0);
Wesley Peck
committed
if (Op.getOpcode() == ISD::BITCAST)
Op = Op.getOperand(0);
Owen Anderson
committed
EVT VT = N->getValueType(0), OpVT = Op.getValueType();
if (Op.getOpcode() == X86ISD::VZEXT_LOAD &&
VT.getVectorElementType().getSizeInBits() ==
OpVT.getVectorElementType().getSizeInBits()) {
Wesley Peck
committed
return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op);
}
return SDValue();
}
12618
12619
12620
12621
12622
12623
12624
12625
12626
12627
12628
12629
12630
12631
12632
12633
12634
12635
12636
12637
12638
12639
12640
12641
12642
12643
static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) {
// (i32 zext (and (i8 x86isd::setcc_carry), 1)) ->
// (and (i32 x86isd::setcc_carry), 1)
// This eliminates the zext. This transformation is necessary because
// ISD::SETCC is always legalized to i8.
DebugLoc dl = N->getDebugLoc();
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
if (N0.getOpcode() == ISD::AND &&
N0.hasOneUse() &&
N0.getOperand(0).hasOneUse()) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() != X86ISD::SETCC_CARRY)
return SDValue();
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
if (!C || C->getZExtValue() != 1)
return SDValue();
return DAG.getNode(ISD::AND, dl, VT,
DAG.getNode(X86ISD::SETCC_CARRY, dl, VT,
N00.getOperand(0), N00.getOperand(1)),
DAG.getConstant(1, VT));
}
return SDValue();
}
// Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT
static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {
unsigned X86CC = N->getConstantOperandVal(0);
SDValue EFLAG = N->getOperand(1);
DebugLoc DL = N->getDebugLoc();
// Materialize "setb reg" as "sbb reg,reg", since it can be extended without
// a zext and produces an all-ones bit which is more useful than 0/1 in some
// cases.
if (X86CC == X86::COND_B)
return DAG.getNode(ISD::AND, DL, MVT::i8,
DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8,
DAG.getConstant(X86CC, MVT::i8), EFLAG),
DAG.getConstant(1, MVT::i8));
return SDValue();
}
static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
const X86TargetLowering *XTLI) {
SDValue Op0 = N->getOperand(0);
// Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have
// a 32-bit target where SSE doesn't support i64->FP operations.
if (Op0.getOpcode() == ISD::LOAD) {
LoadSDNode *Ld = cast<LoadSDNode>(Op0.getNode());
EVT VT = Ld->getValueType(0);
if (!Ld->isVolatile() && !N->getValueType(0).isVector() &&
ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() &&
!XTLI->getSubtarget()->is64Bit() &&
!DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
SDValue FILDChain = XTLI->BuildFILD(SDValue(N, 0), Ld->getValueType(0),
Ld->getChain(), Op0, DAG);
DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), FILDChain.getValue(1));
return FILDChain;
}
}
return SDValue();
}
12683
12684
12685
12686
12687
12688
12689
12690
12691
12692
12693
12694
12695
12696
12697
12698
12699
12700
12701
12702
12703
12704
12705
12706
12707
// Optimize RES, EFLAGS = X86ISD::ADC LHS, RHS, EFLAGS
static SDValue PerformADCCombine(SDNode *N, SelectionDAG &DAG,
X86TargetLowering::DAGCombinerInfo &DCI) {
// If the LHS and RHS of the ADC node are zero, then it can't overflow and
// the result is either zero or one (depending on the input carry bit).
// Strength reduce this down to a "set on carry" aka SETCC_CARRY&1.
if (X86::isZeroNode(N->getOperand(0)) &&
X86::isZeroNode(N->getOperand(1)) &&
// We don't have a good way to replace an EFLAGS use, so only do this when
// dead right now.
SDValue(N, 1).use_empty()) {
DebugLoc DL = N->getDebugLoc();
EVT VT = N->getValueType(0);
SDValue CarryOut = DAG.getConstant(0, N->getValueType(1));
SDValue Res1 = DAG.getNode(ISD::AND, DL, VT,
DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
DAG.getConstant(X86::COND_B,MVT::i8),
N->getOperand(2)),
DAG.getConstant(1, VT));
return DCI.CombineTo(N, Res1, CarryOut);
}
return SDValue();
}
// fold (add Y, (sete X, 0)) -> adc 0, Y
// (add Y, (setne X, 0)) -> sbb -1, Y
// (sub (sete X, 0), Y) -> sbb 0, Y
// (sub (setne X, 0), Y) -> adc -1, Y
static SDValue OptimizeConditionalInDecrement(SDNode *N, SelectionDAG &DAG) {
DebugLoc DL = N->getDebugLoc();
// Look through ZExts.
SDValue Ext = N->getOperand(N->getOpcode() == ISD::SUB ? 1 : 0);
if (Ext.getOpcode() != ISD::ZERO_EXTEND || !Ext.hasOneUse())
return SDValue();
SDValue SetCC = Ext.getOperand(0);
if (SetCC.getOpcode() != X86ISD::SETCC || !SetCC.hasOneUse())
return SDValue();
X86::CondCode CC = (X86::CondCode)SetCC.getConstantOperandVal(0);
if (CC != X86::COND_E && CC != X86::COND_NE)
return SDValue();
SDValue Cmp = SetCC.getOperand(1);
if (Cmp.getOpcode() != X86ISD::CMP || !Cmp.hasOneUse() ||
Chris Lattner
committed
!X86::isZeroNode(Cmp.getOperand(1)) ||
!Cmp.getOperand(0).getValueType().isInteger())
return SDValue();
SDValue CmpOp0 = Cmp.getOperand(0);
SDValue NewCmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpOp0,
DAG.getConstant(1, CmpOp0.getValueType()));
SDValue OtherVal = N->getOperand(N->getOpcode() == ISD::SUB ? 0 : 1);
if (CC == X86::COND_NE)
return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::ADC : X86ISD::SBB,
DL, OtherVal.getValueType(), OtherVal,
DAG.getConstant(-1ULL, OtherVal.getValueType()), NewCmp);
return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::SBB : X86ISD::ADC,
DL, OtherVal.getValueType(), OtherVal,
DAG.getConstant(0, OtherVal.getValueType()), NewCmp);
}
12748
12749
12750
12751
12752
12753
12754
12755
12756
12757
12758
12759
12760
12761
12762
12763
12764
12765
12766
12767
12768
12769
12770
12771
12772
12773
12774
static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG) {
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
// X86 can't encode an immediate LHS of a sub. See if we can push the
// negation into a preceding instruction.
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op0)) {
uint64_t Op0C = C->getSExtValue();
// If the RHS of the sub is a XOR with one use and a constant, invert the
// immediate. Then add one to the LHS of the sub so we can turn
// X-Y -> X+~Y+1, saving one register.
if (Op1->hasOneUse() && Op1.getOpcode() == ISD::XOR &&
isa<ConstantSDNode>(Op1.getOperand(1))) {
uint64_t XorC = cast<ConstantSDNode>(Op1.getOperand(1))->getSExtValue();
EVT VT = Op0.getValueType();
SDValue NewXor = DAG.getNode(ISD::XOR, Op1.getDebugLoc(), VT,
Op1.getOperand(0),
DAG.getConstant(~XorC, VT));
return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, NewXor,
DAG.getConstant(Op0C+1, VT));
}
}
return OptimizeConditionalInDecrement(N, DAG);
}
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
SelectionDAG &DAG = DCI.DAG;
switch (N->getOpcode()) {
default: break;
case ISD::EXTRACT_VECTOR_ELT:
return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget);
case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI);
case ISD::ADD: return OptimizeConditionalInDecrement(N, DAG);
case ISD::SUB: return PerformSubCombine(N, DAG);
case X86ISD::ADC: return PerformADCCombine(N, DAG, DCI);
Evan Cheng
committed
case ISD::MUL: return PerformMulCombine(N, DAG, DCI);
case ISD::SHL:
case ISD::SRA:
case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget);
case ISD::AND: return PerformAndCombine(N, DAG, DCI, Subtarget);
case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget);
Evan Cheng
committed
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this);
case X86ISD::FOR: return PerformFORCombine(N, DAG);
case X86ISD::FAND: return PerformFANDCombine(N, DAG);
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG);
case X86ISD::SETCC: return PerformSETCCCombine(N, DAG);
case X86ISD::SHUFPS: // Handle all target specific shuffles
case X86ISD::SHUFPD:
Bruno Cardoso Lopes
committed
case X86ISD::PALIGN:
case X86ISD::PUNPCKHBW:
case X86ISD::PUNPCKHWD:
case X86ISD::PUNPCKHDQ:
case X86ISD::PUNPCKHQDQ:
case X86ISD::UNPCKHPS:
case X86ISD::UNPCKHPD:
Bruno Cardoso Lopes
committed
case X86ISD::VUNPCKHPSY:
case X86ISD::VUNPCKHPDY:
case X86ISD::PUNPCKLBW:
case X86ISD::PUNPCKLWD:
case X86ISD::PUNPCKLDQ:
case X86ISD::PUNPCKLQDQ:
case X86ISD::UNPCKLPS:
case X86ISD::UNPCKLPD:
case X86ISD::MOVHLPS:
case X86ISD::MOVLHPS:
case X86ISD::PSHUFD:
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
case X86ISD::VPERMILPS:
case X86ISD::VPERMILPSY:
case X86ISD::VPERMILPD:
case X86ISD::VPERMILPDY:
Mon P Wang
committed
case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI);
}
}
/// isTypeDesirableForOp - Return true if the target has native support for
/// the specified value type and it is 'desirable' to use the type for the
/// given node type. e.g. On x86 i16 is legal, but undesirable since i16
/// instruction encodings are longer and some i16 instructions are slow.
bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
if (!isTypeLegal(VT))
return false;
return true;
switch (Opc) {
default:
return true;
case ISD::LOAD:
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
case ISD::SHL:
case ISD::SRL:
case ISD::SUB:
case ISD::ADD:
case ISD::MUL:
case ISD::AND:
case ISD::OR:
case ISD::XOR:
return false;
}
}
/// IsDesirableToPromoteOp - This method query the target whether it is
/// beneficial for dag combiner to promote the specified node. If true, it
/// should return the desired promotion type by reference.
bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
EVT VT = Op.getValueType();
if (VT != MVT::i16)
return false;
bool Promote = false;
bool Commute = false;
switch (Op.getOpcode()) {
default: break;
case ISD::LOAD: {
LoadSDNode *LD = cast<LoadSDNode>(Op);
// If the non-extending load has a single use and it's not live out, then it
// might be folded.
if (LD->getExtensionType() == ISD::NON_EXTLOAD /*&&
Op.hasOneUse()*/) {
for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
UE = Op.getNode()->use_end(); UI != UE; ++UI) {
// The only case where we'd want to promote LOAD (rather then it being
// promoted as an operand is when it's only use is liveout.
if (UI->getOpcode() != ISD::CopyToReg)
return false;
}
}
Promote = true;
break;
}
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
Promote = true;
break;
case ISD::SHL:
SDValue N0 = Op.getOperand(0);
// Look out for (store (shl (load), x)).
Evan Cheng
committed
if (MayFoldLoad(N0) && MayFoldIntoStore(Op))
return false;
Promote = true;
break;
}
case ISD::ADD:
case ISD::MUL:
case ISD::AND:
case ISD::OR:
case ISD::XOR:
Commute = true;
// fallthrough
case ISD::SUB: {
SDValue N0 = Op.getOperand(0);
SDValue N1 = Op.getOperand(1);
Evan Cheng
committed
if (!Commute && MayFoldLoad(N1))
return false;
// Avoid disabling potential load folding opportunities.
Evan Cheng
committed
if (MayFoldLoad(N0) && (!isa<ConstantSDNode>(N1) || MayFoldIntoStore(Op)))
return false;
Evan Cheng
committed
if (MayFoldLoad(N1) && (!isa<ConstantSDNode>(N0) || MayFoldIntoStore(Op)))
return false;
Promote = true;
}
}
PVT = MVT::i32;
return Promote;
}
//===----------------------------------------------------------------------===//
// X86 Inline Assembly Support
//===----------------------------------------------------------------------===//
bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
std::string AsmStr = IA->getAsmString();
// TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a"
SmallVector<StringRef, 4> AsmPieces;
SplitString(AsmStr, AsmPieces, ";\n");
switch (AsmPieces.size()) {
default: return false;
case 1:
AsmStr = AsmPieces[0];
AsmPieces.clear();
SplitString(AsmStr, AsmPieces, " \t"); // Split with whitespace.
// FIXME: this should verify that we are targeting a 486 or better. If not,
// we will turn this bswap into something that will be lowered to logical ops
// instead of emitting the bswap asm. For now, we don't support 486 or lower
// so don't worry about this.
// bswap $0
if (AsmPieces.size() == 2 &&
(AsmPieces[0] == "bswap" ||
AsmPieces[0] == "bswapq" ||
AsmPieces[0] == "bswapl") &&
(AsmPieces[1] == "$0" ||
AsmPieces[1] == "${0:q}")) {
// No need to check constraints, nothing other than the equivalent of
// "=r,0" would be valid here.
IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
if (!Ty || Ty->getBitWidth() % 16 != 0)
return false;
return IntrinsicLowering::LowerToByteSwap(CI);
}
// rorw $$8, ${0:w} --> llvm.bswap.i16
if (CI->getType()->isIntegerTy(16) &&
AsmPieces.size() == 3 &&
(AsmPieces[0] == "rorw" || AsmPieces[0] == "rolw") &&
AsmPieces[1] == "$$8," &&
AsmPieces[2] == "${0:w}" &&
IA->getConstraintString().compare(0, 5, "=r,0,") == 0) {
AsmPieces.clear();
const std::string &ConstraintsStr = IA->getConstraintString();
SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
std::sort(AsmPieces.begin(), AsmPieces.end());
if (AsmPieces.size() == 4 &&
AsmPieces[0] == "~{cc}" &&
AsmPieces[1] == "~{dirflag}" &&
AsmPieces[2] == "~{flags}" &&
AsmPieces[3] == "~{fpsr}") {
IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
if (!Ty || Ty->getBitWidth() % 16 != 0)
return false;
return IntrinsicLowering::LowerToByteSwap(CI);
}
break;
case 3:
if (CI->getType()->isIntegerTy(32) &&
IA->getConstraintString().compare(0, 5, "=r,0,") == 0) {
SmallVector<StringRef, 4> Words;
SplitString(AsmPieces[0], Words, " \t,");