"git@repo.hca.bsc.es:rferrer/llvm-epi-0.8.git" did not exist on "4fca71eb44725be4428567be6b5a46659e6d8685"
Newer
Older
TII->get(X86::MOV32rm), X86::EAX)
.addReg(TII->getGlobalBaseReg(F))
.addImm(0).addReg(0)
.addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
MI->getOperand(3).getTargetFlags())
.addReg(0);
MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
addDirectMem(MIB, X86::EAX);
MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
Evan Cheng
committed
X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
switch (MI->getOpcode()) {
default: assert(false && "Unexpected instr type to insert");
11020
11021
11022
11023
11024
11025
11026
11027
11028
11029
11030
11031
11032
11033
11034
11035
11036
11037
11038
11039
11040
11041
11042
11043
case X86::TAILJMPd64:
case X86::TAILJMPr64:
case X86::TAILJMPm64:
assert(!"TAILJMP64 would not be touched here.");
case X86::TCRETURNdi64:
case X86::TCRETURNri64:
case X86::TCRETURNmi64:
// Defs of TCRETURNxx64 has Win64's callee-saved registers, as subset.
// On AMD64, additional defs should be added before register allocation.
if (!Subtarget->isTargetWin64()) {
MI->addRegisterDefined(X86::RSI);
MI->addRegisterDefined(X86::RDI);
MI->addRegisterDefined(X86::XMM6);
MI->addRegisterDefined(X86::XMM7);
MI->addRegisterDefined(X86::XMM8);
MI->addRegisterDefined(X86::XMM9);
MI->addRegisterDefined(X86::XMM10);
MI->addRegisterDefined(X86::XMM11);
MI->addRegisterDefined(X86::XMM12);
MI->addRegisterDefined(X86::XMM13);
MI->addRegisterDefined(X86::XMM14);
MI->addRegisterDefined(X86::XMM15);
}
return BB;
case X86::WIN_ALLOCA:
return EmitLoweredWinAlloca(MI, BB);
case X86::TLSCall_32:
case X86::TLSCall_64:
return EmitLoweredTLSCall(MI, BB);
case X86::CMOV_GR8:
case X86::CMOV_FR32:
case X86::CMOV_FR64:
case X86::CMOV_V4F32:
case X86::CMOV_V2F64:
case X86::CMOV_V2I64:
case X86::CMOV_GR16:
case X86::CMOV_GR32:
case X86::CMOV_RFP32:
case X86::CMOV_RFP64:
case X86::CMOV_RFP80:
return EmitLoweredSelect(MI, BB);
case X86::FP32_TO_INT16_IN_MEM:
case X86::FP32_TO_INT32_IN_MEM:
case X86::FP32_TO_INT64_IN_MEM:
case X86::FP64_TO_INT16_IN_MEM:
case X86::FP64_TO_INT32_IN_MEM:
case X86::FP64_TO_INT64_IN_MEM:
case X86::FP80_TO_INT16_IN_MEM:
case X86::FP80_TO_INT32_IN_MEM:
case X86::FP80_TO_INT64_IN_MEM: {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
// Change the floating point control register to use "round towards zero"
// mode when truncating to an integer value.
MachineFunction *F = BB->getParent();
addFrameReference(BuildMI(*BB, MI, DL,
TII->get(X86::FNSTCW16m)), CWFrameIdx);
// Load the old value of the high byte of the control word...
unsigned OldCW =
F->getRegInfo().createVirtualRegister(X86::GR16RegisterClass);
addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16rm), OldCW),
// Set the high part to be round to zero...
addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mi)), CWFrameIdx)
.addImm(0xC7F);
// Reload the modified control word now...
addFrameReference(BuildMI(*BB, MI, DL,
TII->get(X86::FLDCW16m)), CWFrameIdx);
// Restore the memory image of control word to original value
addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mr)), CWFrameIdx)
.addReg(OldCW);
// Get the X86 opcode to use.
unsigned Opc;
switch (MI->getOpcode()) {
default: llvm_unreachable("illegal opcode!");
case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break;
case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break;
case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break;
case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break;
case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break;
case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break;
case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break;
case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break;
case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break;
}
X86AddressMode AM;
MachineOperand &Op = MI->getOperand(0);
if (Op.isReg()) {
AM.BaseType = X86AddressMode::RegBase;
AM.Base.Reg = Op.getReg();
} else {
AM.BaseType = X86AddressMode::FrameIndexBase;
AM.Base.FrameIndex = Op.getIndex();
if (Op.isImm())
AM.Scale = Op.getImm();
if (Op.isImm())
AM.IndexReg = Op.getImm();
if (Op.isGlobal()) {
AM.Disp = Op.getImm();
addFullAddress(BuildMI(*BB, MI, DL, TII->get(Opc)), AM)
.addReg(MI->getOperand(X86::AddrNumOperands).getReg());
// Reload the original control word now.
addFrameReference(BuildMI(*BB, MI, DL,
TII->get(X86::FLDCW16m)), CWFrameIdx);
MI->eraseFromParent(); // The pseudo instruction is gone now.
// String/text processing lowering.
case X86::PCMPISTRM128REG:
Bruno Cardoso Lopes
committed
case X86::VPCMPISTRM128REG:
return EmitPCMP(MI, BB, 3, false /* in-mem */);
case X86::PCMPISTRM128MEM:
Bruno Cardoso Lopes
committed
case X86::VPCMPISTRM128MEM:
return EmitPCMP(MI, BB, 3, true /* in-mem */);
case X86::PCMPESTRM128REG:
Bruno Cardoso Lopes
committed
case X86::VPCMPESTRM128REG:
return EmitPCMP(MI, BB, 5, false /* in mem */);
case X86::PCMPESTRM128MEM:
Bruno Cardoso Lopes
committed
case X86::VPCMPESTRM128MEM:
return EmitPCMP(MI, BB, 5, true /* in mem */);
// Thread synchronization.
case X86::MONITOR:
case X86::MWAIT:
return EmitMwait(MI, BB);
// Atomic Lowering.
case X86::ATOMAND32:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
X86::AND32ri, X86::MOV32rm,
Jakob Stoklund Olesen
committed
X86::LCMPXCHG32,
X86::NOT32r, X86::EAX,
X86::GR32RegisterClass);
case X86::ATOMOR32:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr,
X86::OR32ri, X86::MOV32rm,
Jakob Stoklund Olesen
committed
X86::LCMPXCHG32,
X86::NOT32r, X86::EAX,
X86::GR32RegisterClass);
case X86::ATOMXOR32:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr,
X86::XOR32ri, X86::MOV32rm,
Jakob Stoklund Olesen
committed
X86::LCMPXCHG32,
X86::NOT32r, X86::EAX,
X86::GR32RegisterClass);
case X86::ATOMNAND32:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
X86::AND32ri, X86::MOV32rm,
Jakob Stoklund Olesen
committed
X86::LCMPXCHG32,
X86::NOT32r, X86::EAX,
X86::GR32RegisterClass, true);
case X86::ATOMMIN32:
return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL32rr);
case X86::ATOMMAX32:
return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG32rr);
case X86::ATOMUMIN32:
return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB32rr);
case X86::ATOMUMAX32:
return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA32rr);
case X86::ATOMAND16:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr,
X86::AND16ri, X86::MOV16rm,
Jakob Stoklund Olesen
committed
X86::LCMPXCHG16,
X86::NOT16r, X86::AX,
X86::GR16RegisterClass);
case X86::ATOMOR16:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR16rr,
X86::OR16ri, X86::MOV16rm,
Jakob Stoklund Olesen
committed
X86::LCMPXCHG16,
X86::NOT16r, X86::AX,
X86::GR16RegisterClass);
case X86::ATOMXOR16:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR16rr,
X86::XOR16ri, X86::MOV16rm,
Jakob Stoklund Olesen
committed
X86::LCMPXCHG16,
X86::NOT16r, X86::AX,
X86::GR16RegisterClass);
case X86::ATOMNAND16:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr,
X86::AND16ri, X86::MOV16rm,
Jakob Stoklund Olesen
committed
X86::LCMPXCHG16,
X86::NOT16r, X86::AX,
X86::GR16RegisterClass, true);
case X86::ATOMMIN16:
return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL16rr);
case X86::ATOMMAX16:
return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG16rr);
case X86::ATOMUMIN16:
return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB16rr);
case X86::ATOMUMAX16:
return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA16rr);
case X86::ATOMAND8:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr,
X86::AND8ri, X86::MOV8rm,
Jakob Stoklund Olesen
committed
X86::LCMPXCHG8,
X86::NOT8r, X86::AL,
X86::GR8RegisterClass);
case X86::ATOMOR8:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR8rr,
X86::OR8ri, X86::MOV8rm,
Jakob Stoklund Olesen
committed
X86::LCMPXCHG8,
X86::NOT8r, X86::AL,
X86::GR8RegisterClass);
case X86::ATOMXOR8:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR8rr,
X86::XOR8ri, X86::MOV8rm,
Jakob Stoklund Olesen
committed
X86::LCMPXCHG8,
X86::NOT8r, X86::AL,
X86::GR8RegisterClass);
case X86::ATOMNAND8:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr,
X86::AND8ri, X86::MOV8rm,
Jakob Stoklund Olesen
committed
X86::LCMPXCHG8,
X86::NOT8r, X86::AL,
X86::GR8RegisterClass, true);
// FIXME: There are no CMOV8 instructions; MIN/MAX need some other way.
// This group is for 64-bit host.
case X86::ATOMAND64:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr,
X86::AND64ri32, X86::MOV64rm,
Jakob Stoklund Olesen
committed
X86::LCMPXCHG64,
X86::NOT64r, X86::RAX,
X86::GR64RegisterClass);
case X86::ATOMOR64:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR64rr,
X86::OR64ri32, X86::MOV64rm,
Jakob Stoklund Olesen
committed
X86::LCMPXCHG64,
X86::NOT64r, X86::RAX,
X86::GR64RegisterClass);
case X86::ATOMXOR64:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR64rr,
X86::XOR64ri32, X86::MOV64rm,
Jakob Stoklund Olesen
committed
X86::LCMPXCHG64,
X86::NOT64r, X86::RAX,
X86::GR64RegisterClass);
case X86::ATOMNAND64:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr,
X86::AND64ri32, X86::MOV64rm,
Jakob Stoklund Olesen
committed
X86::LCMPXCHG64,
X86::NOT64r, X86::RAX,
X86::GR64RegisterClass, true);
case X86::ATOMMIN64:
return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL64rr);
case X86::ATOMMAX64:
return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG64rr);
case X86::ATOMUMIN64:
return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB64rr);
case X86::ATOMUMAX64:
return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA64rr);
// This group does 64-bit operations on a 32-bit host.
case X86::ATOMAND6432:
return EmitAtomicBit6432WithCustomInserter(MI, BB,
X86::AND32rr, X86::AND32rr,
X86::AND32ri, X86::AND32ri,
false);
case X86::ATOMOR6432:
return EmitAtomicBit6432WithCustomInserter(MI, BB,
X86::OR32rr, X86::OR32rr,
X86::OR32ri, X86::OR32ri,
false);
case X86::ATOMXOR6432:
return EmitAtomicBit6432WithCustomInserter(MI, BB,
X86::XOR32rr, X86::XOR32rr,
X86::XOR32ri, X86::XOR32ri,
false);
case X86::ATOMNAND6432:
return EmitAtomicBit6432WithCustomInserter(MI, BB,
X86::AND32rr, X86::AND32rr,
X86::AND32ri, X86::AND32ri,
true);
case X86::ATOMADD6432:
return EmitAtomicBit6432WithCustomInserter(MI, BB,
X86::ADD32rr, X86::ADC32rr,
X86::ADD32ri, X86::ADC32ri,
false);
case X86::ATOMSUB6432:
return EmitAtomicBit6432WithCustomInserter(MI, BB,
X86::SUB32rr, X86::SBB32rr,
X86::SUB32ri, X86::SBB32ri,
false);
return EmitAtomicBit6432WithCustomInserter(MI, BB,
X86::MOV32rr, X86::MOV32rr,
X86::MOV32ri, X86::MOV32ri,
false);
case X86::VASTART_SAVE_XMM_REGS:
return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
case X86::VAARG_64:
return EmitVAARG64WithCustomInserter(MI, BB);
}
}
//===----------------------------------------------------------------------===//
// X86 Optimization Hooks
//===----------------------------------------------------------------------===//
void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
const APInt &Mask,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth) const {
assert((Opc >= ISD::BUILTIN_OP_END ||
Opc == ISD::INTRINSIC_WO_CHAIN ||
Opc == ISD::INTRINSIC_W_CHAIN ||
Opc == ISD::INTRINSIC_VOID) &&
"Should use MaskedValueIsZero if you don't know whether Op"
" is a target node!");
KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); // Don't know anything.
case X86ISD::ADD:
case X86ISD::SUB:
Chris Lattner
committed
case X86ISD::ADC:
case X86ISD::SBB:
case X86ISD::SMUL:
case X86ISD::UMUL:
case X86ISD::INC:
case X86ISD::DEC:
case X86ISD::OR:
case X86ISD::XOR:
case X86ISD::AND:
// These nodes' second result is a boolean.
if (Op.getResNo() == 0)
break;
// Fallthrough
case X86ISD::SETCC:
KnownZero |= APInt::getHighBitsSet(Mask.getBitWidth(),
Mask.getBitWidth() - 1);
break;
Owen Anderson
committed
unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
unsigned Depth) const {
// SETCC_CARRY sets the dest to ~0 for true or 0 for false.
if (Op.getOpcode() == X86ISD::SETCC_CARRY)
return Op.getValueType().getScalarType().getSizeInBits();
Owen Anderson
committed
// Fallback case.
return 1;
}
/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
Evan Cheng
committed
/// node is a GlobalAddress + offset.
bool X86TargetLowering::isGAPlusOffset(SDNode *N,
const GlobalValue* &GA,
int64_t &Offset) const {
Evan Cheng
committed
if (N->getOpcode() == X86ISD::Wrapper) {
if (isa<GlobalAddressSDNode>(N->getOperand(0))) {
GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();
Offset = cast<GlobalAddressSDNode>(N->getOperand(0))->getOffset();
return true;
}
}
Evan Cheng
committed
return TargetLowering::isGAPlusOffset(N, GA, Offset);
}
/// PerformShuffleCombine256 - Performs shuffle combines for 256-bit vectors.
static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
DebugLoc dl = N->getDebugLoc();
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
EVT VT = SVOp->getValueType(0);
if (V1.getOpcode() == ISD::CONCAT_VECTORS &&
V2.getOpcode() == ISD::CONCAT_VECTORS) {
//
// 0,0,0,...
11417
11418
11419
11420
11421
11422
11423
11424
11425
11426
11427
11428
11429
11430
11431
11432
11433
11434
11435
11436
11437
11438
11439
11440
11441
11442
11443
11444
11445
11446
11447
11448
11449
11450
11451
11452
// V UNDEF BUILD_VECTOR UNDEF
// \ / \ /
// CONCAT_VECTOR CONCAT_VECTOR
// \ /
// \ /
// RESULT: V + zero extended
//
if (V2.getOperand(0).getOpcode() != ISD::BUILD_VECTOR ||
V2.getOperand(1).getOpcode() != ISD::UNDEF ||
V1.getOperand(1).getOpcode() != ISD::UNDEF)
return SDValue();
if (!ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()))
return SDValue();
// To match the shuffle mask, the first half of the mask should
// be exactly the first vector, and all the rest a splat with the
// first element of the second one.
int NumElems = VT.getVectorNumElements();
for (int i = 0; i < NumElems/2; ++i)
if (!isUndefOrEqual(SVOp->getMaskElt(i), i) ||
!isUndefOrEqual(SVOp->getMaskElt(i+NumElems/2), NumElems))
return SDValue();
// Emit a zeroed vector and insert the desired subvector on its
// first half.
SDValue Zeros = getZeroVector(VT, true /* HasSSE2 */, DAG, dl);
SDValue InsV = Insert128BitVector(Zeros, V1.getOperand(0),
DAG.getConstant(0, MVT::i32), DAG, dl);
return DCI.CombineTo(N, InsV);
}
return SDValue();
}
/// PerformShuffleCombine - Performs several different shuffle combines.
static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
Mon P Wang
committed
TargetLowering::DAGCombinerInfo &DCI) {
DebugLoc dl = N->getDebugLoc();
Owen Anderson
committed
EVT VT = N->getValueType(0);
Mon P Wang
committed
// Don't create instructions with illegal types after legalize types has run.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(VT.getVectorElementType()))
return SDValue();
// Only handle pure VECTOR_SHUFFLE nodes.
if (VT.getSizeInBits() == 256 && N->getOpcode() == ISD::VECTOR_SHUFFLE)
return PerformShuffleCombine256(N, DAG, DCI);
// Only handle 128 wide vector from here on.
if (VT.getSizeInBits() != 128)
return SDValue();
// Combine a vector_shuffle that is equal to build_vector load1, load2, load3,
// load4, <0, 1, 2, 3> into a 128-bit load if the load addresses are
// consecutive, non-overlapping, and in the right order.
Nate Begeman
committed
SmallVector<SDValue, 16> Elts;
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
Elts.push_back(getShuffleScalarElt(N, i, DAG, 0));
Nate Begeman
committed
return EltsFromConsecutiveLoads(VT, Elts, dl, DAG);
/// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index
/// generation and convert it from being a bunch of shuffles and extracts
/// to a simple store and scalar loads to extract the elements.
11484
11485
11486
11487
11488
11489
11490
11491
11492
11493
11494
11495
11496
11497
11498
11499
11500
11501
11502
11503
11504
11505
11506
11507
11508
11509
11510
11511
11512
11513
11514
11515
11516
11517
11518
11519
11520
11521
11522
11523
11524
11525
11526
11527
11528
11529
11530
11531
11532
static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
const TargetLowering &TLI) {
SDValue InputVector = N->getOperand(0);
// Only operate on vectors of 4 elements, where the alternative shuffling
// gets to be more expensive.
if (InputVector.getValueType() != MVT::v4i32)
return SDValue();
// Check whether every use of InputVector is an EXTRACT_VECTOR_ELT with a
// single use which is a sign-extend or zero-extend, and all elements are
// used.
SmallVector<SDNode *, 4> Uses;
unsigned ExtractedElements = 0;
for (SDNode::use_iterator UI = InputVector.getNode()->use_begin(),
UE = InputVector.getNode()->use_end(); UI != UE; ++UI) {
if (UI.getUse().getResNo() != InputVector.getResNo())
return SDValue();
SDNode *Extract = *UI;
if (Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
return SDValue();
if (Extract->getValueType(0) != MVT::i32)
return SDValue();
if (!Extract->hasOneUse())
return SDValue();
if (Extract->use_begin()->getOpcode() != ISD::SIGN_EXTEND &&
Extract->use_begin()->getOpcode() != ISD::ZERO_EXTEND)
return SDValue();
if (!isa<ConstantSDNode>(Extract->getOperand(1)))
return SDValue();
// Record which element was extracted.
ExtractedElements |=
1 << cast<ConstantSDNode>(Extract->getOperand(1))->getZExtValue();
Uses.push_back(Extract);
}
// If not all the elements were used, this may not be worthwhile.
if (ExtractedElements != 15)
return SDValue();
// Ok, we've now decided to do the transformation.
DebugLoc dl = InputVector.getDebugLoc();
// Store the value to a temporary stack slot.
SDValue StackPtr = DAG.CreateStackTemporary(InputVector.getValueType());
SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr,
MachinePointerInfo(), false, false, 0);
// Replace each use (extract) with a load of the appropriate element.
for (SmallVectorImpl<SDNode *>::iterator UI = Uses.begin(),
UE = Uses.end(); UI != UE; ++UI) {
SDNode *Extract = *UI;
SDValue Idx = Extract->getOperand(1);
unsigned EltSize =
InputVector.getValueType().getVectorElementType().getSizeInBits()/8;
uint64_t Offset = EltSize * cast<ConstantSDNode>(Idx)->getZExtValue();
SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy());
StackPtr, OffsetVal);
// Load the scalar.
SDValue LoadScalar = DAG.getLoad(Extract->getValueType(0), dl, Ch,
ScalarAddr, MachinePointerInfo(),
false, false, 0);
// Replace the exact with the load.
DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), LoadScalar);
}
// The replacement was made in place; don't return anything.
return SDValue();
}
/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes.
static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
DebugLoc DL = N->getDebugLoc();
// Get the LHS/RHS of the select.
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
// If we have SSE[12] support, try to form min/max nodes. SSE min/max
// instructions match the semantics of the common C idiom x<y?x:y but not
// x<=y?x:y, because of how they handle negative zero (which can be
// ignored in unsafe-math mode).
if (Subtarget->hasSSE2() &&
Owen Anderson
committed
(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64) &&
Cond.getOpcode() == ISD::SETCC) {
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
unsigned Opcode = 0;
// Check for x CC y ? x : y.
if (DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
DAG.isEqualTo(RHS, Cond.getOperand(1))) {
switch (CC) {
default: break;
case ISD::SETULT:
// Converting this to a min would handle NaNs incorrectly, and swapping
// the operands would cause it to handle comparisons between positive
// and negative zero incorrectly.
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
if (!UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
std::swap(LHS, RHS);
}
Opcode = X86ISD::FMIN;
break;
case ISD::SETOLE:
// Converting this to a min would handle comparisons between positive
// and negative zero incorrectly.
if (!UnsafeFPMath &&
!DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS))
break;
Opcode = X86ISD::FMIN;
break;
case ISD::SETULE:
// Converting this to a min would handle both negative zeros and NaNs
// incorrectly, but we can swap the operands to fix both.
std::swap(LHS, RHS);
case ISD::SETOLT:
case ISD::SETLT:
case ISD::SETLE:
Opcode = X86ISD::FMIN;
break;
case ISD::SETOGE:
// Converting this to a max would handle comparisons between positive
// and negative zero incorrectly.
if (!UnsafeFPMath &&
!DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(LHS))
break;
Opcode = X86ISD::FMAX;
break;
case ISD::SETUGT:
// Converting this to a max would handle NaNs incorrectly, and swapping
// the operands would cause it to handle comparisons between positive
// and negative zero incorrectly.
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
if (!UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
std::swap(LHS, RHS);
}
Opcode = X86ISD::FMAX;
break;
case ISD::SETUGE:
// Converting this to a max would handle both negative zeros and NaNs
// incorrectly, but we can swap the operands to fix both.
std::swap(LHS, RHS);
case ISD::SETOGT:
case ISD::SETGT:
case ISD::SETGE:
Opcode = X86ISD::FMAX;
break;
// Check for x CC y ? y : x -- a min/max with reversed arms.
} else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) &&
DAG.isEqualTo(RHS, Cond.getOperand(0))) {
switch (CC) {
default: break;
case ISD::SETOGE:
// Converting this to a min would handle comparisons between positive
// and negative zero incorrectly, and swapping the operands would
// cause it to handle NaNs incorrectly.
if (!UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) {
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
break;
std::swap(LHS, RHS);
}
Opcode = X86ISD::FMIN;
case ISD::SETUGT:
// Converting this to a min would handle NaNs incorrectly.
if (!UnsafeFPMath &&
(!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
break;
Opcode = X86ISD::FMIN;
break;
case ISD::SETUGE:
// Converting this to a min would handle both negative zeros and NaNs
// incorrectly, but we can swap the operands to fix both.
std::swap(LHS, RHS);
case ISD::SETOGT:
case ISD::SETGT:
case ISD::SETGE:
Opcode = X86ISD::FMIN;
break;
case ISD::SETULT:
// Converting this to a max would handle NaNs incorrectly.
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
break;
Opcode = X86ISD::FMAX;
case ISD::SETOLE:
// Converting this to a max would handle comparisons between positive
// and negative zero incorrectly, and swapping the operands would
// cause it to handle NaNs incorrectly.
if (!UnsafeFPMath &&
!DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) {
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
break;
std::swap(LHS, RHS);
}
Opcode = X86ISD::FMAX;
break;
case ISD::SETULE:
// Converting this to a max would handle both negative zeros and NaNs
// incorrectly, but we can swap the operands to fix both.
std::swap(LHS, RHS);
case ISD::SETOLT:
case ISD::SETLT:
case ISD::SETLE:
Opcode = X86ISD::FMAX;
break;
}
if (Opcode)
return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS);
// If this is a select between two integer constants, try to do some
// optimizations.
if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(LHS)) {
if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(RHS))
// Don't do this for crazy integer types.
if (DAG.getTargetLoweringInfo().isTypeLegal(LHS.getValueType())) {
// If this is efficiently invertible, canonicalize the LHSC/RHSC values
// so that TrueC (the true value) is larger than FalseC.
bool NeedsCondInvert = false;
if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue()) &&
// Efficiently invertible.
(Cond.getOpcode() == ISD::SETCC || // setcc -> invertible.
(Cond.getOpcode() == ISD::XOR && // xor(X, C) -> invertible.
isa<ConstantSDNode>(Cond.getOperand(1))))) {
NeedsCondInvert = true;
std::swap(TrueC, FalseC);
// Optimize C ? 8 : 0 -> zext(C) << 3. Likewise for any pow2/0.
if (FalseC->getAPIntValue() == 0 &&
TrueC->getAPIntValue().isPowerOf2()) {
if (NeedsCondInvert) // Invert the condition if needed.
Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
DAG.getConstant(1, Cond.getValueType()));
// Zero extend the condition if needed.
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, LHS.getValueType(), Cond);
unsigned ShAmt = TrueC->getAPIntValue().logBase2();
return DAG.getNode(ISD::SHL, DL, LHS.getValueType(), Cond,
Owen Anderson
committed
DAG.getConstant(ShAmt, MVT::i8));
// Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst.
if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
if (NeedsCondInvert) // Invert the condition if needed.
Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
DAG.getConstant(1, Cond.getValueType()));
// Zero extend the condition if needed.
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
FalseC->getValueType(0), Cond);
return DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
SDValue(FalseC, 0));
// Optimize cases that will turn into an LEA instruction. This requires
// an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
Owen Anderson
committed
if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue();
Owen Anderson
committed
if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff;
bool isFastMultiplier = false;
if (Diff < 10) {
switch ((unsigned char)Diff) {
default: break;
case 1: // result = add base, cond
case 2: // result = lea base( , cond*2)
case 3: // result = lea base(cond, cond*2)
case 4: // result = lea base( , cond*4)
case 5: // result = lea base(cond, cond*4)
case 8: // result = lea base( , cond*8)
case 9: // result = lea base(cond, cond*8)
isFastMultiplier = true;
break;
}
}
if (isFastMultiplier) {
APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
if (NeedsCondInvert) // Invert the condition if needed.
Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
DAG.getConstant(1, Cond.getValueType()));
// Zero extend the condition if needed.
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
Cond);
// Scale the condition by the difference.
if (Diff != 1)
Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
DAG.getConstant(Diff, Cond.getValueType()));
// Add the base if non-zero.
if (FalseC->getAPIntValue() != 0)
Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
SDValue(FalseC, 0));
return Cond;
}
}
}
return SDValue();
}
/// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]
static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
DebugLoc DL = N->getDebugLoc();
// If the flag operand isn't dead, don't touch this CMOV.
if (N->getNumValues() == 2 && !SDValue(N, 1).use_empty())
return SDValue();
SDValue FalseOp = N->getOperand(0);
SDValue TrueOp = N->getOperand(1);
X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2);
SDValue Cond = N->getOperand(3);
if (CC == X86::COND_E || CC == X86::COND_NE) {
switch (Cond.getOpcode()) {
default: break;
case X86ISD::BSR:
case X86ISD::BSF:
// If operand of BSR / BSF are proven never zero, then ZF cannot be set.
if (DAG.isKnownNeverZero(Cond.getOperand(0)))
return (CC == X86::COND_E) ? FalseOp : TrueOp;
}
}
// If this is a select between two integer constants, try to do some
// optimizations. Note that the operands are ordered the opposite of SELECT
// operands.
if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(TrueOp)) {
if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(FalseOp)) {
// Canonicalize the TrueC/FalseC values so that TrueC (the true value) is
// larger than FalseC (the false value).
if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) {
CC = X86::GetOppositeBranchCondition(CC);
std::swap(TrueC, FalseC);
}
// Optimize C ? 8 : 0 -> zext(setcc(C)) << 3. Likewise for any pow2/0.
// This is efficient for any integer data type (including i8/i16) and
// shift amount.
if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) {
Owen Anderson
committed
Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
DAG.getConstant(CC, MVT::i8), Cond);
// Zero extend the condition if needed.
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, TrueC->getValueType(0), Cond);
unsigned ShAmt = TrueC->getAPIntValue().logBase2();
Cond = DAG.getNode(ISD::SHL, DL, Cond.getValueType(), Cond,
Owen Anderson
committed
DAG.getConstant(ShAmt, MVT::i8));
if (N->getNumValues() == 2) // Dead flag value?
return DCI.CombineTo(N, Cond, SDValue());
return Cond;
}
// Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst. This is efficient
// for any integer data type, including i8/i16.
if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
Owen Anderson
committed
Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
DAG.getConstant(CC, MVT::i8), Cond);
// Zero extend the condition if needed.
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
FalseC->getValueType(0), Cond);
Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
SDValue(FalseC, 0));
if (N->getNumValues() == 2) // Dead flag value?
return DCI.CombineTo(N, Cond, SDValue());
return Cond;
}
// Optimize cases that will turn into an LEA instruction. This requires
// an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
Owen Anderson
committed
if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue();
Owen Anderson
committed
if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff;
bool isFastMultiplier = false;
if (Diff < 10) {
switch ((unsigned char)Diff) {
default: break;
case 1: // result = add base, cond
case 2: // result = lea base( , cond*2)
case 3: // result = lea base(cond, cond*2)
case 4: // result = lea base( , cond*4)
case 5: // result = lea base(cond, cond*4)
case 8: // result = lea base( , cond*8)
case 9: // result = lea base(cond, cond*8)
isFastMultiplier = true;
break;
}
}
if (isFastMultiplier) {
APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
Owen Anderson
committed
Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
DAG.getConstant(CC, MVT::i8), Cond);
// Zero extend the condition if needed.
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
Cond);
// Scale the condition by the difference.
if (Diff != 1)
Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
DAG.getConstant(Diff, Cond.getValueType()));
// Add the base if non-zero.
if (FalseC->getAPIntValue() != 0)
Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
SDValue(FalseC, 0));
if (N->getNumValues() == 2) // Dead flag value?
return DCI.CombineTo(N, Cond, SDValue());
return Cond;
}
}
}
Evan Cheng
committed
/// PerformMulCombine - Optimize a single multiply with constant into two
/// in order to implement it with two cheaper instructions, e.g.
/// LEA + SHL, LEA + LEA.
static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
return SDValue();
Owen Anderson
committed
EVT VT = N->getValueType(0);
Owen Anderson
committed
if (VT != MVT::i64)
Evan Cheng
committed
11942
11943
11944
11945
11946
11947
11948
11949
11950
11951
11952
11953
11954
11955
11956
11957
11958
11959
11960
11961
11962
11963
11964
11965
11966
11967
11968
11969
11970
11971
11972
11973
11974
return SDValue();
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!C)
return SDValue();
uint64_t MulAmt = C->getZExtValue();
if (isPowerOf2_64(MulAmt) || MulAmt == 3 || MulAmt == 5 || MulAmt == 9)
return SDValue();
uint64_t MulAmt1 = 0;
uint64_t MulAmt2 = 0;
if ((MulAmt % 9) == 0) {
MulAmt1 = 9;
MulAmt2 = MulAmt / 9;
} else if ((MulAmt % 5) == 0) {
MulAmt1 = 5;
MulAmt2 = MulAmt / 5;
} else if ((MulAmt % 3) == 0) {
MulAmt1 = 3;
MulAmt2 = MulAmt / 3;
}
if (MulAmt2 &&
(isPowerOf2_64(MulAmt2) || MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)){
DebugLoc DL = N->getDebugLoc();
if (isPowerOf2_64(MulAmt2) &&
!(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::ADD))
// If second multiplifer is pow2, issue it first. We want the multiply by
// 3, 5, or 9 to be folded into the addressing mode unless the lone use
// is an add.
std::swap(MulAmt1, MulAmt2);
SDValue NewMul;
Evan Cheng
committed
NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
Owen Anderson
committed
DAG.getConstant(Log2_64(MulAmt1), MVT::i8));
Evan Cheng
committed
else
Evan Cheng
committed
NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
Evan Cheng
committed
DAG.getConstant(MulAmt1, VT));
Evan Cheng
committed
NewMul = DAG.getNode(ISD::SHL, DL, VT, NewMul,
Owen Anderson
committed
DAG.getConstant(Log2_64(MulAmt2), MVT::i8));
Evan Cheng
committed
NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul,
Evan Cheng
committed
DAG.getConstant(MulAmt2, VT));
// Do not add new nodes to DAG combiner worklist.
DCI.CombineTo(N, NewMul, false);
}
return SDValue();
}
Evan Cheng
committed
static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();