Newer
Older
int lastAddrIndx = 3; // [0,3]
int valArgIndx = 4;
unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
MachineInstrBuilder MIB = BuildMI(newMBB, TII->get(X86::MOV32rm), t1);
for (int i=0; i <= lastAddrIndx; ++i)
(*MIB).addOperand(*argOpers[i]);
unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
assert( (argOpers[valArgIndx]->isReg() || argOpers[valArgIndx]->isImm())
&& "invalid operand");
if (argOpers[valArgIndx]->isReg())
MIB = BuildMI(newMBB, TII->get(regOpc), t2);
else
MIB = BuildMI(newMBB, TII->get(immOpc), t2);
MIB.addReg(t1);
(*MIB).addOperand(*argOpers[valArgIndx]);
MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), X86::EAX);
MIB.addReg(t1);
6022
6023
6024
6025
6026
6027
6028
6029
6030
6031
6032
6033
6034
6035
6036
6037
6038
6039
6040
6041
6042
6043
6044
MIB = BuildMI(newMBB, TII->get(X86::LCMPXCHG32));
for (int i=0; i <= lastAddrIndx; ++i)
(*MIB).addOperand(*argOpers[i]);
MIB.addReg(t2);
MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), destOper.getReg());
MIB.addReg(X86::EAX);
// insert branch
BuildMI(newMBB, TII->get(X86::JNE)).addMBB(newMBB);
delete bInstr; // The pseudo instruction is gone now.
return nextMBB;
}
// private utility function
MachineBasicBlock *
X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
MachineBasicBlock *MBB,
unsigned cmovOpc) {
// For the atomic min/max operator, we generate
// thisMBB:
// newMBB:
// mov t2 = [min/max.val]
// cmp t1, t2
// cmov[cond] t2 = t1
6050
6051
6052
6053
6054
6055
6056
6057
6058
6059
6060
6061
6062
6063
6064
6065
6066
6067
6068
6069
6070
6071
6072
6073
6074
6075
6076
6077
6078
6079
6080
6081
6082
6083
6084
6085
6086
6087
6088
// lcs dest = [bitinstr.addr], t2 [EAX is implicit]
// bz newMBB
// fallthrough -->nextMBB
//
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
ilist<MachineBasicBlock>::iterator MBBIter = MBB;
++MBBIter;
/// First build the CFG
MachineFunction *F = MBB->getParent();
MachineBasicBlock *thisMBB = MBB;
MachineBasicBlock *newMBB = new MachineBasicBlock(LLVM_BB);
MachineBasicBlock *nextMBB = new MachineBasicBlock(LLVM_BB);
F->getBasicBlockList().insert(MBBIter, newMBB);
F->getBasicBlockList().insert(MBBIter, nextMBB);
// Move all successors to thisMBB to nextMBB
nextMBB->transferSuccessors(thisMBB);
// Update thisMBB to fall through to newMBB
thisMBB->addSuccessor(newMBB);
// newMBB jumps to newMBB and fall through to nextMBB
newMBB->addSuccessor(nextMBB);
newMBB->addSuccessor(newMBB);
// Insert instructions into newMBB based on incoming instruction
assert(mInstr->getNumOperands() < 8 && "unexpected number of operands");
MachineOperand& destOper = mInstr->getOperand(0);
MachineOperand* argOpers[6];
int numArgs = mInstr->getNumOperands() - 1;
for (int i=0; i < numArgs; ++i)
argOpers[i] = &mInstr->getOperand(i+1);
// x86 address has 4 operands: base, index, scale, and displacement
int lastAddrIndx = 3; // [0,3]
int valArgIndx = 4;
unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
MachineInstrBuilder MIB = BuildMI(newMBB, TII->get(X86::MOV32rm), t1);
for (int i=0; i <= lastAddrIndx; ++i)
(*MIB).addOperand(*argOpers[i]);
// We only support register and immediate values
assert( (argOpers[valArgIndx]->isReg() || argOpers[valArgIndx]->isImm())
&& "invalid operand");
unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
if (argOpers[valArgIndx]->isReg())
MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), t2);
else
MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), t2);
(*MIB).addOperand(*argOpers[valArgIndx]);
MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), X86::EAX);
MIB.addReg(t1);
6108
6109
6110
6111
6112
6113
6114
6115
6116
6117
6118
6119
6120
6121
6122
6123
6124
6125
6126
6127
6128
6129
6130
6131
6132
6133
6134
MIB = BuildMI(newMBB, TII->get(X86::CMP32rr));
MIB.addReg(t1);
MIB.addReg(t2);
// Generate movc
unsigned t3 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
MIB = BuildMI(newMBB, TII->get(cmovOpc),t3);
MIB.addReg(t2);
MIB.addReg(t1);
// Cmp and exchange if none has modified the memory location
MIB = BuildMI(newMBB, TII->get(X86::LCMPXCHG32));
for (int i=0; i <= lastAddrIndx; ++i)
(*MIB).addOperand(*argOpers[i]);
MIB.addReg(t3);
MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), destOper.getReg());
MIB.addReg(X86::EAX);
// insert branch
BuildMI(newMBB, TII->get(X86::JNE)).addMBB(newMBB);
delete mInstr; // The pseudo instruction is gone now.
return nextMBB;
}
Evan Cheng
committed
X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
switch (MI->getOpcode()) {
default: assert(false && "Unexpected instr type to insert");
case X86::CMOV_FR32:
case X86::CMOV_FR64:
case X86::CMOV_V4F32:
case X86::CMOV_V2F64:
// To "insert" a SELECT_CC instruction, we actually have to insert the
// diamond control-flow pattern. The incoming instruction knows the
// destination vreg to set, the condition code register to branch on, the
// true/false values to select between, and a branch opcode to use.
const BasicBlock *LLVM_BB = BB->getBasicBlock();
ilist<MachineBasicBlock>::iterator It = BB;
++It;
// thisMBB:
// ...
// TrueVal = ...
// cmpTY ccX, r1, r2
// bCC copy1MBB
// fallthrough --> copy0MBB
MachineBasicBlock *thisMBB = BB;
MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
unsigned Opc =
X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
BuildMI(BB, TII->get(Opc)).addMBB(sinkMBB);
MachineFunction *F = BB->getParent();
F->getBasicBlockList().insert(It, copy0MBB);
F->getBasicBlockList().insert(It, sinkMBB);
// Update machine-CFG edges by transferring all successors of the current
// block to the new block which will contain the Phi node for the select.
sinkMBB->transferSuccessors(BB);
// Add the true and fallthrough blocks as its successors.
BB->addSuccessor(copy0MBB);
BB->addSuccessor(sinkMBB);
// copy0MBB:
// %FalseValue = ...
// # fallthrough to sinkMBB
BB = copy0MBB;
// Update machine-CFG edges
BB->addSuccessor(sinkMBB);
// sinkMBB:
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
// ...
BB = sinkMBB;
BuildMI(BB, TII->get(X86::PHI), MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
.addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
delete MI; // The pseudo instruction is gone now.
return BB;
}
case X86::FP32_TO_INT16_IN_MEM:
case X86::FP32_TO_INT32_IN_MEM:
case X86::FP32_TO_INT64_IN_MEM:
case X86::FP64_TO_INT16_IN_MEM:
case X86::FP64_TO_INT32_IN_MEM:
case X86::FP64_TO_INT64_IN_MEM:
case X86::FP80_TO_INT16_IN_MEM:
case X86::FP80_TO_INT32_IN_MEM:
case X86::FP80_TO_INT64_IN_MEM: {
// Change the floating point control register to use "round towards zero"
// mode when truncating to an integer value.
MachineFunction *F = BB->getParent();
int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
addFrameReference(BuildMI(BB, TII->get(X86::FNSTCW16m)), CWFrameIdx);
// Load the old value of the high byte of the control word...
unsigned OldCW =
F->getRegInfo().createVirtualRegister(X86::GR16RegisterClass);
addFrameReference(BuildMI(BB, TII->get(X86::MOV16rm), OldCW), CWFrameIdx);
// Set the high part to be round to zero...
addFrameReference(BuildMI(BB, TII->get(X86::MOV16mi)), CWFrameIdx)
.addImm(0xC7F);
// Reload the modified control word now...
addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx);
// Restore the memory image of control word to original value
addFrameReference(BuildMI(BB, TII->get(X86::MOV16mr)), CWFrameIdx)
.addReg(OldCW);
// Get the X86 opcode to use.
unsigned Opc;
switch (MI->getOpcode()) {
default: assert(0 && "illegal opcode!");
case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break;
case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break;
case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break;
case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break;
case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break;
case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break;
case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break;
case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break;
case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break;
}
X86AddressMode AM;
MachineOperand &Op = MI->getOperand(0);
if (Op.isRegister()) {
AM.BaseType = X86AddressMode::RegBase;
AM.Base.Reg = Op.getReg();
} else {
AM.BaseType = X86AddressMode::FrameIndexBase;
AM.Base.FrameIndex = Op.getIndex();
}
Op = MI->getOperand(1);
if (Op.isImmediate())
AM.Scale = Op.getImm();
Op = MI->getOperand(2);
if (Op.isImmediate())
AM.IndexReg = Op.getImm();
Op = MI->getOperand(3);
if (Op.isGlobalAddress()) {
AM.GV = Op.getGlobal();
} else {
AM.Disp = Op.getImm();
addFullAddress(BuildMI(BB, TII->get(Opc)), AM)
.addReg(MI->getOperand(4).getReg());
// Reload the original control word now.
addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx);
delete MI; // The pseudo instruction is gone now.
return BB;
}
case X86::ATOMAND32:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
X86::AND32ri);
case X86::ATOMOR32:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr,
X86::OR32ri);
case X86::ATOMXOR32:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr,
X86::XOR32ri);
case X86::ATOMMIN32:
return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL32rr);
case X86::ATOMMAX32:
return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG32rr);
case X86::ATOMUMIN32:
return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB32rr);
case X86::ATOMUMAX32:
return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA32rr);
}
}
//===----------------------------------------------------------------------===//
// X86 Optimization Hooks
//===----------------------------------------------------------------------===//
void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
const APInt &Mask,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth) const {
assert((Opc >= ISD::BUILTIN_OP_END ||
Opc == ISD::INTRINSIC_WO_CHAIN ||
Opc == ISD::INTRINSIC_W_CHAIN ||
Opc == ISD::INTRINSIC_VOID) &&
"Should use MaskedValueIsZero if you don't know whether Op"
" is a target node!");
KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); // Don't know anything.
case X86ISD::SETCC:
KnownZero |= APInt::getHighBitsSet(Mask.getBitWidth(),
Mask.getBitWidth() - 1);
break;
/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
Evan Cheng
committed
/// node is a GlobalAddress + offset.
bool X86TargetLowering::isGAPlusOffset(SDNode *N,
GlobalValue* &GA, int64_t &Offset) const{
if (N->getOpcode() == X86ISD::Wrapper) {
if (isa<GlobalAddressSDNode>(N->getOperand(0))) {
GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();
return true;
}
}
Evan Cheng
committed
return TargetLowering::isGAPlusOffset(N, GA, Offset);
}
Evan Cheng
committed
static bool isBaseAlignmentOfN(unsigned N, SDNode *Base,
const TargetLowering &TLI) {
GlobalValue *GV;
int64_t Offset = 0;
Evan Cheng
committed
if (TLI.isGAPlusOffset(Base, GV, Offset))
Evan Cheng
committed
return (GV->getAlignment() >= N && (Offset % N) == 0);
// DAG combine handles the stack object case.
return false;
}
Evan Cheng
committed
static bool EltsFromConsecutiveLoads(SDNode *N, SDOperand PermMask,
unsigned NumElems, MVT::ValueType EVT,
Evan Cheng
committed
SDNode *&Base,
SelectionDAG &DAG, MachineFrameInfo *MFI,
const TargetLowering &TLI) {
Evan Cheng
committed
Base = NULL;
for (unsigned i = 0; i < NumElems; ++i) {
SDOperand Idx = PermMask.getOperand(i);
if (Idx.getOpcode() == ISD::UNDEF) {
if (!Base)
return false;
continue;
}
unsigned Index = cast<ConstantSDNode>(Idx)->getValue();
SDOperand Elt = DAG.getShuffleScalarElt(N, Index);
Evan Cheng
committed
if (!Elt.Val ||
(Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.Val)))
return false;
if (!Base) {
Base = Elt.Val;
Evan Cheng
committed
if (Base->getOpcode() == ISD::UNDEF)
return false;
Evan Cheng
committed
continue;
}
if (Elt.getOpcode() == ISD::UNDEF)
continue;
Evan Cheng
committed
if (!TLI.isConsecutiveLoad(Elt.Val, Base,
MVT::getSizeInBits(EVT)/8, i, MFI))
Evan Cheng
committed
return false;
}
return true;
}
/// PerformShuffleCombine - Combine a vector_shuffle that is equal to
/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load
/// if the load addresses are consecutive, non-overlapping, and in the right
/// order.
static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
Evan Cheng
committed
const TargetLowering &TLI) {
Evan Cheng
committed
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
MVT::ValueType VT = N->getValueType(0);
MVT::ValueType EVT = MVT::getVectorElementType(VT);
SDOperand PermMask = N->getOperand(2);
unsigned NumElems = PermMask.getNumOperands();
SDNode *Base = NULL;
Evan Cheng
committed
if (!EltsFromConsecutiveLoads(N, PermMask, NumElems, EVT, Base,
DAG, MFI, TLI))
Evan Cheng
committed
return SDOperand();
LoadSDNode *LD = cast<LoadSDNode>(Base);
Evan Cheng
committed
if (isBaseAlignmentOfN(16, Base->getOperand(1).Val, TLI))
return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(),
LD->getSrcValueOffset(), LD->isVolatile());
return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(),
LD->getSrcValueOffset(), LD->isVolatile(),
LD->getAlignment());
}
/// PerformBuildVectorCombine - build_vector 0,(load i64 / f64) -> movq / movsd.
static SDOperand PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG,
Evan Cheng
committed
const X86Subtarget *Subtarget,
const TargetLowering &TLI) {
unsigned NumOps = N->getNumOperands();
// Ignore single operand BUILD_VECTOR.
if (NumOps == 1)
return SDOperand();
MVT::ValueType VT = N->getValueType(0);
MVT::ValueType EVT = MVT::getVectorElementType(VT);
if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit())
// We are looking for load i64 and zero extend. We want to transform
// it before legalizer has a chance to expand it. Also look for i64
// BUILD_PAIR bit casted to f64.
return SDOperand();
// This must be an insertion into a zero vector.
SDOperand HighElt = N->getOperand(1);
if (!isZeroNode(HighElt))
return SDOperand();
// Value must be a load.
SDNode *Base = N->getOperand(0).Val;
if (!isa<LoadSDNode>(Base)) {
if (Base->getOpcode() != ISD::BIT_CONVERT)
return SDOperand();
Base = Base->getOperand(0).Val;
if (!isa<LoadSDNode>(Base))
return SDOperand();
}
// Transform it into VZEXT_LOAD addr.
LoadSDNode *LD = cast<LoadSDNode>(Base);
Nate Begeman
committed
// Load must not be an extload.
if (LD->getExtensionType() != ISD::NON_EXTLOAD)
return SDOperand();
return DAG.getNode(X86ISD::VZEXT_LOAD, VT, LD->getChain(), LD->getBasePtr());
}
/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes.
static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
SDOperand Cond = N->getOperand(0);
// If we have SSE[12] support, try to form min/max nodes.
if (Subtarget->hasSSE2() &&
(N->getValueType(0) == MVT::f32 || N->getValueType(0) == MVT::f64)) {
if (Cond.getOpcode() == ISD::SETCC) {
// Get the LHS/RHS of the select.
SDOperand LHS = N->getOperand(1);
SDOperand RHS = N->getOperand(2);
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
unsigned Opcode = 0;
if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) {
switch (CC) {
default: break;
case ISD::SETOLE: // (X <= Y) ? X : Y -> min
case ISD::SETULE:
case ISD::SETLE:
if (!UnsafeFPMath) break;
// FALL THROUGH.
case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min
case ISD::SETLT:
Opcode = X86ISD::FMIN;
break;
case ISD::SETOGT: // (X > Y) ? X : Y -> max
case ISD::SETUGT:
case ISD::SETGT:
if (!UnsafeFPMath) break;
// FALL THROUGH.
case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max
case ISD::SETGE:
Opcode = X86ISD::FMAX;
break;
}
} else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) {
switch (CC) {
default: break;
case ISD::SETOGT: // (X > Y) ? Y : X -> min
case ISD::SETUGT:
case ISD::SETGT:
if (!UnsafeFPMath) break;
// FALL THROUGH.
case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min
case ISD::SETGE:
Opcode = X86ISD::FMIN;
break;
case ISD::SETOLE: // (X <= Y) ? Y : X -> max
case ISD::SETULE:
case ISD::SETLE:
if (!UnsafeFPMath) break;
// FALL THROUGH.
case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max
case ISD::SETLT:
Opcode = X86ISD::FMAX;
break;
}
if (Opcode)
return DAG.getNode(Opcode, N->getValueType(0), LHS, RHS);
}
return SDOperand();
}
/// PerformSTORECombine - Do target-specific dag combines on STORE nodes.
Evan Cheng
committed
static SDOperand PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
// Turn load->store of MMX types into GPR load/stores. This avoids clobbering
// the FP state in cases where an emms may be missing.
// A preferable solution to the general problem is to figure out the right
// places to insert EMMS. This qualifies as a quick hack.
Evan Cheng
committed
StoreSDNode *St = cast<StoreSDNode>(N);
if (MVT::isVector(St->getValue().getValueType()) &&
MVT::getSizeInBits(St->getValue().getValueType()) == 64 &&
isa<LoadSDNode>(St->getValue()) &&
!cast<LoadSDNode>(St->getValue())->isVolatile() &&
St->getChain().hasOneUse() && !St->isVolatile()) {
SDNode* LdVal = St->getValue().Val;
LoadSDNode *Ld = 0;
int TokenFactorIndex = -1;
SmallVector<SDOperand, 8> Ops;
SDNode* ChainVal = St->getChain().Val;
// Must be a store of a load. We currently handle two cases: the load
// is a direct child, and it's under an intervening TokenFactor. It is
// possible to dig deeper under nested TokenFactors.
Ld = cast<LoadSDNode>(St->getChain());
else if (St->getValue().hasOneUse() &&
ChainVal->getOpcode() == ISD::TokenFactor) {
for (unsigned i=0, e = ChainVal->getNumOperands(); i != e; ++i) {
if (ChainVal->getOperand(i).Val == LdVal) {
TokenFactorIndex = i;
Ld = cast<LoadSDNode>(St->getValue());
} else
Ops.push_back(ChainVal->getOperand(i));
}
}
if (Ld) {
// If we are a 64-bit capable x86, lower to a single movq load/store pair.
if (Subtarget->is64Bit()) {
SDOperand NewLd = DAG.getLoad(MVT::i64, Ld->getChain(),
Ld->getBasePtr(), Ld->getSrcValue(),
Ld->getSrcValueOffset(), Ld->isVolatile(),
Ld->getAlignment());
SDOperand NewChain = NewLd.getValue(1);
if (TokenFactorIndex != -1) {
Ops.push_back(NewChain);
6561
6562
6563
6564
6565
6566
6567
6568
6569
6570
6571
6572
6573
6574
6575
6576
6577
6578
6579
6580
6581
6582
6583
6584
6585
6586
6587
6588
6589
6590
6591
6592
6593
6594
NewChain = DAG.getNode(ISD::TokenFactor, MVT::Other, &Ops[0],
Ops.size());
}
return DAG.getStore(NewChain, NewLd, St->getBasePtr(),
St->getSrcValue(), St->getSrcValueOffset(),
St->isVolatile(), St->getAlignment());
}
// Otherwise, lower to two 32-bit copies.
SDOperand LoAddr = Ld->getBasePtr();
SDOperand HiAddr = DAG.getNode(ISD::ADD, MVT::i32, LoAddr,
DAG.getConstant(MVT::i32, 4));
SDOperand LoLd = DAG.getLoad(MVT::i32, Ld->getChain(), LoAddr,
Ld->getSrcValue(), Ld->getSrcValueOffset(),
Ld->isVolatile(), Ld->getAlignment());
SDOperand HiLd = DAG.getLoad(MVT::i32, Ld->getChain(), HiAddr,
Ld->getSrcValue(), Ld->getSrcValueOffset()+4,
Ld->isVolatile(),
MinAlign(Ld->getAlignment(), 4));
SDOperand NewChain = LoLd.getValue(1);
if (TokenFactorIndex != -1) {
Ops.push_back(LoLd);
Ops.push_back(HiLd);
NewChain = DAG.getNode(ISD::TokenFactor, MVT::Other, &Ops[0],
Ops.size());
}
LoAddr = St->getBasePtr();
HiAddr = DAG.getNode(ISD::ADD, MVT::i32, LoAddr,
DAG.getConstant(MVT::i32, 4));
SDOperand LoSt = DAG.getStore(NewChain, LoLd, LoAddr,
St->getSrcValue(), St->getSrcValueOffset(),
St->isVolatile(), St->getAlignment());
SDOperand HiSt = DAG.getStore(NewChain, HiLd, HiAddr,
St->getSrcValue(), St->getSrcValueOffset()+4,
St->isVolatile(),
MinAlign(St->getAlignment(), 4));
return DAG.getNode(ISD::TokenFactor, MVT::Other, LoSt, HiSt);
}
}
return SDOperand();
}
/// PerformFORCombine - Do target-specific dag combines on X86ISD::FOR and
/// X86ISD::FXOR nodes.
static SDOperand PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR);
// F[X]OR(0.0, x) -> x
// F[X]OR(x, 0.0) -> x
6613
6614
6615
6616
6617
6618
6619
6620
6621
6622
6623
6624
6625
6626
6627
6628
6629
6630
6631
6632
6633
6634
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
if (C->getValueAPF().isPosZero())
return N->getOperand(1);
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
if (C->getValueAPF().isPosZero())
return N->getOperand(0);
return SDOperand();
}
/// PerformFANDCombine - Do target-specific dag combines on X86ISD::FAND nodes.
static SDOperand PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
// FAND(0.0, x) -> 0.0
// FAND(x, 0.0) -> 0.0
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
if (C->getValueAPF().isPosZero())
return N->getOperand(0);
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
if (C->getValueAPF().isPosZero())
return N->getOperand(1);
return SDOperand();
}
SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
switch (N->getOpcode()) {
default: break;
Evan Cheng
committed
case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this);
case ISD::BUILD_VECTOR:
return PerformBuildVectorCombine(N, DAG, Subtarget, *this);
case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget);
Evan Cheng
committed
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
case X86ISD::FOR: return PerformFORCombine(N, DAG);
case X86ISD::FAND: return PerformFANDCombine(N, DAG);
}
return SDOperand();
}
//===----------------------------------------------------------------------===//
// X86 Inline Assembly Support
//===----------------------------------------------------------------------===//
/// getConstraintType - Given a constraint letter, return the type of
/// constraint it is for this target.
X86TargetLowering::ConstraintType
X86TargetLowering::getConstraintType(const std::string &Constraint) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'A':
case 'r':
case 'R':
case 'l':
case 'q':
case 'Q':
case 'x':
case 'Y':
return C_RegisterClass;
default:
break;
}
return TargetLowering::getConstraintType(Constraint);
}
/// LowerXConstraint - try to replace an X constraint, which matches anything,
/// with another that has more specific requirements based on the type of the
/// corresponding operand.
const char *X86TargetLowering::
LowerXConstraint(MVT::ValueType ConstraintVT) const {
// FP X constraints get lowered to SSE1/2 registers if available, otherwise
// 'f' like normal targets.
if (MVT::isFloatingPoint(ConstraintVT)) {
if (Subtarget->hasSSE2())
return "Y";
if (Subtarget->hasSSE1())
return "x";
}
return TargetLowering::LowerXConstraint(ConstraintVT);
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops.
void X86TargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
char Constraint,
std::vector<SDOperand>&Ops,
SDOperand Result(0, 0);
switch (Constraint) {
default: break;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
if (C->getValue() <= 31) {
Result = DAG.getTargetConstant(C->getValue(), Op.getValueType());
break;
}
return;
case 'N':
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
if (C->getValue() <= 255) {
Result = DAG.getTargetConstant(C->getValue(), Op.getValueType());
break;
}
return;
// Literal immediates are always ok.
if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) {
Result = DAG.getTargetConstant(CST->getValue(), Op.getValueType());
break;
}
6732
6733
6734
6735
6736
6737
6738
6739
6740
6741
6742
6743
6744
6745
6746
6747
6748
6749
6750
6751
6752
6753
6754
6755
6756
6757
6758
6759
// If we are in non-pic codegen mode, we allow the address of a global (with
// an optional displacement) to be used with 'i'.
GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
int64_t Offset = 0;
// Match either (GA) or (GA+C)
if (GA) {
Offset = GA->getOffset();
} else if (Op.getOpcode() == ISD::ADD) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
if (C && GA) {
Offset = GA->getOffset()+C->getValue();
} else {
C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
if (C && GA)
Offset = GA->getOffset()+C->getValue();
else
C = 0, GA = 0;
}
}
if (GA) {
// If addressing this global requires a load (e.g. in PIC mode), we can't
// match.
if (Subtarget->GVRequiresExtraLoad(GA->getGlobal(), getTargetMachine(),
false))
return;
Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0),
Offset);
Result = Op;
break;
}
// Otherwise, not valid for this mode.
return;
}
if (Result.Val) {
Ops.push_back(Result);
return;
}
return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
getRegClassForInlineAsmConstraint(const std::string &Constraint,
MVT::ValueType VT) const {
if (Constraint.size() == 1) {
// FIXME: not handling fp-stack yet!
switch (Constraint[0]) { // GCC X86 Constraint Letters
default: break; // Unknown constraint letter
case 'A': // EAX/EDX
if (VT == MVT::i32 || VT == MVT::i64)
return make_vector<unsigned>(X86::EAX, X86::EDX, 0);
break;
case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode)
case 'Q': // Q_REGS
if (VT == MVT::i32)
return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0);
else if (VT == MVT::i16)
return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0);
else if (VT == MVT::i8)
return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 0);
else if (VT == MVT::i64)
return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX, 0);
break;
return std::vector<unsigned>();
std::pair<unsigned, const TargetRegisterClass*>
X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
MVT::ValueType VT) const {
// First, see if this is a constraint that directly corresponds to an LLVM
// register class.
if (Constraint.size() == 1) {
// GCC Constraint Letters
switch (Constraint[0]) {
default: break;
case 'r': // GENERAL_REGS
case 'R': // LEGACY_REGS
case 'l': // INDEX_REGS
if (VT == MVT::i64 && Subtarget->is64Bit())
return std::make_pair(0U, X86::GR64RegisterClass);
if (VT == MVT::i32)
return std::make_pair(0U, X86::GR32RegisterClass);
else if (VT == MVT::i16)
return std::make_pair(0U, X86::GR16RegisterClass);
else if (VT == MVT::i8)
return std::make_pair(0U, X86::GR8RegisterClass);
break;
case 'f': // FP Stack registers.
// If SSE is enabled for this VT, use f80 to ensure the isel moves the
// value to the correct fpstack register class.
if (VT == MVT::f32 && !isScalarFPTypeInSSEReg(VT))
return std::make_pair(0U, X86::RFP32RegisterClass);
if (VT == MVT::f64 && !isScalarFPTypeInSSEReg(VT))
return std::make_pair(0U, X86::RFP64RegisterClass);
return std::make_pair(0U, X86::RFP80RegisterClass);
Chris Lattner
committed
case 'y': // MMX_REGS if MMX allowed.
if (!Subtarget->hasMMX()) break;
return std::make_pair(0U, X86::VR64RegisterClass);
break;
case 'Y': // SSE_REGS if SSE2 allowed
if (!Subtarget->hasSSE2()) break;
// FALL THROUGH.
case 'x': // SSE_REGS if SSE1 allowed
if (!Subtarget->hasSSE1()) break;
switch (VT) {
default: break;
// Scalar SSE types.
case MVT::f32:
case MVT::i32:
return std::make_pair(0U, X86::FR32RegisterClass);
case MVT::f64:
case MVT::i64:
return std::make_pair(0U, X86::FR64RegisterClass);
// Vector types.
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
case MVT::v2i64:
case MVT::v4f32:
case MVT::v2f64:
return std::make_pair(0U, X86::VR128RegisterClass);
}
// Use the default implementation in TargetLowering to convert the register
// constraint into a member of a register class.
std::pair<unsigned, const TargetRegisterClass*> Res;
Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
// Not found as a standard register?
if (Res.second == 0) {
// GCC calls "st(0)" just plain "st".
if (StringsEqualNoCase("{st}", Constraint)) {
Res.first = X86::ST0;
Chris Lattner
committed
Res.second = X86::RFP80RegisterClass;
// Otherwise, check to see if this is a register class of the wrong value
// type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to
// turn into {ax},{dx}.
if (Res.second->hasType(VT))
return Res; // Correct type already, nothing to do.
// All of the single-register GCC register classes map their values onto
// 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we
// really want an 8-bit or 32-bit register, map to the appropriate register
// class and return the appropriate register.
if (Res.second != X86::GR16RegisterClass)
return Res;
6898
6899
6900
6901
6902
6903
6904
6905
6906
6907
6908
6909
6910
6911
6912
6913
6914
6915
6916
6917
6918
6919
6920
6921
6922
6923
6924
6925
6926
6927
if (VT == MVT::i8) {
unsigned DestReg = 0;
switch (Res.first) {
default: break;
case X86::AX: DestReg = X86::AL; break;
case X86::DX: DestReg = X86::DL; break;
case X86::CX: DestReg = X86::CL; break;
case X86::BX: DestReg = X86::BL; break;
}
if (DestReg) {
Res.first = DestReg;
Res.second = Res.second = X86::GR8RegisterClass;
}
} else if (VT == MVT::i32) {
unsigned DestReg = 0;
switch (Res.first) {
default: break;
case X86::AX: DestReg = X86::EAX; break;
case X86::DX: DestReg = X86::EDX; break;
case X86::CX: DestReg = X86::ECX; break;
case X86::BX: DestReg = X86::EBX; break;
case X86::SI: DestReg = X86::ESI; break;
case X86::DI: DestReg = X86::EDI; break;
case X86::BP: DestReg = X86::EBP; break;
case X86::SP: DestReg = X86::ESP; break;
}
if (DestReg) {
Res.first = DestReg;
Res.second = Res.second = X86::GR32RegisterClass;
}
} else if (VT == MVT::i64) {
unsigned DestReg = 0;
switch (Res.first) {
default: break;
case X86::AX: DestReg = X86::RAX; break;
case X86::DX: DestReg = X86::RDX; break;
case X86::CX: DestReg = X86::RCX; break;
case X86::BX: DestReg = X86::RBX; break;
case X86::SI: DestReg = X86::RSI; break;
case X86::DI: DestReg = X86::RDI; break;
case X86::BP: DestReg = X86::RBP; break;
case X86::SP: DestReg = X86::RSP; break;
}
if (DestReg) {
Res.first = DestReg;
Res.second = Res.second = X86::GR64RegisterClass;
}
return Res;
}