Newer
Older
return Chain;
}
SDOperand
X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) {
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
SDOperand TheOp = Op.getOperand(0);
SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheOp, 1);
if (Subtarget->is64Bit()) {
SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1));
SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::RDX,
MVT::i64, Copy1.getValue(2));
SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, Copy2,
DAG.getConstant(32, MVT::i8));
SDOperand Ops[] = {
DAG.getNode(ISD::OR, MVT::i64, Copy1, Tmp), Copy2.getValue(1)
};
Tys = DAG.getVTList(MVT::i64, MVT::Other);
return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2);
}
SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1));
SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::EDX,
MVT::i32, Copy1.getValue(2));
SDOperand Ops[] = { Copy1, Copy2, Copy2.getValue(1) };
Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 3);
}
SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) {
SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2));
if (!Subtarget->is64Bit()) {
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
return DAG.getStore(Op.getOperand(0), FR,Op.getOperand(1), SV->getValue(),
SV->getOffset());
}
// __va_list_tag:
// gp_offset (0 - 6 * 8)
// fp_offset (48 - 48 + 8 * 16)
// overflow_arg_area (point to parameters coming in memory).
// reg_save_area
SmallVector<SDOperand, 8> MemOps;
SDOperand FIN = Op.getOperand(1);
// Store gp_offset
SDOperand Store = DAG.getStore(Op.getOperand(0),
DAG.getConstant(VarArgsGPOffset, MVT::i32),
FIN, SV->getValue(), SV->getOffset());
MemOps.push_back(Store);
// Store fp_offset
FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
DAG.getConstant(4, getPointerTy()));
Store = DAG.getStore(Op.getOperand(0),
DAG.getConstant(VarArgsFPOffset, MVT::i32),
FIN, SV->getValue(), SV->getOffset());
MemOps.push_back(Store);
// Store ptr to overflow_arg_area
FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
DAG.getConstant(4, getPointerTy()));
SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV->getValue(),
SV->getOffset());
MemOps.push_back(Store);
// Store ptr to reg_save_area.
FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
DAG.getConstant(8, getPointerTy()));
SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV->getValue(),
SV->getOffset());
MemOps.push_back(Store);
return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size());
Evan Cheng
committed
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
SDOperand X86TargetLowering::LowerVACOPY(SDOperand Op, SelectionDAG &DAG) {
// X86-64 va_list is a struct { i32, i32, i8*, i8* }.
SDOperand Chain = Op.getOperand(0);
SDOperand DstPtr = Op.getOperand(1);
SDOperand SrcPtr = Op.getOperand(2);
SrcValueSDNode *DstSV = cast<SrcValueSDNode>(Op.getOperand(3));
SrcValueSDNode *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4));
SrcPtr = DAG.getLoad(getPointerTy(), Chain, SrcPtr,
SrcSV->getValue(), SrcSV->getOffset());
Chain = SrcPtr.getValue(1);
for (unsigned i = 0; i < 3; ++i) {
SDOperand Val = DAG.getLoad(MVT::i64, Chain, SrcPtr,
SrcSV->getValue(), SrcSV->getOffset());
Chain = Val.getValue(1);
Chain = DAG.getStore(Chain, Val, DstPtr,
DstSV->getValue(), DstSV->getOffset());
if (i == 2)
break;
SrcPtr = DAG.getNode(ISD::ADD, getPointerTy(), SrcPtr,
DAG.getConstant(8, getPointerTy()));
DstPtr = DAG.getNode(ISD::ADD, getPointerTy(), DstPtr,
DAG.getConstant(8, getPointerTy()));
}
return Chain;
}
SDOperand
X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue();
switch (IntNo) {
default: return SDOperand(); // Don't custom lower most intrinsics.
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
case Intrinsic::x86_sse_comieq_ss:
case Intrinsic::x86_sse_comilt_ss:
case Intrinsic::x86_sse_comile_ss:
case Intrinsic::x86_sse_comigt_ss:
case Intrinsic::x86_sse_comige_ss:
case Intrinsic::x86_sse_comineq_ss:
case Intrinsic::x86_sse_ucomieq_ss:
case Intrinsic::x86_sse_ucomilt_ss:
case Intrinsic::x86_sse_ucomile_ss:
case Intrinsic::x86_sse_ucomigt_ss:
case Intrinsic::x86_sse_ucomige_ss:
case Intrinsic::x86_sse_ucomineq_ss:
case Intrinsic::x86_sse2_comieq_sd:
case Intrinsic::x86_sse2_comilt_sd:
case Intrinsic::x86_sse2_comile_sd:
case Intrinsic::x86_sse2_comigt_sd:
case Intrinsic::x86_sse2_comige_sd:
case Intrinsic::x86_sse2_comineq_sd:
case Intrinsic::x86_sse2_ucomieq_sd:
case Intrinsic::x86_sse2_ucomilt_sd:
case Intrinsic::x86_sse2_ucomile_sd:
case Intrinsic::x86_sse2_ucomigt_sd:
case Intrinsic::x86_sse2_ucomige_sd:
case Intrinsic::x86_sse2_ucomineq_sd: {
unsigned Opc = 0;
ISD::CondCode CC = ISD::SETCC_INVALID;
switch (IntNo) {
default: break;
case Intrinsic::x86_sse_comieq_ss:
case Intrinsic::x86_sse2_comieq_sd:
Opc = X86ISD::COMI;
CC = ISD::SETEQ;
break;
case Intrinsic::x86_sse_comilt_ss:
case Intrinsic::x86_sse2_comilt_sd:
Opc = X86ISD::COMI;
CC = ISD::SETLT;
break;
case Intrinsic::x86_sse_comile_ss:
Opc = X86ISD::COMI;
CC = ISD::SETLE;
break;
case Intrinsic::x86_sse_comigt_ss:
Opc = X86ISD::COMI;
CC = ISD::SETGT;
break;
case Intrinsic::x86_sse_comige_ss:
Opc = X86ISD::COMI;
CC = ISD::SETGE;
break;
case Intrinsic::x86_sse_comineq_ss:
Opc = X86ISD::COMI;
CC = ISD::SETNE;
break;
case Intrinsic::x86_sse_ucomieq_ss:
Opc = X86ISD::UCOMI;
CC = ISD::SETEQ;
break;
case Intrinsic::x86_sse_ucomilt_ss:
Opc = X86ISD::UCOMI;
CC = ISD::SETLT;
break;
case Intrinsic::x86_sse_ucomile_ss:
Opc = X86ISD::UCOMI;
CC = ISD::SETLE;
break;
case Intrinsic::x86_sse_ucomigt_ss:
Opc = X86ISD::UCOMI;
CC = ISD::SETGT;
break;
case Intrinsic::x86_sse_ucomige_ss:
Opc = X86ISD::UCOMI;
CC = ISD::SETGE;
break;
case Intrinsic::x86_sse_ucomineq_ss:
case Intrinsic::x86_sse2_ucomineq_sd:
Opc = X86ISD::UCOMI;
CC = ISD::SETNE;
break;
unsigned X86CC;
SDOperand LHS = Op.getOperand(1);
SDOperand RHS = Op.getOperand(2);
translateX86CC(CC, true, X86CC, LHS, RHS, DAG);
const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
SDOperand Ops1[] = { DAG.getEntryNode(), LHS, RHS };
SDOperand Cond = DAG.getNode(Opc, VTs, 2, Ops1, 3);
VTs = DAG.getNodeValueTypes(MVT::i8, MVT::Flag);
SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond };
SDOperand SetCC = DAG.getNode(X86ISD::SETCC, VTs, 2, Ops2, 2);
return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC);
}
SDOperand X86TargetLowering::LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG) {
// Depths > 0 not supported yet!
if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0)
return SDOperand();
// Just load the return address
SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0);
}
SDOperand X86TargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) {
// Depths > 0 not supported yet!
if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0)
return SDOperand();
SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
return DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI,
DAG.getConstant(4, getPointerTy()));
}
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
SDOperand X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDOperand Op,
SelectionDAG &DAG) {
// Is not yet supported on x86-64
if (Subtarget->is64Bit())
return SDOperand();
return DAG.getConstant(8, getPointerTy());
}
SDOperand X86TargetLowering::LowerEH_RETURN(SDOperand Op, SelectionDAG &DAG)
{
assert(!Subtarget->is64Bit() &&
"Lowering of eh_return builtin is not supported yet on x86-64");
MachineFunction &MF = DAG.getMachineFunction();
SDOperand Chain = Op.getOperand(0);
SDOperand Offset = Op.getOperand(1);
SDOperand Handler = Op.getOperand(2);
SDOperand Frame = DAG.getRegister(RegInfo->getFrameRegister(MF),
getPointerTy());
SDOperand StoreAddr = DAG.getNode(ISD::SUB, getPointerTy(), Frame,
DAG.getConstant(-4UL, getPointerTy()));
StoreAddr = DAG.getNode(ISD::ADD, getPointerTy(), StoreAddr, Offset);
Chain = DAG.getStore(Chain, Handler, StoreAddr, NULL, 0);
Chain = DAG.getCopyToReg(Chain, X86::ECX, StoreAddr);
MF.addLiveOut(X86::ECX);
return DAG.getNode(X86ISD::EH_RETURN, MVT::Other,
Chain, DAG.getRegister(X86::ECX, getPointerTy()));
}
/// LowerOperation - Provide custom lowering hooks for some operations.
///
SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
switch (Op.getOpcode()) {
default: assert(0 && "Should not custom lower this!");
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
Lauro Ramos Venancio
committed
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG);
case ISD::SHL_PARTS:
case ISD::SRA_PARTS:
case ISD::SRL_PARTS: return LowerShift(Op, DAG);
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
case ISD::FABS: return LowerFABS(Op, DAG);
case ISD::FNEG: return LowerFNEG(Op, DAG);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG, DAG.getEntryNode());
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::JumpTable: return LowerJumpTable(Op, DAG);
case ISD::CALL: return LowerCALL(Op, DAG);
case ISD::RET: return LowerRET(Op, DAG);
case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG);
case ISD::MEMSET: return LowerMEMSET(Op, DAG);
case ISD::MEMCPY: return LowerMEMCPY(Op, DAG);
case ISD::READCYCLECOUNTER: return LowerREADCYCLCECOUNTER(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
Evan Cheng
committed
case ISD::VACOPY: return LowerVACOPY(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
case ISD::FRAME_TO_ARGS_OFFSET:
return LowerFRAME_TO_ARGS_OFFSET(Op, DAG);
Anton Korobeynikov
committed
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
}
const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
default: return NULL;
case X86ISD::SHLD: return "X86ISD::SHLD";
case X86ISD::SHRD: return "X86ISD::SHRD";
case X86ISD::FOR: return "X86ISD::FOR";
case X86ISD::FXOR: return "X86ISD::FXOR";
case X86ISD::FSRL: return "X86ISD::FSRL";
case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG";
case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM";
case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
case X86ISD::FLD: return "X86ISD::FLD";
case X86ISD::FST: return "X86ISD::FST";
case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT";
case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT";
case X86ISD::CALL: return "X86ISD::CALL";
case X86ISD::TAILCALL: return "X86ISD::TAILCALL";
case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG";
case X86ISD::CMP: return "X86ISD::CMP";
case X86ISD::COMI: return "X86ISD::COMI";
case X86ISD::UCOMI: return "X86ISD::UCOMI";
case X86ISD::SETCC: return "X86ISD::SETCC";
case X86ISD::CMOV: return "X86ISD::CMOV";
case X86ISD::BRCOND: return "X86ISD::BRCOND";
case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG";
case X86ISD::REP_STOS: return "X86ISD::REP_STOS";
case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS";
case X86ISD::LOAD_PACK: return "X86ISD::LOAD_PACK";
case X86ISD::LOAD_UA: return "X86ISD::LOAD_UA";
case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg";
case X86ISD::Wrapper: return "X86ISD::Wrapper";
case X86ISD::S2VEC: return "X86ISD::S2VEC";
case X86ISD::PEXTRW: return "X86ISD::PEXTRW";
case X86ISD::PINSRW: return "X86ISD::PINSRW";
case X86ISD::FMAX: return "X86ISD::FMAX";
case X86ISD::FMIN: return "X86ISD::FMIN";
case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
case X86ISD::FRCP: return "X86ISD::FRCP";
Lauro Ramos Venancio
committed
case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER";
case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
// isLegalAddressingMode - Return true if the addressing mode represented
// by AM is legal for this target, for a load/store of the specified type.
bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
const Type *Ty) const {
// X86 supports extremely general addressing modes.
// X86 allows a sign-extended 32-bit immediate field as a displacement.
if (AM.BaseOffs <= -(1LL << 32) || AM.BaseOffs >= (1LL << 32)-1)
return false;
if (AM.BaseGV) {
// X86-64 only supports addr of globals in small code model.
if (Subtarget->is64Bit() &&
getTargetMachine().getCodeModel() != CodeModel::Small)
return false;
// We can only fold this if we don't need a load either.
if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false))
return false;
}
switch (AM.Scale) {
case 0:
case 1:
case 2:
case 4:
case 8:
// These scales always work.
break;
case 3:
case 5:
case 9:
// These scales are formed with basereg+scalereg. Only accept if there is
// no basereg yet.
if (AM.HasBaseReg)
return false;
break;
default: // Other stuff never works.
return false;
}
return true;
}
/// isShuffleMaskLegal - Targets can use this to indicate that they only
/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
/// are assumed to be legal.
bool
X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const {
// Only do shuffles on 128-bit vector types for now.
if (MVT::getSizeInBits(VT) == 64) return false;
return (Mask.Val->getNumOperands() <= 4 ||
Evan Cheng
committed
isIdentityMask(Mask.Val) ||
isIdentityMask(Mask.Val, true) ||
isSplatMask(Mask.Val) ||
isPSHUFHW_PSHUFLWMask(Mask.Val) ||
X86::isUNPCKLMask(Mask.Val) ||
Evan Cheng
committed
X86::isUNPCKHMask(Mask.Val) ||
Evan Cheng
committed
X86::isUNPCKH_v_undef_Mask(Mask.Val));
}
bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps,
MVT::ValueType EVT,
SelectionDAG &DAG) const {
unsigned NumElts = BVOps.size();
// Only do shuffles on 128-bit vector types for now.
if (MVT::getSizeInBits(EVT) * NumElts == 64) return false;
if (NumElts == 2) return true;
if (NumElts == 4) {
return (isMOVLMask(&BVOps[0], 4) ||
isCommutedMOVL(&BVOps[0], 4, true) ||
isSHUFPMask(&BVOps[0], 4) ||
isCommutedSHUFP(&BVOps[0], 4));
}
return false;
}
//===----------------------------------------------------------------------===//
// X86 Scheduler Hooks
//===----------------------------------------------------------------------===//
MachineBasicBlock *
X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
MachineBasicBlock *BB) {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
switch (MI->getOpcode()) {
default: assert(false && "Unexpected instr type to insert");
case X86::CMOV_FR32:
case X86::CMOV_FR64:
case X86::CMOV_V4F32:
case X86::CMOV_V2F64:
case X86::CMOV_V2I64: {
// To "insert" a SELECT_CC instruction, we actually have to insert the
// diamond control-flow pattern. The incoming instruction knows the
// destination vreg to set, the condition code register to branch on, the
// true/false values to select between, and a branch opcode to use.
const BasicBlock *LLVM_BB = BB->getBasicBlock();
ilist<MachineBasicBlock>::iterator It = BB;
++It;
// thisMBB:
// ...
// TrueVal = ...
// cmpTY ccX, r1, r2
// bCC copy1MBB
// fallthrough --> copy0MBB
MachineBasicBlock *thisMBB = BB;
MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
unsigned Opc =
X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
BuildMI(BB, TII->get(Opc)).addMBB(sinkMBB);
MachineFunction *F = BB->getParent();
F->getBasicBlockList().insert(It, copy0MBB);
F->getBasicBlockList().insert(It, sinkMBB);
// Update machine-CFG edges by first adding all successors of the current
// block to the new block which will contain the Phi node for the select.
for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
e = BB->succ_end(); i != e; ++i)
sinkMBB->addSuccessor(*i);
// Next, remove all successors of the current block, and add the true
// and fallthrough blocks as its successors.
while(!BB->succ_empty())
BB->removeSuccessor(BB->succ_begin());
BB->addSuccessor(copy0MBB);
BB->addSuccessor(sinkMBB);
// copy0MBB:
// %FalseValue = ...
// # fallthrough to sinkMBB
BB = copy0MBB;
// Update machine-CFG edges
BB->addSuccessor(sinkMBB);
// sinkMBB:
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
// ...
BB = sinkMBB;
BuildMI(BB, TII->get(X86::PHI), MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
.addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
delete MI; // The pseudo instruction is gone now.
return BB;
}
case X86::FP32_TO_INT16_IN_MEM:
case X86::FP32_TO_INT32_IN_MEM:
case X86::FP32_TO_INT64_IN_MEM:
case X86::FP64_TO_INT16_IN_MEM:
case X86::FP64_TO_INT32_IN_MEM:
case X86::FP64_TO_INT64_IN_MEM: {
// Change the floating point control register to use "round towards zero"
// mode when truncating to an integer value.
MachineFunction *F = BB->getParent();
int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
addFrameReference(BuildMI(BB, TII->get(X86::FNSTCW16m)), CWFrameIdx);
// Load the old value of the high byte of the control word...
unsigned OldCW =
F->getSSARegMap()->createVirtualRegister(X86::GR16RegisterClass);
addFrameReference(BuildMI(BB, TII->get(X86::MOV16rm), OldCW), CWFrameIdx);
// Set the high part to be round to zero...
addFrameReference(BuildMI(BB, TII->get(X86::MOV16mi)), CWFrameIdx)
.addImm(0xC7F);
// Reload the modified control word now...
addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx);
// Restore the memory image of control word to original value
addFrameReference(BuildMI(BB, TII->get(X86::MOV16mr)), CWFrameIdx)
.addReg(OldCW);
// Get the X86 opcode to use.
unsigned Opc;
switch (MI->getOpcode()) {
default: assert(0 && "illegal opcode!");
case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break;
case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break;
case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break;
case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break;
case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break;
case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break;
}
X86AddressMode AM;
MachineOperand &Op = MI->getOperand(0);
if (Op.isRegister()) {
AM.BaseType = X86AddressMode::RegBase;
AM.Base.Reg = Op.getReg();
} else {
AM.BaseType = X86AddressMode::FrameIndexBase;
AM.Base.FrameIndex = Op.getFrameIndex();
}
Op = MI->getOperand(1);
if (Op.isImmediate())
AM.Scale = Op.getImm();
Op = MI->getOperand(2);
if (Op.isImmediate())
AM.IndexReg = Op.getImm();
Op = MI->getOperand(3);
if (Op.isGlobalAddress()) {
AM.GV = Op.getGlobal();
} else {
AM.Disp = Op.getImm();
addFullAddress(BuildMI(BB, TII->get(Opc)), AM)
.addReg(MI->getOperand(4).getReg());
// Reload the original control word now.
addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx);
delete MI; // The pseudo instruction is gone now.
return BB;
}
}
}
//===----------------------------------------------------------------------===//
// X86 Optimization Hooks
//===----------------------------------------------------------------------===//
void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
uint64_t Mask,
uint64_t &KnownZero,
uint64_t &KnownOne,
const SelectionDAG &DAG,
unsigned Depth) const {
assert((Opc >= ISD::BUILTIN_OP_END ||
Opc == ISD::INTRINSIC_WO_CHAIN ||
Opc == ISD::INTRINSIC_W_CHAIN ||
Opc == ISD::INTRINSIC_VOID) &&
"Should use MaskedValueIsZero if you don't know whether Op"
" is a target node!");
case X86ISD::SETCC:
KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL);
break;
/// getShuffleScalarElt - Returns the scalar element that will make up the ith
/// element of the result of the vector shuffle.
static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) {
MVT::ValueType VT = N->getValueType(0);
SDOperand PermMask = N->getOperand(2);
unsigned NumElems = PermMask.getNumOperands();
SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1);
i %= NumElems;
if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) {
return (i == 0)
? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT));
} else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) {
SDOperand Idx = PermMask.getOperand(i);
if (Idx.getOpcode() == ISD::UNDEF)
return DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT));
return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG);
}
return SDOperand();
}
/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
/// node is a GlobalAddress + an offset.
static bool isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) {
unsigned Opc = N->getOpcode();
Evan Cheng
committed
if (Opc == X86ISD::Wrapper) {
if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) {
GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();
return true;
}
} else if (Opc == ISD::ADD) {
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
SDOperand N1 = N->getOperand(0);
SDOperand N2 = N->getOperand(1);
if (isGAPlusOffset(N1.Val, GA, Offset)) {
ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2);
if (V) {
Offset += V->getSignExtended();
return true;
}
} else if (isGAPlusOffset(N2.Val, GA, Offset)) {
ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1);
if (V) {
Offset += V->getSignExtended();
return true;
}
}
}
return false;
}
/// isConsecutiveLoad - Returns true if N is loading from an address of Base
/// + Dist * Size.
static bool isConsecutiveLoad(SDNode *N, SDNode *Base, int Dist, int Size,
MachineFrameInfo *MFI) {
if (N->getOperand(0).Val != Base->getOperand(0).Val)
return false;
SDOperand Loc = N->getOperand(1);
SDOperand BaseLoc = Base->getOperand(1);
if (Loc.getOpcode() == ISD::FrameIndex) {
if (BaseLoc.getOpcode() != ISD::FrameIndex)
return false;
int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
int FS = MFI->getObjectSize(FI);
int BFS = MFI->getObjectSize(BFI);
if (FS != BFS || FS != Size) return false;
return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size);
} else {
GlobalValue *GV1 = NULL;
GlobalValue *GV2 = NULL;
int64_t Offset1 = 0;
int64_t Offset2 = 0;
bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1);
bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2);
if (isGA1 && isGA2 && GV1 == GV2)
return Offset1 == (Offset2 + Dist*Size);
}
return false;
}
static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI,
const X86Subtarget *Subtarget) {
GlobalValue *GV;
int64_t Offset;
if (isGAPlusOffset(Base, GV, Offset))
return (GV->getAlignment() >= 16 && (Offset % 16) == 0);
else {
assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!");
int BFI = cast<FrameIndexSDNode>(Base)->getIndex();
if (BFI < 0)
// Fixed objects do not specify alignment, however the offsets are known.
return ((Subtarget->getStackAlignment() % 16) == 0 &&
(MFI->getObjectOffset(BFI) % 16) == 0);
else
return MFI->getObjectAlignment(BFI) >= 16;
}
return false;
}
/// PerformShuffleCombine - Combine a vector_shuffle that is equal to
/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load
/// if the load addresses are consecutive, non-overlapping, and in the right
/// order.
static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
MVT::ValueType VT = N->getValueType(0);
MVT::ValueType EVT = MVT::getVectorElementType(VT);
SDOperand PermMask = N->getOperand(2);
int NumElems = (int)PermMask.getNumOperands();
SDNode *Base = NULL;
for (int i = 0; i < NumElems; ++i) {
SDOperand Idx = PermMask.getOperand(i);
if (Idx.getOpcode() == ISD::UNDEF) {
if (!Base) return SDOperand();
} else {
SDOperand Arg =
getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG);
if (!Arg.Val || !ISD::isNON_EXTLoad(Arg.Val))
return SDOperand();
if (!Base)
Base = Arg.Val;
else if (!isConsecutiveLoad(Arg.Val, Base,
i, MVT::getSizeInBits(EVT)/8,MFI))
return SDOperand();
}
}
bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget);
if (isAlign16) {
LoadSDNode *LD = cast<LoadSDNode>(Base);
return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(),
LD->getSrcValueOffset());
} else {
// Just use movups, it's shorter.
SDVTList Tys = DAG.getVTList(MVT::v4f32, MVT::Other);
SmallVector<SDOperand, 3> Ops;
Ops.push_back(Base->getOperand(0));
Ops.push_back(Base->getOperand(1));
Ops.push_back(Base->getOperand(2));
return DAG.getNode(ISD::BIT_CONVERT, VT,
DAG.getNode(X86ISD::LOAD_UA, Tys, &Ops[0], Ops.size()));
Evan Cheng
committed
}
}
/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes.
static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
SDOperand Cond = N->getOperand(0);
// If we have SSE[12] support, try to form min/max nodes.
if (Subtarget->hasSSE2() &&
(N->getValueType(0) == MVT::f32 || N->getValueType(0) == MVT::f64)) {
if (Cond.getOpcode() == ISD::SETCC) {
// Get the LHS/RHS of the select.
SDOperand LHS = N->getOperand(1);
SDOperand RHS = N->getOperand(2);
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
unsigned Opcode = 0;
if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) {
switch (CC) {
default: break;
case ISD::SETOLE: // (X <= Y) ? X : Y -> min
case ISD::SETULE:
case ISD::SETLE:
if (!UnsafeFPMath) break;
// FALL THROUGH.
case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min
case ISD::SETLT:
Opcode = X86ISD::FMIN;
break;
case ISD::SETOGT: // (X > Y) ? X : Y -> max
case ISD::SETUGT:
case ISD::SETGT:
if (!UnsafeFPMath) break;
// FALL THROUGH.
case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max
case ISD::SETGE:
Opcode = X86ISD::FMAX;
break;
}
} else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) {
switch (CC) {
default: break;
case ISD::SETOGT: // (X > Y) ? Y : X -> min
case ISD::SETUGT:
case ISD::SETGT:
if (!UnsafeFPMath) break;
// FALL THROUGH.
case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min
case ISD::SETGE:
Opcode = X86ISD::FMIN;
break;
case ISD::SETOLE: // (X <= Y) ? Y : X -> max
case ISD::SETULE:
case ISD::SETLE:
if (!UnsafeFPMath) break;
// FALL THROUGH.
case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max
case ISD::SETLT:
Opcode = X86ISD::FMAX;
break;
}
if (Opcode)
return DAG.getNode(Opcode, N->getValueType(0), LHS, RHS);
}
return SDOperand();
}
SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
switch (N->getOpcode()) {
default: break;
case ISD::VECTOR_SHUFFLE:
return PerformShuffleCombine(N, DAG, Subtarget);
case ISD::SELECT:
return PerformSELECTCombine(N, DAG, Subtarget);
}
return SDOperand();
}
//===----------------------------------------------------------------------===//
// X86 Inline Assembly Support
//===----------------------------------------------------------------------===//
/// getConstraintType - Given a constraint letter, return the type of
/// constraint it is for this target.
X86TargetLowering::ConstraintType
X86TargetLowering::getConstraintType(const std::string &Constraint) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'A':
case 'r':
case 'R':
case 'l':
case 'q':
case 'Q':
case 'x':
case 'Y':
return C_RegisterClass;
default:
break;
}
return TargetLowering::getConstraintType(Constraint);
}
/// isOperandValidForConstraint - Return the specified operand (possibly
/// modified) if the specified SDOperand is valid for the specified target
/// constraint letter, otherwise return null.
SDOperand X86TargetLowering::
isOperandValidForConstraint(SDOperand Op, char Constraint, SelectionDAG &DAG) {
switch (Constraint) {
default: break;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
if (C->getValue() <= 31)
return DAG.getTargetConstant(C->getValue(), Op.getValueType());
return SDOperand(0,0);
case 'N':
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
if (C->getValue() <= 255)
return DAG.getTargetConstant(C->getValue(), Op.getValueType());
}
return SDOperand(0,0);
// Literal immediates are always ok.
if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op))
return DAG.getTargetConstant(CST->getValue(), Op.getValueType());
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
// If we are in non-pic codegen mode, we allow the address of a global (with
// an optional displacement) to be used with 'i'.
GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
int64_t Offset = 0;
// Match either (GA) or (GA+C)
if (GA) {
Offset = GA->getOffset();
} else if (Op.getOpcode() == ISD::ADD) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
if (C && GA) {
Offset = GA->getOffset()+C->getValue();
} else {
C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
if (C && GA)
Offset = GA->getOffset()+C->getValue();
else
C = 0, GA = 0;
}
}
if (GA) {
// If addressing this global requires a load (e.g. in PIC mode), we can't
// match.
if (Subtarget->GVRequiresExtraLoad(GA->getGlobal(), getTargetMachine(),
false))
return SDOperand(0, 0);
Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0),
Offset);
return Op;
}
// Otherwise, not valid for this mode.
return SDOperand(0, 0);
}
return TargetLowering::isOperandValidForConstraint(Op, Constraint, DAG);
}
getRegClassForInlineAsmConstraint(const std::string &Constraint,
MVT::ValueType VT) const {
if (Constraint.size() == 1) {
// FIXME: not handling fp-stack yet!
switch (Constraint[0]) { // GCC X86 Constraint Letters
default: break; // Unknown constraint letter
case 'A': // EAX/EDX
if (VT == MVT::i32 || VT == MVT::i64)
return make_vector<unsigned>(X86::EAX, X86::EDX, 0);
break;
case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode)
case 'Q': // Q_REGS
if (VT == MVT::i32)
return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0);
else if (VT == MVT::i16)
return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0);
else if (VT == MVT::i8)
return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0);
break;
return std::vector<unsigned>();
std::pair<unsigned, const TargetRegisterClass*>
X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
MVT::ValueType VT) const {
// First, see if this is a constraint that directly corresponds to an LLVM
// register class.
if (Constraint.size() == 1) {
// GCC Constraint Letters
switch (Constraint[0]) {
default: break;
case 'r': // GENERAL_REGS
case 'R': // LEGACY_REGS
case 'l': // INDEX_REGS
if (VT == MVT::i64 && Subtarget->is64Bit())
return std::make_pair(0U, X86::GR64RegisterClass);
if (VT == MVT::i32)
return std::make_pair(0U, X86::GR32RegisterClass);
else if (VT == MVT::i16)
return std::make_pair(0U, X86::GR16RegisterClass);
else if (VT == MVT::i8)
return std::make_pair(0U, X86::GR8RegisterClass);
break;
Chris Lattner
committed
case 'y': // MMX_REGS if MMX allowed.
if (!Subtarget->hasMMX()) break;
return std::make_pair(0U, X86::VR64RegisterClass);
break;
case 'Y': // SSE_REGS if SSE2 allowed
if (!Subtarget->hasSSE2()) break;
// FALL THROUGH.
case 'x': // SSE_REGS if SSE1 allowed
if (!Subtarget->hasSSE1()) break;