Newer
Older
//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
//
// The LLVM Compiler Infrastructure
//
// This file was developed by the Evan Cheng and is distributed under
// the University of Illinois Open Source License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines a DAG pattern matching instruction selector for X86,
// converting from a legalized dag to a X86 dag.
//
//===----------------------------------------------------------------------===//
#include "X86.h"
#include "X86InstrBuilder.h"
Evan Cheng
committed
#include "X86ISelLowering.h"
#include "X86RegisterInfo.h"
#include "X86Subtarget.h"
Evan Cheng
committed
#include "X86TargetMachine.h"
#include "llvm/GlobalValue.h"
#include "llvm/Instructions.h"
#include "llvm/Support/CFG.h"
#include "llvm/Type.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/SSARegMap.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
STATISTIC(NumFPKill , "Number of FP_REG_KILL instructions added");
STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
//===----------------------------------------------------------------------===//
// Pattern Matcher Implementation
//===----------------------------------------------------------------------===//
namespace {
/// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
/// SDOperand's instead of register numbers for the leaves of the matched
/// tree.
struct X86ISelAddressMode {
enum {
RegBase,
FrameIndexBase
} BaseType;
struct { // This is really a union, discriminated by BaseType!
SDOperand Reg;
int FrameIndex;
} Base;
unsigned Scale;
SDOperand IndexReg;
unsigned Disp;
GlobalValue *GV;
unsigned Align; // CP alignment.
X86ISelAddressMode()
: BaseType(RegBase), isRIPRel(false), Scale(1), IndexReg(), Disp(0),
GV(0), CP(0), ES(0), JT(-1), Align(0) {
}
};
}
namespace {
//===--------------------------------------------------------------------===//
/// ISel - X86 specific code to select X86 machine instructions for
/// SelectionDAG operations.
///
class VISIBILITY_HIDDEN X86DAGToDAGISel : public SelectionDAGISel {
/// ContainsFPCode - Every instruction we select that uses or defines a FP
/// register should set this to true.
bool ContainsFPCode;
/// FastISel - Enable fast(er) instruction selection.
///
bool FastISel;
/// TM - Keep a reference to X86TargetMachine.
///
X86TargetMachine &TM;
/// X86Lowering - This object fully describes how to lower LLVM code to an
/// X86-specific SelectionDAG.
X86TargetLowering X86Lowering;
/// Subtarget - Keep a pointer to the X86Subtarget around so that we can
/// make the right decision when generating code for different targets.
const X86Subtarget *Subtarget;
/// GlobalBaseReg - keeps track of the virtual register mapped onto global
/// base register.
Evan Cheng
committed
public:
X86DAGToDAGISel(X86TargetMachine &tm, bool fast)
Evan Cheng
committed
: SelectionDAGISel(X86Lowering),
Evan Cheng
committed
X86Lowering(*TM.getTargetLowering()),
Subtarget(&TM.getSubtarget<X86Subtarget>()) {}
virtual bool runOnFunction(Function &Fn) {
// Make sure we re-emit a set of the global base reg if necessary
GlobalBaseReg = 0;
return SelectionDAGISel::runOnFunction(Fn);
}
virtual const char *getPassName() const {
return "X86 DAG->DAG Instruction Selection";
}
/// InstructionSelectBasicBlock - This callback is invoked by
/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
virtual void InstructionSelectBasicBlock(SelectionDAG &DAG);
virtual bool CanBeFoldedBy(SDNode *N, SDNode *U, SDNode *Root) const;
Evan Cheng
committed
// Include the pieces autogenerated from the target description.
#include "X86GenDAGISel.inc"
private:
SDNode *Select(SDOperand N);
bool MatchAddress(SDOperand N, X86ISelAddressMode &AM,
bool MatchAddressBase(SDOperand N, X86ISelAddressMode &AM,
bool isRoot, unsigned Depth);
bool SelectAddr(SDOperand Op, SDOperand N, SDOperand &Base,
SDOperand &Scale, SDOperand &Index, SDOperand &Disp);
bool SelectLEAAddr(SDOperand Op, SDOperand N, SDOperand &Base,
SDOperand &Scale, SDOperand &Index, SDOperand &Disp);
bool SelectScalarSSELoad(SDOperand Op, SDOperand Pred,
SDOperand N, SDOperand &Base, SDOperand &Scale,
SDOperand &Index, SDOperand &Disp,
SDOperand &InChain, SDOperand &OutChain);
bool TryFoldLoad(SDOperand P, SDOperand N,
SDOperand &Base, SDOperand &Scale,
Evan Cheng
committed
void InstructionSelectPreprocess(SelectionDAG &DAG);
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
virtual bool SelectInlineAsmMemoryOperand(const SDOperand &Op,
char ConstraintCode,
std::vector<SDOperand> &OutOps,
SelectionDAG &DAG);
inline void getAddressOperands(X86ISelAddressMode &AM, SDOperand &Base,
SDOperand &Scale, SDOperand &Index,
SDOperand &Disp) {
Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) :
AM.Base.Reg;
Scale = getI8Imm(AM.Scale);
Index = AM.IndexReg;
// These are 32-bit even in 64-bit mode since RIP relative offset
// is 32-bit.
if (AM.GV)
Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp);
else if (AM.CP)
Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Align, AM.Disp);
else if (AM.ES)
Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32);
else if (AM.JT != -1)
Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32);
else
Disp = getI32Imm(AM.Disp);
}
/// getI8Imm - Return a target constant with the specified value, of type
/// i8.
inline SDOperand getI8Imm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i8);
}
/// getI16Imm - Return a target constant with the specified value, of type
/// i16.
inline SDOperand getI16Imm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i16);
}
/// getI32Imm - Return a target constant with the specified value, of type
/// i32.
inline SDOperand getI32Imm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i32);
}
/// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
/// base register. Return the virtual register that holds this value.
SDNode *getGlobalBaseReg();
Christopher Lamb
committed
/// getTruncate - return an SDNode that implements a subreg based truncate
/// of the specified operand to the the specified value type.
SDNode *getTruncate(SDOperand N0, MVT::ValueType VT);
};
}
static SDNode *findFlagUse(SDNode *N) {
unsigned FlagResNo = N->getNumValues()-1;
for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
SDNode *User = *I;
for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
SDOperand Op = User->getOperand(i);
if (Op.Val == N && Op.ResNo == FlagResNo)
return User;
}
}
return NULL;
}
static void findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
SDNode *Root, SDNode *Skip, bool &found,
std::set<SDNode *> &Visited) {
if (found ||
Use->getNodeId() > Def->getNodeId() ||
!Visited.insert(Use).second)
return;
for (unsigned i = 0, e = Use->getNumOperands(); !found && i != e; ++i) {
SDNode *N = Use->getOperand(i).Val;
if (N == Skip)
continue;
if (N == Def) {
if (Use == ImmedUse)
continue; // Immediate use is ok.
if (Use == Root) {
assert(Use->getOpcode() == ISD::STORE ||
Use->getOpcode() == X86ISD::CMP);
continue;
}
found = true;
break;
}
findNonImmUse(N, Def, ImmedUse, Root, Skip, found, Visited);
}
}
/// isNonImmUse - Start searching from Root up the DAG to check is Def can
/// be reached. Return true if that's the case. However, ignore direct uses
/// by ImmedUse (which would be U in the example illustrated in
/// CanBeFoldedBy) and by Root (which can happen in the store case).
/// FIXME: to be really generic, we should allow direct use by any node
/// that is being folded. But realisticly since we only fold loads which
/// have one non-chain use, we only need to watch out for load/op/store
/// and load/op/cmp case where the root (store / cmp) may reach the load via
/// its chain operand.
static inline bool isNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse,
SDNode *Skip = NULL) {
std::set<SDNode *> Visited;
bool found = false;
findNonImmUse(Root, Def, ImmedUse, Root, Skip, found, Visited);
return found;
}
bool X86DAGToDAGISel::CanBeFoldedBy(SDNode *N, SDNode *U, SDNode *Root) const {
if (FastISel) return false;
Evan Cheng
committed
// If U use can somehow reach N through another path then U can't fold N or
// it will create a cycle. e.g. In the following diagram, U can reach N
// through X. If N is folded into into U, then X is both a predecessor and
Evan Cheng
committed
// a successor of U.
//
// [ N ]
// ^ ^
// | |
// / \---
// / [X]
// | ^
// [U]--------|
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
if (isNonImmUse(Root, N, U))
return false;
// If U produces a flag, then it gets (even more) interesting. Since it
// would have been "glued" together with its flag use, we need to check if
// it might reach N:
//
// [ N ]
// ^ ^
// | |
// [U] \--
// ^ [TF]
// | ^
// | |
// \ /
// [FU]
//
// If FU (flag use) indirectly reach N (the load), and U fold N (call it
// NU), then TF is a predecessor of FU and a successor of NU. But since
// NU and FU are flagged together, this effectively creates a cycle.
bool HasFlagUse = false;
MVT::ValueType VT = Root->getValueType(Root->getNumValues()-1);
while ((VT == MVT::Flag && !Root->use_empty())) {
SDNode *FU = findFlagUse(Root);
if (FU == NULL)
break;
else {
Root = FU;
HasFlagUse = true;
VT = Root->getValueType(Root->getNumValues()-1);
if (HasFlagUse)
return !isNonImmUse(Root, N, Root, U);
return true;
Evan Cheng
committed
}
Evan Cheng
committed
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand
/// and move load below the TokenFactor. Replace store's chain operand with
/// load's chain result.
static void MoveBelowTokenFactor(SelectionDAG &DAG, SDOperand Load,
SDOperand Store, SDOperand TF) {
std::vector<SDOperand> Ops;
for (unsigned i = 0, e = TF.Val->getNumOperands(); i != e; ++i)
if (Load.Val == TF.Val->getOperand(i).Val)
Ops.push_back(Load.Val->getOperand(0));
else
Ops.push_back(TF.Val->getOperand(i));
DAG.UpdateNodeOperands(TF, &Ops[0], Ops.size());
DAG.UpdateNodeOperands(Load, TF, Load.getOperand(1), Load.getOperand(2));
DAG.UpdateNodeOperands(Store, Load.getValue(1), Store.getOperand(1),
Store.getOperand(2), Store.getOperand(3));
}
/// InstructionSelectPreprocess - Preprocess the DAG to allow the instruction
/// selector to pick more load-modify-store instructions. This is a common
/// case:
///
/// [Load chain]
/// ^
/// |
/// [Load]
/// ^ ^
/// | |
/// / \-
/// / |
/// [TokenFactor] [Op]
/// ^ ^
/// | |
/// \ /
/// \ /
/// [Store]
///
/// The fact the store's chain operand != load's chain will prevent the
/// (store (op (load))) instruction from being selected. We can transform it to:
///
/// [Load chain]
/// ^
/// |
/// [TokenFactor]
/// ^
/// |
/// [Load]
/// ^ ^
/// | |
/// | \-
/// | |
/// | [Op]
/// | ^
/// | |
/// \ /
/// \ /
/// [Store]
void X86DAGToDAGISel::InstructionSelectPreprocess(SelectionDAG &DAG) {
for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
E = DAG.allnodes_end(); I != E; ++I) {
if (!ISD::isNON_TRUNCStore(I))
Evan Cheng
committed
continue;
SDOperand Chain = I->getOperand(0);
if (Chain.Val->getOpcode() != ISD::TokenFactor)
continue;
SDOperand N1 = I->getOperand(1);
SDOperand N2 = I->getOperand(2);
if (MVT::isFloatingPoint(N1.getValueType()) ||
MVT::isVector(N1.getValueType()) ||
!N1.hasOneUse())
Evan Cheng
committed
continue;
bool RModW = false;
SDOperand Load;
unsigned Opcode = N1.Val->getOpcode();
switch (Opcode) {
case ISD::ADD:
case ISD::MUL:
case ISD::AND:
case ISD::OR:
case ISD::XOR:
case ISD::ADDC:
case ISD::ADDE: {
SDOperand N10 = N1.getOperand(0);
SDOperand N11 = N1.getOperand(1);
if (ISD::isNON_EXTLoad(N10.Val))
Evan Cheng
committed
RModW = true;
else if (ISD::isNON_EXTLoad(N11.Val)) {
Evan Cheng
committed
RModW = true;
std::swap(N10, N11);
}
RModW = RModW && N10.Val->isOperand(Chain.Val) && N10.hasOneUse() &&
Evan Cheng
committed
(N10.getOperand(1) == N2) &&
(N10.Val->getValueType(0) == N1.getValueType());
Evan Cheng
committed
if (RModW)
Load = N10;
break;
}
case ISD::SUB:
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
case ISD::ROTL:
case ISD::ROTR:
case ISD::SUBC:
case ISD::SUBE:
case X86ISD::SHLD:
case X86ISD::SHRD: {
SDOperand N10 = N1.getOperand(0);
if (ISD::isNON_EXTLoad(N10.Val))
Evan Cheng
committed
RModW = N10.Val->isOperand(Chain.Val) && N10.hasOneUse() &&
Evan Cheng
committed
(N10.getOperand(1) == N2) &&
(N10.Val->getValueType(0) == N1.getValueType());
Evan Cheng
committed
if (RModW)
Load = N10;
break;
}
}
Evan Cheng
committed
if (RModW) {
Evan Cheng
committed
MoveBelowTokenFactor(DAG, Load, SDOperand(I, 0), Chain);
Evan Cheng
committed
++NumLoadMoved;
}
Evan Cheng
committed
}
}
/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel
/// when it has created a SelectionDAG for us to codegen.
void X86DAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) {
DEBUG(BB->dump());
MachineFunction::iterator FirstMBB = BB;
if (!FastISel)
Evan Cheng
committed
InstructionSelectPreprocess(DAG);
// Codegen the basic block.
DOUT << "===== Instruction selection begins:\n";
DAG.setRoot(SelectRoot(DAG.getRoot()));
DOUT << "===== Instruction selection ends:\n";
DAG.RemoveDeadNodes();
// Emit machine code to BB.
ScheduleAndEmitDAG(DAG);
// If we are emitting FP stack code, scan the basic block to determine if this
// block defines any FP values. If so, put an FP_REG_KILL instruction before
// the terminator of the block.
// Note that FP stack instructions *are* used in SSE code for long double,
// so we do need this check.
bool ContainsFPCode = false;
// Scan all of the machine instructions in these MBBs, checking for FP
// stores. (RFP32 and RFP64 will not exist in SSE mode, but RFP80 might.)
MachineFunction::iterator MBBI = FirstMBB;
do {
for (MachineBasicBlock::iterator I = MBBI->begin(), E = MBBI->end();
!ContainsFPCode && I != E; ++I) {
if (I->getNumOperands() != 0 && I->getOperand(0).isRegister()) {
const TargetRegisterClass *clas;
for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
if (I->getOperand(op).isRegister() && I->getOperand(op).isDef() &&
MRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()) &&
((clas = RegMap->getRegClass(I->getOperand(0).getReg())) ==
X86::RFP32RegisterClass ||
clas == X86::RFP64RegisterClass ||
clas == X86::RFP80RegisterClass)) {
ContainsFPCode = true;
break;
}
}
}
}
} while (!ContainsFPCode && &*(MBBI++) != BB);
// Check PHI nodes in successor blocks. These PHI's will be lowered to have
// a copy of the input value in this block. In SSE mode, we only care about
// 80-bit values.
if (!ContainsFPCode) {
// Final check, check LLVM BB's that are successors to the LLVM BB
// corresponding to BB for FP PHI nodes.
const BasicBlock *LLVMBB = BB->getBasicBlock();
const PHINode *PN;
for (succ_const_iterator SI = succ_begin(LLVMBB), E = succ_end(LLVMBB);
!ContainsFPCode && SI != E; ++SI) {
for (BasicBlock::const_iterator II = SI->begin();
(PN = dyn_cast<PHINode>(II)); ++II) {
if (PN->getType()==Type::X86_FP80Ty ||
(!Subtarget->hasSSE2() && PN->getType()->isFloatingPoint())) {
ContainsFPCode = true;
break;
}
}
}
}
// Finally, if we found any FP code, emit the FP_REG_KILL instruction.
if (ContainsFPCode) {
BuildMI(*BB, BB->getFirstTerminator(),
TM.getInstrInfo()->get(X86::FP_REG_KILL));
++NumFPKill;
}
}
/// MatchAddress - Add the specified node to the specified addressing mode,
/// returning true if it cannot be done. This just pattern matches for the
/// addressing mode
bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM,
bool isRoot, unsigned Depth) {
// Limit recursion.
if (Depth > 5)
return MatchAddressBase(N, AM, isRoot, Depth);
// RIP relative addressing: %rip + 32-bit displacement!
if (AM.isRIPRel) {
if (!AM.ES && AM.JT != -1 && N.getOpcode() == ISD::Constant) {
int64_t Val = cast<ConstantSDNode>(N)->getSignExtended();
if (isInt32(AM.Disp + Val)) {
AM.Disp += Val;
return false;
}
}
return true;
}
int id = N.Val->getNodeId();
bool Available = isSelected(id);
switch (N.getOpcode()) {
default: break;
int64_t Val = cast<ConstantSDNode>(N)->getSignExtended();
if (isInt32(AM.Disp + Val)) {
AM.Disp += Val;
return false;
}
break;
}
Evan Cheng
committed
case X86ISD::Wrapper: {
bool is64Bit = Subtarget->is64Bit();
// Under X86-64 non-small code model, GV (and friends) are 64-bits.
Evan Cheng
committed
if (is64Bit && TM.getCodeModel() != CodeModel::Small)
if (AM.GV != 0 || AM.CP != 0 || AM.ES != 0 || AM.JT != -1)
break;
// If value is available in a register both base and index components have
// been picked, we can't fit the result available in the register in the
// addressing mode. Duplicate GlobalAddress or ConstantPool as displacement.
bool isStatic = TM.getRelocationModel() == Reloc::Static;
SDOperand N0 = N.getOperand(0);
// Mac OS X X86-64 lower 4G address is not available.
bool isAbs32 = !is64Bit ||
(isStatic && Subtarget->hasLow4GUserSpaceAddress());
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
GlobalValue *GV = G->getGlobal();
if (isAbs32 || isRoot) {
AM.Disp += G->getOffset();
AM.isRIPRel = !isAbs32;
return false;
}
} else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
AM.Align = CP->getAlignment();
AM.Disp += CP->getOffset();
} else if (ExternalSymbolSDNode *S =dyn_cast<ExternalSymbolSDNode>(N0)) {
}
} else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
}
}
break;
case ISD::FrameIndex:
if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base.Reg.Val == 0) {
AM.BaseType = X86ISelAddressMode::FrameIndexBase;
AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
return false;
}
break;
case ISD::SHL:
if (!Available && AM.IndexReg.Val == 0 && AM.Scale == 1)
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1))) {
unsigned Val = CN->getValue();
if (Val == 1 || Val == 2 || Val == 3) {
AM.Scale = 1 << Val;
SDOperand ShVal = N.Val->getOperand(0);
// Okay, we know that we have a scale by now. However, if the scaled
// value is an add of something and a constant, we can fold the
// constant into the disp field here.
if (ShVal.Val->getOpcode() == ISD::ADD && ShVal.hasOneUse() &&
isa<ConstantSDNode>(ShVal.Val->getOperand(1))) {
AM.IndexReg = ShVal.Val->getOperand(0);
ConstantSDNode *AddVal =
cast<ConstantSDNode>(ShVal.Val->getOperand(1));
if (isInt32(Disp))
AM.Disp = Disp;
else
AM.IndexReg = ShVal;
} else {
AM.IndexReg = ShVal;
}
return false;
}
}
break;
case ISD::MUL:
// X*[3,5,9] -> X+X*[2,4,8]
if (!Available &&
AM.BaseType == X86ISelAddressMode::RegBase &&
AM.Base.Reg.Val == 0 &&
AM.IndexReg.Val == 0) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1)))
if (CN->getValue() == 3 || CN->getValue() == 5 || CN->getValue() == 9) {
AM.Scale = unsigned(CN->getValue())-1;
SDOperand MulVal = N.Val->getOperand(0);
SDOperand Reg;
// Okay, we know that we have a scale by now. However, if the scaled
// value is an add of something and a constant, we can fold the
// constant into the disp field here.
if (MulVal.Val->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
isa<ConstantSDNode>(MulVal.Val->getOperand(1))) {
Reg = MulVal.Val->getOperand(0);
ConstantSDNode *AddVal =
cast<ConstantSDNode>(MulVal.Val->getOperand(1));
uint64_t Disp = AM.Disp + AddVal->getValue() * CN->getValue();
if (isInt32(Disp))
AM.Disp = Disp;
else
Reg = N.Val->getOperand(0);
} else {
Reg = N.Val->getOperand(0);
}
AM.IndexReg = AM.Base.Reg = Reg;
return false;
}
}
break;
case ISD::ADD:
X86ISelAddressMode Backup = AM;
if (!MatchAddress(N.Val->getOperand(0), AM, false, Depth+1) &&
!MatchAddress(N.Val->getOperand(1), AM, false, Depth+1))
return false;
AM = Backup;
if (!MatchAddress(N.Val->getOperand(1), AM, false, Depth+1) &&
!MatchAddress(N.Val->getOperand(0), AM, false, Depth+1))
return false;
AM = Backup;
}
break;
case ISD::OR:
// Handle "X | C" as "X + C" iff X is known to have C bits clear.
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
X86ISelAddressMode Backup = AM;
// Start with the LHS as an addr mode.
if (!MatchAddress(N.getOperand(0), AM, false) &&
// Address could not have picked a GV address for the displacement.
AM.GV == NULL &&
// On x86-64, the resultant disp must fit in 32-bits.
isInt32(AM.Disp + CN->getSignExtended()) &&
// Check to see if the LHS & C is zero.
CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getValue())) {
AM.Disp += CN->getValue();
AM = Backup;
return MatchAddressBase(N, AM, isRoot, Depth);
}
/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
/// specified addressing mode without any further recursion.
bool X86DAGToDAGISel::MatchAddressBase(SDOperand N, X86ISelAddressMode &AM,
bool isRoot, unsigned Depth) {
// Is the base register already occupied?
if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.Val) {
// If so, check to see if the scale index register is set.
if (AM.IndexReg.Val == 0) {
AM.IndexReg = N;
AM.Scale = 1;
return false;
}
// Otherwise, we cannot select it.
return true;
}
// Default, generate it as a register.
AM.BaseType = X86ISelAddressMode::RegBase;
AM.Base.Reg = N;
return false;
}
/// SelectAddr - returns true if it is able pattern match an addressing mode.
/// It returns the operands which make up the maximal addressing mode it can
/// match by reference.
bool X86DAGToDAGISel::SelectAddr(SDOperand Op, SDOperand N, SDOperand &Base,
SDOperand &Scale, SDOperand &Index,
SDOperand &Disp) {
X86ISelAddressMode AM;
if (MatchAddress(N, AM))
return false;
if (AM.BaseType == X86ISelAddressMode::RegBase) {
if (!AM.Base.Reg.Val)
}
if (!AM.IndexReg.Val)
getAddressOperands(AM, Base, Scale, Index, Disp);
return true;
}
Chris Lattner
committed
/// isZeroNode - Returns true if Elt is a constant zero or a floating point
/// constant +0.0.
static inline bool isZeroNode(SDOperand Elt) {
return ((isa<ConstantSDNode>(Elt) &&
cast<ConstantSDNode>(Elt)->getValue() == 0) ||
(isa<ConstantFPSDNode>(Elt) &&
cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero()));
Chris Lattner
committed
}
Chris Lattner
committed
/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to
/// match a load whose top elements are either undef or zeros. The load flavor
/// is derived from the type of N, which is either v4f32 or v2f64.
bool X86DAGToDAGISel::SelectScalarSSELoad(SDOperand Op, SDOperand Pred,
SDOperand N, SDOperand &Base,
SDOperand &Scale, SDOperand &Index,
SDOperand &Disp, SDOperand &InChain,
SDOperand &OutChain) {
Chris Lattner
committed
if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) {
Chris Lattner
committed
InChain = N.getOperand(0).getValue(1);
if (ISD::isNON_EXTLoad(InChain.Val) &&
InChain.getValue(0).hasOneUse() &&
N.hasOneUse() &&
LoadSDNode *LD = cast<LoadSDNode>(InChain);
if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp))
Chris Lattner
committed
return false;
OutChain = LD->getChain();
Chris Lattner
committed
return true;
}
}
Chris Lattner
committed
// Also handle the case where we explicitly require zeros in the top
Chris Lattner
committed
// elements. This is a vector shuffle from the zero vector.
Chris Lattner
committed
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
if (N.getOpcode() == ISD::VECTOR_SHUFFLE && N.Val->hasOneUse() &&
N.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
N.getOperand(1).getOpcode() == ISD::SCALAR_TO_VECTOR &&
N.getOperand(1).Val->hasOneUse() &&
ISD::isNON_EXTLoad(N.getOperand(1).getOperand(0).Val) &&
N.getOperand(1).getOperand(0).hasOneUse()) {
// Check to see if the BUILD_VECTOR is building a zero vector.
SDOperand BV = N.getOperand(0);
for (unsigned i = 0, e = BV.getNumOperands(); i != e; ++i)
if (!isZeroNode(BV.getOperand(i)) &&
BV.getOperand(i).getOpcode() != ISD::UNDEF)
return false; // Not a zero/undef vector.
// Check to see if the shuffle mask is 4/L/L/L or 2/L, where L is something
// from the LHS.
unsigned VecWidth = BV.getNumOperands();
SDOperand ShufMask = N.getOperand(2);
assert(ShufMask.getOpcode() == ISD::BUILD_VECTOR && "Invalid shuf mask!");
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(ShufMask.getOperand(0))) {
if (C->getValue() == VecWidth) {
for (unsigned i = 1; i != VecWidth; ++i) {
if (ShufMask.getOperand(i).getOpcode() == ISD::UNDEF) {
// ok.
} else {
ConstantSDNode *C = cast<ConstantSDNode>(ShufMask.getOperand(i));
if (C->getValue() >= VecWidth) return false;
}
}
}
// Okay, this is a zero extending load. Fold it.
LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(1).getOperand(0));
if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp))
Chris Lattner
committed
return false;
OutChain = LD->getChain();
InChain = SDOperand(LD, 1);
return true;
}
}
Chris Lattner
committed
return false;
}
/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
/// mode it matches can be cost effectively emitted as an LEA instruction.
bool X86DAGToDAGISel::SelectLEAAddr(SDOperand Op, SDOperand N,
SDOperand &Base, SDOperand &Scale,
SDOperand &Index, SDOperand &Disp) {
X86ISelAddressMode AM;
if (MatchAddress(N, AM))
return false;
unsigned Complexity = 0;
if (AM.BaseType == X86ISelAddressMode::RegBase)
if (AM.Base.Reg.Val)
Complexity = 1;
else
else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
Complexity = 4;
if (AM.IndexReg.Val)
Complexity++;
else
// Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
// a simple shift.
if (AM.Scale > 1)
// FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
// to a LEA. This is determined with some expermentation but is by no means
// optimal (especially for code size consideration). LEA is nice because of
// its three-address nature. Tweak the cost function again when we can run
// convertToThreeAddress() at register allocation time.
if (AM.GV || AM.CP || AM.ES || AM.JT != -1) {
// For X86-64, we should always use lea to materialize RIP relative
// addresses.
Complexity = 4;
else
Complexity += 2;
}
if (AM.Disp && (AM.Base.Reg.Val || AM.IndexReg.Val))
Complexity++;
if (Complexity > 2) {
getAddressOperands(AM, Base, Scale, Index, Disp);
return true;
}
return false;
}
bool X86DAGToDAGISel::TryFoldLoad(SDOperand P, SDOperand N,
SDOperand &Base, SDOperand &Scale,
SDOperand &Index, SDOperand &Disp) {
if (ISD::isNON_EXTLoad(N.Val) &&
N.hasOneUse() &&
CanBeFoldedBy(N.Val, P.Val, P.Val))
return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp);
/// getGlobalBaseReg - Output the instructions required to put the
/// base address to use for accessing globals into a register.
///
SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
assert(!Subtarget->is64Bit() && "X86-64 PIC uses RIP relative addressing");
if (!GlobalBaseReg) {
// Insert the set of GlobalBaseReg into the first MBB of the function
MachineBasicBlock &FirstMBB = BB->getParent()->front();
MachineBasicBlock::iterator MBBI = FirstMBB.begin();
SSARegMap *RegMap = BB->getParent()->getSSARegMap();
unsigned PC = RegMap->createVirtualRegister(X86::GR32RegisterClass);
const TargetInstrInfo *TII = TM.getInstrInfo();
BuildMI(FirstMBB, MBBI, TII->get(X86::MovePCtoStack));
BuildMI(FirstMBB, MBBI, TII->get(X86::POP32r), PC);
// If we're using vanilla 'GOT' PIC style, we should use relative addressing
// not to pc, but to _GLOBAL_ADDRESS_TABLE_ external
if (TM.getRelocationModel() == Reloc::PIC_ &&
Subtarget->isPICStyleGOT()) {
GlobalBaseReg = RegMap->createVirtualRegister(X86::GR32RegisterClass);
BuildMI(FirstMBB, MBBI, TII->get(X86::ADD32ri), GlobalBaseReg).
addReg(PC).
addExternalSymbol("_GLOBAL_OFFSET_TABLE_");
} else {
GlobalBaseReg = PC;
}
return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).Val;
static SDNode *FindCallStartFromCall(SDNode *Node) {
if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;
assert(Node->getOperand(0).getValueType() == MVT::Other &&
"Node doesn't have a token chain argument!");
return FindCallStartFromCall(Node->getOperand(0).Val);
}
Christopher Lamb
committed
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
SDNode *X86DAGToDAGISel::getTruncate(SDOperand N0, MVT::ValueType VT) {
SDOperand SRIdx;
switch (VT) {
case MVT::i8:
SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1
// Ensure that the source register has an 8-bit subreg on 32-bit targets
if (!Subtarget->is64Bit()) {
unsigned Opc;
MVT::ValueType VT;
switch (N0.getValueType()) {
default: assert(0 && "Unknown truncate!");
case MVT::i16:
Opc = X86::MOV16to16_;
VT = MVT::i16;
break;
case MVT::i32:
Opc = X86::MOV32to32_;
VT = MVT::i32;
break;
}
N0 =
SDOperand(CurDAG->getTargetNode(Opc, VT, N0), 0);
}
break;
case MVT::i16:
SRIdx = CurDAG->getTargetConstant(2, MVT::i32); // SubRegSet 2
break;
case MVT::i32:
SRIdx = CurDAG->getTargetConstant(3, MVT::i32); // SubRegSet 3
break;
default: assert(0 && "Unknown truncate!");
}
return CurDAG->getTargetNode(X86::EXTRACT_SUBREG,
VT,
N0, SRIdx);
}
SDNode *X86DAGToDAGISel::Select(SDOperand N) {