Newer
Older
//===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the X86-specific support for the FastISel class. Much
// of the target-specific code is generated by tablegen in the file
// X86GenFastISel.inc, which is #included here.
//
//===----------------------------------------------------------------------===//
#include "X86.h"
#include "X86InstrBuilder.h"
#include "X86RegisterInfo.h"
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
#include "llvm/CallingConv.h"
#include "llvm/DerivedTypes.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Instructions.h"
#include "llvm/CodeGen/FastISel.h"
Owen Anderson
committed
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
Owen Anderson
committed
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
class X86FastISel : public FastISel {
/// Subtarget - Keep a pointer to the X86Subtarget around so that we can
/// make the right decision when generating code for different targets.
const X86Subtarget *Subtarget;
/// StackPtr - Register used as the stack pointer.
///
unsigned StackPtr;
/// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
/// floating point ops.
/// When SSE is available, use it for f32 operations.
/// When SSE2 is available, use it for f64 operations.
bool X86ScalarSSEf64;
bool X86ScalarSSEf32;
explicit X86FastISel(MachineFunction &mf,
MachineModuleInfo *mmi,
DenseMap<const Value *, unsigned> &vm,
DenseMap<const BasicBlock *, MachineBasicBlock *> &bm,
DenseMap<const AllocaInst *, int> &am
#ifndef NDEBUG
, SmallSet<Instruction*, 8> &cil
#endif
)
#ifndef NDEBUG
, cil
#endif
) {
StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
X86ScalarSSEf64 = Subtarget->hasSSE2();
X86ScalarSSEf32 = Subtarget->hasSSE1();
virtual bool TargetSelectInstruction(Instruction *I);
#include "X86GenFastISel.inc"
private:
Owen Anderson
committed
bool X86FastEmitCompare(Value *LHS, Value *RHS, EVT VT);
Owen Anderson
committed
bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
Owen Anderson
committed
bool X86FastEmitStore(EVT VT, Value *Val,
const X86AddressMode &AM);
Owen Anderson
committed
bool X86FastEmitStore(EVT VT, unsigned Val,
const X86AddressMode &AM);
Evan Cheng
committed
Owen Anderson
committed
bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
Evan Cheng
committed
unsigned &ResultReg);
bool X86SelectAddress(Value *V, X86AddressMode &AM);
bool X86SelectCallAddress(Value *V, X86AddressMode &AM);
bool X86SelectLoad(Instruction *I);
Owen Anderson
committed
bool X86SelectStore(Instruction *I);
bool X86SelectCmp(Instruction *I);
bool X86SelectZExt(Instruction *I);
bool X86SelectBranch(Instruction *I);
bool X86SelectShift(Instruction *I);
bool X86SelectSelect(Instruction *I);
bool X86SelectTrunc(Instruction *I);
bool X86SelectFPExt(Instruction *I);
bool X86SelectFPTrunc(Instruction *I);
bool X86SelectExtractValue(Instruction *I);
bool X86VisitIntrinsicCall(IntrinsicInst &I);
bool X86SelectCall(Instruction *I);
Sandeep Patel
committed
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isTailCall = false);
return getTargetMachine()->getInstrInfo();
}
const X86TargetMachine *getTargetMachine() const {
return static_cast<const X86TargetMachine *>(&TM);
unsigned TargetMaterializeConstant(Constant *C);
unsigned TargetMaterializeAlloca(AllocaInst *C);
/// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
/// computed in an SSE register, not on the X87 floating point stack.
Owen Anderson
committed
bool isScalarFPTypeInSSEReg(EVT VT) const {
Owen Anderson
committed
return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
(VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
}
Owen Anderson
committed
bool isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1 = false);
};
} // end anonymous namespace.
Owen Anderson
committed
bool X86FastISel::isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1) {
Owen Anderson
committed
if (VT == MVT::Other || !VT.isSimple())
// Unhandled type. Halt "fast" selection and bail.
return false;
// For now, require SSE/SSE2 for performing floating-point operations,
// since x87 requires additional work.
Owen Anderson
committed
if (VT == MVT::f64 && !X86ScalarSSEf64)
return false;
Owen Anderson
committed
if (VT == MVT::f32 && !X86ScalarSSEf32)
return false;
// Similarly, no f80 support yet.
Owen Anderson
committed
if (VT == MVT::f80)
return false;
// We only handle legal types. For example, on x86-32 the instruction
// selector contains all of the 64-bit instructions from x86-64,
// under the assumption that i64 won't be used if the target doesn't
// support it.
Owen Anderson
committed
return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
}
#include "X86GenCallingConv.inc"
/// CCAssignFnForCall - Selects the correct CCAssignFn for a given calling
/// convention.
Sandeep Patel
committed
CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC,
bool isTaillCall) {
if (Subtarget->is64Bit()) {
if (Subtarget->isTargetWin64())
return CC_X86_Win64_C;
else
return CC_X86_64_C;
}
if (CC == CallingConv::X86_FastCall)
return CC_X86_32_FastCall;
else if (CC == CallingConv::Fast)
return CC_X86_32_FastCC;
else
return CC_X86_32_C;
}
/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
/// Return true and the result register by reference if it is possible.
Owen Anderson
committed
bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
unsigned &ResultReg) {
// Get opcode and regclass of the output for the given load instruction.
unsigned Opc = 0;
const TargetRegisterClass *RC = NULL;
Owen Anderson
committed
switch (VT.getSimpleVT().SimpleTy) {
default: return false;
case MVT::i1:
Owen Anderson
committed
case MVT::i8:
Opc = X86::MOV8rm;
RC = X86::GR8RegisterClass;
break;
Owen Anderson
committed
case MVT::i16:
Opc = X86::MOV16rm;
RC = X86::GR16RegisterClass;
break;
Owen Anderson
committed
case MVT::i32:
Opc = X86::MOV32rm;
RC = X86::GR32RegisterClass;
break;
Owen Anderson
committed
case MVT::i64:
// Must be in x86-64 mode.
Opc = X86::MOV64rm;
RC = X86::GR64RegisterClass;
break;
Owen Anderson
committed
case MVT::f32:
if (Subtarget->hasSSE1()) {
Opc = X86::MOVSSrm;
RC = X86::FR32RegisterClass;
Opc = X86::LD_Fp32m;
RC = X86::RFP32RegisterClass;
Owen Anderson
committed
case MVT::f64:
if (Subtarget->hasSSE2()) {
Opc = X86::MOVSDrm;
RC = X86::FR64RegisterClass;
} else {
Opc = X86::LD_Fp64m;
RC = X86::RFP64RegisterClass;
}
break;
Owen Anderson
committed
case MVT::f80:
// No f80 support yet.
return false;
ResultReg = createResultReg(RC);
addFullAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM);
return true;
}
/// X86FastEmitStore - Emit a machine instruction to store a value Val of
/// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
/// and a displacement offset, or a GlobalAddress,
/// i.e. V. Return true if it is possible.
bool
Owen Anderson
committed
X86FastISel::X86FastEmitStore(EVT VT, unsigned Val,
const X86AddressMode &AM) {
// Get opcode and regclass of the output for the given store instruction.
Owen Anderson
committed
unsigned Opc = 0;
Owen Anderson
committed
switch (VT.getSimpleVT().SimpleTy) {
case MVT::f80: // No f80 support yet.
Owen Anderson
committed
default: return false;
case MVT::i1: {
// Mask out all but lowest bit.
unsigned AndResult = createResultReg(X86::GR8RegisterClass);
BuildMI(MBB, DL,
TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1);
Val = AndResult;
}
// FALLTHROUGH, handling i1 as i8.
Owen Anderson
committed
case MVT::i8: Opc = X86::MOV8mr; break;
case MVT::i16: Opc = X86::MOV16mr; break;
case MVT::i32: Opc = X86::MOV32mr; break;
case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode.
case MVT::f32:
Opc = Subtarget->hasSSE1() ? X86::MOVSSmr : X86::ST_Fp32m;
Owen Anderson
committed
break;
Owen Anderson
committed
case MVT::f64:
Opc = Subtarget->hasSSE2() ? X86::MOVSDmr : X86::ST_Fp64m;
Owen Anderson
committed
break;
}
addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM).addReg(Val);
Owen Anderson
committed
return true;
}
Owen Anderson
committed
bool X86FastISel::X86FastEmitStore(EVT VT, Value *Val,
const X86AddressMode &AM) {
// Handle 'null' like i32/i64 0.
if (isa<ConstantPointerNull>(Val))
Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext()));
// If this is a store of a simple constant, fold the constant into the store.
if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
unsigned Opc = 0;
bool Signed = true;
Owen Anderson
committed
switch (VT.getSimpleVT().SimpleTy) {
default: break;
case MVT::i1: Signed = false; // FALLTHROUGH to handle as i8.
Owen Anderson
committed
case MVT::i8: Opc = X86::MOV8mi; break;
case MVT::i16: Opc = X86::MOV16mi; break;
case MVT::i32: Opc = X86::MOV32mi; break;
case MVT::i64:
// Must be a 32-bit sign extended value.
if ((int)CI->getSExtValue() == CI->getSExtValue())
Opc = X86::MOV64mi32;
break;
}
if (Opc) {
addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM)
.addImm(Signed ? CI->getSExtValue() :
CI->getZExtValue());
return true;
}
}
unsigned ValReg = getRegForValue(Val);
if (ValReg == 0)
return false;
return X86FastEmitStore(VT, ValReg, AM);
}
Evan Cheng
committed
/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
/// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
/// ISD::SIGN_EXTEND).
Owen Anderson
committed
bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
unsigned Src, EVT SrcVT,
Evan Cheng
committed
unsigned &ResultReg) {
Owen Anderson
committed
unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, Src);
if (RR != 0) {
ResultReg = RR;
return true;
} else
return false;
Evan Cheng
committed
}
/// X86SelectAddress - Attempt to fill in an address from the given value.
///
bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) {
unsigned Opcode = Instruction::UserOp1;
if (Instruction *I = dyn_cast<Instruction>(V)) {
Opcode = I->getOpcode();
U = I;
} else if (ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
Opcode = C->getOpcode();
U = C;
}
switch (Opcode) {
default: break;
case Instruction::BitCast:
// Look past bitcasts.
return X86SelectAddress(U->getOperand(0), AM);
case Instruction::IntToPtr:
// Look past no-op inttoptrs.
if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
return X86SelectAddress(U->getOperand(0), AM);
case Instruction::PtrToInt:
// Look past no-op ptrtoints.
if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
return X86SelectAddress(U->getOperand(0), AM);
case Instruction::Alloca: {
// Do static allocas.
const AllocaInst *A = cast<AllocaInst>(V);
DenseMap<const AllocaInst*, int>::iterator SI = StaticAllocaMap.find(A);
if (SI != StaticAllocaMap.end()) {
AM.BaseType = X86AddressMode::FrameIndexBase;
AM.Base.FrameIndex = SI->second;
return true;
}
break;
}
case Instruction::Add: {
// Adds of constants are common and easy enough.
if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
// They have to fit in the 32-bit signed displacement field though.
if (isInt32(Disp)) {
AM.Disp = (uint32_t)Disp;
return X86SelectAddress(U->getOperand(0), AM);
}
break;
}
case Instruction::GetElementPtr: {
// Pattern-match simple GEPs.
uint64_t Disp = (int32_t)AM.Disp;
unsigned IndexReg = AM.IndexReg;
unsigned Scale = AM.Scale;
gep_type_iterator GTI = gep_type_begin(U);
// Iterate through the indices, folding what we can. Constants can be
// folded, and one dynamic index can be handled, if the scale is supported.
for (User::op_iterator i = U->op_begin() + 1, e = U->op_end();
i != e; ++i, ++GTI) {
Value *Op = *i;
if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
const StructLayout *SL = TD.getStructLayout(STy);
unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
Disp += SL->getElementOffset(Idx);
} else {
uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
// Constant-offset addressing.
Disp += CI->getSExtValue() * S;
} else if (IndexReg == 0 &&
Chris Lattner
committed
(!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
(S == 1 || S == 2 || S == 4 || S == 8)) {
// Scaled-index addressing.
Scale = S;
IndexReg = getRegForGEPIndex(Op);
if (IndexReg == 0)
return false;
} else
// Unsupported.
goto unsupported_gep;
}
// Check for displacement overflow.
if (!isInt32(Disp))
break;
// Ok, the GEP indices were covered by constant-offset and scaled-index
// addressing. Update the address state and move on to examining the base.
AM.IndexReg = IndexReg;
AM.Scale = Scale;
AM.Disp = (uint32_t)Disp;
return X86SelectAddress(U->getOperand(0), AM);
unsupported_gep:
// Ok, the GEP indices weren't all covered.
break;
}
}
// Handle constant address.
if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
if (TM.getCodeModel() != CodeModel::Small)
// RIP-relative addresses can't have additional register operands.
Chris Lattner
committed
if (Subtarget->isPICStyleRIPRel() &&
(AM.Base.Reg != 0 || AM.IndexReg != 0))
return false;
// Can't handle TLS yet.
if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
if (GVar->isThreadLocal())
return false;
// Okay, we've committed to selecting this global. Set up the basic address.
// Allow the subtarget to classify the global.
unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
// If this reference is relative to the pic base, set it now.
if (isGlobalRelativeToPICBase(GVFlags)) {
// FIXME: How do we know Base.Reg is free??
AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(&MF);
// Unless the ABI requires an extra load, return a direct reference to
if (!isGlobalStubReference(GVFlags)) {
if (Subtarget->isPICStyleRIPRel()) {
// Use rip-relative addressing if we can. Above we verified that the
// base and index registers are unused.
assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
AM.Base.Reg = X86::RIP;
}
AM.GVOpFlags = GVFlags;
return true;
}
// Ok, we need to do a load from a stub. If we've already loaded from this
// stub, reuse the loaded pointer, otherwise emit the load now.
DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
unsigned LoadReg;
if (I != LocalValueMap.end() && I->second != 0) {
LoadReg = I->second;
} else {
// Issue load from stub.
unsigned Opc = 0;
const TargetRegisterClass *RC = NULL;
X86AddressMode StubAM;
StubAM.Base.Reg = AM.Base.Reg;
StubAM.GV = GV;
StubAM.GVOpFlags = GVFlags;
Owen Anderson
committed
if (TLI.getPointerTy() == MVT::i64) {
Opc = X86::MOV64rm;
RC = X86::GR64RegisterClass;
if (Subtarget->isPICStyleRIPRel())
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
StubAM.Base.Reg = X86::RIP;
} else {
Opc = X86::MOV32rm;
RC = X86::GR32RegisterClass;
}
LoadReg = createResultReg(RC);
addFullAddress(BuildMI(MBB, DL, TII.get(Opc), LoadReg), StubAM);
// Prevent loading GV stub multiple times in same MBB.
LocalValueMap[V] = LoadReg;
}
// Now construct the final address. Note that the Disp, Scale,
// and Index values may already be set here.
AM.Base.Reg = LoadReg;
AM.GV = 0;
return true;
}
// If all else fails, try to materialize the value in a register.
if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
if (AM.Base.Reg == 0) {
AM.Base.Reg = getRegForValue(V);
return AM.Base.Reg != 0;
}
if (AM.IndexReg == 0) {
assert(AM.Scale == 1 && "Scale with no index!");
AM.IndexReg = getRegForValue(V);
return AM.IndexReg != 0;
}
}
return false;
}
/// X86SelectCallAddress - Attempt to fill in an address from the given value.
///
bool X86FastISel::X86SelectCallAddress(Value *V, X86AddressMode &AM) {
User *U = NULL;
unsigned Opcode = Instruction::UserOp1;
if (Instruction *I = dyn_cast<Instruction>(V)) {
Opcode = I->getOpcode();
U = I;
} else if (ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
Opcode = C->getOpcode();
U = C;
}
switch (Opcode) {
default: break;
case Instruction::BitCast:
// Look past bitcasts.
return X86SelectCallAddress(U->getOperand(0), AM);
case Instruction::IntToPtr:
// Look past no-op inttoptrs.
if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
return X86SelectCallAddress(U->getOperand(0), AM);
break;
case Instruction::PtrToInt:
// Look past no-op ptrtoints.
if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
return X86SelectCallAddress(U->getOperand(0), AM);
break;
}
// Handle constant address.
if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
// Can't handle alternate code models yet.
if (TM.getCodeModel() != CodeModel::Small)
return false;
// RIP-relative addresses can't have additional register operands.
if (Subtarget->isPICStyleRIPRel() &&
(AM.Base.Reg != 0 || AM.IndexReg != 0))
return false;
// Can't handle TLS or DLLImport.
if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
if (GVar->isThreadLocal() || GVar->hasDLLImportLinkage())
return false;
// Okay, we've committed to selecting this global. Set up the basic address.
AM.GV = GV;
// No ABI requires an extra load for anything other than DLLImport, which
// we rejected above. Return a direct reference to the global.
if (Subtarget->isPICStyleRIPRel()) {
// Use rip-relative addressing if we can. Above we verified that the
// base and index registers are unused.
assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
AM.Base.Reg = X86::RIP;
} else if (Subtarget->isPICStyleStubPIC()) {
AM.GVOpFlags = X86II::MO_PIC_BASE_OFFSET;
} else if (Subtarget->isPICStyleGOT()) {
AM.GVOpFlags = X86II::MO_GOTOFF;
return true;
// If all else fails, try to materialize the value in a register.
Chris Lattner
committed
if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
if (AM.Base.Reg == 0) {
AM.Base.Reg = getRegForValue(V);
return AM.Base.Reg != 0;
}
if (AM.IndexReg == 0) {
assert(AM.Scale == 1 && "Scale with no index!");
AM.IndexReg = getRegForValue(V);
return AM.IndexReg != 0;
}
}
return false;
/// X86SelectStore - Select and emit code to implement store instructions.
bool X86FastISel::X86SelectStore(Instruction* I) {
Owen Anderson
committed
EVT VT;
if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
X86AddressMode AM;
if (!X86SelectAddress(I->getOperand(1), AM))
return X86FastEmitStore(VT, I->getOperand(0), AM);
/// X86SelectLoad - Select and emit code to implement load instructions.
///
bool X86FastISel::X86SelectLoad(Instruction *I) {
Owen Anderson
committed
EVT VT;
if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true))
return false;
X86AddressMode AM;
if (!X86SelectAddress(I->getOperand(0), AM))
unsigned ResultReg = 0;
if (X86FastEmitLoad(VT, AM, ResultReg)) {
UpdateValueMap(I, ResultReg);
return true;
Owen Anderson
committed
static unsigned X86ChooseCmpOpcode(EVT VT) {
Owen Anderson
committed
switch (VT.getSimpleVT().SimpleTy) {
Owen Anderson
committed
case MVT::i8: return X86::CMP8rr;
case MVT::i16: return X86::CMP16rr;
case MVT::i32: return X86::CMP32rr;
case MVT::i64: return X86::CMP64rr;
case MVT::f32: return X86::UCOMISSrr;
case MVT::f64: return X86::UCOMISDrr;
Chris Lattner
committed
/// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS
/// of the comparison, return an opcode that works for the compare (e.g.
/// CMP32ri) otherwise return 0.
Owen Anderson
committed
static unsigned X86ChooseCmpImmediateOpcode(EVT VT, ConstantInt *RHSC) {
Owen Anderson
committed
switch (VT.getSimpleVT().SimpleTy) {
Chris Lattner
committed
// Otherwise, we can't fold the immediate into this comparison.
Owen Anderson
committed
case MVT::i8: return X86::CMP8ri;
case MVT::i16: return X86::CMP16ri;
case MVT::i32: return X86::CMP32ri;
case MVT::i64:
// 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
// field.
if ((int)RHSC->getSExtValue() == RHSC->getSExtValue())
return X86::CMP64ri32;
return 0;
}
Chris Lattner
committed
}
Owen Anderson
committed
bool X86FastISel::X86FastEmitCompare(Value *Op0, Value *Op1, EVT VT) {
unsigned Op0Reg = getRegForValue(Op0);
if (Op0Reg == 0) return false;
// Handle 'null' like i32/i64 0.
if (isa<ConstantPointerNull>(Op1))
Op1 = Constant::getNullValue(TD.getIntPtrType(Op0->getContext()));
// We have two options: compare with register or immediate. If the RHS of
// the compare is an immediate that we can fold into this compare, use
// CMPri, otherwise use CMPrr.
if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
BuildMI(MBB, DL, TII.get(CompareImmOpc)).addReg(Op0Reg)
.addImm(Op1C->getSExtValue());
return true;
}
}
unsigned CompareOpc = X86ChooseCmpOpcode(VT);
if (CompareOpc == 0) return false;
unsigned Op1Reg = getRegForValue(Op1);
if (Op1Reg == 0) return false;
BuildMI(MBB, DL, TII.get(CompareOpc)).addReg(Op0Reg).addReg(Op1Reg);
return true;
}
bool X86FastISel::X86SelectCmp(Instruction *I) {
CmpInst *CI = cast<CmpInst>(I);
Owen Anderson
committed
EVT VT;
unsigned ResultReg = createResultReg(&X86::GR8RegClass);
bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0.
switch (CI->getPredicate()) {
case CmpInst::FCMP_OEQ: {
if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
return false;
unsigned EReg = createResultReg(&X86::GR8RegClass);
unsigned NPReg = createResultReg(&X86::GR8RegClass);
BuildMI(MBB, DL, TII.get(X86::SETEr), EReg);
BuildMI(MBB, DL, TII.get(X86::SETNPr), NPReg);
BuildMI(MBB, DL,
TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg);
UpdateValueMap(I, ResultReg);
return true;
}
case CmpInst::FCMP_UNE: {
if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
return false;
unsigned NEReg = createResultReg(&X86::GR8RegClass);
unsigned PReg = createResultReg(&X86::GR8RegClass);
BuildMI(MBB, DL, TII.get(X86::SETNEr), NEReg);
BuildMI(MBB, DL, TII.get(X86::SETPr), PReg);
BuildMI(MBB, DL, TII.get(X86::OR8rr), ResultReg).addReg(PReg).addReg(NEReg);
UpdateValueMap(I, ResultReg);
return true;
case CmpInst::FCMP_OGT: SwapArgs = false; SetCCOpc = X86::SETAr; break;
case CmpInst::FCMP_OGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break;
case CmpInst::FCMP_OLT: SwapArgs = true; SetCCOpc = X86::SETAr; break;
case CmpInst::FCMP_OLE: SwapArgs = true; SetCCOpc = X86::SETAEr; break;
case CmpInst::FCMP_ONE: SwapArgs = false; SetCCOpc = X86::SETNEr; break;
case CmpInst::FCMP_ORD: SwapArgs = false; SetCCOpc = X86::SETNPr; break;
case CmpInst::FCMP_UNO: SwapArgs = false; SetCCOpc = X86::SETPr; break;
case CmpInst::FCMP_UEQ: SwapArgs = false; SetCCOpc = X86::SETEr; break;
case CmpInst::FCMP_UGT: SwapArgs = true; SetCCOpc = X86::SETBr; break;
case CmpInst::FCMP_UGE: SwapArgs = true; SetCCOpc = X86::SETBEr; break;
case CmpInst::FCMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break;
case CmpInst::FCMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
case CmpInst::ICMP_EQ: SwapArgs = false; SetCCOpc = X86::SETEr; break;
case CmpInst::ICMP_NE: SwapArgs = false; SetCCOpc = X86::SETNEr; break;
case CmpInst::ICMP_UGT: SwapArgs = false; SetCCOpc = X86::SETAr; break;
case CmpInst::ICMP_UGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break;
case CmpInst::ICMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break;
case CmpInst::ICMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
case CmpInst::ICMP_SGT: SwapArgs = false; SetCCOpc = X86::SETGr; break;
case CmpInst::ICMP_SGE: SwapArgs = false; SetCCOpc = X86::SETGEr; break;
case CmpInst::ICMP_SLT: SwapArgs = false; SetCCOpc = X86::SETLr; break;
case CmpInst::ICMP_SLE: SwapArgs = false; SetCCOpc = X86::SETLEr; break;
default:
return false;
}
Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
std::swap(Op0, Op1);
// Emit a compare of Op0/Op1.
if (!X86FastEmitCompare(Op0, Op1, VT))
return false;
BuildMI(MBB, DL, TII.get(SetCCOpc), ResultReg);
UpdateValueMap(I, ResultReg);
return true;
}
bool X86FastISel::X86SelectZExt(Instruction *I) {
// Handle zero-extension from i1 to i8, which is common.
if (I->getType() == Type::getInt8Ty(I->getContext()) &&
I->getOperand(0)->getType() == Type::getInt1Ty(I->getContext())) {
unsigned ResultReg = getRegForValue(I->getOperand(0));
if (ResultReg == 0) return false;
// Set the high bits to zero.
Owen Anderson
committed
ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg);
if (ResultReg == 0) return false;
UpdateValueMap(I, ResultReg);
return true;
}
return false;
}
bool X86FastISel::X86SelectBranch(Instruction *I) {
// Unconditional branches are selected by tablegen-generated code.
// Handle a conditional branch.
BranchInst *BI = cast<BranchInst>(I);
MachineBasicBlock *TrueMBB = MBBMap[BI->getSuccessor(0)];
MachineBasicBlock *FalseMBB = MBBMap[BI->getSuccessor(1)];
// Fold the common case of a conditional branch with a comparison.
if (CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
if (CI->hasOneUse()) {
Owen Anderson
committed
EVT VT = TLI.getValueType(CI->getOperand(0)->getType());
// Try to take advantage of fallthrough opportunities.
CmpInst::Predicate Predicate = CI->getPredicate();
if (MBB->isLayoutSuccessor(TrueMBB)) {
std::swap(TrueMBB, FalseMBB);
Predicate = CmpInst::getInversePredicate(Predicate);
}
bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0.
unsigned BranchOpc; // Opcode to jump on, e.g. "X86::JA"
switch (Predicate) {
case CmpInst::FCMP_OEQ:
std::swap(TrueMBB, FalseMBB);
Predicate = CmpInst::FCMP_UNE;
// FALL THROUGH
case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE; break;
case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA; break;
case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE; break;
case CmpInst::FCMP_OLT: SwapArgs = true; BranchOpc = X86::JA; break;
case CmpInst::FCMP_OLE: SwapArgs = true; BranchOpc = X86::JAE; break;
case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE; break;
case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP; break;
case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP; break;
case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE; break;
case CmpInst::FCMP_UGT: SwapArgs = true; BranchOpc = X86::JB; break;
case CmpInst::FCMP_UGE: SwapArgs = true; BranchOpc = X86::JBE; break;
case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB; break;
case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE; break;
case CmpInst::ICMP_EQ: SwapArgs = false; BranchOpc = X86::JE; break;
case CmpInst::ICMP_NE: SwapArgs = false; BranchOpc = X86::JNE; break;
case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA; break;
case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE; break;
case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB; break;
case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE; break;
case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG; break;
case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE; break;
case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL; break;
case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE; break;
default:
return false;
}
Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
std::swap(Op0, Op1);
// Emit a compare of the LHS and RHS, setting the flags.
if (!X86FastEmitCompare(Op0, Op1, VT))
return false;
Chris Lattner
committed
BuildMI(MBB, DL, TII.get(BranchOpc)).addMBB(TrueMBB);
if (Predicate == CmpInst::FCMP_UNE) {
// X86 requires a second branch to handle UNE (and OEQ,
// which is mapped to UNE above).
BuildMI(MBB, DL, TII.get(X86::JP)).addMBB(TrueMBB);
FastEmitBranch(FalseMBB);
MBB->addSuccessor(TrueMBB);
return true;
}
Bill Wendling
committed
} else if (ExtractValueInst *EI =
dyn_cast<ExtractValueInst>(BI->getCondition())) {
// Check to see if the branch instruction is from an "arithmetic with
// overflow" intrinsic. The main way these intrinsics are used is:
//
// %t = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
// %sum = extractvalue { i32, i1 } %t, 0
// %obit = extractvalue { i32, i1 } %t, 1
// br i1 %obit, label %overflow, label %normal
//
// The %sum and %obit are converted in an ADD and a SETO/SETB before
Bill Wendling
committed
// reaching the branch. Therefore, we search backwards through the MBB
// looking for the SETO/SETB instruction. If an instruction modifies the
// EFLAGS register before we reach the SETO/SETB instruction, then we can't
// convert the branch into a JO/JB instruction.
if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(EI->getAggregateOperand())){
if (CI->getIntrinsicID() == Intrinsic::sadd_with_overflow ||
CI->getIntrinsicID() == Intrinsic::uadd_with_overflow) {
const MachineInstr *SetMI = 0;
unsigned Reg = lookUpRegForValue(EI);
for (MachineBasicBlock::const_reverse_iterator
RI = MBB->rbegin(), RE = MBB->rend(); RI != RE; ++RI) {
const MachineInstr &MI = *RI;
if (MI.modifiesRegister(Reg)) {
unsigned Src, Dst, SrcSR, DstSR;
if (getInstrInfo()->isMoveInstr(MI, Src, Dst, SrcSR, DstSR)) {
Reg = Src;
continue;
Bill Wendling
committed
}
Bill Wendling
committed
Bill Wendling
committed
}
const TargetInstrDesc &TID = MI.getDesc();
if (TID.hasUnmodeledSideEffects() ||
TID.hasImplicitDefOfPhysReg(X86::EFLAGS))
break;
}
Bill Wendling
committed
if (SetMI) {
unsigned OpCode = SetMI->getOpcode();
if (OpCode == X86::SETOr || OpCode == X86::SETBr) {
BuildMI(MBB, DL, TII.get(OpCode == X86::SETOr ? X86::JO : X86::JB))
.addMBB(TrueMBB);
FastEmitBranch(FalseMBB);
MBB->addSuccessor(TrueMBB);
return true;
Bill Wendling
committed
}
}
Bill Wendling
committed
}
}
}
// Otherwise do a clumsy setcc and re-test it.
unsigned OpReg = getRegForValue(BI->getCondition());
if (OpReg == 0) return false;
BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(OpReg).addReg(OpReg);
BuildMI(MBB, DL, TII.get(X86::JNE)).addMBB(TrueMBB);
FastEmitBranch(FalseMBB);
MBB->addSuccessor(TrueMBB);
bool X86FastISel::X86SelectShift(Instruction *I) {
unsigned CReg = 0, OpReg = 0, OpImm = 0;
const TargetRegisterClass *RC = NULL;
if (I->getType() == Type::getInt8Ty(I->getContext())) {
CReg = X86::CL;
RC = &X86::GR8RegClass;
switch (I->getOpcode()) {
case Instruction::LShr: OpReg = X86::SHR8rCL; OpImm = X86::SHR8ri; break;
case Instruction::AShr: OpReg = X86::SAR8rCL; OpImm = X86::SAR8ri; break;
case Instruction::Shl: OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break;
default: return false;
}
} else if (I->getType() == Type::getInt16Ty(I->getContext())) {
CReg = X86::CX;
RC = &X86::GR16RegClass;
switch (I->getOpcode()) {
case Instruction::LShr: OpReg = X86::SHR16rCL; OpImm = X86::SHR16ri; break;
case Instruction::AShr: OpReg = X86::SAR16rCL; OpImm = X86::SAR16ri; break;
case Instruction::Shl: OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break;
default: return false;
}
} else if (I->getType() == Type::getInt32Ty(I->getContext())) {
CReg = X86::ECX;
RC = &X86::GR32RegClass;
switch (I->getOpcode()) {
case Instruction::LShr: OpReg = X86::SHR32rCL; OpImm = X86::SHR32ri; break;
case Instruction::AShr: OpReg = X86::SAR32rCL; OpImm = X86::SAR32ri; break;
case Instruction::Shl: OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break;
default: return false;
}
} else if (I->getType() == Type::getInt64Ty(I->getContext())) {
CReg = X86::RCX;
RC = &X86::GR64RegClass;
switch (I->getOpcode()) {
case Instruction::LShr: OpReg = X86::SHR64rCL; OpImm = X86::SHR64ri; break;
case Instruction::AShr: OpReg = X86::SAR64rCL; OpImm = X86::SAR64ri; break;
case Instruction::Shl: OpReg = X86::SHL64rCL; OpImm = X86::SHL64ri; break;
default: return false;
}
} else {
return false;
}
Owen Anderson
committed
EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true);
Owen Anderson
committed
if (VT == MVT::Other || !isTypeLegal(I->getType(), VT))
return false;
unsigned Op0Reg = getRegForValue(I->getOperand(0));
if (Op0Reg == 0) return false;
// Fold immediate in shl(x,3).
if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
unsigned ResultReg = createResultReg(RC);