Newer
Older
//===-- X86ISelSimple.cpp - A simple instruction selector for x86 ---------===//
//
// The LLVM Compiler Infrastructure
//
// This file was developed by the LLVM research group and is distributed under
// the University of Illinois Open Source License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
// This file defines a simple peephole instruction selector for the x86 target
//
//===----------------------------------------------------------------------===//
#include "X86.h"
#include "X86InstrInfo.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/Target/MRegisterInfo.h"
#include "llvm/Target/TargetMachine.h"
Chris Lattner
committed
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "Support/Statistic.h"
using namespace llvm;
namespace {
Statistic<>
NumFPKill("x86-codegen", "Number of FP_REG_KILL instructions added");
/// TypeClass - Used by the X86 backend to group LLVM types by their basic X86
/// Representation.
///
enum TypeClass {
cByte, cShort, cInt, cFP, cLong
};
}
/// getClass - Turn a primitive type into a "class" number which is based on the
/// size of the type, and whether or not it is floating point.
///
static inline TypeClass getClass(const Type *Ty) {
switch (Ty->getTypeID()) {
case Type::SByteTyID:
case Type::UByteTyID: return cByte; // Byte operands are class #0
case Type::ShortTyID:
case Type::UShortTyID: return cShort; // Short operands are class #1
case Type::IntTyID:
case Type::UIntTyID:
case Type::PointerTyID: return cInt; // Int's and pointers are class #2
case Type::FloatTyID:
case Type::DoubleTyID: return cFP; // Floating Point is #3
case Type::LongTyID:
case Type::ULongTyID: return cLong; // Longs are class #4
default:
assert(0 && "Invalid type to getClass!");
return cByte; // not reached
}
}
// getClassB - Just like getClass, but treat boolean values as bytes.
static inline TypeClass getClassB(const Type *Ty) {
if (Ty == Type::BoolTy) return cByte;
return getClass(Ty);
}
struct ISel : public FunctionPass, InstVisitor<ISel> {
TargetMachine &TM;
MachineFunction *F; // The function we are compiling into
MachineBasicBlock *BB; // The current MBB we are compiling
int VarArgsFrameIndex; // FrameIndex for start of varargs area
Chris Lattner
committed
int ReturnAddressIndex; // FrameIndex for the return address
std::map<Value*, unsigned> RegMap; // Mapping between Val's and SSA Regs
// MBBMap - Mapping between LLVM BB -> Machine BB
std::map<const BasicBlock*, MachineBasicBlock*> MBBMap;
// AllocaMap - Mapping from fixed sized alloca instructions to the
// FrameIndex for the alloca.
std::map<AllocaInst*, unsigned> AllocaMap;
Chris Lattner
committed
ISel(TargetMachine &tm) : TM(tm), F(0), BB(0) {}
/// runOnFunction - Top level implementation of instruction selection for
/// the entire function.
///
bool runOnFunction(Function &Fn) {
// First pass over the function, lower any unknown intrinsic functions
// with the IntrinsicLowering class.
LowerUnknownIntrinsicFunctionCalls(Fn);
F = &MachineFunction::construct(&Fn, TM);
// Create all of the machine basic blocks for the function...
for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
F->getBasicBlockList().push_back(MBBMap[I] = new MachineBasicBlock(I));
Chris Lattner
committed
Chris Lattner
committed
// Set up a frame object for the return address. This is used by the
// llvm.returnaddress & llvm.frameaddress intrinisics.
ReturnAddressIndex = F->getFrameInfo()->CreateFixedObject(4, -4);
Chris Lattner
committed
// Copy incoming arguments off of the stack...
LoadArgumentsToVirtualRegs(Fn);
// Instruction select everything except PHI nodes
// Select the PHI nodes
SelectPHINodes();
// Insert the FP_REG_KILL instructions into blocks that need them.
InsertFPRegKills();
AllocaMap.clear();
// We always build a machine code representation for the function
return true;
virtual const char *getPassName() const {
return "X86 Simple Instruction Selection";
}
/// visitBasicBlock - This method is called when we are visiting a new basic
/// block. This simply creates a new MachineBasicBlock to emit code into
/// and adds it to the current MachineFunction. Subsequent visit* for
/// instructions will be invoked for all instructions in the basic block.
///
void visitBasicBlock(BasicBlock &LLVM_BB) {
/// LowerUnknownIntrinsicFunctionCalls - This performs a prepass over the
/// function, lowering any calls to unknown intrinsic functions into the
/// equivalent LLVM code.
void LowerUnknownIntrinsicFunctionCalls(Function &F);
/// LoadArgumentsToVirtualRegs - Load all of the arguments to this function
/// from the stack into virtual registers.
///
void LoadArgumentsToVirtualRegs(Function &F);
/// SelectPHINodes - Insert machine code to generate phis. This is tricky
/// because we have to generate our sources into the source basic blocks,
/// not the current one.
///
void SelectPHINodes();
/// InsertFPRegKills - Insert FP_REG_KILL instructions into basic blocks
/// that need them. This only occurs due to the floating point stackifier
/// not being aggressive enough to handle arbitrary global stackification.
///
void InsertFPRegKills();
// Visitation methods for various instructions. These methods simply emit
// fixed X86 code for each instruction.
//
// Control flow operators
void visitBranchInst(BranchInst &BI);
struct ValueRecord {
Value *Val;
unsigned Reg;
const Type *Ty;
ValueRecord(unsigned R, const Type *T) : Val(0), Reg(R), Ty(T) {}
ValueRecord(Value *V) : Val(V), Reg(0), Ty(V->getType()) {}
};
void doCall(const ValueRecord &Ret, MachineInstr *CallMI,
const std::vector<ValueRecord> &Args);
void visitCallInst(CallInst &I);
void visitIntrinsicCall(Intrinsic::ID ID, CallInst &I);
// Arithmetic operators
void visitSimpleBinary(BinaryOperator &B, unsigned OpcodeClass);
void visitAdd(BinaryOperator &B) { visitSimpleBinary(B, 0); }
void visitSub(BinaryOperator &B) { visitSimpleBinary(B, 1); }
void visitDiv(BinaryOperator &B) { visitDivRem(B); }
void visitRem(BinaryOperator &B) { visitDivRem(B); }
void visitDivRem(BinaryOperator &B);
void visitAnd(BinaryOperator &B) { visitSimpleBinary(B, 2); }
void visitOr (BinaryOperator &B) { visitSimpleBinary(B, 3); }
void visitXor(BinaryOperator &B) { visitSimpleBinary(B, 4); }
// Comparison operators...
void visitSetCondInst(SetCondInst &I);
Chris Lattner
committed
unsigned EmitComparison(unsigned OpNum, Value *Op0, Value *Op1,
MachineBasicBlock *MBB,
MachineBasicBlock::iterator MBBI);
Chris Lattner
committed
void visitSelectInst(SelectInst &SI);
Chris Lattner
committed
// Memory Instructions
void visitLoadInst(LoadInst &I);
void visitStoreInst(StoreInst &I);
void visitGetElementPtrInst(GetElementPtrInst &I);
void visitAllocaInst(AllocaInst &I);
void visitMallocInst(MallocInst &I);
void visitFreeInst(FreeInst &I);
void visitShiftInst(ShiftInst &I);
void visitPHINode(PHINode &I) {} // PHI nodes handled by second pass
void visitCastInst(CastInst &I);
void visitVANextInst(VANextInst &I);
void visitVAArgInst(VAArgInst &I);
void visitInstruction(Instruction &I) {
std::cerr << "Cannot instruction select: " << I;
abort();
}
///
void promote32(unsigned targetReg, const ValueRecord &VR);
/// getAddressingMode - Get the addressing mode to use to address the
/// specified value. The returned value should be used with addFullAddress.
void getAddressingMode(Value *Addr, X86AddressMode &AM);
/// getGEPIndex - This is used to fold GEP instructions into X86 addressing
/// expressions.
Chris Lattner
committed
void getGEPIndex(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP,
std::vector<Value*> &GEPOps,
std::vector<const Type*> &GEPTypes,
X86AddressMode &AM);
Chris Lattner
committed
/// isGEPFoldable - Return true if the specified GEP can be completely
/// folded into the addressing mode of a load/store or lea instruction.
bool isGEPFoldable(MachineBasicBlock *MBB,
Value *Src, User::op_iterator IdxBegin,
User::op_iterator IdxEnd, X86AddressMode &AM);
Chris Lattner
committed
/// emitGEPOperation - Common code shared between visitGetElementPtrInst and
/// constant expression GEP support.
///
void emitGEPOperation(MachineBasicBlock *BB, MachineBasicBlock::iterator IP,
User::op_iterator IdxEnd, unsigned TargetReg);
/// emitCastOperation - Common code shared between visitCastInst and
/// constant expression cast support.
void emitCastOperation(MachineBasicBlock *BB,MachineBasicBlock::iterator IP,
Value *Src, const Type *DestTy, unsigned TargetReg);
/// emitSimpleBinaryOperation - Common code shared between visitSimpleBinary
/// and constant expression support.
void emitSimpleBinaryOperation(MachineBasicBlock *BB,
MachineBasicBlock::iterator IP,
Value *Op0, Value *Op1,
unsigned OperatorClass, unsigned TargetReg);
/// emitBinaryFPOperation - This method handles emission of floating point
/// Add (0), Sub (1), Mul (2), and Div (3) operations.
void emitBinaryFPOperation(MachineBasicBlock *BB,
MachineBasicBlock::iterator IP,
Value *Op0, Value *Op1,
unsigned OperatorClass, unsigned TargetReg);
void emitMultiply(MachineBasicBlock *BB, MachineBasicBlock::iterator IP,
Value *Op0, Value *Op1, unsigned TargetReg);
void doMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI,
unsigned DestReg, const Type *DestTy,
unsigned Op0Reg, unsigned Op1Reg);
void doMultiplyConst(MachineBasicBlock *MBB,
MachineBasicBlock::iterator MBBI,
unsigned DestReg, const Type *DestTy,
unsigned Op0Reg, unsigned Op1Val);
void emitDivRemOperation(MachineBasicBlock *BB,
MachineBasicBlock::iterator IP,
Value *Op0, Value *Op1, bool isDiv,
unsigned TargetReg);
/// emitSetCCOperation - Common code shared between visitSetCondInst and
/// constant expression support.
void emitSetCCOperation(MachineBasicBlock *BB,
MachineBasicBlock::iterator IP,
Value *Op0, Value *Op1, unsigned Opcode,
unsigned TargetReg);
/// emitShiftOperation - Common code shared between visitShiftInst and
/// constant expression support.
void emitShiftOperation(MachineBasicBlock *MBB,
MachineBasicBlock::iterator IP,
Value *Op, Value *ShiftAmount, bool isLeftShift,
const Type *ResultTy, unsigned DestReg);
Chris Lattner
committed
/// emitSelectOperation - Common code shared between visitSelectInst and the
/// constant expression support.
void emitSelectOperation(MachineBasicBlock *MBB,
MachineBasicBlock::iterator IP,
Value *Cond, Value *TrueVal, Value *FalseVal,
unsigned DestReg);
/// copyConstantToRegister - Output the instructions required to put the
/// specified constant into the specified register.
///
Chris Lattner
committed
void copyConstantToRegister(MachineBasicBlock *MBB,
MachineBasicBlock::iterator MBBI,
Chris Lattner
committed
Constant *C, unsigned Reg);
void emitUCOMr(MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI,
unsigned LHS, unsigned RHS);
/// makeAnotherReg - This method returns the next register number we haven't
/// yet used.
///
/// Long values are handled somewhat specially. They are always allocated
/// as pairs of 32 bit integer values. The register number returned is the
/// lower 32 bits of the long value, and the regNum+1 is the upper 32 bits
/// of the long value.
///
unsigned makeAnotherReg(const Type *Ty) {
assert(dynamic_cast<const X86RegisterInfo*>(TM.getRegisterInfo()) &&
"Current target doesn't have X86 reg info??");
const X86RegisterInfo *MRI =
static_cast<const X86RegisterInfo*>(TM.getRegisterInfo());
if (Ty == Type::LongTy || Ty == Type::ULongTy) {
const TargetRegisterClass *RC = MRI->getRegClassForType(Type::IntTy);
// Create the lower part
F->getSSARegMap()->createVirtualRegister(RC);
// Create the upper part.
return F->getSSARegMap()->createVirtualRegister(RC)-1;
// Add the mapping of regnumber => reg class to MachineFunction
const TargetRegisterClass *RC = MRI->getRegClassForType(Ty);
return F->getSSARegMap()->createVirtualRegister(RC);
/// getReg - This method turns an LLVM value into a register number.
///
unsigned getReg(Value &V) { return getReg(&V); } // Allow references
unsigned getReg(Value *V) {
// Just append to the end of the current bb.
MachineBasicBlock::iterator It = BB->end();
return getReg(V, BB, It);
}
MachineBasicBlock::iterator IPt);
/// getFixedSizedAllocaFI - Return the frame index for a fixed sized alloca
/// that is to be statically allocated with the initial stack frame
/// adjustment.
unsigned getFixedSizedAllocaFI(AllocaInst *AI);
/// dyn_castFixedAlloca - If the specified value is a fixed size alloca
/// instruction in the entry block, return it. Otherwise, return a null
/// pointer.
static AllocaInst *dyn_castFixedAlloca(Value *V) {
if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
BasicBlock *BB = AI->getParent();
if (isa<ConstantUInt>(AI->getArraySize()) && BB ==&BB->getParent()->front())
return AI;
}
return 0;
}
/// getReg - This method turns an LLVM value into a register number.
///
unsigned ISel::getReg(Value *V, MachineBasicBlock *MBB,
MachineBasicBlock::iterator IPt) {
// If this operand is a constant, emit the code to copy the constant into
// the register here...
if (Constant *C = dyn_cast<Constant>(V)) {
unsigned Reg = makeAnotherReg(V->getType());
copyConstantToRegister(MBB, IPt, C, Reg);
return Reg;
} else if (CastInst *CI = dyn_cast<CastInst>(V)) {
// Do not emit noop casts at all, unless it's a double -> float cast.
if (getClassB(CI->getType()) == getClassB(CI->getOperand(0)->getType()) &&
(CI->getType() != Type::FloatTy ||
CI->getOperand(0)->getType() != Type::DoubleTy))
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
return getReg(CI->getOperand(0), MBB, IPt);
} else if (AllocaInst *AI = dyn_castFixedAlloca(V)) {
// If the alloca address couldn't be folded into the instruction addressing,
// emit an explicit LEA as appropriate.
unsigned Reg = makeAnotherReg(V->getType());
unsigned FI = getFixedSizedAllocaFI(AI);
addFrameReference(BuildMI(*MBB, IPt, X86::LEA32r, 4, Reg), FI);
return Reg;
}
unsigned &Reg = RegMap[V];
if (Reg == 0) {
Reg = makeAnotherReg(V->getType());
RegMap[V] = Reg;
}
return Reg;
}
/// getFixedSizedAllocaFI - Return the frame index for a fixed sized alloca
/// that is to be statically allocated with the initial stack frame
/// adjustment.
unsigned ISel::getFixedSizedAllocaFI(AllocaInst *AI) {
// Already computed this?
std::map<AllocaInst*, unsigned>::iterator I = AllocaMap.lower_bound(AI);
if (I != AllocaMap.end() && I->first == AI) return I->second;
const Type *Ty = AI->getAllocatedType();
ConstantUInt *CUI = cast<ConstantUInt>(AI->getArraySize());
unsigned TySize = TM.getTargetData().getTypeSize(Ty);
TySize *= CUI->getValue(); // Get total allocated size...
unsigned Alignment = TM.getTargetData().getTypeAlignment(Ty);
// Create a new stack object using the frame manager...
int FrameIdx = F->getFrameInfo()->CreateStackObject(TySize, Alignment);
AllocaMap.insert(I, std::make_pair(AI, FrameIdx));
return FrameIdx;
}
/// copyConstantToRegister - Output the instructions required to put the
/// specified constant into the specified register.
///
Chris Lattner
committed
void ISel::copyConstantToRegister(MachineBasicBlock *MBB,
MachineBasicBlock::iterator IP,
Chris Lattner
committed
Constant *C, unsigned R) {
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
unsigned Class = 0;
switch (CE->getOpcode()) {
case Instruction::GetElementPtr:
case Instruction::Cast:
emitCastOperation(MBB, IP, CE->getOperand(0), CE->getType(), R);
return;
case Instruction::Xor: ++Class; // FALL THROUGH
case Instruction::Or: ++Class; // FALL THROUGH
case Instruction::And: ++Class; // FALL THROUGH
case Instruction::Sub: ++Class; // FALL THROUGH
case Instruction::Add:
emitSimpleBinaryOperation(MBB, IP, CE->getOperand(0), CE->getOperand(1),
Class, R);
return;
case Instruction::Mul:
emitMultiply(MBB, IP, CE->getOperand(0), CE->getOperand(1), R);
case Instruction::Rem:
emitDivRemOperation(MBB, IP, CE->getOperand(0), CE->getOperand(1),
CE->getOpcode() == Instruction::Div, R);
case Instruction::SetNE:
case Instruction::SetEQ:
case Instruction::SetLT:
case Instruction::SetGT:
case Instruction::SetLE:
case Instruction::SetGE:
emitSetCCOperation(MBB, IP, CE->getOperand(0), CE->getOperand(1),
CE->getOpcode(), R);
return;
case Instruction::Shl:
case Instruction::Shr:
emitShiftOperation(MBB, IP, CE->getOperand(0), CE->getOperand(1),
CE->getOpcode() == Instruction::Shl, CE->getType(), R);
return;
Chris Lattner
committed
case Instruction::Select:
emitSelectOperation(MBB, IP, CE->getOperand(0), CE->getOperand(1),
CE->getOperand(2), R);
return;
std::cerr << "Offending expr: " << *C << "\n";
Chris Lattner
committed
assert(0 && "Constant expression not yet handled!\n");
if (C->getType()->isIntegral()) {
unsigned Class = getClassB(C->getType());
if (Class == cLong) {
// Copy the value into the register pair.
uint64_t Val = cast<ConstantInt>(C)->getRawValue();
BuildMI(*MBB, IP, X86::MOV32ri, 1, R).addImm(Val & 0xFFFFFFFF);
BuildMI(*MBB, IP, X86::MOV32ri, 1, R+1).addImm(Val >> 32);
return;
}
assert(Class <= cInt && "Type not handled yet!");
static const unsigned IntegralOpcodeTab[] = {
X86::MOV8ri, X86::MOV16ri, X86::MOV32ri
};
if (C->getType() == Type::BoolTy) {
BuildMI(*MBB, IP, X86::MOV8ri, 1, R).addImm(C == ConstantBool::True);
} else {
ConstantInt *CI = cast<ConstantInt>(C);
BuildMI(*MBB, IP, IntegralOpcodeTab[Class],1,R).addImm(CI->getRawValue());
}
} else if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
Chris Lattner
committed
if (CFP->isExactlyValue(+0.0))
BuildMI(*MBB, IP, X86::FLD0, 0, R);
Chris Lattner
committed
else if (CFP->isExactlyValue(+1.0))
BuildMI(*MBB, IP, X86::FLD1, 0, R);
// Otherwise we need to spill the constant to memory...
MachineConstantPool *CP = F->getConstantPool();
unsigned CPI = CP->getConstantPoolIndex(CFP);
const Type *Ty = CFP->getType();
assert(Ty == Type::FloatTy || Ty == Type::DoubleTy && "Unknown FP type!");
unsigned LoadOpcode = Ty == Type::FloatTy ? X86::FLD32m : X86::FLD64m;
addConstantPoolReference(BuildMI(*MBB, IP, LoadOpcode, 4, R), CPI);
BuildMI(*MBB, IP, X86::MOV32ri, 1, R).addImm(0);
} else if (GlobalValue *GV = dyn_cast<GlobalValue>(C)) {
BuildMI(*MBB, IP, X86::MOV32ri, 1, R).addGlobalAddress(GV);
} else {
std::cerr << "Offending constant: " << *C << "\n";
assert(0 && "Type not handled yet!");
/// LoadArgumentsToVirtualRegs - Load all of the arguments to this function from
/// the stack into virtual registers.
///
void ISel::LoadArgumentsToVirtualRegs(Function &Fn) {
// Emit instructions to load the arguments... On entry to a function on the
// X86, the stack frame looks like this:
//
// [ESP] -- return address
// [ESP + 4] -- first argument (leftmost lexically)
// [ESP + 8] -- second argument, if first argument is four bytes in size
// ...
//
unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot
MachineFrameInfo *MFI = F->getFrameInfo();
for (Function::aiterator I = Fn.abegin(), E = Fn.aend(); I != E; ++I) {
bool ArgLive = !I->use_empty();
unsigned Reg = ArgLive ? getReg(*I) : 0;
int FI; // Frame object index
switch (getClassB(I->getType())) {
case cByte:
if (ArgLive) {
FI = MFI->CreateFixedObject(1, ArgOffset);
addFrameReference(BuildMI(BB, X86::MOV8rm, 4, Reg), FI);
}
break;
case cShort:
if (ArgLive) {
FI = MFI->CreateFixedObject(2, ArgOffset);
addFrameReference(BuildMI(BB, X86::MOV16rm, 4, Reg), FI);
}
break;
case cInt:
if (ArgLive) {
FI = MFI->CreateFixedObject(4, ArgOffset);
addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Reg), FI);
}
break;
if (ArgLive) {
FI = MFI->CreateFixedObject(8, ArgOffset);
addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Reg), FI);
addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Reg+1), FI, 4);
}
ArgOffset += 4; // longs require 4 additional bytes
break;
case cFP:
if (ArgLive) {
unsigned Opcode;
if (I->getType() == Type::FloatTy) {
Opcode = X86::FLD32m;
FI = MFI->CreateFixedObject(4, ArgOffset);
} else {
Opcode = X86::FLD64m;
FI = MFI->CreateFixedObject(8, ArgOffset);
}
addFrameReference(BuildMI(BB, Opcode, 4, Reg), FI);
if (I->getType() == Type::DoubleTy)
ArgOffset += 4; // doubles require 4 additional bytes
break;
default:
assert(0 && "Unhandled argument type!");
}
ArgOffset += 4; // Each argument takes at least 4 bytes on the stack...
// If the function takes variable number of arguments, add a frame offset for
// the start of the first vararg value... this is used to expand
// llvm.va_start.
if (Fn.getFunctionType()->isVarArg())
VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset);
}
/// SelectPHINodes - Insert machine code to generate phis. This is tricky
/// because we have to generate our sources into the source basic blocks, not
/// the current one.
///
void ISel::SelectPHINodes() {
const TargetInstrInfo &TII = *TM.getInstrInfo();
const Function &LF = *F->getFunction(); // The LLVM function...
for (Function::const_iterator I = LF.begin(), E = LF.end(); I != E; ++I) {
const BasicBlock *BB = I;
MachineBasicBlock &MBB = *MBBMap[I];
// Loop over all of the PHI nodes in the LLVM basic block...
MachineBasicBlock::iterator PHIInsertPoint = MBB.begin();
for (BasicBlock::const_iterator I = BB->begin();
PHINode *PN = const_cast<PHINode*>(dyn_cast<PHINode>(I)); ++I) {
// Create a new machine instr PHI node, and insert it.
unsigned PHIReg = getReg(*PN);
MachineInstr *PhiMI = BuildMI(MBB, PHIInsertPoint,
X86::PHI, PN->getNumOperands(), PHIReg);
MachineInstr *LongPhiMI = 0;
if (PN->getType() == Type::LongTy || PN->getType() == Type::ULongTy)
LongPhiMI = BuildMI(MBB, PHIInsertPoint,
X86::PHI, PN->getNumOperands(), PHIReg+1);
// PHIValues - Map of blocks to incoming virtual registers. We use this
// so that we only initialize one incoming value for a particular block,
// even if the block has multiple entries in the PHI node.
//
std::map<MachineBasicBlock*, unsigned> PHIValues;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
MachineBasicBlock *PredMBB = MBBMap[PN->getIncomingBlock(i)];
unsigned ValReg;
std::map<MachineBasicBlock*, unsigned>::iterator EntryIt =
PHIValues.lower_bound(PredMBB);
if (EntryIt != PHIValues.end() && EntryIt->first == PredMBB) {
// We already inserted an initialization of the register for this
// predecessor. Recycle it.
ValReg = EntryIt->second;
} else {
// Get the incoming value into a virtual register.
//
Value *Val = PN->getIncomingValue(i);
// If this is a constant or GlobalValue, we may have to insert code
// into the basic block to compute it into a virtual register.
// Simple constants get emitted at the end of the basic block,
// before any terminator instructions. We "know" that the code to
// move a constant into a register will never clobber any flags.
ValReg = getReg(Val, PredMBB, PredMBB->getFirstTerminator());
} else {
// Because we don't want to clobber any values which might be in
// physical registers with the computation of this constant (which
// might be arbitrarily complex if it is a constant expression),
// just insert the computation at the top of the basic block.
MachineBasicBlock::iterator PI = PredMBB->begin();
// Skip over any PHI nodes though!
while (PI != PredMBB->end() && PI->getOpcode() == X86::PHI)
++PI;
ValReg = getReg(Val, PredMBB, PI);
// Remember that we inserted a value for this PHI for this predecessor
PHIValues.insert(EntryIt, std::make_pair(PredMBB, ValReg));
}
PhiMI->addMachineBasicBlockOperand(PredMBB);
if (LongPhiMI) {
LongPhiMI->addRegOperand(ValReg+1);
LongPhiMI->addMachineBasicBlockOperand(PredMBB);
}
// Now that we emitted all of the incoming values for the PHI node, make
// sure to reposition the InsertPoint after the PHI that we just added.
// This is needed because we might have inserted a constant into this
// block, right after the PHI's which is before the old insert point!
PHIInsertPoint = LongPhiMI ? LongPhiMI : PhiMI;
++PHIInsertPoint;
/// RequiresFPRegKill - The floating point stackifier pass cannot insert
/// compensation code on critical edges. As such, it requires that we kill all
/// FP registers on the exit from any blocks that either ARE critical edges, or
/// branch to a block that has incoming critical edges.
///
/// Note that this kill instruction will eventually be eliminated when
/// restrictions in the stackifier are relaxed.
///
static bool RequiresFPRegKill(const MachineBasicBlock *MBB) {
#if 0
const BasicBlock *BB = MBB->getBasicBlock ();
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
for (succ_const_iterator SI = succ_begin(BB), E = succ_end(BB); SI!=E; ++SI) {
const BasicBlock *Succ = *SI;
pred_const_iterator PI = pred_begin(Succ), PE = pred_end(Succ);
++PI; // Block have at least one predecessory
if (PI != PE) { // If it has exactly one, this isn't crit edge
// If this block has more than one predecessor, check all of the
// predecessors to see if they have multiple successors. If so, then the
// block we are analyzing needs an FPRegKill.
for (PI = pred_begin(Succ); PI != PE; ++PI) {
const BasicBlock *Pred = *PI;
succ_const_iterator SI2 = succ_begin(Pred);
++SI2; // There must be at least one successor of this block.
if (SI2 != succ_end(Pred))
return true; // Yes, we must insert the kill on this edge.
}
}
}
// If we got this far, there is no need to insert the kill instruction.
return false;
#else
return true;
#endif
}
// InsertFPRegKills - Insert FP_REG_KILL instructions into basic blocks that
// need them. This only occurs due to the floating point stackifier not being
// aggressive enough to handle arbitrary global stackification.
//
// Currently we insert an FP_REG_KILL instruction into each block that uses or
// defines a floating point virtual register.
//
// When the global register allocators (like linear scan) finally update live
// variable analysis, we can keep floating point values in registers across
// portions of the CFG that do not involve critical edges. This will be a big
// win, but we are waiting on the global allocators before we can do this.
//
// With a bit of work, the floating point stackifier pass can be enhanced to
// break critical edges as needed (to make a place to put compensation code),
// but this will require some infrastructure improvements as well.
//
void ISel::InsertFPRegKills() {
SSARegMap &RegMap = *F->getSSARegMap();
for (MachineFunction::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I!=E; ++I)
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
MachineOperand& MO = I->getOperand(i);
if (MO.isRegister() && MO.getReg()) {
unsigned Reg = MO.getReg();
if (MRegisterInfo::isVirtualRegister(Reg))
Chris Lattner
committed
if (RegMap.getRegClass(Reg)->getSize() == 10)
goto UsesFPReg;
}
Chris Lattner
committed
// If we haven't found an FP register use or def in this basic block, check
// to see if any of our successors has an FP PHI node, which will cause a
// copy to be inserted into this block.
for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(),
SE = BB->succ_end(); SI != SE; ++SI) {
MachineBasicBlock *SBB = *SI;
Chris Lattner
committed
for (MachineBasicBlock::iterator I = SBB->begin();
I != SBB->end() && I->getOpcode() == X86::PHI; ++I) {
if (RegMap.getRegClass(I->getOperand(0).getReg())->getSize() == 10)
goto UsesFPReg;
}
Chris Lattner
committed
}
Chris Lattner
committed
continue;
UsesFPReg:
// Okay, this block uses an FP register. If the block has successors (ie,
// it's not an unwind/return), insert the FP_REG_KILL instruction.
if (BB->succ_size () && RequiresFPRegKill(BB)) {
BuildMI(*BB, BB->getFirstTerminator(), X86::FP_REG_KILL, 0);
Chris Lattner
committed
++NumFPKill;
}
}
}
void ISel::getAddressingMode(Value *Addr, X86AddressMode &AM) {
AM.BaseType = X86AddressMode::RegBase;
AM.Base.Reg = 0; AM.Scale = 1; AM.IndexReg = 0; AM.Disp = 0;
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Addr)) {
if (isGEPFoldable(BB, GEP->getOperand(0), GEP->op_begin()+1, GEP->op_end(),
AM))
return;
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
if (CE->getOpcode() == Instruction::GetElementPtr)
if (isGEPFoldable(BB, CE->getOperand(0), CE->op_begin()+1, CE->op_end(),
AM))
return;
} else if (AllocaInst *AI = dyn_castFixedAlloca(Addr)) {
AM.BaseType = X86AddressMode::FrameIndexBase;
AM.Base.FrameIndex = getFixedSizedAllocaFI(AI);
return;
}
// If it's not foldable, reset addr mode.
AM.BaseType = X86AddressMode::RegBase;
AM.Base.Reg = getReg(Addr);
AM.Scale = 1; AM.IndexReg = 0; AM.Disp = 0;
}
// canFoldSetCCIntoBranchOrSelect - Return the setcc instruction if we can fold
// it into the conditional branch or select instruction which is the only user
// of the cc instruction. This is the case if the conditional branch is the
Chris Lattner
committed
// only user of the setcc. We also don't handle long arguments below, so we
// reject them here as well.
static SetCondInst *canFoldSetCCIntoBranchOrSelect(Value *V) {
if (SetCondInst *SCI = dyn_cast<SetCondInst>(V))
if (SCI->hasOneUse()) {
Instruction *User = cast<Instruction>(SCI->use_back());
if ((isa<BranchInst>(User) || isa<SelectInst>(User)) &&
(getClassB(SCI->getOperand(0)->getType()) != cLong ||
SCI->getOpcode() == Instruction::SetEQ ||
SCI->getOpcode() == Instruction::SetNE))
return SCI;
}
return 0;
}
// Return a fixed numbering for setcc instructions which does not depend on the
// order of the opcodes.
//
static unsigned getSetCCNumber(unsigned Opcode) {
switch(Opcode) {
default: assert(0 && "Unknown setcc instruction!");
case Instruction::SetEQ: return 0;
case Instruction::SetNE: return 1;
case Instruction::SetLT: return 2;
case Instruction::SetGE: return 3;
case Instruction::SetGT: return 4;
case Instruction::SetLE: return 5;
}
}
// LLVM -> X86 signed X86 unsigned
// ----- ---------- ------------
// seteq -> sete sete
// setne -> setne setne
// setlt -> setl setb
// setgt -> setg seta
// setle -> setle setbe
Chris Lattner
committed
// ----
// sets // Used by comparison with 0 optimization
// setns
static const unsigned SetCCOpcodeTab[2][8] = {
{ X86::SETEr, X86::SETNEr, X86::SETBr, X86::SETAEr, X86::SETAr, X86::SETBEr,
0, 0 },
{ X86::SETEr, X86::SETNEr, X86::SETLr, X86::SETGEr, X86::SETGr, X86::SETLEr,
X86::SETSr, X86::SETNSr },
/// emitUCOMr - In the future when we support processors before the P6, this
/// wraps the logic for emitting an FUCOMr vs FUCOMIr.
void ISel::emitUCOMr(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP,
unsigned LHS, unsigned RHS) {
if (0) { // for processors prior to the P6
BuildMI(*MBB, IP, X86::FUCOMr, 2).addReg(LHS).addReg(RHS);
BuildMI(*MBB, IP, X86::FNSTSW8r, 0);
BuildMI(*MBB, IP, X86::SAHF, 1);
} else {
BuildMI(*MBB, IP, X86::FUCOMIr, 2).addReg(LHS).addReg(RHS);
}
}
Chris Lattner
committed
// EmitComparison - This function emits a comparison of the two operands,
// returning the extended setcc code to use.
unsigned ISel::EmitComparison(unsigned OpNum, Value *Op0, Value *Op1,
MachineBasicBlock *MBB,
MachineBasicBlock::iterator IP) {
// The arguments are already supposed to be of the same type.
const Type *CompTy = Op0->getType();
unsigned Class = getClassB(CompTy);
unsigned Op0r = getReg(Op0, MBB, IP);
// Special case handling of: cmp R, i
Chris Lattner
committed
if (isa<ConstantPointerNull>(Op1)) {
if (OpNum < 2) // seteq/setne -> test
BuildMI(*MBB, IP, X86::TEST32rr, 2).addReg(Op0r).addReg(Op0r);
else
BuildMI(*MBB, IP, X86::CMP32ri, 2).addReg(Op0r).addImm(0);
return OpNum;
} else if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
if (Class == cByte || Class == cShort || Class == cInt) {
unsigned Op1v = CI->getRawValue();
// Mask off any upper bits of the constant, if there are any...
Op1v &= (1ULL << (8 << Class)) - 1;
Chris Lattner
committed
// If this is a comparison against zero, emit more efficient code. We
// can't handle unsigned comparisons against zero unless they are == or
// !=. These should have been strength reduced already anyway.
if (Op1v == 0 && (CompTy->isSigned() || OpNum < 2)) {
static const unsigned TESTTab[] = {
X86::TEST8rr, X86::TEST16rr, X86::TEST32rr
Chris Lattner
committed
};
BuildMI(*MBB, IP, TESTTab[Class], 2).addReg(Op0r).addReg(Op0r);
Chris Lattner
committed
if (OpNum == 2) return 6; // Map jl -> js
if (OpNum == 3) return 7; // Map jg -> jns
return OpNum;
Chris Lattner
committed
static const unsigned CMPTab[] = {
X86::CMP8ri, X86::CMP16ri, X86::CMP32ri
Chris Lattner
committed
};
BuildMI(*MBB, IP, CMPTab[Class], 2).addReg(Op0r).addImm(Op1v);
Chris Lattner
committed
return OpNum;
} else {
assert(Class == cLong && "Unknown integer class!");
unsigned LowCst = CI->getRawValue();
unsigned HiCst = CI->getRawValue() >> 32;
if (OpNum < 2) { // seteq, setne
unsigned LoTmp = Op0r;
if (LowCst != 0) {
LoTmp = makeAnotherReg(Type::IntTy);
BuildMI(*MBB, IP, X86::XOR32ri, 2, LoTmp).addReg(Op0r).addImm(LowCst);
}
unsigned HiTmp = Op0r+1;
if (HiCst != 0) {
HiTmp = makeAnotherReg(Type::IntTy);
BuildMI(*MBB, IP, X86::XOR32ri, 2,HiTmp).addReg(Op0r+1).addImm(HiCst);
}
unsigned FinalTmp = makeAnotherReg(Type::IntTy);
BuildMI(*MBB, IP, X86::OR32rr, 2, FinalTmp).addReg(LoTmp).addReg(HiTmp);
return OpNum;
} else {
// Emit a sequence of code which compares the high and low parts once
// each, then uses a conditional move to handle the overflow case. For
// example, a setlt for long would generate code like this:
//
Chris Lattner
committed
// AL = lo(op1) < lo(op2) // Always unsigned comparison
// BL = hi(op1) < hi(op2) // Signedness depends on operands
// dest = hi(op1) == hi(op2) ? BL : AL;
//
// FIXME: This would be much better if we had hierarchical register
// classes! Until then, hardcode registers so that we can deal with
// their aliases (because we don't have conditional byte moves).
//
BuildMI(*MBB, IP, X86::CMP32ri, 2).addReg(Op0r).addImm(LowCst);
BuildMI(*MBB, IP, SetCCOpcodeTab[0][OpNum], 0, X86::AL);
BuildMI(*MBB, IP, X86::CMP32ri, 2).addReg(Op0r+1).addImm(HiCst);
BuildMI(*MBB, IP, SetCCOpcodeTab[CompTy->isSigned()][OpNum], 0,X86::BL);
BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::BH);
BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::AH);
BuildMI(*MBB, IP, X86::CMOVE16rr, 2, X86::BX).addReg(X86::BX)
.addReg(X86::AX);
// NOTE: visitSetCondInst knows that the value is dumped into the BL
// register at this point for long values...
return OpNum;
}
}
// Special case handling of comparison against +/- 0.0
if (ConstantFP *CFP = dyn_cast<ConstantFP>(Op1))
if (CFP->isExactlyValue(+0.0) || CFP->isExactlyValue(-0.0)) {
BuildMI(*MBB, IP, X86::FTST, 1).addReg(Op0r);
BuildMI(*MBB, IP, X86::FNSTSW8r, 0);
BuildMI(*MBB, IP, X86::SAHF, 1);