Newer
Older
//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the SPUTargetLowering class.
//
//===----------------------------------------------------------------------===//
#include "SPURegisterNames.h"
#include "SPUISelLowering.h"
#include "SPUTargetMachine.h"
#include "SPUFrameInfo.h"
#include "llvm/ADT/VectorExtras.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Constants.h"
#include "llvm/Function.h"
#include "llvm/Intrinsics.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetOptions.h"
#include <map>
using namespace llvm;
// Used in getTargetNodeName() below
namespace {
std::map<unsigned, const char *> node_names;
//! MVT mapping to useful data for Cell SPU
struct valtype_map_s {
Scott Michel
committed
const MVT valtype;
const int prefslot_byte;
const valtype_map_s valtype_map[] = {
{ MVT::i1, 3 },
{ MVT::i8, 3 },
{ MVT::i16, 2 },
{ MVT::i32, 0 },
{ MVT::f32, 0 },
{ MVT::i64, 0 },
{ MVT::f64, 0 },
{ MVT::i128, 0 }
};
const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
const valtype_map_s *getValueTypeMapEntry(MVT VT) {
const valtype_map_s *retval = 0;
for (size_t i = 0; i < n_valtype_map; ++i) {
if (valtype_map[i].valtype == VT) {
}
}
#ifndef NDEBUG
if (retval == 0) {
cerr << "getValueTypeMapEntry returns NULL for "
abort();
}
#endif
return retval;
}
//! Predicate that returns true if operand is a memory target
/*!
\arg Op Operand to test
\return true if the operand is a memory target (i.e., global
address, external symbol, constant pool) or an A-form
{
const unsigned Opc = Op.getOpcode();
return (Opc == ISD::GlobalAddress
|| Opc == ISD::GlobalTLSAddress
|| Opc == ISD::JumpTable
|| Opc == ISD::ConstantPool
Bill Wendling
committed
|| Opc == ISD::ExternalSymbol
|| Opc == ISD::TargetGlobalAddress
|| Opc == ISD::TargetGlobalTLSAddress
|| Opc == ISD::TargetJumpTable
|| Opc == ISD::TargetConstantPool
Bill Wendling
committed
|| Opc == ISD::TargetExternalSymbol
//! Predicate that returns true if the operand is an indirect target
{
const unsigned Opc = Op.getOpcode();
return (Opc == ISD::Register
}
SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
: TargetLowering(TM),
SPUTM(TM)
{
// Fold away setcc operations if possible.
setPow2DivIsCheap();
// Use _setjmp/_longjmp instead of setjmp/longjmp.
setUseUnderscoreSetJmp(true);
setUseUnderscoreLongJmp(true);
// Set up the SPU's register classes:
addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
// Initialize libcalls:
setLibcallName(RTLIB::MUL_I64, "__muldi3");
// SPU has no sign or zero extended loads for i1, i8, i16:
setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
setTruncStoreAction(MVT::i8, MVT::i8, Custom);
setTruncStoreAction(MVT::i16, MVT::i8, Custom);
setTruncStoreAction(MVT::i32, MVT::i8, Custom);
setTruncStoreAction(MVT::i64, MVT::i8, Custom);
setTruncStoreAction(MVT::i128, MVT::i8, Custom);
setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
// SPU constant load actions are custom lowered:
setOperationAction(ISD::Constant, MVT::i64, Custom);
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
// SPU's loads and stores have to be custom lowered:
for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
MVT VT = (MVT::SimpleValueType)sctype;
setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
// Expand the jumptable branches
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
setOperationAction(ISD::BR_CC, MVT::Other, Expand);
Scott Michel
committed
// Custom lower SELECT_CC for most cases, but expand by default
setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
Scott Michel
committed
setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
Scott Michel
committed
setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
// SPU has no intrinsics for these particular operations:
Andrew Lenharth
committed
setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
// PowerPC has no SREM/UREM instructions
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
setOperationAction(ISD::SREM, MVT::i64, Expand);
setOperationAction(ISD::UREM, MVT::i64, Expand);
// We don't support sin/cos/sqrt/fmod
setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f64, Expand);
setOperationAction(ISD::FREM , MVT::f64, Expand);
setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
setOperationAction(ISD::FREM , MVT::f32, Expand);
// If we're enabling GP optimizations, use hardware square root
setOperationAction(ISD::FSQRT, MVT::f64, Expand);
setOperationAction(ISD::FSQRT, MVT::f32, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
// SPU can do rotate right and left, so legalize it... but customize for i8
// because instructions don't exist.
// FIXME: Change from "expand" to appropriate type once ROTR is supported in
// .td files.
setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
setOperationAction(ISD::ROTL, MVT::i32, Legal);
setOperationAction(ISD::ROTL, MVT::i16, Legal);
setOperationAction(ISD::ROTL, MVT::i8, Custom);
// SPU has no native version of shift left/right for i8
setOperationAction(ISD::SHL, MVT::i8, Custom);
setOperationAction(ISD::SRL, MVT::i8, Custom);
setOperationAction(ISD::SRA, MVT::i8, Custom);
Scott Michel
committed
setOperationAction(ISD::SHL, MVT::i64, Custom);
setOperationAction(ISD::SRL, MVT::i64, Custom);
setOperationAction(ISD::SRA, MVT::i64, Custom);
// Custom lower i8, i32 and i64 multiplications
setOperationAction(ISD::MUL, MVT::i8, Custom);
setOperationAction(ISD::MUL, MVT::i32, Custom);
setOperationAction(ISD::MUL, MVT::i64, Expand); // libcall
// SMUL_LOHI, UMUL_LOHI
setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom);
setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom);
setOperationAction(ISD::SMUL_LOHI, MVT::i64, Custom);
setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom);
// Need to custom handle (some) common i8, i64 math ops
setOperationAction(ISD::ADD, MVT::i64, Custom);
setOperationAction(ISD::SUB, MVT::i8, Custom);
setOperationAction(ISD::SUB, MVT::i64, Custom);
// SPU does not have BSWAP. It does have i32 support CTLZ.
// CTPOP has to be custom lowered.
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
setOperationAction(ISD::BSWAP, MVT::i64, Expand);
setOperationAction(ISD::CTPOP, MVT::i8, Custom);
setOperationAction(ISD::CTPOP, MVT::i16, Custom);
setOperationAction(ISD::CTPOP, MVT::i32, Custom);
setOperationAction(ISD::CTPOP, MVT::i64, Custom);
setOperationAction(ISD::CTTZ , MVT::i32, Expand);
setOperationAction(ISD::CTTZ , MVT::i64, Expand);
setOperationAction(ISD::CTLZ , MVT::i32, Legal);
// SPU has a version of select that implements (a&~c)|(b&c), just like
Scott Michel
committed
// select ought to work:
setOperationAction(ISD::SELECT, MVT::i8, Legal);
setOperationAction(ISD::SELECT, MVT::i16, Legal);
setOperationAction(ISD::SELECT, MVT::i32, Legal);
setOperationAction(ISD::SELECT, MVT::i64, Expand);
setOperationAction(ISD::SETCC, MVT::i8, Legal);
setOperationAction(ISD::SETCC, MVT::i16, Legal);
setOperationAction(ISD::SETCC, MVT::i32, Legal);
setOperationAction(ISD::SETCC, MVT::i64, Expand);
Scott Michel
committed
// Zero extension and sign extension for i64 have to be
// custom legalized
setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
// Custom lower truncates
setOperationAction(ISD::TRUNCATE, MVT::i8, Custom);
setOperationAction(ISD::TRUNCATE, MVT::i16, Custom);
setOperationAction(ISD::TRUNCATE, MVT::i32, Custom);
setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
// SPU has a legal FP -> signed INT instruction
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
// FDIV on SPU requires custom lowering
setOperationAction(ISD::FDIV, MVT::f32, Custom);
//setOperationAction(ISD::FDIV, MVT::f64, Custom);
// SPU has [U|S]INT_TO_FP
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
// We cannot sextinreg(i1). Expand to shifts.
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
// Support label based line numbers.
setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
// We want to legalize GlobalAddress and ConstantPool nodes into the
// appropriate instructions to materialize the address.
for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
++sctype) {
MVT VT = (MVT::SimpleValueType)sctype;
setOperationAction(ISD::GlobalAddress, VT, Custom);
setOperationAction(ISD::ConstantPool, VT, Custom);
setOperationAction(ISD::JumpTable, VT, Custom);
// RET must be custom lowered, to meet ABI requirements
setOperationAction(ISD::RET, MVT::Other, Custom);
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
// Use the default implementation.
setOperationAction(ISD::VAARG , MVT::Other, Expand);
setOperationAction(ISD::VACOPY , MVT::Other, Expand);
setOperationAction(ISD::VAEND , MVT::Other, Expand);
setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
// Cell SPU has instructions for converting between i64 and fp.
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
// To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
// BUILD_PAIR can't be handled natively, and should be expanded to shl/or
setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
// First set operation action for all vector types to expand. Then we
// will selectively turn on ones that can be effectively codegen'd.
addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
MVT VT = (MVT::SimpleValueType)i;
// add/sub are legal for all supported vector VT's.
setOperationAction(ISD::ADD , VT, Legal);
setOperationAction(ISD::SUB , VT, Legal);
// mul has to be custom lowered.
setOperationAction(ISD::MUL , VT, Custom);
setOperationAction(ISD::AND , VT, Legal);
setOperationAction(ISD::OR , VT, Legal);
setOperationAction(ISD::XOR , VT, Legal);
setOperationAction(ISD::LOAD , VT, Legal);
setOperationAction(ISD::SELECT, VT, Legal);
setOperationAction(ISD::STORE, VT, Legal);
// These operations need to be expanded:
setOperationAction(ISD::SDIV, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::FDIV, VT, Custom);
// Custom lower build_vector, constant pool spills, insert and
// extract vector elements:
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::ConstantPool, VT, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
}
setOperationAction(ISD::MUL, MVT::v16i8, Custom);
setOperationAction(ISD::AND, MVT::v16i8, Custom);
setOperationAction(ISD::OR, MVT::v16i8, Custom);
setOperationAction(ISD::XOR, MVT::v16i8, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
setShiftAmountType(MVT::i32);
setBooleanContents(ZeroOrOneBooleanContent);
setStackPointerRegisterToSaveRestore(SPU::R1);
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::ADD);
Scott Michel
committed
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);
computeRegisterProperties();
Scott Michel
committed
// Set other properties:
setSchedulingPreference(SchedulingForLatency);
}
const char *
SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
{
if (node_names.empty()) {
node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
Scott Michel
committed
node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
Scott Michel
committed
node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
Scott Michel
committed
node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
"SPUISD::ROTQUAD_RZ_BYTES";
node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
"SPUISD::ROTQUAD_RZ_BITS";
node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
"SPUISD::ROTBYTES_LEFT_BITS";
node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
}
std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
return ((i != node_names.end()) ? i->second : 0);
}
MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
MVT VT = Op.getValueType();
Scott Michel
committed
return (VT.isInteger() ? VT : MVT(MVT::i32));
//===----------------------------------------------------------------------===//
// Calling convention code:
//===----------------------------------------------------------------------===//
#include "SPUGenCallingConv.inc"
//===----------------------------------------------------------------------===//
// LowerOperation implementation
//===----------------------------------------------------------------------===//
/// Aligned load common code for CellSPU
/*!
\param[in] Op The SelectionDAG load or store operand
\param[in] DAG The selection DAG
\param[in] ST CellSPU subtarget information structure
\param[in,out] alignment Caller initializes this to the load or store node's
value from getAlignment(), may be updated while generating the aligned load
\param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
offset (divisible by 16, modulo 16 == 0)
\param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
offset of the preferred slot (modulo 16 != 0)
\param[in,out] VT Caller initializes this value type to the the load or store
node's loaded or stored value type; may be updated if an i1-extended load or
store.
\param[out] was16aligned true if the base pointer had 16-byte alignment,
otherwise false. Can help to determine if the chunk needs to be rotated.
Both load and store lowering load a block of data aligned on a 16-byte
boundary. This is the common aligned load code shared between both.
*/
static SDValue
AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
LSBaseSDNode *LSN,
unsigned &alignment, int &alignOffs, int &prefSlotOffs,
MVT &VT, bool &was16aligned)
MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
const valtype_map_s *vtm = getValueTypeMapEntry(VT);
SDValue basePtr = LSN->getBasePtr();
SDValue chain = LSN->getChain();
if (basePtr.getOpcode() == ISD::ADD) {
Gabor Greif
committed
SDValue Op1 = basePtr.getNode()->getOperand(1);
if (Op1.getOpcode() == ISD::Constant
|| Op1.getOpcode() == ISD::TargetConstant) {
const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
alignOffs = (int) CN->getZExtValue();
prefSlotOffs = (int) (alignOffs & 0xf);
// Adjust the rotation amount to ensure that the final result ends up in
// the preferred slot:
prefSlotOffs -= vtm->prefslot_byte;
basePtr = basePtr.getOperand(0);
// Loading from memory, can we adjust alignment?
if (basePtr.getOpcode() == SPUISD::AFormAddr) {
if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
alignment = GSDN->getGlobal()->getAlignment();
}
}
} else {
alignOffs = 0;
prefSlotOffs = -vtm->prefslot_byte;
}
} else if (basePtr.getOpcode() == ISD::FrameIndex) {
FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
prefSlotOffs = (int) (alignOffs & 0xf);
prefSlotOffs -= vtm->prefslot_byte;
basePtr = DAG.getRegister(SPU::R1, VT);
} else {
alignOffs = 0;
prefSlotOffs = -vtm->prefslot_byte;
}
if (alignment == 16) {
// Realign the base pointer as a D-Form address:
if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
basePtr = DAG.getNode(ISD::ADD, PtrVT,
basePtr,
}
// Emit the vector load:
was16aligned = true;
return DAG.getLoad(MVT::v16i8, chain, basePtr,
LSN->getSrcValue(), LSN->getSrcValueOffset(),
LSN->isVolatile(), 16);
}
// Unaligned load or we're using the "large memory" model, which means that
// we have to be very pessimistic:
if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr,
DAG.getConstant(0, PtrVT));
basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
was16aligned = false;
return DAG.getLoad(MVT::v16i8, chain, basePtr,
LSN->getSrcValue(), LSN->getSrcValueOffset(),
LSN->isVolatile(), 16);
}
/// Custom lower loads for CellSPU
/*!
All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
within a 16-byte block, we have to rotate to extract the requested element.
For extending loads, we also want to ensure that the following sequence is
emitted, e.g. for MVT::f32 extending load to MVT::f64:
\verbatim
%1 v16i8,ch = load
%2 v16i8,ch = rotate %1
%3 v4f8, ch = bitconvert %2
%4 f32 = vec2perfslot %3
%5 f64 = fp_extend %4
\endverbatim
*/
static SDValue
LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
LoadSDNode *LN = cast<LoadSDNode>(Op);
MVT InVT = LN->getMemoryVT();
MVT OutVT = Op.getValueType();
ISD::LoadExtType ExtType = LN->getExtensionType();
unsigned alignment = LN->getAlignment();
switch (LN->getAddressingMode()) {
case ISD::UNINDEXED: {
int offset, rotamt;
bool was16aligned;
AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, InVT,
was16aligned);
Gabor Greif
committed
if (result.getNode() == 0)
the_chain = result.getValue(1);
// Rotate the chunk if necessary
if (rotamt < 0)
rotamt += 16;
SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
LoadSDNode *LN1 = cast<LoadSDNode>(result);
result = DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v16i8, Ops, 2);
// Convert the loaded v16i8 vector to the appropriate vector type
// specified by the operand:
MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
result = DAG.getNode(SPUISD::VEC2PREFSLOT, InVT,
DAG.getNode(ISD::BIT_CONVERT, vecVT, result));
// Handle extending loads by extending the scalar result:
if (ExtType == ISD::SEXTLOAD) {
result = DAG.getNode(ISD::SIGN_EXTEND, OutVT, result);
} else if (ExtType == ISD::ZEXTLOAD) {
result = DAG.getNode(ISD::ZERO_EXTEND, OutVT, result);
} else if (ExtType == ISD::EXTLOAD) {
unsigned NewOpc = ISD::ANY_EXTEND;
};
result = DAG.getNode(SPUISD::LDRESULT, retvts,
retops, sizeof(retops) / sizeof(retops[0]));
}
case ISD::PRE_INC:
case ISD::PRE_DEC:
case ISD::POST_INC:
case ISD::POST_DEC:
case ISD::LAST_INDEXED_MODE:
cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
"UNINDEXED\n";
cerr << (unsigned) LN->getAddressingMode() << "\n";
abort();
/*NOTREACHED*/
}
}
/// Custom lower stores for CellSPU
/*!
All CellSPU stores are aligned to 16-byte boundaries, so for elements
within a 16-byte block, we have to generate a shuffle to insert the
requested element into its place, then store the resulting block.
*/
static SDValue
LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
StoreSDNode *SN = cast<StoreSDNode>(Op);
MVT VT = Value.getValueType();
MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
unsigned alignment = SN->getAlignment();
switch (SN->getAddressingMode()) {
case ISD::UNINDEXED: {
int chunk_offset, slot_offset;
bool was16aligned;
// The vector type we really want to load from the 16-byte chunk.
MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
AlignedLoad(Op, DAG, ST, SN, alignment,
chunk_offset, slot_offset, VT, was16aligned);
Gabor Greif
committed
if (alignLoadVec.getNode() == 0)
LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
SDValue basePtr = LN->getBasePtr();
SDValue the_chain = alignLoadVec.getValue(1);
SDValue theValue = SN->getValue();
SDValue result;
&& (theValue.getOpcode() == ISD::AssertZext
|| theValue.getOpcode() == ISD::AssertSext)) {
// Drill down and get the value for zero- and sign-extended
// quantities
theValue = theValue.getOperand(0);
SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
SDValue insertEltPtr;
// If the base pointer is already a D-form address, then just create
// a new D-form address with a slot offset and the orignal base pointer.
// Otherwise generate a D-form address with the slot offset relative
// to the stack pointer, which is always aligned.
DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
Gabor Greif
committed
DEBUG(basePtr.getNode()->dump(&DAG));
if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
(basePtr.getOpcode() == ISD::ADD
&& basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
SDValue insertEltOp =
SDValue vectorizeOp =
DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue);
result = DAG.getNode(SPUISD::SHUFB, vecVT,
vectorizeOp, alignLoadVec,
DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, insertEltOp));
result = DAG.getStore(the_chain, result, basePtr,
LN->getSrcValue(), LN->getSrcValueOffset(),
LN->isVolatile(), LN->getAlignment());
#if 0 && defined(NDEBUG)
if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
const SDValue ¤tRoot = DAG.getRoot();
DAG.setRoot(result);
cerr << "------- CellSPU:LowerStore result:\n";
DAG.dump();
cerr << "-------\n";
DAG.setRoot(currentRoot);
}
#endif
return result;
/*UNREACHED*/
}
case ISD::PRE_INC:
case ISD::PRE_DEC:
case ISD::POST_INC:
case ISD::POST_DEC:
case ISD::LAST_INDEXED_MODE:
cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
"UNINDEXED\n";
cerr << (unsigned) SN->getAddressingMode() << "\n";
abort();
/*NOTREACHED*/
}
}
/// Generate the address of a constant pool entry.
static SDValue
LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
MVT PtrVT = Op.getValueType();
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
Constant *C = CP->getConstVal();
SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
SDValue Zero = DAG.getConstant(0, PtrVT);
const TargetMachine &TM = DAG.getTarget();
if (TM.getRelocationModel() == Reloc::Static) {
if (!ST->usingLargeMem()) {
// Just return the SDValue with the constant pool address in it.
return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
Scott Michel
committed
return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
}
}
assert(0 &&
"LowerConstantPool: Relocation model other than static"
" not supported.");
static SDValue
LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
MVT PtrVT = Op.getValueType();
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
SDValue Zero = DAG.getConstant(0, PtrVT);
const TargetMachine &TM = DAG.getTarget();
if (TM.getRelocationModel() == Reloc::Static) {
Scott Michel
committed
if (!ST->usingLargeMem()) {
return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
} else {
SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
Scott Michel
committed
return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
}
}
assert(0 &&
"LowerJumpTable: Relocation model other than static not supported.");
static SDValue
LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
MVT PtrVT = Op.getValueType();
GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
GlobalValue *GV = GSDN->getGlobal();
SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
const TargetMachine &TM = DAG.getTarget();
if (TM.getRelocationModel() == Reloc::Static) {
if (!ST->usingLargeMem()) {
return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
} else {
SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
}
} else {
cerr << "LowerGlobalAddress: Relocation model other than static not "
abort();
/*NOTREACHED*/
}
}
//! Custom lower i64 integer constants
/*!
This code inserts all of the necessary juggling that needs to occur to load
a 64-bit constant into a register.
*/
static SDValue
LowerConstant(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getValueType();
if (VT == MVT::i64) {
ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
SDValue T = DAG.getConstant(CN->getZExtValue(), VT);
DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
} else {
cerr << "LowerConstant: unhandled constant type "
abort();
/*NOTREACHED*/
}
//! Custom lower double precision floating point constants
static SDValue
LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getValueType();
if (VT == MVT::f64) {
ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
assert((FP != 0) &&
"LowerConstantFP: Node is not ConstantFPSDNode");
uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
SDValue T = DAG.getConstant(dbits, MVT::i64);
SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T);
return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Tvec));
//! Lower MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
static SDValue
LowerBRCOND(SDValue Op, SelectionDAG &DAG)
MVT CondVT = Cond.getValueType();
MVT CondNVT;
return DAG.getNode(ISD::BRCOND, Op.getValueType(),
Op.getOperand(0),
DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
Op.getOperand(2));
} else
static SDValue
LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
{
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
Scott Michel
committed
SmallVector<SDValue, 48> ArgValues;
bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
unsigned ArgOffset = SPUFrameInfo::minStackSize();
unsigned ArgRegIdx = 0;
unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Add DAG nodes to load the arguments or copy them out of registers.
for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
ArgNo != e; ++ArgNo) {
MVT ObjectVT = Op.getValue(ArgNo).getValueType();
unsigned ObjSize = ObjectVT.getSizeInBits()/8;
Scott Michel
committed
SDValue ArgVal;
Scott Michel
committed
if (ArgRegIdx < NumArgRegs) {
const TargetRegisterClass *ArgRegClass;
switch (ObjectVT.getSimpleVT()) {
default: {
cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
<< ObjectVT.getMVTString()
<< "\n";
abort();
Scott Michel
committed
case MVT::i8:
Scott Michel
committed
case MVT::i16:
Scott Michel
committed
case MVT::i32:
Scott Michel
committed
case MVT::i64:
Scott Michel
committed
case MVT::f32:
Scott Michel
committed
case MVT::f64:
Scott Michel
committed
case MVT::v2f64:
case MVT::v4f32:
case MVT::v2i64:
case MVT::v4i32:
case MVT::v8i16:
case MVT::v16i8: