Skip to content
Snippets Groups Projects
Commit a7976d32 authored by Chris Lattner's avatar Chris Lattner
Browse files

Implement Regression/CodeGen/PowerPC/bswap-load-store.ll by folding bswaps

into i16/i32 load/stores.

llvm-svn: 29089
parent a0449c29
No related branches found
No related tags found
No related merge requests found
...@@ -178,6 +178,7 @@ getHazardType(SDNode *Node) { ...@@ -178,6 +178,7 @@ getHazardType(SDNode *Node) {
case PPC::LHZ: case PPC::LHZ:
case PPC::LHZX: case PPC::LHZX:
case PPC::LVEHX: case PPC::LVEHX:
case PPC::LHBRX:
LoadSize = 2; LoadSize = 2;
break; break;
case PPC::LFS: case PPC::LFS:
...@@ -188,6 +189,7 @@ getHazardType(SDNode *Node) { ...@@ -188,6 +189,7 @@ getHazardType(SDNode *Node) {
case PPC::LWA: case PPC::LWA:
case PPC::LWAX: case PPC::LWAX:
case PPC::LVEWX: case PPC::LVEWX:
case PPC::LWBRX:
LoadSize = 4; LoadSize = 4;
break; break;
case PPC::LFD: case PPC::LFD:
...@@ -233,6 +235,7 @@ void PPCHazardRecognizer970::EmitInstruction(SDNode *Node) { ...@@ -233,6 +235,7 @@ void PPCHazardRecognizer970::EmitInstruction(SDNode *Node) {
case PPC::STHX: case PPC::STHX:
case PPC::STH: case PPC::STH:
case PPC::STVEHX: case PPC::STVEHX:
case PPC::STHBRX:
ThisStoreSize = 2; ThisStoreSize = 2;
break; break;
case PPC::STFS: case PPC::STFS:
...@@ -243,6 +246,7 @@ void PPCHazardRecognizer970::EmitInstruction(SDNode *Node) { ...@@ -243,6 +246,7 @@ void PPCHazardRecognizer970::EmitInstruction(SDNode *Node) {
case PPC::STW: case PPC::STW:
case PPC::STVEWX: case PPC::STVEWX:
case PPC::STFIWX: case PPC::STFIWX:
case PPC::STWBRX:
ThisStoreSize = 4; ThisStoreSize = 4;
break; break;
case PPC::STD_32: case PPC::STD_32:
......
...@@ -266,6 +266,7 @@ PPCTargetLowering::PPCTargetLowering(TargetMachine &TM) ...@@ -266,6 +266,7 @@ PPCTargetLowering::PPCTargetLowering(TargetMachine &TM)
setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::BR_CC); setTargetDAGCombine(ISD::BR_CC);
setTargetDAGCombine(ISD::BSWAP);
computeRegisterProperties(); computeRegisterProperties();
} }
...@@ -296,6 +297,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { ...@@ -296,6 +297,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::MFCR: return "PPCISD::MFCR"; case PPCISD::MFCR: return "PPCISD::MFCR";
case PPCISD::VCMP: return "PPCISD::VCMP"; case PPCISD::VCMP: return "PPCISD::VCMP";
case PPCISD::VCMPo: return "PPCISD::VCMPo"; case PPCISD::VCMPo: return "PPCISD::VCMPo";
case PPCISD::LBRX: return "PPCISD::LBRX";
case PPCISD::STBRX: return "PPCISD::STBRX";
case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
} }
} }
...@@ -2344,6 +2347,56 @@ SDOperand PPCTargetLowering::PerformDAGCombine(SDNode *N, ...@@ -2344,6 +2347,56 @@ SDOperand PPCTargetLowering::PerformDAGCombine(SDNode *N,
DCI.AddToWorklist(Val.Val); DCI.AddToWorklist(Val.Val);
return Val; return Val;
} }
// Turn STORE (BSWAP) -> sthbrx/stwbrx.
if (N->getOperand(1).getOpcode() == ISD::BSWAP &&
N->getOperand(1).Val->hasOneUse() &&
(N->getOperand(1).getValueType() == MVT::i32 ||
N->getOperand(1).getValueType() == MVT::i16)) {
SDOperand BSwapOp = N->getOperand(1).getOperand(0);
// Do an any-extend to 32-bits if this is a half-word input.
if (BSwapOp.getValueType() == MVT::i16)
BSwapOp = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, BSwapOp);
return DAG.getNode(PPCISD::STBRX, MVT::Other, N->getOperand(0), BSwapOp,
N->getOperand(2), N->getOperand(3),
DAG.getValueType(N->getOperand(1).getValueType()));
}
break;
case ISD::BSWAP:
// Turn BSWAP (LOAD) -> lhbrx/lwbrx.
if (N->getOperand(0).getOpcode() == ISD::LOAD &&
N->getOperand(0).hasOneUse() &&
(N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) {
SDOperand Load = N->getOperand(0);
// Create the byte-swapping load.
std::vector<MVT::ValueType> VTs;
VTs.push_back(MVT::i32);
VTs.push_back(MVT::Other);
std::vector<SDOperand> Ops;
Ops.push_back(Load.getOperand(0)); // Chain
Ops.push_back(Load.getOperand(1)); // Ptr
Ops.push_back(Load.getOperand(2)); // SrcValue
Ops.push_back(DAG.getValueType(N->getValueType(0))); // VT
SDOperand BSLoad = DAG.getNode(PPCISD::LBRX, VTs, Ops);
// If this is an i16 load, insert the truncate.
SDOperand ResVal = BSLoad;
if (N->getValueType(0) == MVT::i16)
ResVal = DAG.getNode(ISD::TRUNCATE, MVT::i16, BSLoad);
// First, combine the bswap away. This makes the value produced by the
// load dead.
DCI.CombineTo(N, ResVal);
// Next, combine the load away, we give it a bogus result value but a real
// chain result. The result value is dead because the bswap is dead.
DCI.CombineTo(Load.Val, ResVal, BSLoad.getValue(1));
// Return N so it doesn't get rechecked!
return SDOperand(N, 0);
}
break; break;
case PPCISD::VCMP: { case PPCISD::VCMP: {
// If a VCMPo node already exists with exactly the same operands as this // If a VCMPo node already exists with exactly the same operands as this
...@@ -2477,6 +2530,12 @@ void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, ...@@ -2477,6 +2530,12 @@ void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
KnownOne = 0; KnownOne = 0;
switch (Op.getOpcode()) { switch (Op.getOpcode()) {
default: break; default: break;
case PPCISD::LBRX: {
// lhbrx is known to have the top bits cleared out.
if (cast<VTSDNode>(Op.getOperand(3))->getVT() == MVT::i16)
KnownZero = 0xFFFF0000;
break;
}
case ISD::INTRINSIC_WO_CHAIN: { case ISD::INTRINSIC_WO_CHAIN: {
switch (cast<ConstantSDNode>(Op.getOperand(0))->getValue()) { switch (cast<ConstantSDNode>(Op.getOperand(0))->getValue()) {
default: break; default: break;
......
...@@ -111,7 +111,19 @@ namespace llvm { ...@@ -111,7 +111,19 @@ namespace llvm {
/// condition register to branch on, OPC is the branch opcode to use (e.g. /// condition register to branch on, OPC is the branch opcode to use (e.g.
/// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is
/// an optional input flag argument. /// an optional input flag argument.
COND_BRANCH COND_BRANCH,
/// CHAIN = STBRX CHAIN, GPRC, Ptr, SRCVALUE, Type - This is a
/// byte-swapping store instruction. It byte-swaps the low "Type" bits of
/// the GPRC input, then stores it through Ptr. Type can be either i16 or
/// i32.
STBRX,
/// GPRC, CHAIN = LBRX CHAIN, Ptr, SRCVALUE, Type - This is a
/// byte-swapping load instruction. It loads "Type" bits, byte swaps it,
/// then puts it in the bottom bits of the GPRC. TYPE can be either i16
/// or i32.
LBRX
}; };
} }
......
...@@ -37,6 +37,13 @@ def SDT_PPCcondbr : SDTypeProfile<0, 3, [ ...@@ -37,6 +37,13 @@ def SDT_PPCcondbr : SDTypeProfile<0, 3, [
SDTCisVT<1, i32>, SDTCisVT<2, OtherVT> SDTCisVT<1, i32>, SDTCisVT<2, OtherVT>
]>; ]>;
def SDT_PPClbrx : SDTypeProfile<1, 3, [
SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>, SDTCisVT<3, OtherVT>
]>;
def SDT_PPCstbrx : SDTypeProfile<0, 4, [
SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>, SDTCisVT<3, OtherVT>
]>;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// PowerPC specific DAG Nodes. // PowerPC specific DAG Nodes.
// //
...@@ -88,6 +95,9 @@ def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutFlag]>; ...@@ -88,6 +95,9 @@ def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutFlag]>;
def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr, def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr,
[SDNPHasChain, SDNPOptInFlag]>; [SDNPHasChain, SDNPOptInFlag]>;
def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx, [SDNPHasChain]>;
def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx, [SDNPHasChain]>;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// PowerPC specific transformation functions and pattern fragments. // PowerPC specific transformation functions and pattern fragments.
// //
...@@ -464,6 +474,15 @@ def LHZX : XForm_1<31, 279, (ops GPRC:$rD, memrr:$src), ...@@ -464,6 +474,15 @@ def LHZX : XForm_1<31, 279, (ops GPRC:$rD, memrr:$src),
def LWZX : XForm_1<31, 23, (ops GPRC:$rD, memrr:$src), def LWZX : XForm_1<31, 23, (ops GPRC:$rD, memrr:$src),
"lwzx $rD, $src", LdStGeneral, "lwzx $rD, $src", LdStGeneral,
[(set GPRC:$rD, (load xaddr:$src))]>; [(set GPRC:$rD, (load xaddr:$src))]>;
def LHBRX : XForm_1<31, 790, (ops GPRC:$rD, memrr:$src),
"lhbrx $rD, $src", LdStGeneral,
[(set GPRC:$rD, (PPClbrx xaddr:$src,srcvalue:$dummy, i16))]>;
def LWBRX : XForm_1<31, 534, (ops GPRC:$rD, memrr:$src),
"lwbrx $rD, $src", LdStGeneral,
[(set GPRC:$rD, (PPClbrx xaddr:$src,srcvalue:$dummy, i32))]>;
} }
let PPC970_Unit = 1 in { // FXU Operations. let PPC970_Unit = 1 in { // FXU Operations.
...@@ -517,6 +536,14 @@ def STWX : XForm_8<31, 151, (ops GPRC:$rS, memrr:$dst), ...@@ -517,6 +536,14 @@ def STWX : XForm_8<31, 151, (ops GPRC:$rS, memrr:$dst),
def STWUX : XForm_8<31, 183, (ops GPRC:$rS, GPRC:$rA, GPRC:$rB), def STWUX : XForm_8<31, 183, (ops GPRC:$rS, GPRC:$rA, GPRC:$rB),
"stwux $rS, $rA, $rB", LdStGeneral, "stwux $rS, $rA, $rB", LdStGeneral,
[]>; []>;
def STHBRX: XForm_8<31, 918, (ops GPRC:$rS, memrr:$dst),
"sthbrx $rS, $dst", LdStGeneral,
[(PPCstbrx GPRC:$rS, xaddr:$dst, srcvalue:$dummy, i16)]>,
PPC970_DGroup_Cracked;
def STWBRX: XForm_8<31, 662, (ops GPRC:$rS, memrr:$dst),
"stwbrx $rS, $dst", LdStGeneral,
[(PPCstbrx GPRC:$rS, xaddr:$dst, srcvalue:$dummy, i32)]>,
PPC970_DGroup_Cracked;
} }
let PPC970_Unit = 1 in { // FXU Operations. let PPC970_Unit = 1 in { // FXU Operations.
def SRAWI : XForm_10<31, 824, (ops GPRC:$rA, GPRC:$rS, u5imm:$SH), def SRAWI : XForm_10<31, 824, (ops GPRC:$rA, GPRC:$rS, u5imm:$SH),
......
...@@ -3,7 +3,6 @@ ...@@ -3,7 +3,6 @@
TODO: TODO:
* gpr0 allocation * gpr0 allocation
* implement do-loop -> bdnz transform * implement do-loop -> bdnz transform
* implement powerpc-64 for darwin
===-------------------------------------------------------------------------=== ===-------------------------------------------------------------------------===
...@@ -238,10 +237,6 @@ just fastcc. ...@@ -238,10 +237,6 @@ just fastcc.
===-------------------------------------------------------------------------=== ===-------------------------------------------------------------------------===
Generate lwbrx and other byteswapping load/store instructions when reasonable.
===-------------------------------------------------------------------------===
Compile this: Compile this:
int foo(int a) { int foo(int a) {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment