Newer
Older
//==- SPUInstrInfo.td - Describe the Cell SPU Instructions -*- tablegen -*-==//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
// Cell SPU Instructions:
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// TODO Items (not urgent today, but would be nice, low priority)
//
// ANDBI, ORBI: SPU constructs a 4-byte constant for these instructions by
// concatenating the byte argument b as "bbbb". Could recognize this bit pattern
// in 16-bit and 32-bit constants and reduce instruction count.
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Pseudo instructions:
//===----------------------------------------------------------------------===//
let hasCtrlDep = 1, Defs = [R1], Uses = [R1] in {
def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm_i32:$amt),
[(callseq_start timm:$amt)]>;
def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm_i32:$amt),
[(callseq_end timm:$amt)]>;
}
//===----------------------------------------------------------------------===//
// DWARF debugging Pseudo Instructions
//===----------------------------------------------------------------------===//
def DWARF_LOC : Pseudo<(outs), (ins i32imm:$line, i32imm:$col, i32imm:$file),
"${:comment} .loc $file, $line, $col",
[(dwarf_loc (i32 imm:$line), (i32 imm:$col),
(i32 imm:$file))]>;
//===----------------------------------------------------------------------===//
// Loads:
// NB: The ordering is actually important, since the instruction selection
// will try each of the instructions in sequence, i.e., the D-form first with
// the 10-bit displacement, then the A-form with the 16 bit displacement, and
// finally the X-form with the register-register.
//===----------------------------------------------------------------------===//
let canFoldAsLoad = 1 in {
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
class LoadDFormVec<ValueType vectype>
: RI10Form<0b00101100, (outs VECREG:$rT), (ins memri10:$src),
"lqd\t$rT, $src",
LoadStore,
[(set (vectype VECREG:$rT), (load dform_addr:$src))]>
{ }
class LoadDForm<RegisterClass rclass>
: RI10Form<0b00101100, (outs rclass:$rT), (ins memri10:$src),
"lqd\t$rT, $src",
LoadStore,
[(set rclass:$rT, (load dform_addr:$src))]>
{ }
multiclass LoadDForms
{
def v16i8: LoadDFormVec<v16i8>;
def v8i16: LoadDFormVec<v8i16>;
def v4i32: LoadDFormVec<v4i32>;
def v2i64: LoadDFormVec<v2i64>;
def v4f32: LoadDFormVec<v4f32>;
def v2f64: LoadDFormVec<v2f64>;
def r128: LoadDForm<GPRC>;
def r64: LoadDForm<R64C>;
def r32: LoadDForm<R32C>;
def f32: LoadDForm<R32FP>;
def f64: LoadDForm<R64FP>;
def r16: LoadDForm<R16C>;
def r8: LoadDForm<R8C>;
}
class LoadAFormVec<ValueType vectype>
: RI16Form<0b100001100, (outs VECREG:$rT), (ins addr256k:$src),
"lqa\t$rT, $src",
LoadStore,
[(set (vectype VECREG:$rT), (load aform_addr:$src))]>
{ }
class LoadAForm<RegisterClass rclass>
: RI16Form<0b100001100, (outs rclass:$rT), (ins addr256k:$src),
"lqa\t$rT, $src",
LoadStore,
[(set rclass:$rT, (load aform_addr:$src))]>
{ }
multiclass LoadAForms
{
def v16i8: LoadAFormVec<v16i8>;
def v8i16: LoadAFormVec<v8i16>;
def v4i32: LoadAFormVec<v4i32>;
def v2i64: LoadAFormVec<v2i64>;
def v4f32: LoadAFormVec<v4f32>;
def v2f64: LoadAFormVec<v2f64>;
def r128: LoadAForm<GPRC>;
def r64: LoadAForm<R64C>;
def r32: LoadAForm<R32C>;
def f32: LoadAForm<R32FP>;
def f64: LoadAForm<R64FP>;
def r16: LoadAForm<R16C>;
def r8: LoadAForm<R8C>;
}
class LoadXFormVec<ValueType vectype>
: RRForm<0b00100011100, (outs VECREG:$rT), (ins memrr:$src),
"lqx\t$rT, $src",
LoadStore,
[(set (vectype VECREG:$rT), (load xform_addr:$src))]>
{ }
class LoadXForm<RegisterClass rclass>
: RRForm<0b00100011100, (outs rclass:$rT), (ins memrr:$src),
"lqx\t$rT, $src",
LoadStore,
[(set rclass:$rT, (load xform_addr:$src))]>
{ }
multiclass LoadXForms
{
def v16i8: LoadXFormVec<v16i8>;
def v8i16: LoadXFormVec<v8i16>;
def v4i32: LoadXFormVec<v4i32>;
def v2i64: LoadXFormVec<v2i64>;
def v4f32: LoadXFormVec<v4f32>;
def v2f64: LoadXFormVec<v2f64>;
def r128: LoadXForm<GPRC>;
def r64: LoadXForm<R64C>;
def r32: LoadXForm<R32C>;
def f32: LoadXForm<R32FP>;
def f64: LoadXForm<R64FP>;
def r16: LoadXForm<R16C>;
def r8: LoadXForm<R8C>;
}
defm LQA : LoadAForms;
defm LQD : LoadDForms;
defm LQX : LoadXForms;
/* Load quadword, PC relative: Not much use at this point in time.
Might be of use later for relocatable code. It's effectively the
same as LQA, but uses PC-relative addressing.
def LQR : RI16Form<0b111001100, (outs VECREG:$rT), (ins s16imm:$disp),
"lqr\t$rT, $disp", LoadStore,
[(set VECREG:$rT, (load iaddr:$disp))]>;
*/
}
//===----------------------------------------------------------------------===//
// Stores:
//===----------------------------------------------------------------------===//
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
class StoreDFormVec<ValueType vectype>
: RI10Form<0b00100100, (outs), (ins VECREG:$rT, memri10:$src),
"stqd\t$rT, $src",
LoadStore,
[(store (vectype VECREG:$rT), dform_addr:$src)]>
{ }
class StoreDForm<RegisterClass rclass>
: RI10Form<0b00100100, (outs), (ins rclass:$rT, memri10:$src),
"stqd\t$rT, $src",
LoadStore,
[(store rclass:$rT, dform_addr:$src)]>
{ }
multiclass StoreDForms
{
def v16i8: StoreDFormVec<v16i8>;
def v8i16: StoreDFormVec<v8i16>;
def v4i32: StoreDFormVec<v4i32>;
def v2i64: StoreDFormVec<v2i64>;
def v4f32: StoreDFormVec<v4f32>;
def v2f64: StoreDFormVec<v2f64>;
def r128: StoreDForm<GPRC>;
def r64: StoreDForm<R64C>;
def r32: StoreDForm<R32C>;
def f32: StoreDForm<R32FP>;
def f64: StoreDForm<R64FP>;
def r16: StoreDForm<R16C>;
def r8: StoreDForm<R8C>;
}
class StoreAFormVec<ValueType vectype>
: RI16Form<0b0010010, (outs), (ins VECREG:$rT, addr256k:$src),
[(store (vectype VECREG:$rT), aform_addr:$src)]>;
class StoreAForm<RegisterClass rclass>
: RI16Form<0b001001, (outs), (ins rclass:$rT, addr256k:$src),
[(store rclass:$rT, aform_addr:$src)]>;
multiclass StoreAForms
{
def v16i8: StoreAFormVec<v16i8>;
def v8i16: StoreAFormVec<v8i16>;
def v4i32: StoreAFormVec<v4i32>;
def v2i64: StoreAFormVec<v2i64>;
def v4f32: StoreAFormVec<v4f32>;
def v2f64: StoreAFormVec<v2f64>;
def r128: StoreAForm<GPRC>;
def r64: StoreAForm<R64C>;
def r32: StoreAForm<R32C>;
def f32: StoreAForm<R32FP>;
def f64: StoreAForm<R64FP>;
def r16: StoreAForm<R16C>;
def r8: StoreAForm<R8C>;
}
class StoreXFormVec<ValueType vectype>
: RRForm<0b00100100, (outs), (ins VECREG:$rT, memrr:$src),
"stqx\t$rT, $src",
LoadStore,
[(store (vectype VECREG:$rT), xform_addr:$src)]>
{ }
class StoreXForm<RegisterClass rclass>
: RRForm<0b00100100, (outs), (ins rclass:$rT, memrr:$src),
"stqx\t$rT, $src",
LoadStore,
[(store rclass:$rT, xform_addr:$src)]>
{ }
multiclass StoreXForms
{
def v16i8: StoreXFormVec<v16i8>;
def v8i16: StoreXFormVec<v8i16>;
def v4i32: StoreXFormVec<v4i32>;
def v2i64: StoreXFormVec<v2i64>;
def v4f32: StoreXFormVec<v4f32>;
def v2f64: StoreXFormVec<v2f64>;
def r128: StoreXForm<GPRC>;
def r64: StoreXForm<R64C>;
def r32: StoreXForm<R32C>;
def f32: StoreXForm<R32FP>;
def f64: StoreXForm<R64FP>;
def r16: StoreXForm<R16C>;
def r8: StoreXForm<R8C>;
}
defm STQD : StoreDForms;
defm STQA : StoreAForms;
defm STQX : StoreXForms;
/* Store quadword, PC relative: Not much use at this point in time. Might
be useful for relocatable code.
def STQR : RI16Form<0b111000100, (outs), (ins VECREG:$rT, s16imm:$disp),
"stqr\t$rT, $disp", LoadStore,
[(store VECREG:$rT, iaddr:$disp)]>;
*/
//===----------------------------------------------------------------------===//
// Generate Controls for Insertion:
//===----------------------------------------------------------------------===//
def CBD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins memri7:$src),
"cbd\t$rT, $src", ShuffleOp,
[(set (v16i8 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
def CBX: RRForm<0b00101011100, (outs VECREG:$rT), (ins memrr:$src),
Scott Michel
committed
[(set (v16i8 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
def CHD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins memri7:$src),
Scott Michel
committed
[(set (v8i16 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
def CHX: RRForm<0b10101011100, (outs VECREG:$rT), (ins memrr:$src),
Scott Michel
committed
[(set (v8i16 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
def CWD: RI7Form<0b01101111100, (outs VECREG:$rT), (ins memri7:$src),
Scott Michel
committed
[(set (v4i32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
def CWX: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src),
Scott Michel
committed
[(set (v4i32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
def CWDf32: RI7Form<0b01101111100, (outs VECREG:$rT), (ins memri7:$src),
"cwd\t$rT, $src", ShuffleOp,
[(set (v4f32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
def CWXf32: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src),
"cwx\t$rT, $src", ShuffleOp,
Scott Michel
committed
[(set (v4f32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
def CDD: RI7Form<0b11101111100, (outs VECREG:$rT), (ins memri7:$src),
Scott Michel
committed
[(set (v2i64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
def CDX: RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src),
Scott Michel
committed
[(set (v2i64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
def CDDf64: RI7Form<0b11101111100, (outs VECREG:$rT), (ins memri7:$src),
"cdd\t$rT, $src", ShuffleOp,
[(set (v2f64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
def CDXf64: RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src),
"cdx\t$rT, $src", ShuffleOp,
Scott Michel
committed
[(set (v2f64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
//===----------------------------------------------------------------------===//
// Constant formation:
//===----------------------------------------------------------------------===//
def ILHv8i16:
RI16Form<0b110000010, (outs VECREG:$rT), (ins s16imm:$val),
"ilh\t$rT, $val", ImmLoad,
[(set (v8i16 VECREG:$rT), (v8i16 v8i16SExt16Imm:$val))]>;
def ILHr16:
RI16Form<0b110000010, (outs R16C:$rT), (ins s16imm:$val),
"ilh\t$rT, $val", ImmLoad,
[(set R16C:$rT, immSExt16:$val)]>;
// Cell SPU doesn't have a native 8-bit immediate load, but ILH works ("with
// the right constant")
def ILHr8:
RI16Form<0b110000010, (outs R8C:$rT), (ins s16imm_i8:$val),
"ilh\t$rT, $val", ImmLoad,
[(set R8C:$rT, immSExt8:$val)]>;
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
class ILInst<dag OOL, dag IOL, list<dag> pattern>:
RI16Form<0b100000010, OOL, IOL, "il\t$rT, $val",
ImmLoad, pattern>;
class ILVecInst<ValueType vectype, Operand immtype, PatLeaf xform>:
ILInst<(outs VECREG:$rT), (ins immtype:$val),
[(set (vectype VECREG:$rT), (vectype xform:$val))]>;
class ILRegInst<RegisterClass rclass, Operand immtype, PatLeaf xform>:
ILInst<(outs rclass:$rT), (ins immtype:$val),
[(set rclass:$rT, xform:$val)]>;
multiclass ImmediateLoad
{
def v2i64: ILVecInst<v2i64, s16imm_i64, v2i64SExt16Imm>;
def v4i32: ILVecInst<v4i32, s16imm_i32, v4i32SExt16Imm>;
// TODO: Need v2f64, v4f32
def r64: ILRegInst<R64C, s16imm_i64, immSExt16>;
def r32: ILRegInst<R32C, s16imm_i32, immSExt16>;
def f32: ILRegInst<R32FP, s16imm_f32, fpimmSExt16>;
def f64: ILRegInst<R64FP, s16imm_f64, fpimmSExt16>;
}
defm IL : ImmediateLoad;
class ILHUInst<dag OOL, dag IOL, list<dag> pattern>:
RI16Form<0b010000010, OOL, IOL, "ilhu\t$rT, $val",
ImmLoad, pattern>;
class ILHUVecInst<ValueType vectype, Operand immtype, PatLeaf xform>:
ILHUInst<(outs VECREG:$rT), (ins immtype:$val),
[(set (vectype VECREG:$rT), (vectype xform:$val))]>;
class ILHURegInst<RegisterClass rclass, Operand immtype, PatLeaf xform>:
ILHUInst<(outs rclass:$rT), (ins immtype:$val),
[(set rclass:$rT, xform:$val)]>;
multiclass ImmLoadHalfwordUpper
{
def v2i64: ILHUVecInst<v2i64, u16imm_i64, immILHUvec_i64>;
def v4i32: ILHUVecInst<v4i32, u16imm_i32, immILHUvec>;
def r64: ILHURegInst<R64C, u16imm_i64, hi16>;
def r32: ILHURegInst<R32C, u16imm_i32, hi16>;
// Loads the high portion of an address
def hi: ILHURegInst<R32C, symbolHi, hi16>;
// Used in custom lowering constant SFP loads:
def f32: ILHURegInst<R32FP, f16imm, hi16_f32>;
}
defm ILHU : ImmLoadHalfwordUpper;
// Immediate load address (can also be used to load 18-bit unsigned constants,
// see the zext 16->32 pattern)
Scott Michel
committed
class ILAInst<dag OOL, dag IOL, list<dag> pattern>:
RI18Form<0b1000010, OOL, IOL, "ila\t$rT, $val",
LoadNOP, pattern>;
class ILAVecInst<ValueType vectype, Operand immtype, PatLeaf xform>:
ILAInst<(outs VECREG:$rT), (ins immtype:$val),
[(set (vectype VECREG:$rT), (vectype xform:$val))]>;
Scott Michel
committed
class ILARegInst<RegisterClass rclass, Operand immtype, PatLeaf xform>:
ILAInst<(outs rclass:$rT), (ins immtype:$val),
[(set rclass:$rT, xform:$val)]>;
Scott Michel
committed
multiclass ImmLoadAddress
{
def v2i64: ILAVecInst<v2i64, u18imm, v2i64Uns18Imm>;
def v4i32: ILAVecInst<v4i32, u18imm, v4i32Uns18Imm>;
Scott Michel
committed
def r64: ILARegInst<R64C, u18imm_i64, imm18>;
def r32: ILARegInst<R32C, u18imm, imm18>;
def f32: ILARegInst<R32FP, f18imm, fpimm18>;
def f64: ILARegInst<R64FP, f18imm_f64, fpimm18>;
Scott Michel
committed
def lo: ILARegInst<R32C, symbolLo, imm18>;
Scott Michel
committed
def lsa: ILAInst<(outs R32C:$rT), (ins symbolLSA:$val),
[/* no pattern */]>;
}
defm ILA : ImmLoadAddress;
// Immediate OR, Halfword Lower: The "other" part of loading large constants
// into 32-bit registers. See the anonymous pattern Pat<(i32 imm:$imm), ...>
// Note that these are really two operand instructions, but they're encoded
// as three operands with the first two arguments tied-to each other.
class IOHLInst<dag OOL, dag IOL, list<dag> pattern>:
RI16Form<0b100000110, OOL, IOL, "iohl\t$rT, $val",
ImmLoad, pattern>,
RegConstraint<"$rS = $rT">,
NoEncode<"$rS">;
class IOHLVecInst<ValueType vectype, Operand immtype /* , PatLeaf xform */>:
IOHLInst<(outs VECREG:$rT), (ins VECREG:$rS, immtype:$val),
[/* no pattern */]>;
class IOHLRegInst<RegisterClass rclass, Operand immtype /* , PatLeaf xform */>:
IOHLInst<(outs rclass:$rT), (ins rclass:$rS, immtype:$val),
[/* no pattern */]>;
multiclass ImmOrHalfwordLower
{
def v2i64: IOHLVecInst<v2i64, u16imm_i64>;
def v4i32: IOHLVecInst<v4i32, u16imm_i32>;
def r32: IOHLRegInst<R32C, i32imm>;
def f32: IOHLRegInst<R32FP, f32imm>;
def lo: IOHLRegInst<R32C, symbolLo>;
}
defm IOHL: ImmOrHalfwordLower;
// Form select mask for bytes using immediate, used in conjunction with the
// SELB instruction:
class FSMBIVec<ValueType vectype>:
RI16Form<0b101001100, (outs VECREG:$rT), (ins u16imm:$val),
"fsmbi\t$rT, $val",
SelectOp,
[(set (vectype VECREG:$rT), (SPUselmask (i16 immU16:$val)))]>;
Scott Michel
committed
multiclass FormSelectMaskBytesImm
{
def v16i8: FSMBIVec<v16i8>;
def v8i16: FSMBIVec<v8i16>;
def v4i32: FSMBIVec<v4i32>;
def v2i64: FSMBIVec<v2i64>;
}
Scott Michel
committed
defm FSMBI : FormSelectMaskBytesImm;
// fsmb: Form select mask for bytes. N.B. Input operand, $rA, is 16-bits
def FSMB:
RRForm_1<0b01101101100, (outs VECREG:$rT), (ins R16C:$rA),
"fsmb\t$rT, $rA", SelectOp,
[(set (v16i8 VECREG:$rT), (SPUselmask R16C:$rA))]>;
Scott Michel
committed
// fsmh: Form select mask for halfwords. N.B., Input operand, $rA, is
// only 8-bits wide (even though it's input as 16-bits here)
def FSMH:
RRForm_1<0b10101101100, (outs VECREG:$rT), (ins R16C:$rA),
"fsmh\t$rT, $rA", SelectOp,
[(set (v8i16 VECREG:$rT), (SPUselmask R16C:$rA))]>;
Scott Michel
committed
// fsm: Form select mask for words. Like the other fsm* instructions,
// only the lower 4 bits of $rA are significant.
class FSMInst<ValueType vectype, RegisterClass rclass>:
RRForm_1<0b00101101100, (outs VECREG:$rT), (ins rclass:$rA),
"fsm\t$rT, $rA",
SelectOp,
[(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
multiclass FormSelectMaskWord {
def r32 : FSMInst<v4i32, R32C>;
def r16 : FSMInst<v4i32, R16C>;
}
defm FSM : FormSelectMaskWord;
// Special case when used for i64 math operations
multiclass FormSelectMaskWord64 {
def r32 : FSMInst<v2i64, R32C>;
def r16 : FSMInst<v2i64, R16C>;
}
defm FSM64 : FormSelectMaskWord64;
//===----------------------------------------------------------------------===//
// Integer and Logical Operations:
//===----------------------------------------------------------------------===//
def AHv8i16:
RRForm<0b00010011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
"ah\t$rT, $rA, $rB", IntegerOp,
[(set (v8i16 VECREG:$rT), (int_spu_si_ah VECREG:$rA, VECREG:$rB))]>;
def : Pat<(add (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
(AHv8i16 VECREG:$rA, VECREG:$rB)>;
def AHr16:
RRForm<0b00010011000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
"ah\t$rT, $rA, $rB", IntegerOp,
[(set R16C:$rT, (add R16C:$rA, R16C:$rB))]>;
def AHIvec:
RI10Form<0b10111000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
"ahi\t$rT, $rA, $val", IntegerOp,
[(set (v8i16 VECREG:$rT), (add (v8i16 VECREG:$rA),
v8i16SExt10Imm:$val))]>;
Scott Michel
committed
def AHIr16:
RI10Form<0b10111000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
"ahi\t$rT, $rA, $val", IntegerOp,
[(set R16C:$rT, (add R16C:$rA, v8i16SExt10Imm:$val))]>;
Scott Michel
committed
def Avec:
RRForm<0b00000011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
"a\t$rT, $rA, $rB", IntegerOp,
[(set (v4i32 VECREG:$rT), (add (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
def : Pat<(add (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)),
(Avec VECREG:$rA, VECREG:$rB)>;
Scott Michel
committed
def Ar32:
RRForm<0b00000011000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
"a\t$rT, $rA, $rB", IntegerOp,
[(set R32C:$rT, (add R32C:$rA, R32C:$rB))]>;
def Ar8:
RRForm<0b00000011000, (outs R8C:$rT), (ins R8C:$rA, R8C:$rB),
"a\t$rT, $rA, $rB", IntegerOp,
[/* no pattern */]>;
def AIvec:
RI10Form<0b00111000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
"ai\t$rT, $rA, $val", IntegerOp,
[(set (v4i32 VECREG:$rT), (add (v4i32 VECREG:$rA),
v4i32SExt10Imm:$val))]>;
def AIr32:
RI10Form<0b00111000, (outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
"ai\t$rT, $rA, $val", IntegerOp,
[(set R32C:$rT, (add R32C:$rA, i32ImmSExt10:$val))]>;
def SFHvec:
RRForm<0b00010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
"sfh\t$rT, $rA, $rB", IntegerOp,
[(set (v8i16 VECREG:$rT), (sub (v8i16 VECREG:$rA),
(v8i16 VECREG:$rB)))]>;
def SFHr16:
RRForm<0b00010010000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
"sfh\t$rT, $rA, $rB", IntegerOp,
[(set R16C:$rT, (sub R16C:$rA, R16C:$rB))]>;
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
def SFHIvec:
RI10Form<0b10110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
"sfhi\t$rT, $rA, $val", IntegerOp,
[(set (v8i16 VECREG:$rT), (sub v8i16SExt10Imm:$val,
(v8i16 VECREG:$rA)))]>;
def SFHIr16 : RI10Form<0b10110000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
"sfhi\t$rT, $rA, $val", IntegerOp,
[(set R16C:$rT, (sub i16ImmSExt10:$val, R16C:$rA))]>;
def SFvec : RRForm<0b00000010000, (outs VECREG:$rT),
(ins VECREG:$rA, VECREG:$rB),
"sf\t$rT, $rA, $rB", IntegerOp,
[(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
def SFr32 : RRForm<0b00000010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
"sf\t$rT, $rA, $rB", IntegerOp,
[(set R32C:$rT, (sub R32C:$rA, R32C:$rB))]>;
def SFIvec:
RI10Form<0b00110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
"sfi\t$rT, $rA, $val", IntegerOp,
[(set (v4i32 VECREG:$rT), (sub v4i32SExt10Imm:$val,
(v4i32 VECREG:$rA)))]>;
def SFIr32 : RI10Form<0b00110000, (outs R32C:$rT),
(ins R32C:$rA, s10imm_i32:$val),
"sfi\t$rT, $rA, $val", IntegerOp,
[(set R32C:$rT, (sub i32ImmSExt10:$val, R32C:$rA))]>;
// ADDX: only available in vector form, doesn't match a pattern.
class ADDXInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b00000010110, OOL, IOL,
"addx\t$rT, $rA, $rB",
IntegerOp, pattern>;
class ADDXVecInst<ValueType vectype>:
ADDXInst<(outs VECREG:$rT),
(ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry),
[(set (vectype VECREG:$rT),
(SPUaddx (vectype VECREG:$rA), (vectype VECREG:$rB),
(vectype VECREG:$rCarry)))]>,
RegConstraint<"$rCarry = $rT">,
NoEncode<"$rCarry">;
class ADDXRegInst<RegisterClass rclass>:
ADDXInst<(outs rclass:$rT),
(ins rclass:$rA, rclass:$rB, rclass:$rCarry),
[(set rclass:$rT,
(SPUaddx rclass:$rA, rclass:$rB, rclass:$rCarry))]>,
RegConstraint<"$rCarry = $rT">,
NoEncode<"$rCarry">;
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
multiclass AddExtended {
def v2i64 : ADDXVecInst<v2i64>;
def v4i32 : ADDXVecInst<v4i32>;
def r64 : ADDXRegInst<R64C>;
def r32 : ADDXRegInst<R32C>;
}
defm ADDX : AddExtended;
// CG: Generate carry for add
class CGInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b01000011000, OOL, IOL,
"cg\t$rT, $rA, $rB",
IntegerOp, pattern>;
class CGVecInst<ValueType vectype>:
CGInst<(outs VECREG:$rT),
(ins VECREG:$rA, VECREG:$rB),
[(set (vectype VECREG:$rT),
(SPUcarry_gen (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
class CGRegInst<RegisterClass rclass>:
CGInst<(outs rclass:$rT),
(ins rclass:$rA, rclass:$rB),
[(set rclass:$rT,
(SPUcarry_gen rclass:$rA, rclass:$rB))]>;
multiclass CarryGenerate {
def v2i64 : CGVecInst<v2i64>;
def v4i32 : CGVecInst<v4i32>;
def r64 : CGRegInst<R64C>;
def r32 : CGRegInst<R32C>;
}
defm CG : CarryGenerate;
// SFX: Subract from, extended. This is used in conjunction with BG to subtract
// with carry (borrow, in this case)
class SFXInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b10000010110, OOL, IOL,
"sfx\t$rT, $rA, $rB",
IntegerOp, pattern>;
class SFXVecInst<ValueType vectype>:
SFXInst<(outs VECREG:$rT),
(ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry),
[(set (vectype VECREG:$rT),
(SPUsubx (vectype VECREG:$rA), (vectype VECREG:$rB),
(vectype VECREG:$rCarry)))]>,
RegConstraint<"$rCarry = $rT">,
NoEncode<"$rCarry">;
class SFXRegInst<RegisterClass rclass>:
SFXInst<(outs rclass:$rT),
(ins rclass:$rA, rclass:$rB, rclass:$rCarry),
[(set rclass:$rT,
(SPUsubx rclass:$rA, rclass:$rB, rclass:$rCarry))]>,
RegConstraint<"$rCarry = $rT">,
NoEncode<"$rCarry">;
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
multiclass SubtractExtended {
def v2i64 : SFXVecInst<v2i64>;
def v4i32 : SFXVecInst<v4i32>;
def r64 : SFXRegInst<R64C>;
def r32 : SFXRegInst<R32C>;
}
defm SFX : SubtractExtended;
// BG: only available in vector form, doesn't match a pattern.
class BGInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b01000010000, OOL, IOL,
"bg\t$rT, $rA, $rB",
IntegerOp, pattern>;
class BGVecInst<ValueType vectype>:
BGInst<(outs VECREG:$rT),
(ins VECREG:$rA, VECREG:$rB),
[(set (vectype VECREG:$rT),
(SPUborrow_gen (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
class BGRegInst<RegisterClass rclass>:
BGInst<(outs rclass:$rT),
(ins rclass:$rA, rclass:$rB),
[(set rclass:$rT,
(SPUborrow_gen rclass:$rA, rclass:$rB))]>;
multiclass BorrowGenerate {
def v4i32 : BGVecInst<v4i32>;
def v2i64 : BGVecInst<v2i64>;
def r64 : BGRegInst<R64C>;
def r32 : BGRegInst<R32C>;
}
defm BG : BorrowGenerate;
// BGX: Borrow generate, extended.
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
def BGXvec:
RRForm<0b11000010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB,
VECREG:$rCarry),
"bgx\t$rT, $rA, $rB", IntegerOp,
[]>,
RegConstraint<"$rCarry = $rT">,
NoEncode<"$rCarry">;
// Halfword multiply variants:
// N.B: These can be used to build up larger quantities (16x16 -> 32)
def MPYv8i16:
RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
"mpy\t$rT, $rA, $rB", IntegerMulDiv,
[(set (v8i16 VECREG:$rT), (SPUmpy_v8i16 (v8i16 VECREG:$rA),
(v8i16 VECREG:$rB)))]>;
def MPYr16:
RRForm<0b00100011110, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
"mpy\t$rT, $rA, $rB", IntegerMulDiv,
[(set R16C:$rT, (mul R16C:$rA, R16C:$rB))]>;
def MPYUv4i32:
RRForm<0b00110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
"mpyu\t$rT, $rA, $rB", IntegerMulDiv,
[(set (v4i32 VECREG:$rT),
(SPUmpyu_v4i32 (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
def MPYUr16:
RRForm<0b00110011110, (outs R32C:$rT), (ins R16C:$rA, R16C:$rB),
"mpyu\t$rT, $rA, $rB", IntegerMulDiv,
[(set R32C:$rT, (mul (zext R16C:$rA),
(zext R16C:$rB)))]>;
def MPYUr32:
RRForm<0b00110011110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
"mpyu\t$rT, $rA, $rB", IntegerMulDiv,
[(set R32C:$rT, (SPUmpyu_i32 R32C:$rA, R32C:$rB))]>;
// mpyi: multiply 16 x s10imm -> 32 result (custom lowering for 32 bit result,
// this only produces the lower 16 bits)
def MPYIvec:
RI10Form<0b00101110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
"mpyi\t$rT, $rA, $val", IntegerMulDiv,
[(set (v8i16 VECREG:$rT), (mul (v8i16 VECREG:$rA), v8i16SExt10Imm:$val))]>;
def MPYIr16:
RI10Form<0b00101110, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
"mpyi\t$rT, $rA, $val", IntegerMulDiv,
[(set R16C:$rT, (mul R16C:$rA, i16ImmSExt10:$val))]>;
// mpyui: same issues as other multiplies, plus, this doesn't match a
// pattern... but may be used during target DAG selection or lowering
def MPYUIvec:
RI10Form<0b10101110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
"mpyui\t$rT, $rA, $val", IntegerMulDiv,
[]>;
def MPYUIr16:
RI10Form<0b10101110, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
"mpyui\t$rT, $rA, $val", IntegerMulDiv,
[]>;
// mpya: 16 x 16 + 16 -> 32 bit result
def MPYAvec:
RRRForm<0b0011, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
"mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv,
[(set (v4i32 VECREG:$rT), (add (v4i32 (bitconvert (mul (v8i16 VECREG:$rA),
(v8i16 VECREG:$rB)))),
(v4i32 VECREG:$rC)))]>;
def MPYAr32:
RRRForm<0b0011, (outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC),
"mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv,
[(set R32C:$rT, (add (sext (mul R16C:$rA, R16C:$rB)),
R32C:$rC))]>;
def : Pat<(add (mul (sext R16C:$rA), (sext R16C:$rB)), R32C:$rC),
(MPYAr32 R16C:$rA, R16C:$rB, R32C:$rC)>;
def MPYAr32_sextinreg:
RRRForm<0b0011, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB, R32C:$rC),
"mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv,
[(set R32C:$rT, (add (mul (sext_inreg R32C:$rA, i16),
(sext_inreg R32C:$rB, i16)),
R32C:$rC))]>;
//def MPYAr32:
// RRRForm<0b0011, (outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC),
// "mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv,
// [(set R32C:$rT, (add (sext (mul R16C:$rA, R16C:$rB)),
// R32C:$rC))]>;
// mpyh: multiply high, used to synthesize 32-bit multiplies
def MPYHv4i32:
RRForm<0b10100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
"mpyh\t$rT, $rA, $rB", IntegerMulDiv,
[(set (v4i32 VECREG:$rT),
(SPUmpyh_v4i32 (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
def MPYHr32:
RRForm<0b10100011110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
"mpyh\t$rT, $rA, $rB", IntegerMulDiv,
[(set R32C:$rT, (SPUmpyh_i32 R32C:$rA, R32C:$rB))]>;
// mpys: multiply high and shift right (returns the top half of
// a 16-bit multiply, sign extended to 32 bits.)
def MPYSvec:
RRForm<0b11100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
"mpys\t$rT, $rA, $rB", IntegerMulDiv,
[]>;
def MPYSr16:
RRForm<0b11100011110, (outs R32C:$rT), (ins R16C:$rA, R16C:$rB),
"mpys\t$rT, $rA, $rB", IntegerMulDiv,
[]>;
// mpyhh: multiply high-high (returns the 32-bit result from multiplying
// the top 16 bits of the $rA, $rB)
def MPYHHv8i16:
RRForm<0b01100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
"mpyhh\t$rT, $rA, $rB", IntegerMulDiv,
[(set (v8i16 VECREG:$rT),
(SPUmpyhh_v8i16 (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
def MPYHHr32:
RRForm<0b01100011110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
"mpyhh\t$rT, $rA, $rB", IntegerMulDiv,
[]>;
// mpyhha: Multiply high-high, add to $rT:
def MPYHHAvec:
RRForm<0b01100010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
"mpyhha\t$rT, $rA, $rB", IntegerMulDiv,
[]>;
def MPYHHAr32:
RRForm<0b01100010110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
"mpyhha\t$rT, $rA, $rB", IntegerMulDiv,
[]>;
// mpyhhu: Multiply high-high, unsigned
def MPYHHUvec:
RRForm<0b01110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
"mpyhhu\t$rT, $rA, $rB", IntegerMulDiv,
[]>;
def MPYHHUr32:
RRForm<0b01110011110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
"mpyhhu\t$rT, $rA, $rB", IntegerMulDiv,
[]>;
// mpyhhau: Multiply high-high, unsigned
def MPYHHAUvec:
RRForm<0b01110010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
"mpyhhau\t$rT, $rA, $rB", IntegerMulDiv,
[]>;
def MPYHHAUr32:
RRForm<0b01110010110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
"mpyhhau\t$rT, $rA, $rB", IntegerMulDiv,
[]>;
// clz: Count leading zeroes
def CLZv4i32:
RRForm_1<0b10100101010, (outs VECREG:$rT), (ins VECREG:$rA),
"clz\t$rT, $rA", IntegerOp,
[/* intrinsic */]>;
def CLZr32:
RRForm_1<0b10100101010, (outs R32C:$rT), (ins R32C:$rA),
"clz\t$rT, $rA", IntegerOp,
[(set R32C:$rT, (ctlz R32C:$rA))]>;
// cntb: Count ones in bytes (aka "population count")
// NOTE: This instruction is really a vector instruction, but the custom
// lowering code uses it in unorthodox ways to support CTPOP for other
// data types!
def CNTBv16i8:
RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
"cntb\t$rT, $rA", IntegerOp,
[(set (v16i8 VECREG:$rT), (SPUcntb (v16i8 VECREG:$rA)))]>;
def CNTBv8i16 :
RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
"cntb\t$rT, $rA", IntegerOp,
[(set (v8i16 VECREG:$rT), (SPUcntb (v8i16 VECREG:$rA)))]>;
def CNTBv4i32 :
RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
"cntb\t$rT, $rA", IntegerOp,
[(set (v4i32 VECREG:$rT), (SPUcntb (v4i32 VECREG:$rA)))]>;
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
// gbb: Gather all low order bits from each byte in $rA into a single 16-bit
// quantity stored into $rT
def GBB:
RRForm_1<0b01001101100, (outs R16C:$rT), (ins VECREG:$rA),
"gbb\t$rT, $rA", GatherOp,
[]>;
// gbh: Gather all low order bits from each halfword in $rA into a single
// 8-bit quantity stored in $rT
def GBH:
RRForm_1<0b10001101100, (outs R16C:$rT), (ins VECREG:$rA),
"gbh\t$rT, $rA", GatherOp,
[]>;
// gb: Gather all low order bits from each word in $rA into a single
// 4-bit quantity stored in $rT
def GB:
RRForm_1<0b00001101100, (outs R16C:$rT), (ins VECREG:$rA),
"gb\t$rT, $rA", GatherOp,
[]>;
// avgb: average bytes
def AVGB:
RRForm<0b11001011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
"avgb\t$rT, $rA, $rB", ByteOp,
[]>;
// absdb: absolute difference of bytes
def ABSDB:
RRForm<0b11001010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
"absdb\t$rT, $rA, $rB", ByteOp,
[]>;
// sumb: sum bytes into halfwords
def SUMB:
RRForm<0b11001010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
"sumb\t$rT, $rA, $rB", ByteOp,
[]>;
// Sign extension operations:
class XSBHInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm_1<0b01101101010, OOL, IOL,
"xsbh\t$rDst, $rSrc",
IntegerOp, pattern>;
class XSBHVecInst<ValueType vectype>:
XSBHInst<(outs VECREG:$rDst), (ins VECREG:$rSrc),
[(set (v8i16 VECREG:$rDst), (sext (vectype VECREG:$rSrc)))]>;
class XSBHRegInst<RegisterClass rclass>:
XSBHInst<(outs rclass:$rDst), (ins rclass:$rSrc),
[(set rclass:$rDst, (sext_inreg rclass:$rSrc, i8))]>;
multiclass ExtendByteHalfword {
def v16i8: XSBHVecInst<v8i16>;
def r16: XSBHRegInst<R16C>;
// 32-bit form for XSBH: used to sign extend 8-bit quantities to 16-bit
// quantities to 32-bit quantities via a 32-bit register (see the sext 8->32
// pattern below). Intentionally doesn't match a pattern because we want the
// sext 8->32 pattern to do the work for us, namely because we need the extra
// XSHWr32.
def r32: XSBHRegInst<R32C>;
}
defm XSBH : ExtendByteHalfword;
// Sign-extend, but take an 8-bit register to a 16-bit register (not done as
// sext_inreg)
XSBHInst<(outs R16C:$rDst), (ins R8C:$rSrc),