Newer
Older
let AddedComplexity = 20 in {
// vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))),
(MOVLPSrm VR128:$src1, addr:$src2)>;
def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))),
(MOVLPDrm VR128:$src1, addr:$src2)>;
def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))),
(MOVLPSrm VR128:$src1, addr:$src2)>;
def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))),
(MOVLPDrm VR128:$src1, addr:$src2)>;
}
// (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
(MOVLPSmr addr:$src1, VR128:$src2)>;
def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
(MOVLPDmr addr:$src1, VR128:$src2)>;
def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)),
addr:$src1),
(MOVLPSmr addr:$src1, VR128:$src2)>;
def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
(MOVLPDmr addr:$src1, VR128:$src2)>;
let AddedComplexity = 15 in {
// Setting the lowest element in the vector.
def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)),
(EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)),
(EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)),
(MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>,
def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
(MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>,
}
// vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but
// fall back to this for SSE1)
def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))),
(SHUFFLE_get_shuf_imm VR128:$src3))>;
// Set lowest element and zero upper elements.
def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
(MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
// Some special case pandn patterns.
def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))),
VR128:$src2)),
(PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))),
VR128:$src2)),
(PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))),
VR128:$src2)),
(PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))),
Evan Cheng
committed
(memop addr:$src2))),
(PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))),
Evan Cheng
committed
(memop addr:$src2))),
(PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))),
Evan Cheng
committed
(memop addr:$src2))),
(PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
// vector -> vector casts
def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
(Int_CVTDQ2PSrr VR128:$src)>, Requires<[HasSSE2]>;
def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
(Int_CVTTPS2DQrr VR128:$src)>, Requires<[HasSSE2]>;
def : Pat<(v2f64 (sint_to_fp (v2i32 VR64:$src))),
(Int_CVTPI2PDrr VR64:$src)>, Requires<[HasSSE2]>;
def : Pat<(v2i32 (fp_to_sint (v2f64 VR128:$src))),
(Int_CVTTPD2PIrr VR128:$src)>, Requires<[HasSSE2]>;
Evan Cheng
committed
// Use movaps / movups for SSE integer load / store (one byte shorter).
def : Pat<(alignedloadv4i32 addr:$src),
def : Pat<(loadv4i32 addr:$src),
Evan Cheng
committed
def : Pat<(alignedloadv2i64 addr:$src),
Evan Cheng
committed
def : Pat<(loadv2i64 addr:$src),
Evan Cheng
committed
def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
(MOVAPSmr addr:$dst, VR128:$src)>;
Evan Cheng
committed
def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
(MOVAPSmr addr:$dst, VR128:$src)>;
Evan Cheng
committed
def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
(MOVAPSmr addr:$dst, VR128:$src)>;
Evan Cheng
committed
def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
(MOVAPSmr addr:$dst, VR128:$src)>;
Evan Cheng
committed
def : Pat<(store (v2i64 VR128:$src), addr:$dst),
(MOVUPSmr addr:$dst, VR128:$src)>;
Evan Cheng
committed
def : Pat<(store (v4i32 VR128:$src), addr:$dst),
(MOVUPSmr addr:$dst, VR128:$src)>;
Evan Cheng
committed
def : Pat<(store (v8i16 VR128:$src), addr:$dst),
(MOVUPSmr addr:$dst, VR128:$src)>;
Evan Cheng
committed
def : Pat<(store (v16i8 VR128:$src), addr:$dst),
(MOVUPSmr addr:$dst, VR128:$src)>;
//===----------------------------------------------------------------------===//
// SSE4.1 Instructions
//===----------------------------------------------------------------------===//
multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd,
string OpcodeStr,
Intrinsic V4F32Int,
// Intrinsic operation, reg.
// Vector intrinsic operation, reg
def PSr_Int : SS4AIi8<opcps, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst, (V4F32Int VR128:$src1, imm:$src2))]>,
OpSize;
// Vector intrinsic operation, mem
def PSm_Int : Ii8<opcps, MRMSrcMem,
(outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Evan Cheng
committed
[(set VR128:$dst,
(V4F32Int (memopv4f32 addr:$src1),imm:$src2))]>,
TA, OpSize,
Requires<[HasSSE41]>;
// Vector intrinsic operation, reg
Evan Cheng
committed
def PDr_Int : SS4AIi8<opcpd, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst, (V2F64Int VR128:$src1, imm:$src2))]>,
OpSize;
// Vector intrinsic operation, mem
Evan Cheng
committed
def PDm_Int : SS4AIi8<opcpd, MRMSrcMem,
(outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Evan Cheng
committed
[(set VR128:$dst,
(V2F64Int (memopv2f64 addr:$src1),imm:$src2))]>,
let Constraints = "$src1 = $dst" in {
multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
string OpcodeStr,
Intrinsic F32Int,
Intrinsic F64Int> {
// Intrinsic operation, reg.
def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
(ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
!strconcat(OpcodeStr,
"ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
(F32Int VR128:$src1, VR128:$src2, imm:$src3))]>,
OpSize;
// Intrinsic operation, mem.
def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
(outs VR128:$dst),
(ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
"ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
(F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
OpSize;
// Intrinsic operation, reg.
def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
(ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
!strconcat(OpcodeStr,
"sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
(F64Int VR128:$src1, VR128:$src2, imm:$src3))]>,
OpSize;
// Intrinsic operation, mem.
def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
(ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
!strconcat(OpcodeStr,
"sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
(F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
OpSize;
}
}
// FP round - roundss, roundps, roundsd, roundpd
defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round",
int_x86_sse41_round_ps, int_x86_sse41_round_pd>;
defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
int_x86_sse41_round_ss, int_x86_sse41_round_sd>;
// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16.
multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
Intrinsic IntId128> {
def rr128 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (IntId128 VR128:$src))]>, OpSize;
def rm128 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
(IntId128
(bitconvert (memopv8i16 addr:$src))))]>, OpSize;
}
defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw",
int_x86_sse41_phminposuw>;
/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
Intrinsic IntId128, bit Commutable = 0> {
def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
OpSize {
let isCommutable = Commutable;
}
def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst,
(IntId128 VR128:$src1,
(bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
}
}
defm PCMPEQQ : SS41I_binop_rm_int<0x29, "pcmpeqq",
int_x86_sse41_pcmpeqq, 1>;
defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw",
int_x86_sse41_packusdw, 0>;
defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb",
int_x86_sse41_pminsb, 1>;
defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd",
int_x86_sse41_pminsd, 1>;
defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud",
int_x86_sse41_pminud, 1>;
defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw",
int_x86_sse41_pminuw, 1>;
defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb",
int_x86_sse41_pmaxsb, 1>;
defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd",
int_x86_sse41_pmaxsd, 1>;
defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud",
int_x86_sse41_pmaxud, 1>;
defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw",
int_x86_sse41_pmaxuw, 1>;
defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq, 1>;
def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)),
(PCMPEQQrr VR128:$src1, VR128:$src2)>;
def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
(PCMPEQQrm VR128:$src1, addr:$src2)>;
/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
multiclass SS41I_binop_patint<bits<8> opc, string OpcodeStr, ValueType OpVT,
SDNode OpNode, Intrinsic IntId128,
bit Commutable = 0> {
def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (OpNode (OpVT VR128:$src1),
VR128:$src2))]>, OpSize {
let isCommutable = Commutable;
}
def rr_int : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
OpSize {
let isCommutable = Commutable;
}
def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst,
(OpVT (OpNode VR128:$src1, (memop addr:$src2))))]>, OpSize;
def rm_int : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst,
Evan Cheng
committed
(IntId128 VR128:$src1, (memop addr:$src2)))]>,
OpSize;
}
}
/// SS48I_binop_rm - Simple SSE41 binary operator.
let Constraints = "$src1 = $dst" in {
multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType OpVT, bit Commutable = 0> {
def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>,
OpSize {
let isCommutable = Commutable;
}
def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (OpNode VR128:$src1,
(bc_v4i32 (memopv2i64 addr:$src2))))]>,
OpSize;
}
}
defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, 1>;
Evan Cheng
committed
/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
Intrinsic IntId128, bit Commutable = 0> {
Evan Cheng
committed
def rri : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
(IntId128 VR128:$src1, VR128:$src2, imm:$src3))]>,
OpSize {
let isCommutable = Commutable;
}
Evan Cheng
committed
def rmi : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, i32i8imm:$src3),
!strconcat(OpcodeStr,
[(set VR128:$dst,
(IntId128 VR128:$src1,
(bitconvert (memopv16i8 addr:$src2)), imm:$src3))]>,
OpSize;
}
}
defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps",
int_x86_sse41_blendps, 0>;
defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd",
int_x86_sse41_blendpd, 0>;
defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw",
int_x86_sse41_pblendw, 0>;
defm DPPS : SS41I_binop_rmi_int<0x40, "dpps",
int_x86_sse41_dpps, 1>;
defm DPPD : SS41I_binop_rmi_int<0x41, "dppd",
int_x86_sse41_dppd, 1>;
defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw",
Evan Cheng
committed
/// SS41I_ternary_int - SSE 4.1 ternary operator
let Uses = [XMM0], Constraints = "$src1 = $dst" in {
multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
"\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"),
[(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))]>,
OpSize;
def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr,
"\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"),
[(set VR128:$dst,
(IntId VR128:$src1,
(bitconvert (memopv16i8 addr:$src2)), XMM0))]>, OpSize;
}
}
defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
(IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))]>,
OpSize;
}
defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>;
defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd>;
defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq>;
defm PMOVZXBW : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw>;
defm PMOVZXWD : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd>;
defm PMOVZXDQ : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq>;
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
// Common patterns involving scalar load.
def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)),
(PMOVSXBWrm addr:$src)>, Requires<[HasSSE41]>;
def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
(PMOVSXBWrm addr:$src)>, Requires<[HasSSE41]>;
def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
(PMOVSXWDrm addr:$src)>, Requires<[HasSSE41]>;
def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
(PMOVSXWDrm addr:$src)>, Requires<[HasSSE41]>;
def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
(PMOVSXDQrm addr:$src)>, Requires<[HasSSE41]>;
def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
(PMOVSXDQrm addr:$src)>, Requires<[HasSSE41]>;
def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
(PMOVZXBWrm addr:$src)>, Requires<[HasSSE41]>;
def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
(PMOVZXBWrm addr:$src)>, Requires<[HasSSE41]>;
def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
(PMOVZXWDrm addr:$src)>, Requires<[HasSSE41]>;
def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
(PMOVZXWDrm addr:$src)>, Requires<[HasSSE41]>;
def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
(PMOVZXDQrm addr:$src)>, Requires<[HasSSE41]>;
def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
(PMOVZXDQrm addr:$src)>, Requires<[HasSSE41]>;
multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
(IntId (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))]>,
OpSize;
}
defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>;
defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>;
defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>;
defm PMOVZXWQ : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq>;
// Common patterns involving scalar load
def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)),
(PMOVSXBDrm addr:$src)>, Requires<[HasSSE41]>;
def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)),
(PMOVSXWQrm addr:$src)>, Requires<[HasSSE41]>;
def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)),
(PMOVZXBDrm addr:$src)>, Requires<[HasSSE41]>;
def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)),
(PMOVZXWQrm addr:$src)>, Requires<[HasSSE41]>;
multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
// Expecting a i16 load any extended to i32 value.
def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i16mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (IntId (bitconvert
(v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))]>,
OpSize;
}
defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>;
// Common patterns involving scalar load
def : Pat<(int_x86_sse41_pmovsxbq
(bitconvert (v4i32 (X86vzmovl
(v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
(PMOVSXBQrm addr:$src)>, Requires<[HasSSE41]>;
def : Pat<(int_x86_sse41_pmovzxbq
(bitconvert (v4i32 (X86vzmovl
(v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
(PMOVZXBQrm addr:$src)>, Requires<[HasSSE41]>;
/// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem
multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
(ins VR128:$src1, i32i8imm:$src2),
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set GR32:$dst, (X86pextrb (v16i8 VR128:$src1), imm:$src2))]>,
OpSize;
Evan Cheng
committed
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins i8mem:$dst, VR128:$src1, i32i8imm:$src2),
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, OpSize;
// FIXME:
// There's an AssertZext in the way of writing the store pattern
// (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
}
defm PEXTRB : SS41I_extract8<0x14, "pextrb">;
/// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination
multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
Evan Cheng
committed
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins i16mem:$dst, VR128:$src1, i32i8imm:$src2),
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, OpSize;
// FIXME:
// There's an AssertZext in the way of writing the store pattern
// (store (i16 (trunc (X86pextrw (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
}
defm PEXTRW : SS41I_extract16<0x15, "pextrw">;
/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> {
def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
(ins VR128:$src1, i32i8imm:$src2),
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set GR32:$dst,
(extractelt (v4i32 VR128:$src1), imm:$src2))]>, OpSize;
Evan Cheng
committed
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins i32mem:$dst, VR128:$src1, i32i8imm:$src2),
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(store (extractelt (v4i32 VR128:$src1), imm:$src2),
addr:$dst)]>, OpSize;
defm PEXTRD : SS41I_extract32<0x16, "pextrd">;
Evan Cheng
committed
/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory
/// destination
multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
(ins VR128:$src1, i32i8imm:$src2),
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set GR32:$dst,
(extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>,
Evan Cheng
committed
OpSize;
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins f32mem:$dst, VR128:$src1, i32i8imm:$src2),
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Evan Cheng
committed
[(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2),
addr:$dst)]>, OpSize;
defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">;
// Also match an EXTRACTPS store when the store is done as f32 instead of i32.
def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
imm:$src2))),
addr:$dst),
(EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
Requires<[HasSSE41]>;
multiclass SS41I_insert8<bits<8> opc, string OpcodeStr> {
Evan Cheng
committed
def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
(X86pinsrb VR128:$src1, GR32:$src2, imm:$src3))]>, OpSize;
Evan Cheng
committed
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i8mem:$src2, i32i8imm:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
(X86pinsrb VR128:$src1, (extloadi8 addr:$src2),
imm:$src3))]>, OpSize;
}
}
defm PINSRB : SS41I_insert8<0x20, "pinsrb">;
multiclass SS41I_insert32<bits<8> opc, string OpcodeStr> {
Evan Cheng
committed
def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
(v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>,
OpSize;
Evan Cheng
committed
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i32mem:$src2, i32i8imm:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
(v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2),
imm:$src3)))]>, OpSize;
}
}
defm PINSRD : SS41I_insert32<0x22, "pinsrd">;
// insertps has a few different modes, there's the first two here below which
// are optimized inserts that won't zero arbitrary elements in the destination
// vector. The next one matches the intrinsic and could zero arbitrary elements
// in the target vector.
multiclass SS41I_insertf32<bits<8> opc, string OpcodeStr> {
def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR128:$dst,
(X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>,
OpSize;
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, f32mem:$src2, i32i8imm:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
(X86insrtps VR128:$src1,
(v4f32 (scalar_to_vector (loadf32 addr:$src2))),
imm:$src3))]>, OpSize;
}
}
defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
(INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>;
// ptest instruction we'll lower to this in X86ISelLowering primarily from
// the intel intrinsic that corresponds to this.
let Defs = [EFLAGS] in {
def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
"ptest \t{$src2, $src1|$src1, $src2}",
Chris Lattner
committed
[(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>,
OpSize;
def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2),
"ptest \t{$src2, $src1|$src1, $src2}",
Chris Lattner
committed
[(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>,
OpSize;
}
def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movntdqa\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,
OpSize;
//===----------------------------------------------------------------------===//
// SSE4.2 Instructions
//===----------------------------------------------------------------------===//
/// SS42I_binop_rm_int - Simple SSE 4.2 binary operator
let Constraints = "$src1 = $dst" in {
multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr,
Intrinsic IntId128, bit Commutable = 0> {
def rr : SS428I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
OpSize {
let isCommutable = Commutable;
}
def rm : SS428I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst,
(IntId128 VR128:$src1,
(bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
}
}
defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>;
def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)),
(PCMPGTQrr VR128:$src1, VR128:$src2)>;
def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
(PCMPGTQrm VR128:$src1, addr:$src2)>;
// crc intrinsic instruction
// This set of instructions are only rm, the only difference is the size
// of r and m.
let Constraints = "$src1 = $dst" in {
def CRC32m8 : SS42FI<0xF0, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$src1, i8mem:$src2),
Kevin Enderby
committed
"crc32{b} \t{$src2, $src1|$src1, $src2}",
[(set GR32:$dst,
(int_x86_sse42_crc32_8 GR32:$src1,
Kevin Enderby
committed
(load addr:$src2)))]>;
def CRC32r8 : SS42FI<0xF0, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR8:$src2),
Kevin Enderby
committed
"crc32{b} \t{$src2, $src1|$src1, $src2}",
[(set GR32:$dst,
Kevin Enderby
committed
(int_x86_sse42_crc32_8 GR32:$src1, GR8:$src2))]>;
def CRC32m16 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$src1, i16mem:$src2),
Kevin Enderby
committed
"crc32{w} \t{$src2, $src1|$src1, $src2}",
[(set GR32:$dst,
(int_x86_sse42_crc32_16 GR32:$src1,
(load addr:$src2)))]>,
OpSize;
def CRC32r16 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR16:$src2),
Kevin Enderby
committed
"crc32{w} \t{$src2, $src1|$src1, $src2}",
[(set GR32:$dst,
(int_x86_sse42_crc32_16 GR32:$src1, GR16:$src2))]>,
OpSize;
def CRC32m32 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$src1, i32mem:$src2),
Kevin Enderby
committed
"crc32{l} \t{$src2, $src1|$src1, $src2}",
[(set GR32:$dst,
(int_x86_sse42_crc32_32 GR32:$src1,
Kevin Enderby
committed
(load addr:$src2)))]>;
def CRC32r32 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
Kevin Enderby
committed
"crc32{l} \t{$src2, $src1|$src1, $src2}",
[(set GR32:$dst,
Kevin Enderby
committed
(int_x86_sse42_crc32_32 GR32:$src1, GR32:$src2))]>;
def CRC64m8 : SS42FI<0xF0, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$src1, i8mem:$src2),
"crc32{b} \t{$src2, $src1|$src1, $src2}",
[(set GR64:$dst,
(int_x86_sse42_crc64_8 GR64:$src1,
(load addr:$src2)))]>,
REX_W;
def CRC64r8 : SS42FI<0xF0, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR8:$src2),
"crc32{b} \t{$src2, $src1|$src1, $src2}",
[(set GR64:$dst,
(int_x86_sse42_crc64_8 GR64:$src1, GR8:$src2))]>,
REX_W;
def CRC64m64 : SS42FI<0xF1, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$src1, i64mem:$src2),
Kevin Enderby
committed
"crc32{q} \t{$src2, $src1|$src1, $src2}",
[(set GR64:$dst,
Kevin Enderby
committed
(int_x86_sse42_crc64_64 GR64:$src1,
(load addr:$src2)))]>,
Kevin Enderby
committed
REX_W;
def CRC64r64 : SS42FI<0xF1, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
Kevin Enderby
committed
"crc32{q} \t{$src2, $src1|$src1, $src2}",
[(set GR64:$dst,
Kevin Enderby
committed
(int_x86_sse42_crc64_64 GR64:$src1, GR64:$src2))]>,
REX_W;
// String/text processing instructions.
let Defs = [EFLAGS], usesCustomInserter = 1 in {
def PCMPISTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
"#PCMPISTRM128rr PSEUDO!",
[(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2,
imm:$src3))]>, OpSize;
def PCMPISTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
"#PCMPISTRM128rm PSEUDO!",
[(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, (load addr:$src2),
imm:$src3))]>, OpSize;
}
let Defs = [XMM0, EFLAGS] in {
def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
"pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize;
def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
"pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize;
let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src3, i8imm:$src5),
"#PCMPESTRM128rr PSEUDO!",
[(set VR128:$dst,
(int_x86_sse42_pcmpestrm128
VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>, OpSize;
def PCMPESTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
"#PCMPESTRM128rm PSEUDO!",
[(set VR128:$dst, (int_x86_sse42_pcmpestrm128
VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>,
OpSize;
}
let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in {
def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
(ins VR128:$src1, VR128:$src3, i8imm:$src5),
"pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize;
def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
"pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize;
}
let Defs = [ECX, EFLAGS] in {
multiclass SS42AI_pcmpistri<Intrinsic IntId128> {
def rr : SS42AI<0x63, MRMSrcReg, (outs),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
"pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}",
[(set ECX, (IntId128 VR128:$src1, VR128:$src2, imm:$src3)),
(implicit EFLAGS)]>, OpSize;
def rm : SS42AI<0x63, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
"pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}",
[(set ECX, (IntId128 VR128:$src1, (load addr:$src2), imm:$src3)),
(implicit EFLAGS)]>, OpSize;
}
}
defm PCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128>;
defm PCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128>;
defm PCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128>;
defm PCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128>;
defm PCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128>;
defm PCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128>;
let Defs = [ECX, EFLAGS] in {
let Uses = [EAX, EDX] in {
multiclass SS42AI_pcmpestri<Intrinsic IntId128> {
def rr : SS42AI<0x61, MRMSrcReg, (outs),
(ins VR128:$src1, VR128:$src3, i8imm:$src5),
"pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}",
[(set ECX, (IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)),
(implicit EFLAGS)]>, OpSize;
def rm : SS42AI<0x61, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
"pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}",
[(set ECX,
(IntId128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5)),
(implicit EFLAGS)]>, OpSize;
}
}
}
defm PCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128>;
defm PCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128>;
defm PCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128>;
defm PCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128>;
defm PCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128>;
defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>;
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
//===----------------------------------------------------------------------===//
// AES-NI Instructions
//===----------------------------------------------------------------------===//
let Constraints = "$src1 = $dst" in {
multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
Intrinsic IntId128, bit Commutable = 0> {
def rr : AES8I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
OpSize {
let isCommutable = Commutable;
}
def rm : AES8I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst,
(IntId128 VR128:$src1,
(bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
}
}
defm AESENC : AESI_binop_rm_int<0xDC, "aesenc",
int_x86_aesni_aesenc>;
defm AESENCLAST : AESI_binop_rm_int<0xDD, "aesenclast",
int_x86_aesni_aesenclast>;
defm AESDEC : AESI_binop_rm_int<0xDE, "aesdec",
int_x86_aesni_aesdec>;
defm AESDECLAST : AESI_binop_rm_int<0xDF, "aesdeclast",
int_x86_aesni_aesdeclast>;
def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)),
(AESENCrr VR128:$src1, VR128:$src2)>;
def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))),
(AESENCrm VR128:$src1, addr:$src2)>;
def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)),
(AESENCLASTrr VR128:$src1, VR128:$src2)>;
def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))),
(AESENCLASTrm VR128:$src1, addr:$src2)>;
def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)),
(AESDECrr VR128:$src1, VR128:$src2)>;
def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))),
(AESDECrm VR128:$src1, addr:$src2)>;
def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)),
(AESDECLASTrr VR128:$src1, VR128:$src2)>;
def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))),
(AESDECLASTrm VR128:$src1, addr:$src2)>;
def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1),
"aesimc\t{$src1, $dst|$dst, $src1}",
[(set VR128:$dst,
(int_x86_aesni_aesimc VR128:$src1))]>,
OpSize;
def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1),
"aesimc\t{$src1, $dst|$dst, $src1}",
[(set VR128:$dst,
(int_x86_aesni_aesimc (bitconvert (memopv2i64 addr:$src1))))]>,
OpSize;
def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, i8imm:$src2),
"aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
OpSize;
def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1, i8imm:$src2),
"aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)),
imm:$src2))]>,
OpSize;