Skip to content
X86InstrSSE.td 107 KiB
Newer Older
def ANDPDrr : PDI<0x54, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
                "andpd {$src2, $dst|$dst, $src2}",
                  [(set VR128:$dst,
                    (and (bc_v2i64 (v2f64 VR128:$src1)),
                         (bc_v2i64 (v2f64 VR128:$src2))))]>;
def ORPSrr  : PSI<0x56, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
                  "orps {$src2, $dst|$dst, $src2}",
                  [(set VR128:$dst, (v2i64 (or VR128:$src1, VR128:$src2)))]>;
def ORPDrr  : PDI<0x56, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
                  "orpd {$src2, $dst|$dst, $src2}",
                  [(set VR128:$dst,
                    (or (bc_v2i64 (v2f64 VR128:$src1)),
                        (bc_v2i64 (v2f64 VR128:$src2))))]>;
def XORPSrr : PSI<0x57, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
                  "xorps {$src2, $dst|$dst, $src2}",
                  [(set VR128:$dst, (v2i64 (xor VR128:$src1, VR128:$src2)))]>;
def XORPDrr : PDI<0x57, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
                  "xorpd {$src2, $dst|$dst, $src2}",
                  [(set VR128:$dst,
                    (xor (bc_v2i64 (v2f64 VR128:$src1)),
                         (bc_v2i64 (v2f64 VR128:$src2))))]>;
def ANDPSrm : PSI<0x54, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
                  "andps {$src2, $dst|$dst, $src2}",
                  [(set VR128:$dst, (and VR128:$src1,
                                          (bc_v2i64 (loadv4f32 addr:$src2))))]>;
def ANDPDrm : PDI<0x54, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
                  "andpd {$src2, $dst|$dst, $src2}",
                [(set VR128:$dst,
                  (and (bc_v2i64 (v2f64 VR128:$src1)),
                       (bc_v2i64 (loadv2f64 addr:$src2))))]>;
def ORPSrm  : PSI<0x56, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
                  "orps {$src2, $dst|$dst, $src2}",
                  [(set VR128:$dst, (or VR128:$src1,
                                          (bc_v2i64 (loadv4f32 addr:$src2))))]>;
def ORPDrm  : PDI<0x56, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
                "orpd {$src2, $dst|$dst, $src2}",
                 [(set VR128:$dst,
                   (or (bc_v2i64 (v2f64 VR128:$src1)),
                       (bc_v2i64 (loadv2f64 addr:$src2))))]>;
def XORPSrm : PSI<0x57, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
                  "xorps {$src2, $dst|$dst, $src2}",
                  [(set VR128:$dst, (xor VR128:$src1,
                                          (bc_v2i64 (loadv4f32 addr:$src2))))]>;
def XORPDrm : PDI<0x57, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
                  "xorpd {$src2, $dst|$dst, $src2}",
                [(set VR128:$dst,
                  (xor (bc_v2i64 (v2f64 VR128:$src1)),
                       (bc_v2i64 (loadv2f64 addr:$src2))))]>;
def ANDNPSrr : PSI<0x55, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
                  "andnps {$src2, $dst|$dst, $src2}",
                  [(set VR128:$dst, (v2i64 (and (xor VR128:$src1,
                                                (bc_v2i64 (v4i32 immAllOnesV))),
                                            VR128:$src2)))]>;
def ANDNPSrm : PSI<0x55, MRMSrcMem, (ops VR128:$dst, VR128:$src1,f128mem:$src2),
                  "andnps {$src2, $dst|$dst, $src2}",
                  [(set VR128:$dst, (v2i64 (and (xor VR128:$src1,
                                                (bc_v2i64 (v4i32 immAllOnesV))),
                                         (bc_v2i64 (loadv4f32 addr:$src2)))))]>;
def ANDNPDrr : PDI<0x55, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
                  "andnpd {$src2, $dst|$dst, $src2}",
                [(set VR128:$dst,
                  (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
                       (bc_v2i64 (v2f64 VR128:$src2))))]>;
def ANDNPDrm : PDI<0x55, MRMSrcMem, (ops VR128:$dst, VR128:$src1,f128mem:$src2),
                  "andnpd {$src2, $dst|$dst, $src2}",
                  [(set VR128:$dst,
                    (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
                         (bc_v2i64 (loadv2f64 addr:$src2))))]>;
Evan Cheng's avatar
Evan Cheng committed
def CMPPSrri : PSIi8<0xC2, MRMSrcReg, 
Evan Cheng's avatar
Evan Cheng committed
                    (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
                    "cmp${cc}ps {$src, $dst|$dst, $src}",
                    [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
                                       VR128:$src, imm:$cc))]>;
Evan Cheng's avatar
Evan Cheng committed
def CMPPSrmi : PSIi8<0xC2, MRMSrcMem, 
Evan Cheng's avatar
Evan Cheng committed
                    (ops VR128:$dst, VR128:$src1, f128mem:$src, SSECC:$cc),
                    "cmp${cc}ps {$src, $dst|$dst, $src}",
                    [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
                                       (load addr:$src), imm:$cc))]>;
Evan Cheng's avatar
Evan Cheng committed
def CMPPDrri : PDIi8<0xC2, MRMSrcReg, 
Evan Cheng's avatar
Evan Cheng committed
                    (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
                    "cmp${cc}pd {$src, $dst|$dst, $src}",
                    [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
                                       VR128:$src, imm:$cc))]>;
Evan Cheng's avatar
Evan Cheng committed
def CMPPDrmi : PDIi8<0xC2, MRMSrcMem, 
Evan Cheng's avatar
Evan Cheng committed
                    (ops VR128:$dst, VR128:$src1, f128mem:$src, SSECC:$cc),
                    "cmp${cc}pd {$src, $dst|$dst, $src}",
                    [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
                                       (load addr:$src), imm:$cc))]>;
// Shuffle and unpack instructions
Evan Cheng's avatar
Evan Cheng committed
let isTwoAddress = 1 in {
let isConvertibleToThreeAddress = 1 in // Convert to pshufd
Evan Cheng's avatar
Evan Cheng committed
def SHUFPSrri : PSIi8<0xC6, MRMSrcReg, 
                     (ops VR128:$dst, VR128:$src1, VR128:$src2, i32i8imm:$src3),
                     "shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
                     [(set VR128:$dst, (v4f32 (vector_shuffle
                                               VR128:$src1, VR128:$src2,
                                               SHUFP_shuffle_mask:$src3)))]>;
Evan Cheng's avatar
Evan Cheng committed
def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem, 
                   (ops VR128:$dst, VR128:$src1, f128mem:$src2, i32i8imm:$src3),
                     "shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
                     [(set VR128:$dst, (v4f32 (vector_shuffle
                                               VR128:$src1, (load addr:$src2),
                                               SHUFP_shuffle_mask:$src3)))]>;
Evan Cheng's avatar
Evan Cheng committed
def SHUFPDrri : PDIi8<0xC6, MRMSrcReg, 
                     (ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3),
                     "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
                     [(set VR128:$dst, (v2f64 (vector_shuffle
                                               VR128:$src1, VR128:$src2,
                                               SHUFP_shuffle_mask:$src3)))]>;
Evan Cheng's avatar
Evan Cheng committed
def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem, 
                     (ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3),
                     "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
                     [(set VR128:$dst, (v2f64 (vector_shuffle
                                               VR128:$src1, (load addr:$src2),
                                               SHUFP_shuffle_mask:$src3)))]>;
let AddedComplexity = 10 in {
def UNPCKHPSrr : PSI<0x15, MRMSrcReg, 
                    (ops VR128:$dst, VR128:$src1, VR128:$src2),
                    "unpckhps {$src2, $dst|$dst, $src2}",
                    [(set VR128:$dst, (v4f32 (vector_shuffle
                                              VR128:$src1, VR128:$src2,
                                              UNPCKH_shuffle_mask)))]>;
def UNPCKHPSrm : PSI<0x15, MRMSrcMem, 
                    (ops VR128:$dst, VR128:$src1, f128mem:$src2),
                    "unpckhps {$src2, $dst|$dst, $src2}",
                    [(set VR128:$dst, (v4f32 (vector_shuffle
                                              VR128:$src1, (load addr:$src2),
                                              UNPCKH_shuffle_mask)))]>;
def UNPCKHPDrr : PDI<0x15, MRMSrcReg, 
                    (ops VR128:$dst, VR128:$src1, VR128:$src2),
                    "unpckhpd {$src2, $dst|$dst, $src2}",
                    [(set VR128:$dst, (v2f64 (vector_shuffle
                                              VR128:$src1, VR128:$src2,
                                              UNPCKH_shuffle_mask)))]>;
def UNPCKHPDrm : PDI<0x15, MRMSrcMem, 
                    (ops VR128:$dst, VR128:$src1, f128mem:$src2),
                    "unpckhpd {$src2, $dst|$dst, $src2}",
                    [(set VR128:$dst, (v2f64 (vector_shuffle
                                              VR128:$src1, (load addr:$src2),
                                              UNPCKH_shuffle_mask)))]>;
def UNPCKLPSrr : PSI<0x14, MRMSrcReg, 
                    (ops VR128:$dst, VR128:$src1, VR128:$src2),
                    "unpcklps {$src2, $dst|$dst, $src2}",
                    [(set VR128:$dst, (v4f32 (vector_shuffle
                                              VR128:$src1, VR128:$src2,
                                              UNPCKL_shuffle_mask)))]>;
def UNPCKLPSrm : PSI<0x14, MRMSrcMem, 
                    (ops VR128:$dst, VR128:$src1, f128mem:$src2),
                    "unpcklps {$src2, $dst|$dst, $src2}",
                    [(set VR128:$dst, (v4f32 (vector_shuffle
                                              VR128:$src1, (load addr:$src2),
                                              UNPCKL_shuffle_mask)))]>;
def UNPCKLPDrr : PDI<0x14, MRMSrcReg, 
                    (ops VR128:$dst, VR128:$src1, VR128:$src2),
                    "unpcklpd {$src2, $dst|$dst, $src2}",
                    [(set VR128:$dst, (v2f64 (vector_shuffle
                                              VR128:$src1, VR128:$src2,
                                              UNPCKL_shuffle_mask)))]>;
def UNPCKLPDrm : PDI<0x14, MRMSrcMem, 
                    (ops VR128:$dst, VR128:$src1, f128mem:$src2),
                    "unpcklpd {$src2, $dst|$dst, $src2}",
                    [(set VR128:$dst, (v2f64 (vector_shuffle
                                              VR128:$src1, (load addr:$src2),
                                              UNPCKL_shuffle_mask)))]>;
} // AddedComplexity

class S3D_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
  : S3DI<o, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
         !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
         [(set VR128:$dst, (v4f32 (IntId VR128:$src1, VR128:$src2)))]>;
class S3D_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
  : S3DI<o, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
         !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
         [(set VR128:$dst, (v4f32 (IntId VR128:$src1, (load addr:$src2))))]>;
class S3_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
  : S3I<o, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
         !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
         [(set VR128:$dst, (v2f64 (IntId VR128:$src1, VR128:$src2)))]>;
class S3_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
  : S3I<o, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
         !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
         [(set VR128:$dst, (v2f64 (IntId VR128:$src1, (load addr:$src2))))]>;

def HADDPSrr : S3D_Intrr<0x7C, "haddps", int_x86_sse3_hadd_ps>;
def HADDPSrm : S3D_Intrm<0x7C, "haddps", int_x86_sse3_hadd_ps>;
def HADDPDrr : S3_Intrr <0x7C, "haddpd", int_x86_sse3_hadd_pd>;
def HADDPDrm : S3_Intrm <0x7C, "haddpd", int_x86_sse3_hadd_pd>;
def HSUBPSrr : S3D_Intrr<0x7D, "hsubps", int_x86_sse3_hsub_ps>;
def HSUBPSrm : S3D_Intrm<0x7D, "hsubps", int_x86_sse3_hsub_ps>;
def HSUBPDrr : S3_Intrr <0x7D, "hsubpd", int_x86_sse3_hsub_pd>;
def HSUBPDrm : S3_Intrm <0x7D, "hsubpd", int_x86_sse3_hsub_pd>;
//===----------------------------------------------------------------------===//
// SSE integer instructions
//===----------------------------------------------------------------------===//

// Move Instructions
def MOVDQArr : PDI<0x6F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
                   "movdqa {$src, $dst|$dst, $src}", []>;
def MOVDQArm : PDI<0x6F, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
                   "movdqa {$src, $dst|$dst, $src}",
                   [(set VR128:$dst, (loadv2i64 addr:$src))]>;
def MOVDQAmr : PDI<0x7F, MRMDestMem, (ops i128mem:$dst, VR128:$src),
                   "movdqa {$src, $dst|$dst, $src}",
                   [(store (v2i64 VR128:$src), addr:$dst)]>;
Evan Cheng's avatar
Evan Cheng committed
def MOVDQUrm :   I<0x6F, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
                   "movdqu {$src, $dst|$dst, $src}",
                   [(set VR128:$dst, (int_x86_sse2_loadu_dq addr:$src))]>,
                 XS, Requires<[HasSSE2]>;
def MOVDQUmr :   I<0x7F, MRMDestMem, (ops i128mem:$dst, VR128:$src),
                   "movdqu {$src, $dst|$dst, $src}",
                   [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
                 XS, Requires<[HasSSE2]>;
def LDDQUrm : S3DI<0xF0, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
                   "lddqu {$src, $dst|$dst, $src}",
                   [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>;
Chris Lattner's avatar
Chris Lattner committed
multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
                            bit Commutable = 0> {
  def rr : PDI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
               !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2"),
               [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]> {
    let isCommutable = Commutable;
  }
  def rm : PDI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
               !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2"),
               [(set VR128:$dst, (IntId VR128:$src1,
                                        (bitconvert (loadv2i64 addr:$src2))))]>;
}
}

let isTwoAddress = 1 in {
Chris Lattner's avatar
Chris Lattner committed
multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
                             string OpcodeStr, Intrinsic IntId> {
  def rr : PDI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
               !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2"),
               [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
  def rm : PDI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
               !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2"),
               [(set VR128:$dst, (IntId VR128:$src1,
                                        (bitconvert (loadv2i64 addr:$src2))))]>;
  def ri : PDIi8<opc2, ImmForm, (ops VR128:$dst, VR128:$src1, i32i8imm:$src2),
               !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2"),
               [(set VR128:$dst, (IntId VR128:$src1,
                                        (scalar_to_vector (i32 imm:$src2))))]>;
}
}


let isTwoAddress = 1 in {
/// PDI_binop_rm - Simple SSE2 binary operator.
multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
                        ValueType OpVT, bit Commutable = 0> {
  def rr : PDI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
               !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2"),
               [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]> {
    let isCommutable = Commutable;
  }
  def rm : PDI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
               !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2"),
               [(set VR128:$dst, (OpVT (OpNode VR128:$src1,
                                       (bitconvert (loadv2i64 addr:$src2)))))]>;
}

/// PDI_binop_rm_v2i64 - Simple SSE2 binary operator whose type is v2i64.
///
/// FIXME: we could eliminate this and use PDI_binop_rm instead if tblgen knew
/// to collapse (bitconvert VT to VT) into its operand.
///
multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
                              bit Commutable = 0> {
  def rr : PDI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
               !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2"),
               [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]> {
    let isCommutable = Commutable;
  }
  def rm : PDI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
               !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2"),
               [(set VR128:$dst, (OpNode VR128:$src1,(loadv2i64 addr:$src2)))]>;
}
// 128-bit Integer Arithmetic

defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, 1>;
defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, 1>;
defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, 1>;
defm PADDQ : PDI_binop_rm_v2i64<0xD4, "paddq", add, 1>;
Chris Lattner's avatar
Chris Lattner committed
defm PADDSB  : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 1>;
defm PADDSW  : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, 1>;
defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 1>;
defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 1>;
defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8>;
defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16>;
defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32>;
defm PSUBQ : PDI_binop_rm_v2i64<0xFB, "psubq", sub>;
Chris Lattner's avatar
Chris Lattner committed
defm PSUBSB  : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b>;
defm PSUBSW  : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w>;
defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b>;
defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w>;
defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, 1>;
Chris Lattner's avatar
Chris Lattner committed
defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, 1>;
defm PMULHW  : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w , 1>;
defm PMULUDQ : PDI_binop_rm_int<0xF4, "pmuludq", int_x86_sse2_pmulu_dq, 1>;
Chris Lattner's avatar
Chris Lattner committed
defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, 1>;
Chris Lattner's avatar
Chris Lattner committed
defm PAVGB  : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 1>;
defm PAVGW  : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 1>;
Chris Lattner's avatar
Chris Lattner committed
defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, 1>;
defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, 1>;
defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, 1>;
defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 1>;
defm PSADBW : PDI_binop_rm_int<0xE0, "psadbw", int_x86_sse2_psad_bw, 1>;
Chris Lattner's avatar
Chris Lattner committed
defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw", int_x86_sse2_psll_w>;
defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld", int_x86_sse2_psll_d>;
defm PSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq", int_x86_sse2_psll_q>;
Chris Lattner's avatar
Chris Lattner committed
defm PSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw", int_x86_sse2_psrl_w>;
defm PSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld", int_x86_sse2_psrl_d>;
defm PSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq", int_x86_sse2_psrl_q>;
Chris Lattner's avatar
Chris Lattner committed
defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw", int_x86_sse2_psra_w>;
defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad", int_x86_sse2_psra_d>;
// PSRAQ doesn't exist in SSE[1-3].

Chris Lattner's avatar
Chris Lattner committed

// 128-bit logical shifts.
Evan Cheng's avatar
Evan Cheng committed
let isTwoAddress = 1 in {
def PSLLDQri : PDIi8<0x73, MRM7r, (ops VR128:$dst, VR128:$src1, i32i8imm:$src2),
                     "pslldq {$src2, $dst|$dst, $src2}", []>;
def PSRLDQri : PDIi8<0x73, MRM3r, (ops VR128:$dst, VR128:$src1, i32i8imm:$src2),
Evan Cheng's avatar
Evan Cheng committed
                     "psrldq {$src2, $dst|$dst, $src2}", []>;
// PSRADQri doesn't exist in SSE[1-3].
Chris Lattner's avatar
Chris Lattner committed
let Predicates = [HasSSE2] in {
  def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
            (v2i64 (PSLLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
  def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
            (v2i64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
}

defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>;
defm POR  : PDI_binop_rm_v2i64<0xEB, "por" , or , 1>;
defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>;
let isTwoAddress = 1 in {
def PANDNrr : PDI<0xDF, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
                  "pandn {$src2, $dst|$dst, $src2}",
                  [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
                                            VR128:$src2)))]>;

def PANDNrm : PDI<0xDF, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
                 "pandn {$src2, $dst|$dst, $src2}",
                 [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
                                           (load addr:$src2))))]>;
}
// SSE2 Integer comparison
Chris Lattner's avatar
Chris Lattner committed
defm PCMPEQB : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b>;
defm PCMPEQW : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w>;
defm PCMPEQD : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d>;
defm PCMPGTB : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b>;
defm PCMPGTW : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w>;
defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>;
Evan Cheng's avatar
Evan Cheng committed

// Pack instructions
Chris Lattner's avatar
Chris Lattner committed
defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128>;
defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128>;
defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>;

// Shuffle and unpack instructions
def PSHUFDri : PDIi8<0x70, MRMSrcReg,
                     (ops VR128:$dst, VR128:$src1, i8imm:$src2),
                     "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
                     [(set VR128:$dst, (v4i32 (vector_shuffle
                                               VR128:$src1, (undef),
                                               PSHUFD_shuffle_mask:$src2)))]>;
def PSHUFDmi : PDIi8<0x70, MRMSrcMem,
                     (ops VR128:$dst, i128mem:$src1, i8imm:$src2),
                     "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
                     [(set VR128:$dst, (v4i32 (vector_shuffle
                                               (bc_v4i32(loadv2i64 addr:$src1)),
                                               PSHUFD_shuffle_mask:$src2)))]>;

// SSE2 with ImmT == Imm8 and XS prefix.
def PSHUFHWri : Ii8<0x70, MRMSrcReg,
                    (ops VR128:$dst, VR128:$src1, i8imm:$src2),
                    "pshufhw {$src2, $src1, $dst|$dst, $src1, $src2}",
                    [(set VR128:$dst, (v8i16 (vector_shuffle
                                              VR128:$src1, (undef),
                                              PSHUFHW_shuffle_mask:$src2)))]>,
                XS, Requires<[HasSSE2]>;
def PSHUFHWmi : Ii8<0x70, MRMSrcMem,
                    (ops VR128:$dst, i128mem:$src1, i8imm:$src2),
                    "pshufhw {$src2, $src1, $dst|$dst, $src1, $src2}",
                    [(set VR128:$dst, (v8i16 (vector_shuffle
                                              (bc_v8i16 (loadv2i64 addr:$src1)),
                                              (undef),
                                              PSHUFHW_shuffle_mask:$src2)))]>,
                XS, Requires<[HasSSE2]>;

// SSE2 with ImmT == Imm8 and XD prefix.
def PSHUFLWri : Ii8<0x70, MRMSrcReg,
                    (ops VR128:$dst, VR128:$src1, i32i8imm:$src2),
                    "pshuflw {$src2, $src1, $dst|$dst, $src1, $src2}",
                    [(set VR128:$dst, (v8i16 (vector_shuffle
                                              VR128:$src1, (undef),
                                              PSHUFLW_shuffle_mask:$src2)))]>,
                XD, Requires<[HasSSE2]>;
def PSHUFLWmi : Ii8<0x70, MRMSrcMem,
                    (ops VR128:$dst, i128mem:$src1, i32i8imm:$src2),
                    "pshuflw {$src2, $src1, $dst|$dst, $src1, $src2}",
                    [(set VR128:$dst, (v8i16 (vector_shuffle
                                              (bc_v8i16 (loadv2i64 addr:$src1)),
                                              (undef),
                                              PSHUFLW_shuffle_mask:$src2)))]>,
                XD, Requires<[HasSSE2]>;

let isTwoAddress = 1 in {
def PUNPCKLBWrr : PDI<0x60, MRMSrcReg, 
                      (ops VR128:$dst, VR128:$src1, VR128:$src2),
                      "punpcklbw {$src2, $dst|$dst, $src2}",
                      [(set VR128:$dst,
                        (v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
                                UNPCKL_shuffle_mask)))]>;
def PUNPCKLBWrm : PDI<0x60, MRMSrcMem, 
                      (ops VR128:$dst, VR128:$src1, i128mem:$src2),
                      "punpcklbw {$src2, $dst|$dst, $src2}",
                        (v16i8 (vector_shuffle VR128:$src1,
                                (bc_v16i8 (loadv2i64 addr:$src2)),
def PUNPCKLWDrr : PDI<0x61, MRMSrcReg, 
                      (ops VR128:$dst, VR128:$src1, VR128:$src2),
                      "punpcklwd {$src2, $dst|$dst, $src2}",
                      [(set VR128:$dst,
                        (v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
                                UNPCKL_shuffle_mask)))]>;
def PUNPCKLWDrm : PDI<0x61, MRMSrcMem, 
                      (ops VR128:$dst, VR128:$src1, i128mem:$src2),
                      "punpcklwd {$src2, $dst|$dst, $src2}",
                        (v8i16 (vector_shuffle VR128:$src1,
                                (bc_v8i16 (loadv2i64 addr:$src2)),
def PUNPCKLDQrr : PDI<0x62, MRMSrcReg, 
                      (ops VR128:$dst, VR128:$src1, VR128:$src2),
                      "punpckldq {$src2, $dst|$dst, $src2}",
                      [(set VR128:$dst,
                        (v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
                                UNPCKL_shuffle_mask)))]>;
def PUNPCKLDQrm : PDI<0x62, MRMSrcMem, 
                      (ops VR128:$dst, VR128:$src1, i128mem:$src2),
                      "punpckldq {$src2, $dst|$dst, $src2}",
                        (v4i32 (vector_shuffle VR128:$src1,
                                (bc_v4i32 (loadv2i64 addr:$src2)),
def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, 
                       (ops VR128:$dst, VR128:$src1, VR128:$src2),
                       "punpcklqdq {$src2, $dst|$dst, $src2}",
                      [(set VR128:$dst,
                        (v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
                                UNPCKL_shuffle_mask)))]>;
def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem, 
                       (ops VR128:$dst, VR128:$src1, i128mem:$src2),
                       "punpcklqdq {$src2, $dst|$dst, $src2}",
                      [(set VR128:$dst,
                        (v2i64 (vector_shuffle VR128:$src1,
                                (loadv2i64 addr:$src2),
                                UNPCKL_shuffle_mask)))]>;

def PUNPCKHBWrr : PDI<0x68, MRMSrcReg, 
                      (ops VR128:$dst, VR128:$src1, VR128:$src2),
                      "punpckhbw {$src2, $dst|$dst, $src2}",
                      [(set VR128:$dst,
                        (v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
                                UNPCKH_shuffle_mask)))]>;
def PUNPCKHBWrm : PDI<0x68, MRMSrcMem, 
                      (ops VR128:$dst, VR128:$src1, i128mem:$src2),
                      "punpckhbw {$src2, $dst|$dst, $src2}",
                      [(set VR128:$dst,
                        (v16i8 (vector_shuffle VR128:$src1,
                                (bc_v16i8 (loadv2i64 addr:$src2)),
                                UNPCKH_shuffle_mask)))]>;
def PUNPCKHWDrr : PDI<0x69, MRMSrcReg, 
                      (ops VR128:$dst, VR128:$src1, VR128:$src2),
                      "punpckhwd {$src2, $dst|$dst, $src2}",
                      [(set VR128:$dst,
                        (v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
                                UNPCKH_shuffle_mask)))]>;
def PUNPCKHWDrm : PDI<0x69, MRMSrcMem, 
                      (ops VR128:$dst, VR128:$src1, i128mem:$src2),
                      "punpckhwd {$src2, $dst|$dst, $src2}",
                      [(set VR128:$dst,
                        (v8i16 (vector_shuffle VR128:$src1,
                                (bc_v8i16 (loadv2i64 addr:$src2)),
                                UNPCKH_shuffle_mask)))]>;
def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg, 
                      (ops VR128:$dst, VR128:$src1, VR128:$src2),
                      "punpckhdq {$src2, $dst|$dst, $src2}",
                      [(set VR128:$dst,
                        (v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
                                UNPCKH_shuffle_mask)))]>;
def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem, 
                      (ops VR128:$dst, VR128:$src1, i128mem:$src2),
                      "punpckhdq {$src2, $dst|$dst, $src2}",
                      [(set VR128:$dst,
                        (v4i32 (vector_shuffle VR128:$src1,
                                (bc_v4i32 (loadv2i64 addr:$src2)),
                                UNPCKH_shuffle_mask)))]>;
def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, 
                       (ops VR128:$dst, VR128:$src1, VR128:$src2),
Evan Cheng's avatar
Evan Cheng committed
                       "punpckhqdq {$src2, $dst|$dst, $src2}",
                      [(set VR128:$dst,
                        (v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
                                UNPCKH_shuffle_mask)))]>;
def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem, 
                      (ops VR128:$dst, VR128:$src1, i128mem:$src2),
                      "punpckhqdq {$src2, $dst|$dst, $src2}",
                      [(set VR128:$dst,
                        (v2i64 (vector_shuffle VR128:$src1,
                                (loadv2i64 addr:$src2),
                                UNPCKH_shuffle_mask)))]>;
Evan Cheng's avatar
Evan Cheng committed
def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
                    (ops GR32:$dst, VR128:$src1, i32i8imm:$src2),
                    "pextrw {$src2, $src1, $dst|$dst, $src1, $src2}",
                    [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
Evan Cheng's avatar
Evan Cheng committed
def PINSRWrri : PDIi8<0xC4, MRMSrcReg,
                     (ops VR128:$dst, VR128:$src1, GR32:$src2, i32i8imm:$src3),
                     "pinsrw {$src3, $src2, $dst|$dst, $src2, $src3}",
                     [(set VR128:$dst, (v8i16 (X86pinsrw (v8i16 VR128:$src1),
                                               GR32:$src2, (iPTR imm:$src3))))]>;
Evan Cheng's avatar
Evan Cheng committed
def PINSRWrmi : PDIi8<0xC4, MRMSrcMem,
                     (ops VR128:$dst, VR128:$src1, i16mem:$src2, i32i8imm:$src3),
                     "pinsrw {$src3, $src2, $dst|$dst, $src2, $src3}",
                     [(set VR128:$dst,
                       (v8i16 (X86pinsrw (v8i16 VR128:$src1),
                               (i32 (anyext (loadi16 addr:$src2))),
Evan Cheng's avatar
Evan Cheng committed
//===----------------------------------------------------------------------===//
// Miscellaneous Instructions
//===----------------------------------------------------------------------===//

// Mask creation
def MOVMSKPSrr : PSI<0x50, MRMSrcReg, (ops GR32:$dst, VR128:$src),
                     "movmskps {$src, $dst|$dst, $src}",
                     [(set GR32:$dst, (int_x86_sse_movmsk_ps VR128:$src))]>;
def MOVMSKPDrr : PSI<0x50, MRMSrcReg, (ops GR32:$dst, VR128:$src),
                     "movmskpd {$src, $dst|$dst, $src}",
                     [(set GR32:$dst, (int_x86_sse2_movmsk_pd VR128:$src))]>;
def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (ops GR32:$dst, VR128:$src),
                     "pmovmskb {$src, $dst|$dst, $src}",
                     [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>;
// Conditional store
Evan Cheng's avatar
Evan Cheng committed
def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (ops VR128:$src, VR128:$mask),
                     "maskmovdqu {$mask, $src|$src, $mask}",
                     [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>,
                 Imp<[EDI],[]>;

Chris Lattner's avatar
Chris Lattner committed
// Prefetching loads.
// TODO: no intrinsics for these?
def PREFETCHT0   : PSI<0x18, MRM1m, (ops i8mem:$src), "prefetcht0 $src", []>;
def PREFETCHT1   : PSI<0x18, MRM2m, (ops i8mem:$src), "prefetcht1 $src", []>;
def PREFETCHT2   : PSI<0x18, MRM3m, (ops i8mem:$src), "prefetcht2 $src", []>;
def PREFETCHTNTA : PSI<0x18, MRM0m, (ops i8mem:$src), "prefetchtnta $src", []>;
Evan Cheng's avatar
Evan Cheng committed

// Non-temporal stores
def MOVNTPSmr : PSI<0x2B, MRMDestMem, (ops i128mem:$dst, VR128:$src),
                    "movntps {$src, $dst|$dst, $src}",
                    [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>;
def MOVNTPDmr : PDI<0x2B, MRMDestMem, (ops i128mem:$dst, VR128:$src),
                    "movntpd {$src, $dst|$dst, $src}",
                    [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>;
def MOVNTDQmr : PDI<0xE7, MRMDestMem, (ops f128mem:$dst, VR128:$src),
                    "movntdq {$src, $dst|$dst, $src}",
                    [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
def MOVNTImr  :   I<0xC3, MRMDestMem, (ops i32mem:$dst, GR32:$src),
                    "movnti {$src, $dst|$dst, $src}",
                    [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>, 
                  TB, Requires<[HasSSE2]>;
// Flush cache
def CLFLUSH : I<0xAE, MRM7m, (ops i8mem:$src),
               "clflush $src", [(int_x86_sse2_clflush addr:$src)]>,
              TB, Requires<[HasSSE2]>;

// Load, store, and memory fence
Chris Lattner's avatar
Chris Lattner committed
def SFENCE : PSI<0xAE, MRM7m, (ops), "sfence", [(int_x86_sse_sfence)]>;
def LFENCE : I<0xAE, MRM5m, (ops),
               "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
def MFENCE : I<0xAE, MRM6m, (ops),
               "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
Evan Cheng's avatar
Evan Cheng committed
// MXCSR register
Evan Cheng's avatar
Evan Cheng committed
def LDMXCSR : PSI<0xAE, MRM2m, (ops i32mem:$src),
                  "ldmxcsr $src", [(int_x86_sse_ldmxcsr addr:$src)]>;
def STMXCSR : PSI<0xAE, MRM3m, (ops i32mem:$dst),
                  "stmxcsr $dst", [(int_x86_sse_stmxcsr addr:$dst)]>;
Evan Cheng's avatar
Evan Cheng committed

Evan Cheng's avatar
Evan Cheng committed
// Thread synchronization
def MONITOR : I<0xC8, RawFrm, (ops), "monitor",
Chris Lattner's avatar
Chris Lattner committed
                [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>;
def MWAIT   : I<0xC9, RawFrm, (ops), "mwait",
                [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>;
//===----------------------------------------------------------------------===//
// Alias Instructions
//===----------------------------------------------------------------------===//

// Alias instructions that map zero vector to pxor / xorp* for sse.
// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
def V_SET0 : PSI<0x57, MRMInitReg, (ops VR128:$dst),
                 "xorps $dst, $dst",
                 [(set VR128:$dst, (v4f32 immAllZerosV))]>;
def V_SETALLONES : PDI<0x76, MRMInitReg, (ops VR128:$dst),
                       "pcmpeqd $dst, $dst",
                       [(set VR128:$dst, (v2f64 immAllOnesV))]>;

// FR32 / FR64 to 128-bit vector conversion.
def MOVSS2PSrr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, FR32:$src),
                      "movss {$src, $dst|$dst, $src}",
                      [(set VR128:$dst,
                        (v4f32 (scalar_to_vector FR32:$src)))]>;
def MOVSS2PSrm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
                     "movss {$src, $dst|$dst, $src}",
                     [(set VR128:$dst,
                       (v4f32 (scalar_to_vector (loadf32 addr:$src))))]>;
def MOVSD2PDrr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, FR64:$src),
                      "movsd {$src, $dst|$dst, $src}",
                      [(set VR128:$dst,
                        (v2f64 (scalar_to_vector FR64:$src)))]>;
def MOVSD2PDrm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
                     "movsd {$src, $dst|$dst, $src}",
                     [(set VR128:$dst, 
                       (v2f64 (scalar_to_vector (loadf64 addr:$src))))]>;

def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, GR32:$src),
                      "movd {$src, $dst|$dst, $src}",
                      [(set VR128:$dst,
def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
                      "movd {$src, $dst|$dst, $src}",
                      [(set VR128:$dst,
                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
def MOVDI2SSrr  : PDI<0x6E, MRMSrcReg, (ops FR32:$dst, GR32:$src),
                      "movd {$src, $dst|$dst, $src}",
                      [(set FR32:$dst, (bitconvert GR32:$src))]>;

def MOVDI2SSrm  : PDI<0x6E, MRMSrcMem, (ops FR32:$dst, i32mem:$src),
                      "movd {$src, $dst|$dst, $src}",
                      [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>;
// SSE2 instructions with XS prefix
def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
                    "movq {$src, $dst|$dst, $src}",
                    [(set VR128:$dst,
                      (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
                  Requires<[HasSSE2]>;
def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (ops i64mem:$dst, VR128:$src),
                      "movq {$src, $dst|$dst, $src}",
                      [(store (i64 (vector_extract (v2i64 VR128:$src),
                                    (iPTR 0))), addr:$dst)]>;

// FIXME: may not be able to eliminate this movss with coalescing the src and
// dest register classes are different. We really want to write this pattern
// like this:
// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
//           (f32 FR32:$src)>;
def MOVPS2SSrr : SSI<0x10, MRMSrcReg, (ops FR32:$dst, VR128:$src),
                     "movss {$src, $dst|$dst, $src}",
                     [(set FR32:$dst, (vector_extract (v4f32 VR128:$src),
def MOVPS2SSmr : SSI<0x11, MRMDestMem, (ops f32mem:$dst, VR128:$src),
                     "movss {$src, $dst|$dst, $src}",
                     [(store (f32 (vector_extract (v4f32 VR128:$src),
def MOVPD2SDrr : SDI<0x10, MRMSrcReg, (ops FR64:$dst, VR128:$src),
                     "movsd {$src, $dst|$dst, $src}",
                     [(set FR64:$dst, (vector_extract (v2f64 VR128:$src),
Evan Cheng's avatar
Evan Cheng committed
def MOVPD2SDmr : SDI<0x11, MRMDestMem, (ops f64mem:$dst, VR128:$src),
                     "movsd {$src, $dst|$dst, $src}",
                     [(store (f64 (vector_extract (v2f64 VR128:$src),
def MOVPDI2DIrr  : PDI<0x7E, MRMDestReg, (ops GR32:$dst, VR128:$src),
                       [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
def MOVPDI2DImr  : PDI<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src),
                       "movd {$src, $dst|$dst, $src}",
                       [(store (i32 (vector_extract (v4i32 VR128:$src),
def MOVSS2DIrr  : PDI<0x7E, MRMDestReg, (ops GR32:$dst, FR32:$src),
                      "movd {$src, $dst|$dst, $src}",
                      [(set GR32:$dst, (bitconvert FR32:$src))]>;
def MOVSS2DImr  : PDI<0x7E, MRMDestMem, (ops i32mem:$dst, FR32:$src),
                      "movd {$src, $dst|$dst, $src}",
                      [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>;
// Move to lower bits of a VR128, leaving upper bits alone.
// Three operand (but two address) aliases.
let isTwoAddress = 1 in {
def MOVLSS2PSrr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR32:$src2),
                      "movss {$src2, $dst|$dst, $src2}", []>;
def MOVLSD2PDrr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR64:$src2),
                      "movsd {$src2, $dst|$dst, $src2}", []>;
def MOVLPSrr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
                      "movss {$src2, $dst|$dst, $src2}",
                   [(set VR128:$dst,
                     (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
def MOVLPDrr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
                      "movsd {$src2, $dst|$dst, $src2}",
                   [(set VR128:$dst,
                     (v2f64 (vector_shuffle VR128:$src1, VR128:$src2,
// Store / copy lower 64-bits of a XMM register.
def MOVLQ128mr : PDI<0xD6, MRMDestMem, (ops i64mem:$dst, VR128:$src),
                     "movq {$src, $dst|$dst, $src}",
                     [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>;

// Move to lower bits of a VR128 and zeroing upper bits.
// Loading from memory automatically zeroing upper bits.
def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
                      "movss {$src, $dst|$dst, $src}",
                      [(set VR128:$dst, (v4f32 (vector_shuffle immAllZerosV,
                                 (v4f32 (scalar_to_vector (loadf32 addr:$src))),
                                                MOVL_shuffle_mask)))]>;
def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
                      "movsd {$src, $dst|$dst, $src}",
                      [(set VR128:$dst, (v2f64 (vector_shuffle immAllZerosV,
                                 (v2f64 (scalar_to_vector (loadf64 addr:$src))),
                                                MOVL_shuffle_mask)))]>;
// movd / movq to XMM register zero-extends
def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, GR32:$src),
                       "movd {$src, $dst|$dst, $src}",
                      [(set VR128:$dst, (v4i32 (vector_shuffle immAllZerosV,
                                           (v4i32 (scalar_to_vector GR32:$src)),
def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
                       "movd {$src, $dst|$dst, $src}",
                      [(set VR128:$dst, (v4i32 (vector_shuffle immAllZerosV,
                                 (v4i32 (scalar_to_vector (loadi32 addr:$src))),
                                                MOVL_shuffle_mask)))]>;
// Moving from XMM to XMM but still clear upper 64 bits.
def MOVZQI2PQIrr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR128:$src),
                     "movq {$src, $dst|$dst, $src}",
                     [(set VR128:$dst, (int_x86_sse2_movl_dq VR128:$src))]>,
                   XS, Requires<[HasSSE2]>;
def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
                     "movq {$src, $dst|$dst, $src}",
                   [(set VR128:$dst, (int_x86_sse2_movl_dq
                                      (bitconvert (loadv2i64 addr:$src))))]>,

//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//===----------------------------------------------------------------------===//

// 128-bit vector undef's.
def : Pat<(v2f64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
def : Pat<(v16i8 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
def : Pat<(v8i16 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
def : Pat<(v4i32 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
def : Pat<(v2i64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;

def : Pat<(v16i8 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
def : Pat<(v8i16 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
def : Pat<(v4i32 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
def : Pat<(v2i64 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
def : Pat<(v2f64 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
// 128-bit vector all one's.
def : Pat<(v16i8 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>;
def : Pat<(v8i16 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>;
def : Pat<(v4i32 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>;
def : Pat<(v2i64 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>;
def : Pat<(v4f32 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE1]>;
// Store 128-bit integer vector values.
def : Pat<(store (v16i8 VR128:$src), addr:$dst),
          (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
def : Pat<(store (v8i16 VR128:$src), addr:$dst),
          (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
def : Pat<(store (v4i32 VR128:$src), addr:$dst),
          (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
// Scalar to v8i16 / v16i8. The source may be a GR32, but only the lower 8 or
// 16-bits matter.
def : Pat<(v8i16 (X86s2vec GR32:$src)), (MOVDI2PDIrr GR32:$src)>,
def : Pat<(v16i8 (X86s2vec GR32:$src)), (MOVDI2PDIrr GR32:$src)>,
Chris Lattner's avatar
Chris Lattner committed
let Predicates = [HasSSE2] in {
  def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
  def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
  def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
  def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
  def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>;
  def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
  def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
  def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
  def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
  def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
  def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
  def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
  def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
  def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
  def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>;
  def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
  def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
  def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>;
  def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
  def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>;
  def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>;
  def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
  def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>;
  def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>;
  def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>;
  def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
  def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
  def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
  def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
  def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
}
// Move scalar to XMM zero-extended
// movd to XMM register zero-extends
def : Pat<(v8i16 (vector_shuffle immAllZerosV,
                  (v8i16 (X86s2vec GR32:$src)), MOVL_shuffle_mask)),
          (MOVZDI2PDIrr GR32:$src)>, Requires<[HasSSE2]>;
def : Pat<(v16i8 (vector_shuffle immAllZerosV,
                  (v16i8 (X86s2vec GR32:$src)), MOVL_shuffle_mask)),
          (MOVZDI2PDIrr GR32:$src)>, Requires<[HasSSE2]>;
// Zeroing a VR128 then do a MOVS{S|D} to the lower bits.
def : Pat<(v2f64 (vector_shuffle immAllZerosV,
                  (v2f64 (scalar_to_vector FR64:$src)), MOVL_shuffle_mask)),
          (MOVLSD2PDrr (V_SET0), FR64:$src)>, Requires<[HasSSE2]>;
def : Pat<(v4f32 (vector_shuffle immAllZerosV,
                  (v4f32 (scalar_to_vector FR32:$src)), MOVL_shuffle_mask)),
          (MOVLSS2PSrr (V_SET0), FR32:$src)>, Requires<[HasSSE2]>;
// Splat v2f64 / v2i64
let AddedComplexity = 10 in {
def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), SSE_splat_lo_mask:$sm),
          (UNPCKLPDrr VR128:$src, VR128:$src)>,   Requires<[HasSSE2]>;
def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), UNPCKH_shuffle_mask:$sm),
          (UNPCKHPDrr VR128:$src, VR128:$src)>,   Requires<[HasSSE2]>;
def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), SSE_splat_lo_mask:$sm),
          (PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), UNPCKH_shuffle_mask:$sm),
          (PUNPCKHQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
// Splat v4f32
def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), SSE_splat_mask:$sm),
          (SHUFPSrri VR128:$src, VR128:$src, SSE_splat_mask:$sm)>,
Evan Cheng's avatar
Evan Cheng committed
// Special unary SHUFPSrri case.
// FIXME: when we want non two-address code, then we should use PSHUFD?
def : Pat<(vector_shuffle (v4f32 VR128:$src1), (undef),
           SHUFP_unary_shuffle_mask:$sm),
          (SHUFPSrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>,
      Requires<[HasSSE1]>;
// Unary v4f32 shuffle with PSHUF* in order to fold a load.
def : Pat<(vector_shuffle (loadv4f32 addr:$src1), (undef),
           SHUFP_unary_shuffle_mask:$sm),
          (PSHUFDmi addr:$src1, SHUFP_unary_shuffle_mask:$sm)>,
      Requires<[HasSSE2]>;
// Special binary v4i32 shuffle cases with SHUFPS.
def : Pat<(vector_shuffle (v4i32 VR128:$src1), (v4i32 VR128:$src2),
           PSHUFD_binary_shuffle_mask:$sm),
          (SHUFPSrri VR128:$src1, VR128:$src2, PSHUFD_binary_shuffle_mask:$sm)>,
           Requires<[HasSSE2]>;
def : Pat<(vector_shuffle (v4i32 VR128:$src1),
           (bc_v4i32 (loadv2i64 addr:$src2)), PSHUFD_binary_shuffle_mask:$sm),
          (SHUFPSrmi VR128:$src1, addr:$src2, PSHUFD_binary_shuffle_mask:$sm)>,
           Requires<[HasSSE2]>;
// vector_shuffle v1, <undef>, <0, 0, 1, 1, ...>
let AddedComplexity = 10 in {
def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef),
                  UNPCKL_v_undef_shuffle_mask)),
          (UNPCKLPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
def : Pat<(v16i8 (vector_shuffle VR128:$src, (undef),
                  UNPCKL_v_undef_shuffle_mask)),
          (PUNPCKLBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
def : Pat<(v8i16 (vector_shuffle VR128:$src, (undef),
                  UNPCKL_v_undef_shuffle_mask)),
          (PUNPCKLWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
                  UNPCKL_v_undef_shuffle_mask)),
          (PUNPCKLDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
Evan Cheng's avatar
Evan Cheng committed
// vector_shuffle v1, <undef> <1, 1, 3, 3>
def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
                  MOVSHDUP_shuffle_mask)),
          (MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>;
Evan Cheng's avatar
Evan Cheng committed
def : Pat<(v4i32 (vector_shuffle (bc_v4i32 (loadv2i64 addr:$src)), (undef),
                  MOVSHDUP_shuffle_mask)),
          (MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>;
Evan Cheng's avatar
Evan Cheng committed

// vector_shuffle v1, <undef> <0, 0, 2, 2>
Evan Cheng's avatar
Evan Cheng committed
def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
                  MOVSLDUP_shuffle_mask)),
          (MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>;
Evan Cheng's avatar
Evan Cheng committed
def : Pat<(v4i32 (vector_shuffle (bc_v4i32 (loadv2i64 addr:$src)), (undef),
                  MOVSLDUP_shuffle_mask)),
          (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>;
// vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS
def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
                  MOVHP_shuffle_mask)),
          (MOVLHPSrr VR128:$src1, VR128:$src2)>;

// vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS
def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
                  MOVHLPS_shuffle_mask)),
          (MOVHLPSrr VR128:$src1, VR128:$src2)>;
// vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS
def : Pat<(v4f32 (vector_shuffle VR128:$src1, (undef),
                  MOVHLPS_v_undef_shuffle_mask)),
          (MOVHLPSrr VR128:$src1, VR128:$src1)>;
def : Pat<(v4i32 (vector_shuffle VR128:$src1, (undef),
                  MOVHLPS_v_undef_shuffle_mask)),
          (MOVHLPSrr VR128:$src1, VR128:$src1)>;
let AddedComplexity = 20 in {
// vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
// vector_shuffle v1, (load v2) <0, 1, 4, 5> using MOVHPS
def : Pat<(v4f32 (vector_shuffle VR128:$src1, (loadv4f32 addr:$src2),
                  MOVLP_shuffle_mask)),