Newer
Older
Chris Lattner
committed
(ops RST:$reg),
"fucomp $reg">, DD, Imp<[ST0],[]>;
def FUCOMPPr : FPI<0xE9, RawFrm, // cmp ST(0) with ST(1), pop, pop
(ops),
"fucompp">, DA, Imp<[ST0],[]>;
def FUCOMIr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i)
(ops RST:$reg),
"fucomi {$reg, %st(0)|%ST(0), $reg}">, DB, Imp<[ST0],[]>;
Chris Lattner
committed
def FUCOMIPr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i), pop
(ops RST:$reg),
"fucomip {$reg, %st(0)|%ST(0), $reg}">, DF, Imp<[ST0],[]>;
Chris Lattner
committed
// Floating point flag ops.
def FNSTSW8r : I<0xE0, RawFrm, // AX = fp flags
def FNSTCW16m : I<0xD9, MRM7m, // [mem16] = X87 control world
def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16]
//===----------------------------------------------------------------------===//
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
// MMX and XMM Packed Integer support (requires MMX, SSE, and SSE2)
//===----------------------------------------------------------------------===//
// Move Instructions
def MOVD64rr : I<0x6E, MRMSrcReg, (ops VR64:$dst, R32:$src),
"movd {$src, $dst|$dst, $src}", []>, TB,
Requires<[HasSSE1]>;
def MOVD64rm : I<0x6E, MRMSrcMem, (ops VR64:$dst, i32mem:$src),
"movd {$src, $dst|$dst, $src}", []>, TB,
Requires<[HasSSE1]>;
def MOVD64mr : I<0x7E, MRMDestMem, (ops i32mem:$dst, VR64:$src),
"movd {$src, $dst|$dst, $src}", []>, TB,
Requires<[HasSSE1]>;
def MOVD128rr : I<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src),
"movd {$src, $dst|$dst, $src}", []>, TB, OpSize,
Requires<[HasSSE2]>;
def MOVD128rm : I<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
"movd {$src, $dst|$dst, $src}", []>, TB, OpSize,
Requires<[HasSSE2]>;
def MOVD128mr : I<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src),
"movd {$src, $dst|$dst, $src}", []>, TB, OpSize,
Requires<[HasSSE2]>;
def MOVQ64rr : I<0x6F, MRMSrcReg, (ops VR64:$dst, VR64:$src),
"movq {$src, $dst|$dst, $src}", []>, TB,
Requires<[HasSSE1]>;
def MOVQ64rm : I<0x6F, MRMSrcMem, (ops VR64:$dst, i64mem:$src),
"movq {$src, $dst|$dst, $src}", []>, TB,
Requires<[HasSSE1]>;
def MOVQ64mr : I<0x7F, MRMDestMem, (ops i64mem:$dst, VR64:$src),
"movq {$src, $dst|$dst, $src}", []>, TB,
Requires<[HasSSE1]>;
def MOVQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src),
"movq {$src, $dst|$dst, $src}", []>, XS,
Requires<[HasSSE2]>;
def MOVQ128rm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
"movq {$src, $dst|$dst, $src}", []>, XS,
Requires<[HasSSE2]>;
def MOVQ128mr : I<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src),
"movq {$src, $dst|$dst, $src}", []>, TB, OpSize,
Requires<[HasSSE2]>;
//===----------------------------------------------------------------------===//
// XMM Packed Floating point support (requires SSE / SSE2)
//===----------------------------------------------------------------------===//
def MOVAPSrr : I<0x28, MRMSrcReg, (ops V4F32:$dst, V4F32:$src),
"movaps {$src, $dst|$dst, $src}", []>,
def MOVAPDrr : I<0x28, MRMSrcReg, (ops V2F64:$dst, V2F64:$src),
"movapd {$src, $dst|$dst, $src}", []>,
def MOVAPSrm : I<0x28, MRMSrcMem, (ops V4F32:$dst, f128mem:$src),
"movaps {$src, $dst|$dst, $src}", []>,
def MOVAPSmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V4F32:$src),
"movaps {$src, $dst|$dst, $src}",[]>,
def MOVAPDrm : I<0x28, MRMSrcMem, (ops V2F64:$dst, f128mem:$src),
"movapd {$src, $dst|$dst, $src}", []>,
def MOVAPDmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V2F64:$src),
"movapd {$src, $dst|$dst, $src}",[]>,
// Alias instructions to do FR32 / FR64 reg-to-reg copy using movaps / movapd.
// Upper bits are disregarded.
def FsMOVAPSrr : I<0x28, MRMSrcReg, (ops V4F32:$dst, V4F32:$src),
"movaps {$src, $dst|$dst, $src}", []>,
Requires<[HasSSE1]>, TB;
def FsMOVAPDrr : I<0x28, MRMSrcReg, (ops V2F64:$dst, V2F64:$src),
"movapd {$src, $dst|$dst, $src}", []>,
Requires<[HasSSE2]>, TB, OpSize;
// Alias instructions to load FR32 / FR64 from f128mem using movaps / movapd.
// Upper bits are disregarded.
def FsMOVAPSrm : I<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src),
"movaps {$src, $dst|$dst, $src}",
[(set FR32:$dst, (X86loadpf32 addr:$src))]>,
Requires<[HasSSE1]>, TB;
def FsMOVAPDrm : I<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src),
"movapd {$src, $dst|$dst, $src}",
[(set FR64:$dst, (X86loadpf64 addr:$src))]>,
//===----------------------------------------------------------------------===//
// Miscellaneous Instructions
//===----------------------------------------------------------------------===//
def RDTSC : I<0x31, RawFrm, (ops), "rdtsc", [(X86rdtsc)]>,
TB, Imp<[],[EAX,EDX]>;
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//===----------------------------------------------------------------------===//
// GlobalAddress and ExternalSymbol
def : Pat<(i32 globaladdr:$dst), (MOV32ri tglobaladdr:$dst)>;
def : Pat<(i32 externalsym:$dst), (MOV32ri texternalsym:$dst)>;
// Calls
def : Pat<(X86call tglobaladdr:$dst),
(CALLpcrel32 tglobaladdr:$dst)>;
def : Pat<(X86call texternalsym:$dst),
(CALLpcrel32 texternalsym:$dst)>;
def : Pat<(addc R32:$src1, R32:$src2),
def : Pat<(addc R32:$src1, (load addr:$src2)),
def : Pat<(addc R32:$src1, imm:$src2),
def : Pat<(addc R32:$src1, i32immSExt8:$src2),
def : Pat<(subc R32:$src1, R32:$src2),
def : Pat<(subc R32:$src1, (load addr:$src2)),
def : Pat<(subc R32:$src1, imm:$src2),
def : Pat<(subc R32:$src1, i32immSExt8:$src2),
def : Pat<(truncstore (i8 imm:$src), addr:$dst, i1),
(MOV8mi addr:$dst, imm:$src)>;
def : Pat<(truncstore R8:$src, addr:$dst, i1),
(MOV8mr addr:$dst, R8:$src)>;
// {s|z}extload bool -> {s|z}extload byte
def : Pat<(sextloadi16i1 addr:$src), (MOVSX16rm8 addr:$src)>;
def : Pat<(sextloadi32i1 addr:$src), (MOVSX32rm8 addr:$src)>;
def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>;
def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
// extload bool -> extload byte
def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>;
// anyext -> zext
def : Pat<(i16 (anyext R8 :$src)), (MOVZX16rr8 R8 :$src)>;
def : Pat<(i32 (anyext R8 :$src)), (MOVZX32rr8 R8 :$src)>;
def : Pat<(i32 (anyext R16:$src)), (MOVZX32rr16 R16:$src)>;
// Required for RET of f32 / f64 values.
def : Pat<(X86fld addr:$src, f32), (FpLD32m addr:$src)>;
def : Pat<(X86fld addr:$src, f64), (FpLD64m addr:$src)>;
// Required for CALL which return f32 / f64 values.
def : Pat<(X86fst RFP:$src, addr:$op, f32), (FpST32m addr:$op, RFP:$src)>;
def : Pat<(X86fst RFP:$src, addr:$op, f64), (FpST64m addr:$op, RFP:$src)>;
// Floating point constant -0.0 and -1.0
def : Pat<(f64 fp64immneg0), (FpCHS (FpLD0))>, Requires<[FPStack]>;
def : Pat<(f64 fp64immneg1), (FpCHS (FpLD1))>, Requires<[FPStack]>;
// Used to conv. i64 to f64 since there isn't a SSE version.
def : Pat<(X86fildflag addr:$src, i64), (FpILD64m addr:$src)>;
//===----------------------------------------------------------------------===//
// Some peepholes
//===----------------------------------------------------------------------===//
// (shl x, 1) ==> (add x, x)
def : Pat<(shl R8 :$src1, (i8 1)), (ADD8rr R8 :$src1, R8 :$src1)>;
def : Pat<(shl R16:$src1, (i8 1)), (ADD16rr R16:$src1, R16:$src1)>;
def : Pat<(shl R32:$src1, (i8 1)), (ADD32rr R32:$src1, R32:$src1)>;
// (or (x >> c) | (y << (32 - c))) ==> (shrd32 x, y, c)
def : Pat<(or (srl R32:$src1, CL:$amt),
(shl R32:$src2, (sub 32, CL:$amt))),
(SHRD32rrCL R32:$src1, R32:$src2)>;
def : Pat<(store (or (srl (loadi32 addr:$dst), CL:$amt),
(shl R32:$src2, (sub 32, CL:$amt))), addr:$dst),
(SHRD32mrCL addr:$dst, R32:$src2)>;
// (or (x << c) | (y >> (32 - c))) ==> (shld32 x, y, c)
def : Pat<(or (shl R32:$src1, CL:$amt),
(srl R32:$src2, (sub 32, CL:$amt))),
(SHLD32rrCL R32:$src1, R32:$src2)>;
def : Pat<(store (or (shl (loadi32 addr:$dst), CL:$amt),
(srl R32:$src2, (sub 32, CL:$amt))), addr:$dst),
(SHLD32mrCL addr:$dst, R32:$src2)>;
// (or (x >> c) | (y << (16 - c))) ==> (shrd16 x, y, c)
def : Pat<(or (srl R16:$src1, CL:$amt),
(shl R16:$src2, (sub 16, CL:$amt))),
(SHRD16rrCL R16:$src1, R16:$src2)>;
def : Pat<(store (or (srl (loadi16 addr:$dst), CL:$amt),
(shl R16:$src2, (sub 16, CL:$amt))), addr:$dst),
(SHRD16mrCL addr:$dst, R16:$src2)>;
// (or (x << c) | (y >> (16 - c))) ==> (shld16 x, y, c)
def : Pat<(or (shl R16:$src1, CL:$amt),
(srl R16:$src2, (sub 16, CL:$amt))),
(SHLD16rrCL R16:$src1, R16:$src2)>;
def : Pat<(store (or (shl (loadi16 addr:$dst), CL:$amt),
(srl R16:$src2, (sub 16, CL:$amt))), addr:$dst),
(SHLD16mrCL addr:$dst, R16:$src2)>;