Newer
Older
Chris Lattner
committed
// Floating point flag ops.
def FNSTSW8r : I<0xE0, RawFrm, // AX = fp flags
def FNSTCW16m : I<0xD9, MRM7m, // [mem16] = X87 control world
def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16]
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
//===----------------------------------------------------------------------===//
// XMM Packed Floating point support (requires SSE / SSE2)
//===----------------------------------------------------------------------===//
def MOVAPSrr : I<0x28, MRMSrcMem, (ops V4F4:$dst, V4F4:$src),
"movaps {$src, $dst|$dst, $src}", []>,
Requires<[HasSSE1]>, XS;
def MOVAPDrr : I<0x28, MRMSrcMem, (ops V2F8:$dst, V2F8:$src),
"movapd {$src, $dst|$dst, $src}", []>,
Requires<[HasSSE2]>, XD;
def MOVAPSrm : I<0x28, MRMSrcMem, (ops V4F4:$dst, f128mem:$src),
"movaps {$src, $dst|$dst, $src}", []>,
Requires<[HasSSE1]>, XS;
def MOVAPSmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V4F4:$src),
"movaps {$src, $dst|$dst, $src}",[]>,
Requires<[HasSSE1]>, XD;
def MOVAPDrm : I<0x28, MRMSrcMem, (ops V2F8:$dst, f128mem:$src),
"movapd {$src, $dst|$dst, $src}", []>,
Requires<[HasSSE1]>, XD;
def MOVAPDmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V2F8:$src),
"movapd {$src, $dst|$dst, $src}",[]>,
Requires<[HasSSE2]>, XD;
// Pseudo-instructions to load FR32 / FR64 from f128mem using movaps / movapd.
// Upper bits are disregarded.
def MOVSAPSrm : I<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src),
"movaps {$src, $dst|$dst, $src}",
[(set FR32:$dst, (X86loadpf32 addr:$src))]>,
Requires<[HasSSE1]>, XS;
def MOVSAPDrm : I<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src),
"movapd {$src, $dst|$dst, $src}",
[(set FR64:$dst, (X86loadpf64 addr:$src))]>,
Requires<[HasSSE1]>, XD;
//===----------------------------------------------------------------------===//
// Miscellaneous Instructions
//===----------------------------------------------------------------------===//
def RDTSC : I<0x31, RawFrm, (ops), "rdtsc", [(X86rdtsc)]>,
TB, Imp<[],[EAX,EDX]>;
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//===----------------------------------------------------------------------===//
// GlobalAddress and ExternalSymbol
def : Pat<(i32 globaladdr:$dst), (MOV32ri tglobaladdr:$dst)>;
def : Pat<(i32 externalsym:$dst), (MOV32ri texternalsym:$dst)>;
// Calls
def : Pat<(X86call tglobaladdr:$dst),
(CALLpcrel32 tglobaladdr:$dst)>;
def : Pat<(X86call texternalsym:$dst),
(CALLpcrel32 texternalsym:$dst)>;
// X86 specific add which produces a flag.
def : Pat<(X86addflag R32:$src1, R32:$src2),
(ADD32rr R32:$src1, R32:$src2)>;
def : Pat<(X86addflag R32:$src1, (load addr:$src2)),
(ADD32rm R32:$src1, addr:$src2)>;
def : Pat<(X86addflag R32:$src1, imm:$src2),
(ADD32ri R32:$src1, imm:$src2)>;
def : Pat<(X86addflag R32:$src1, i32immSExt8:$src2),
(ADD32ri8 R32:$src1, i32immSExt8:$src2)>;
def : Pat<(X86subflag R32:$src1, R32:$src2),
(SUB32rr R32:$src1, R32:$src2)>;
def : Pat<(X86subflag R32:$src1, (load addr:$src2)),
(SUB32rm R32:$src1, addr:$src2)>;
def : Pat<(X86subflag R32:$src1, imm:$src2),
(SUB32ri R32:$src1, imm:$src2)>;
def : Pat<(X86subflag R32:$src1, i32immSExt8:$src2),
(SUB32ri8 R32:$src1, i32immSExt8:$src2)>;
def : Pat<(truncstore (i8 imm:$src), addr:$dst, i1),
(MOV8mi addr:$dst, imm:$src)>;
def : Pat<(truncstore R8:$src, addr:$dst, i1),
(MOV8mr addr:$dst, R8:$src)>;
// {s|z}extload bool -> {s|z}extload byte
def : Pat<(sextloadi16i1 addr:$src), (MOVSX16rm8 addr:$src)>;
def : Pat<(sextloadi32i1 addr:$src), (MOVSX32rm8 addr:$src)>;
def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>;
def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
// extload bool -> extload byte
def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>;
// anyext -> zext
def : Pat<(i16 (anyext R8 :$src)), (MOVZX16rr8 R8 :$src)>;
def : Pat<(i32 (anyext R8 :$src)), (MOVZX32rr8 R8 :$src)>;
def : Pat<(i32 (anyext R16:$src)), (MOVZX32rr16 R16:$src)>;
// Required for RET of f32 / f64 values.
def : Pat<(X86fld addr:$src, f32), (FpLD32m addr:$src)>;
def : Pat<(X86fld addr:$src, f64), (FpLD64m addr:$src)>;
// Required for CALL which return f32 / f64 values.
def : Pat<(X86fst RFP:$src, addr:$op, f32), (FpST32m addr:$op, RFP:$src)>;
def : Pat<(X86fst RFP:$src, addr:$op, f64), (FpST64m addr:$op, RFP:$src)>;
// Floating point constant -0.0 and -1.0
def : Pat<(f64 fp64immneg0), (FpCHS (FpLD0))>, Requires<[FPStack]>;
def : Pat<(f64 fp64immneg1), (FpCHS (FpLD1))>, Requires<[FPStack]>;
// Used to conv. i64 to f64 since there isn't a SSE version.
def : Pat<(X86fildflag addr:$src, i64), (FpILD64m addr:$src)>;
//===----------------------------------------------------------------------===//
// Some peepholes
//===----------------------------------------------------------------------===//
// (shl x, 1) ==> (add x, x)
def : Pat<(shl R8 :$src1, (i8 1)), (ADD8rr R8 :$src1, R8 :$src1)>;
def : Pat<(shl R16:$src1, (i8 1)), (ADD16rr R16:$src1, R16:$src1)>;
def : Pat<(shl R32:$src1, (i8 1)), (ADD32rr R32:$src1, R32:$src1)>;
// (or (x >> c) | (y << (32 - c))) ==> (shrd32 x, y, c)
def : Pat<(or (srl R32:$src1, CL:$amt),
(shl R32:$src2, (sub 32, CL:$amt))),
(SHRD32rrCL R32:$src1, R32:$src2)>;
def : Pat<(store (or (srl (loadi32 addr:$dst), CL:$amt),
(shl R32:$src2, (sub 32, CL:$amt))), addr:$dst),
(SHRD32mrCL addr:$dst, R32:$src2)>;
// (or (x << c) | (y >> (32 - c))) ==> (shld32 x, y, c)
def : Pat<(or (shl R32:$src1, CL:$amt),
(srl R32:$src2, (sub 32, CL:$amt))),
(SHLD32rrCL R32:$src1, R32:$src2)>;
def : Pat<(store (or (shl (loadi32 addr:$dst), CL:$amt),
(srl R32:$src2, (sub 32, CL:$amt))), addr:$dst),
(SHLD32mrCL addr:$dst, R32:$src2)>;
// (or (x >> c) | (y << (16 - c))) ==> (shrd16 x, y, c)
def : Pat<(or (srl R16:$src1, CL:$amt),
(shl R16:$src2, (sub 16, CL:$amt))),
(SHRD16rrCL R16:$src1, R16:$src2)>;
def : Pat<(store (or (srl (loadi16 addr:$dst), CL:$amt),
(shl R16:$src2, (sub 16, CL:$amt))), addr:$dst),
(SHRD16mrCL addr:$dst, R16:$src2)>;
// (or (x << c) | (y >> (16 - c))) ==> (shld16 x, y, c)
def : Pat<(or (shl R16:$src1, CL:$amt),
(srl R16:$src2, (sub 16, CL:$amt))),
(SHLD16rrCL R16:$src1, R16:$src2)>;
def : Pat<(store (or (shl (loadi16 addr:$dst), CL:$amt),
(srl R16:$src2, (sub 16, CL:$amt))), addr:$dst),
(SHLD16mrCL addr:$dst, R16:$src2)>;