Newer
Older
Chris Lattner
committed
// Floating point flag ops.
def FNSTSW8r : I<0xE0, RawFrm, // AX = fp flags
def FNSTCW16m : I<0xD9, MRM7m, // [mem16] = X87 control world
def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16]
//===----------------------------------------------------------------------===//
// XMM Packed Floating point support (requires SSE / SSE2)
//===----------------------------------------------------------------------===//
def MOVAPSrr : I<0x28, MRMSrcMem, (ops V4F4:$dst, V4F4:$src),
"movaps {$src, $dst|$dst, $src}", []>,
def MOVAPDrr : I<0x28, MRMSrcMem, (ops V2F8:$dst, V2F8:$src),
"movapd {$src, $dst|$dst, $src}", []>,
def MOVAPSrm : I<0x28, MRMSrcMem, (ops V4F4:$dst, f128mem:$src),
"movaps {$src, $dst|$dst, $src}", []>,
def MOVAPSmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V4F4:$src),
"movaps {$src, $dst|$dst, $src}",[]>,
def MOVAPDrm : I<0x28, MRMSrcMem, (ops V2F8:$dst, f128mem:$src),
"movapd {$src, $dst|$dst, $src}", []>,
def MOVAPDmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V2F8:$src),
"movapd {$src, $dst|$dst, $src}",[]>,
// Pseudo-instructions to load FR32 / FR64 from f128mem using movaps / movapd.
// Upper bits are disregarded.
def MOVSAPSrm : I<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src),
"movaps {$src, $dst|$dst, $src}",
[(set FR32:$dst, (X86loadpf32 addr:$src))]>,
def MOVSAPDrm : I<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src),
"movapd {$src, $dst|$dst, $src}",
[(set FR64:$dst, (X86loadpf64 addr:$src))]>,
//===----------------------------------------------------------------------===//
// Miscellaneous Instructions
//===----------------------------------------------------------------------===//
def RDTSC : I<0x31, RawFrm, (ops), "rdtsc", [(X86rdtsc)]>,
TB, Imp<[],[EAX,EDX]>;
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//===----------------------------------------------------------------------===//
// GlobalAddress and ExternalSymbol
def : Pat<(i32 globaladdr:$dst), (MOV32ri tglobaladdr:$dst)>;
def : Pat<(i32 externalsym:$dst), (MOV32ri texternalsym:$dst)>;
// Calls
def : Pat<(X86call tglobaladdr:$dst),
(CALLpcrel32 tglobaladdr:$dst)>;
def : Pat<(X86call texternalsym:$dst),
(CALLpcrel32 texternalsym:$dst)>;
// X86 specific add which produces a flag.
def : Pat<(X86addflag R32:$src1, R32:$src2),
(ADD32rr R32:$src1, R32:$src2)>;
def : Pat<(X86addflag R32:$src1, (load addr:$src2)),
(ADD32rm R32:$src1, addr:$src2)>;
def : Pat<(X86addflag R32:$src1, imm:$src2),
(ADD32ri R32:$src1, imm:$src2)>;
def : Pat<(X86addflag R32:$src1, i32immSExt8:$src2),
(ADD32ri8 R32:$src1, i32immSExt8:$src2)>;
def : Pat<(X86subflag R32:$src1, R32:$src2),
(SUB32rr R32:$src1, R32:$src2)>;
def : Pat<(X86subflag R32:$src1, (load addr:$src2)),
(SUB32rm R32:$src1, addr:$src2)>;
def : Pat<(X86subflag R32:$src1, imm:$src2),
(SUB32ri R32:$src1, imm:$src2)>;
def : Pat<(X86subflag R32:$src1, i32immSExt8:$src2),
(SUB32ri8 R32:$src1, i32immSExt8:$src2)>;
def : Pat<(truncstore (i8 imm:$src), addr:$dst, i1),
(MOV8mi addr:$dst, imm:$src)>;
def : Pat<(truncstore R8:$src, addr:$dst, i1),
(MOV8mr addr:$dst, R8:$src)>;
// {s|z}extload bool -> {s|z}extload byte
def : Pat<(sextloadi16i1 addr:$src), (MOVSX16rm8 addr:$src)>;
def : Pat<(sextloadi32i1 addr:$src), (MOVSX32rm8 addr:$src)>;
def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>;
def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
// extload bool -> extload byte
def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>;
// anyext -> zext
def : Pat<(i16 (anyext R8 :$src)), (MOVZX16rr8 R8 :$src)>;
def : Pat<(i32 (anyext R8 :$src)), (MOVZX32rr8 R8 :$src)>;
def : Pat<(i32 (anyext R16:$src)), (MOVZX32rr16 R16:$src)>;
// Required for RET of f32 / f64 values.
def : Pat<(X86fld addr:$src, f32), (FpLD32m addr:$src)>;
def : Pat<(X86fld addr:$src, f64), (FpLD64m addr:$src)>;
// Required for CALL which return f32 / f64 values.
def : Pat<(X86fst RFP:$src, addr:$op, f32), (FpST32m addr:$op, RFP:$src)>;
def : Pat<(X86fst RFP:$src, addr:$op, f64), (FpST64m addr:$op, RFP:$src)>;
// Floating point constant -0.0 and -1.0
def : Pat<(f64 fp64immneg0), (FpCHS (FpLD0))>, Requires<[FPStack]>;
def : Pat<(f64 fp64immneg1), (FpCHS (FpLD1))>, Requires<[FPStack]>;
// Used to conv. i64 to f64 since there isn't a SSE version.
def : Pat<(X86fildflag addr:$src, i64), (FpILD64m addr:$src)>;
//===----------------------------------------------------------------------===//
// Some peepholes
//===----------------------------------------------------------------------===//
// (shl x, 1) ==> (add x, x)
def : Pat<(shl R8 :$src1, (i8 1)), (ADD8rr R8 :$src1, R8 :$src1)>;
def : Pat<(shl R16:$src1, (i8 1)), (ADD16rr R16:$src1, R16:$src1)>;
def : Pat<(shl R32:$src1, (i8 1)), (ADD32rr R32:$src1, R32:$src1)>;
// (or (x >> c) | (y << (32 - c))) ==> (shrd32 x, y, c)
def : Pat<(or (srl R32:$src1, CL:$amt),
(shl R32:$src2, (sub 32, CL:$amt))),
(SHRD32rrCL R32:$src1, R32:$src2)>;
def : Pat<(store (or (srl (loadi32 addr:$dst), CL:$amt),
(shl R32:$src2, (sub 32, CL:$amt))), addr:$dst),
(SHRD32mrCL addr:$dst, R32:$src2)>;
// (or (x << c) | (y >> (32 - c))) ==> (shld32 x, y, c)
def : Pat<(or (shl R32:$src1, CL:$amt),
(srl R32:$src2, (sub 32, CL:$amt))),
(SHLD32rrCL R32:$src1, R32:$src2)>;
def : Pat<(store (or (shl (loadi32 addr:$dst), CL:$amt),
(srl R32:$src2, (sub 32, CL:$amt))), addr:$dst),
(SHLD32mrCL addr:$dst, R32:$src2)>;
// (or (x >> c) | (y << (16 - c))) ==> (shrd16 x, y, c)
def : Pat<(or (srl R16:$src1, CL:$amt),
(shl R16:$src2, (sub 16, CL:$amt))),
(SHRD16rrCL R16:$src1, R16:$src2)>;
def : Pat<(store (or (srl (loadi16 addr:$dst), CL:$amt),
(shl R16:$src2, (sub 16, CL:$amt))), addr:$dst),
(SHRD16mrCL addr:$dst, R16:$src2)>;
// (or (x << c) | (y >> (16 - c))) ==> (shld16 x, y, c)
def : Pat<(or (shl R16:$src1, CL:$amt),
(srl R16:$src2, (sub 16, CL:$amt))),
(SHLD16rrCL R16:$src1, R16:$src2)>;
def : Pat<(store (or (shl (loadi16 addr:$dst), CL:$amt),
(srl R16:$src2, (sub 16, CL:$amt))), addr:$dst),
(SHLD16mrCL addr:$dst, R16:$src2)>;