Newer
Older
def ATOMAND8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
[(set GR8:$dst, (atomic_load_and_8 addr:$ptr, GR8:$val))]>;
def ATOMOR8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
[(set GR8:$dst, (atomic_load_or_8 addr:$ptr, GR8:$val))]>;
def ATOMXOR8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
[(set GR8:$dst, (atomic_load_xor_8 addr:$ptr, GR8:$val))]>;
def ATOMNAND8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
[(set GR8:$dst, (atomic_load_nand_8 addr:$ptr, GR8:$val))]>;
let Constraints = "$val1 = $dst1, $val2 = $dst2",
Defs = [EFLAGS, EAX, EBX, ECX, EDX],
Uses = [EAX, EBX, ECX, EDX],
mayLoad = 1, mayStore = 1,
usesCustomDAGSchedInserter = 1 in {
def ATOMAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
(ins i64mem:$ptr, GR32:$val1, GR32:$val2),
def ATOMOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
(ins i64mem:$ptr, GR32:$val1, GR32:$val2),
def ATOMXOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
(ins i64mem:$ptr, GR32:$val1, GR32:$val2),
def ATOMNAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
(ins i64mem:$ptr, GR32:$val1, GR32:$val2),
def ATOMADD6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
(ins i64mem:$ptr, GR32:$val1, GR32:$val2),
def ATOMSUB6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
(ins i64mem:$ptr, GR32:$val1, GR32:$val2),
def ATOMSWAP6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
(ins i64mem:$ptr, GR32:$val1, GR32:$val2),
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//===----------------------------------------------------------------------===//
Bill Wendling
committed
// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable
def : Pat<(i32 (X86Wrapper tconstpool :$dst)), (MOV32ri tconstpool :$dst)>;
def : Pat<(i32 (X86Wrapper tjumptable :$dst)), (MOV32ri tjumptable :$dst)>;
def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>;
def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>;
def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;
Evan Cheng
committed
def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)),
(ADD32ri GR32:$src1, tconstpool:$src2)>;
def : Pat<(add GR32:$src1, (X86Wrapper tjumptable:$src2)),
(ADD32ri GR32:$src1, tjumptable:$src2)>;
def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)),
(ADD32ri GR32:$src1, tglobaladdr:$src2)>;
def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)),
(ADD32ri GR32:$src1, texternalsym:$src2)>;
def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst),
(MOV32mi addr:$dst, tglobaladdr:$src)>;
def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
(MOV32mi addr:$dst, texternalsym:$src)>;
// tailcall stuff
Evan Cheng
committed
def : Pat<(X86tailcall GR32:$dst),
(TAILCALL)>;
def : Pat<(X86tailcall (i32 tglobaladdr:$dst)),
(TAILCALL)>;
def : Pat<(X86tailcall (i32 texternalsym:$dst)),
(TAILCALL)>;
def : Pat<(X86tcret GR32:$dst, imm:$off),
(TCRETURNri GR32:$dst, imm:$off)>;
def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
(TCRETURNdi texternalsym:$dst, imm:$off)>;
def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
(TCRETURNdi texternalsym:$dst, imm:$off)>;
(CALLpcrel32 texternalsym:$dst)>;
Evan Cheng
committed
def : Pat<(addc GR32:$src1, GR32:$src2),
(ADD32rr GR32:$src1, GR32:$src2)>;
def : Pat<(addc GR32:$src1, (load addr:$src2)),
(ADD32rm GR32:$src1, addr:$src2)>;
def : Pat<(addc GR32:$src1, imm:$src2),
(ADD32ri GR32:$src1, imm:$src2)>;
def : Pat<(addc GR32:$src1, i32immSExt8:$src2),
(ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
def : Pat<(subc GR32:$src1, GR32:$src2),
(SUB32rr GR32:$src1, GR32:$src2)>;
def : Pat<(subc GR32:$src1, (load addr:$src2)),
(SUB32rm GR32:$src1, addr:$src2)>;
def : Pat<(subc GR32:$src1, imm:$src2),
(SUB32ri GR32:$src1, imm:$src2)>;
def : Pat<(subc GR32:$src1, i32immSExt8:$src2),
(SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
Chris Lattner
committed
// Comparisons.
// TEST R,R is smaller than CMP R,0
def : Pat<(parallel (X86cmp GR8:$src1, 0), (implicit EFLAGS)),
Chris Lattner
committed
(TEST8rr GR8:$src1, GR8:$src1)>;
def : Pat<(parallel (X86cmp GR16:$src1, 0), (implicit EFLAGS)),
Chris Lattner
committed
(TEST16rr GR16:$src1, GR16:$src1)>;
def : Pat<(parallel (X86cmp GR32:$src1, 0), (implicit EFLAGS)),
Chris Lattner
committed
(TEST32rr GR32:$src1, GR32:$src1)>;
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
// Conditional moves with folded loads with operands swapped and conditions
// inverted.
def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_B, EFLAGS),
(CMOVAE16rm GR16:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_B, EFLAGS),
(CMOVAE32rm GR32:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_AE, EFLAGS),
(CMOVB16rm GR16:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_AE, EFLAGS),
(CMOVB32rm GR32:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_E, EFLAGS),
(CMOVNE16rm GR16:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_E, EFLAGS),
(CMOVNE32rm GR32:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NE, EFLAGS),
(CMOVE16rm GR16:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NE, EFLAGS),
(CMOVE32rm GR32:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_BE, EFLAGS),
(CMOVA16rm GR16:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_BE, EFLAGS),
(CMOVA32rm GR32:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_A, EFLAGS),
(CMOVBE16rm GR16:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_A, EFLAGS),
(CMOVBE32rm GR32:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_L, EFLAGS),
(CMOVGE16rm GR16:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_L, EFLAGS),
(CMOVGE32rm GR32:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_GE, EFLAGS),
(CMOVL16rm GR16:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_GE, EFLAGS),
(CMOVL32rm GR32:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_LE, EFLAGS),
(CMOVG16rm GR16:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_LE, EFLAGS),
(CMOVG32rm GR32:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_G, EFLAGS),
(CMOVLE16rm GR16:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_G, EFLAGS),
(CMOVLE32rm GR32:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_P, EFLAGS),
(CMOVNP16rm GR16:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_P, EFLAGS),
(CMOVNP32rm GR32:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NP, EFLAGS),
(CMOVP16rm GR16:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NP, EFLAGS),
(CMOVP32rm GR32:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_S, EFLAGS),
(CMOVNS16rm GR16:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_S, EFLAGS),
(CMOVNS32rm GR32:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NS, EFLAGS),
(CMOVS16rm GR16:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NS, EFLAGS),
(CMOVS32rm GR32:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_O, EFLAGS),
(CMOVNO16rm GR16:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_O, EFLAGS),
(CMOVNO32rm GR32:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NO, EFLAGS),
(CMOVO16rm GR16:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NO, EFLAGS),
(CMOVO32rm GR32:$src2, addr:$src1)>;
// zextload bool -> zextload byte
def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>;
def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
// extload bool -> extload byte
def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>;
Bill Wendling
committed
def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>,
Requires<[In32BitMode]>;
def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
Bill Wendling
committed
def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>,
Requires<[In32BitMode]>;
def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>;
def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
// anyext
Bill Wendling
committed
def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>,
Requires<[In32BitMode]>;
def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>,
Requires<[In32BitMode]>;
def : Pat<(i32 (anyext GR16:$src)),
(INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>;
// (and (i32 load), 255) -> (zextload i8)
def : Pat<(i32 (and (nvloadi32 addr:$src), (i32 255))),
(MOVZX32rm8 addr:$src)>;
def : Pat<(i32 (and (nvloadi32 addr:$src), (i32 65535))),
(MOVZX32rm16 addr:$src)>;
//===----------------------------------------------------------------------===//
// Some peepholes
//===----------------------------------------------------------------------===//
// Odd encoding trick: -128 fits into an 8-bit immediate field while
// +128 doesn't, so in this special case use a sub instead of an add.
def : Pat<(add GR16:$src1, 128),
(SUB16ri8 GR16:$src1, -128)>;
def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst),
(SUB16mi8 addr:$dst, -128)>;
def : Pat<(add GR32:$src1, 128),
(SUB32ri8 GR32:$src1, -128)>;
def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst),
(SUB32mi8 addr:$dst, -128)>;
// r & (2^16-1) ==> movz
def : Pat<(and GR32:$src1, 0xffff),
(MOVZX32rr16 (i16 (EXTRACT_SUBREG GR32:$src1, x86_subreg_16bit)))>;
// r & (2^8-1) ==> movz
def : Pat<(and GR32:$src1, 0xff),
(MOVZX32rr8 (i8 (EXTRACT_SUBREG (MOV32to32_ GR32:$src1),
x86_subreg_8bit)))>,
Requires<[In32BitMode]>;
// r & (2^8-1) ==> movz
def : Pat<(and GR16:$src1, 0xff),
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
(MOVZX16rr8 (i8 (EXTRACT_SUBREG (MOV16to16_ GR16:$src1),
x86_subreg_8bit)))>,
Requires<[In32BitMode]>;
// sext_inreg patterns
def : Pat<(sext_inreg GR32:$src, i16),
(MOVSX32rr16 (i16 (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit)))>;
def : Pat<(sext_inreg GR32:$src, i8),
(MOVSX32rr8 (i8 (EXTRACT_SUBREG (MOV32to32_ GR32:$src),
x86_subreg_8bit)))>,
Requires<[In32BitMode]>;
def : Pat<(sext_inreg GR16:$src, i8),
(MOVSX16rr8 (i8 (EXTRACT_SUBREG (MOV16to16_ GR16:$src),
x86_subreg_8bit)))>,
Requires<[In32BitMode]>;
// trunc patterns
def : Pat<(i16 (trunc GR32:$src)),
(i16 (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit))>;
def : Pat<(i8 (trunc GR32:$src)),
(i8 (EXTRACT_SUBREG (MOV32to32_ GR32:$src), x86_subreg_8bit))>,
Requires<[In32BitMode]>;
def : Pat<(i8 (trunc GR16:$src)),
(i8 (EXTRACT_SUBREG (MOV16to16_ GR16:$src), x86_subreg_8bit))>,
Requires<[In32BitMode]>;
Evan Cheng
committed
def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>;
def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
// (shl x (and y, 31)) ==> (shl x, y)
def : Pat<(shl GR8:$src1, (and CL:$amt, 31)),
(SHL8rCL GR8:$src1)>;
def : Pat<(shl GR16:$src1, (and CL:$amt, 31)),
(SHL16rCL GR16:$src1)>;
def : Pat<(shl GR32:$src1, (and CL:$amt, 31)),
(SHL32rCL GR32:$src1)>;
def : Pat<(store (shl (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst),
(SHL8mCL addr:$dst)>;
def : Pat<(store (shl (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst),
(SHL16mCL addr:$dst)>;
def : Pat<(store (shl (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst),
(SHL32mCL addr:$dst)>;
def : Pat<(srl GR8:$src1, (and CL:$amt, 31)),
(SHR8rCL GR8:$src1)>;
def : Pat<(srl GR16:$src1, (and CL:$amt, 31)),
(SHR16rCL GR16:$src1)>;
def : Pat<(srl GR32:$src1, (and CL:$amt, 31)),
(SHR32rCL GR32:$src1)>;
def : Pat<(store (srl (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst),
(SHR8mCL addr:$dst)>;
def : Pat<(store (srl (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst),
(SHR16mCL addr:$dst)>;
def : Pat<(store (srl (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst),
(SHR32mCL addr:$dst)>;
def : Pat<(sra GR8:$src1, (and CL:$amt, 31)),
(SAR8rCL GR8:$src1)>;
def : Pat<(sra GR16:$src1, (and CL:$amt, 31)),
(SAR16rCL GR16:$src1)>;
def : Pat<(sra GR32:$src1, (and CL:$amt, 31)),
(SAR32rCL GR32:$src1)>;
def : Pat<(store (sra (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst),
(SAR8mCL addr:$dst)>;
def : Pat<(store (sra (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst),
(SAR16mCL addr:$dst)>;
def : Pat<(store (sra (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst),
(SAR32mCL addr:$dst)>;
// (or (x >> c) | (y << (32 - c))) ==> (shrd32 x, y, c)
Evan Cheng
committed
def : Pat<(or (srl GR32:$src1, CL:$amt),
(shl GR32:$src2, (sub 32, CL:$amt))),
(SHRD32rrCL GR32:$src1, GR32:$src2)>;
def : Pat<(store (or (srl (loadi32 addr:$dst), CL:$amt),
Evan Cheng
committed
(shl GR32:$src2, (sub 32, CL:$amt))), addr:$dst),
(SHRD32mrCL addr:$dst, GR32:$src2)>;
def : Pat<(or (srl GR32:$src1, (i8 (trunc ECX:$amt))),
(shl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))),
(SHRD32rrCL GR32:$src1, GR32:$src2)>;
def : Pat<(store (or (srl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))),
(shl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))),
addr:$dst),
(SHRD32mrCL addr:$dst, GR32:$src2)>;
def : Pat<(shrd GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm:$amt2)),
(SHRD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>;
def : Pat<(store (shrd (loadi32 addr:$dst), (i8 imm:$amt1),
GR32:$src2, (i8 imm:$amt2)), addr:$dst),
(SHRD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>;
// (or (x << c) | (y >> (32 - c))) ==> (shld32 x, y, c)
Evan Cheng
committed
def : Pat<(or (shl GR32:$src1, CL:$amt),
(srl GR32:$src2, (sub 32, CL:$amt))),
(SHLD32rrCL GR32:$src1, GR32:$src2)>;
def : Pat<(store (or (shl (loadi32 addr:$dst), CL:$amt),
Evan Cheng
committed
(srl GR32:$src2, (sub 32, CL:$amt))), addr:$dst),
(SHLD32mrCL addr:$dst, GR32:$src2)>;
def : Pat<(or (shl GR32:$src1, (i8 (trunc ECX:$amt))),
(srl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))),
(SHLD32rrCL GR32:$src1, GR32:$src2)>;
def : Pat<(store (or (shl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))),
(srl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))),
addr:$dst),
(SHLD32mrCL addr:$dst, GR32:$src2)>;
def : Pat<(shld GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm:$amt2)),
(SHLD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>;
def : Pat<(store (shld (loadi32 addr:$dst), (i8 imm:$amt1),
GR32:$src2, (i8 imm:$amt2)), addr:$dst),
(SHLD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>;
// (or (x >> c) | (y << (16 - c))) ==> (shrd16 x, y, c)
Evan Cheng
committed
def : Pat<(or (srl GR16:$src1, CL:$amt),
(shl GR16:$src2, (sub 16, CL:$amt))),
(SHRD16rrCL GR16:$src1, GR16:$src2)>;
def : Pat<(store (or (srl (loadi16 addr:$dst), CL:$amt),
Evan Cheng
committed
(shl GR16:$src2, (sub 16, CL:$amt))), addr:$dst),
(SHRD16mrCL addr:$dst, GR16:$src2)>;
def : Pat<(or (srl GR16:$src1, (i8 (trunc CX:$amt))),
(shl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))),
(SHRD16rrCL GR16:$src1, GR16:$src2)>;
def : Pat<(store (or (srl (loadi16 addr:$dst), (i8 (trunc CX:$amt))),
(shl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))),
addr:$dst),
(SHRD16mrCL addr:$dst, GR16:$src2)>;
def : Pat<(shrd GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm:$amt2)),
(SHRD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>;
def : Pat<(store (shrd (loadi16 addr:$dst), (i8 imm:$amt1),
GR16:$src2, (i8 imm:$amt2)), addr:$dst),
(SHRD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>;
// (or (x << c) | (y >> (16 - c))) ==> (shld16 x, y, c)
Evan Cheng
committed
def : Pat<(or (shl GR16:$src1, CL:$amt),
(srl GR16:$src2, (sub 16, CL:$amt))),
(SHLD16rrCL GR16:$src1, GR16:$src2)>;
def : Pat<(store (or (shl (loadi16 addr:$dst), CL:$amt),
Evan Cheng
committed
(srl GR16:$src2, (sub 16, CL:$amt))), addr:$dst),
(SHLD16mrCL addr:$dst, GR16:$src2)>;
def : Pat<(or (shl GR16:$src1, (i8 (trunc CX:$amt))),
(srl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))),
(SHLD16rrCL GR16:$src1, GR16:$src2)>;
def : Pat<(store (or (shl (loadi16 addr:$dst), (i8 (trunc CX:$amt))),
(srl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))),
addr:$dst),
(SHLD16mrCL addr:$dst, GR16:$src2)>;
def : Pat<(shld GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm:$amt2)),
(SHLD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>;
def : Pat<(store (shld (loadi16 addr:$dst), (i8 imm:$amt1),
GR16:$src2, (i8 imm:$amt2)), addr:$dst),
(SHLD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>;
Bill Wendling
committed
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
//===----------------------------------------------------------------------===//
// Overflow Patterns
//===----------------------------------------------------------------------===//
// Register-Register Addition with Overflow
def : Pat<(parallel (X86add_ovf GR8:$src1, GR8:$src2),
(implicit EFLAGS)),
(ADD8rr GR8:$src1, GR8:$src2)>;
// Register-Register Addition with Overflow
def : Pat<(parallel (X86add_ovf GR16:$src1, GR16:$src2),
(implicit EFLAGS)),
(ADD16rr GR16:$src1, GR16:$src2)>;
def : Pat<(parallel (X86add_ovf GR32:$src1, GR32:$src2),
(implicit EFLAGS)),
(ADD32rr GR32:$src1, GR32:$src2)>;
// Register-Memory Addition with Overflow
def : Pat<(parallel (X86add_ovf GR8:$src1, (load addr:$src2)),
(implicit EFLAGS)),
(ADD8rm GR8:$src1, addr:$src2)>;
def : Pat<(parallel (X86add_ovf GR16:$src1, (load addr:$src2)),
(implicit EFLAGS)),
(ADD16rm GR16:$src1, addr:$src2)>;
def : Pat<(parallel (X86add_ovf GR32:$src1, (load addr:$src2)),
(implicit EFLAGS)),
(ADD32rm GR32:$src1, addr:$src2)>;
// Register-Integer Addition with Overflow
def : Pat<(parallel (X86add_ovf GR8:$src1, imm:$src2),
(implicit EFLAGS)),
(ADD8ri GR8:$src1, imm:$src2)>;
// Register-Integer Addition with Overflow
def : Pat<(parallel (X86add_ovf GR16:$src1, imm:$src2),
(implicit EFLAGS)),
(ADD16ri GR16:$src1, imm:$src2)>;
def : Pat<(parallel (X86add_ovf GR32:$src1, imm:$src2),
(implicit EFLAGS)),
(ADD32ri GR32:$src1, imm:$src2)>;
def : Pat<(parallel (X86add_ovf GR16:$src1, i16immSExt8:$src2),
(implicit EFLAGS)),
(ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
def : Pat<(parallel (X86add_ovf GR32:$src1, i32immSExt8:$src2),
(implicit EFLAGS)),
(ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
// Memory-Register Addition with Overflow
def : Pat<(parallel (store (X86add_ovf (load addr:$dst), GR8:$src2),
addr:$dst),
(implicit EFLAGS)),
(ADD8mr addr:$dst, GR8:$src2)>;
def : Pat<(parallel (store (X86add_ovf (load addr:$dst), GR16:$src2),
addr:$dst),
(implicit EFLAGS)),
(ADD16mr addr:$dst, GR16:$src2)>;
def : Pat<(parallel (store (X86add_ovf (load addr:$dst), GR32:$src2),
addr:$dst),
(implicit EFLAGS)),
(ADD32mr addr:$dst, GR32:$src2)>;
def : Pat<(parallel (store (X86add_ovf (loadi8 addr:$dst), imm:$src2),
addr:$dst),
(implicit EFLAGS)),
(ADD8mi addr:$dst, imm:$src2)>;
def : Pat<(parallel (store (X86add_ovf (loadi16 addr:$dst), imm:$src2),
addr:$dst),
(implicit EFLAGS)),
(ADD16mi addr:$dst, imm:$src2)>;
def : Pat<(parallel (store (X86add_ovf (loadi32 addr:$dst), imm:$src2),
addr:$dst),
(implicit EFLAGS)),
(ADD32mi addr:$dst, imm:$src2)>;
def : Pat<(parallel (store (X86add_ovf (load addr:$dst), i16immSExt8:$src2),
addr:$dst),
(implicit EFLAGS)),
(ADD16mi8 addr:$dst, i16immSExt8:$src2)>;
def : Pat<(parallel (store (X86add_ovf (load addr:$dst), i32immSExt8:$src2),
addr:$dst),
(implicit EFLAGS)),
(ADD32mi8 addr:$dst, i32immSExt8:$src2)>;
// Register-Register Subtraction with Overflow
def : Pat<(parallel (X86sub_ovf GR8:$src1, GR8:$src2),
(implicit EFLAGS)),
(SUB8rr GR8:$src1, GR8:$src2)>;
def : Pat<(parallel (X86sub_ovf GR16:$src1, GR16:$src2),
(implicit EFLAGS)),
(SUB16rr GR16:$src1, GR16:$src2)>;
def : Pat<(parallel (X86sub_ovf GR32:$src1, GR32:$src2),
(implicit EFLAGS)),
(SUB32rr GR32:$src1, GR32:$src2)>;
// Register-Memory Subtraction with Overflow
def : Pat<(parallel (X86sub_ovf GR8:$src1, (load addr:$src2)),
(implicit EFLAGS)),
(SUB8rm GR8:$src1, addr:$src2)>;
def : Pat<(parallel (X86sub_ovf GR16:$src1, (load addr:$src2)),
(implicit EFLAGS)),
(SUB16rm GR16:$src1, addr:$src2)>;
def : Pat<(parallel (X86sub_ovf GR32:$src1, (load addr:$src2)),
(implicit EFLAGS)),
(SUB32rm GR32:$src1, addr:$src2)>;
// Register-Integer Subtraction with Overflow
def : Pat<(parallel (X86sub_ovf GR8:$src1, imm:$src2),
(implicit EFLAGS)),
(SUB8ri GR8:$src1, imm:$src2)>;
def : Pat<(parallel (X86sub_ovf GR16:$src1, imm:$src2),
(implicit EFLAGS)),
(SUB16ri GR16:$src1, imm:$src2)>;
def : Pat<(parallel (X86sub_ovf GR32:$src1, imm:$src2),
(implicit EFLAGS)),
(SUB32ri GR32:$src1, imm:$src2)>;
def : Pat<(parallel (X86sub_ovf GR16:$src1, i16immSExt8:$src2),
(implicit EFLAGS)),
(SUB16ri8 GR16:$src1, i16immSExt8:$src2)>;
def : Pat<(parallel (X86sub_ovf GR32:$src1, i32immSExt8:$src2),
(implicit EFLAGS)),
(SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
// Memory-Register Subtraction with Overflow
def : Pat<(parallel (store (X86sub_ovf (load addr:$dst), GR8:$src2),
addr:$dst),
(implicit EFLAGS)),
(SUB8mr addr:$dst, GR8:$src2)>;
def : Pat<(parallel (store (X86sub_ovf (load addr:$dst), GR16:$src2),
addr:$dst),
(implicit EFLAGS)),
(SUB16mr addr:$dst, GR16:$src2)>;
def : Pat<(parallel (store (X86sub_ovf (load addr:$dst), GR32:$src2),
addr:$dst),
(implicit EFLAGS)),
(SUB32mr addr:$dst, GR32:$src2)>;
// Memory-Integer Subtraction with Overflow
def : Pat<(parallel (store (X86sub_ovf (loadi8 addr:$dst), imm:$src2),
addr:$dst),
(implicit EFLAGS)),
(SUB8mi addr:$dst, imm:$src2)>;
def : Pat<(parallel (store (X86sub_ovf (loadi16 addr:$dst), imm:$src2),
addr:$dst),
(implicit EFLAGS)),
(SUB16mi addr:$dst, imm:$src2)>;
def : Pat<(parallel (store (X86sub_ovf (loadi32 addr:$dst), imm:$src2),
addr:$dst),
(implicit EFLAGS)),
(SUB32mi addr:$dst, imm:$src2)>;
def : Pat<(parallel (store (X86sub_ovf (load addr:$dst), i16immSExt8:$src2),
addr:$dst),
(implicit EFLAGS)),
(SUB16mi8 addr:$dst, i16immSExt8:$src2)>;
def : Pat<(parallel (store (X86sub_ovf (load addr:$dst), i32immSExt8:$src2),
addr:$dst),
(implicit EFLAGS)),
(SUB32mi8 addr:$dst, i32immSExt8:$src2)>;
// Register-Register Signed Integer Multiply with Overflow
def : Pat<(parallel (X86smul_ovf GR16:$src1, GR16:$src2),
(implicit EFLAGS)),
(IMUL16rr GR16:$src1, GR16:$src2)>;
def : Pat<(parallel (X86smul_ovf GR32:$src1, GR32:$src2),
(implicit EFLAGS)),
(IMUL32rr GR32:$src1, GR32:$src2)>;
// Register-Memory Signed Integer Multiply with Overflow
def : Pat<(parallel (X86smul_ovf GR16:$src1, (load addr:$src2)),
(implicit EFLAGS)),
(IMUL16rm GR16:$src1, addr:$src2)>;
def : Pat<(parallel (X86smul_ovf GR32:$src1, (load addr:$src2)),
(implicit EFLAGS)),
(IMUL32rm GR32:$src1, addr:$src2)>;
// Register-Integer Signed Integer Multiply with Overflow
def : Pat<(parallel (X86smul_ovf GR16:$src1, imm:$src2),
(implicit EFLAGS)),
(IMUL16rri GR16:$src1, imm:$src2)>;
def : Pat<(parallel (X86smul_ovf GR32:$src1, imm:$src2),
(implicit EFLAGS)),
(IMUL32rri GR32:$src1, imm:$src2)>;
def : Pat<(parallel (X86smul_ovf GR16:$src1, i16immSExt8:$src2),
(implicit EFLAGS)),
(IMUL16rri8 GR16:$src1, i16immSExt8:$src2)>;
def : Pat<(parallel (X86smul_ovf GR32:$src1, i32immSExt8:$src2),
(implicit EFLAGS)),
(IMUL32rri8 GR32:$src1, i32immSExt8:$src2)>;
// Memory-Integer Signed Integer Multiply with Overflow
def : Pat<(parallel (X86smul_ovf (load addr:$src1), imm:$src2),
(implicit EFLAGS)),
(IMUL16rmi addr:$src1, imm:$src2)>;
def : Pat<(parallel (X86smul_ovf (load addr:$src1), imm:$src2),
(implicit EFLAGS)),
(IMUL32rmi addr:$src1, imm:$src2)>;
def : Pat<(parallel (X86smul_ovf (load addr:$src1), i16immSExt8:$src2),
(implicit EFLAGS)),
(IMUL16rmi8 addr:$src1, i16immSExt8:$src2)>;
def : Pat<(parallel (X86smul_ovf (load addr:$src1), i32immSExt8:$src2),
(implicit EFLAGS)),
(IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>;
// Optimize multiple with overflow by 2.
let AddedComplexity = 2 in {
def : Pat<(parallel (X86smul_ovf GR16:$src1, 2),
(implicit EFLAGS)),
(ADD16rr GR16:$src1, GR16:$src1)>;
def : Pat<(parallel (X86smul_ovf GR32:$src1, 2),
(implicit EFLAGS)),
(ADD32rr GR32:$src1, GR32:$src1)>;
}
//===----------------------------------------------------------------------===//
// Floating Point Stack Support
//===----------------------------------------------------------------------===//
include "X86InstrFPStack.td"
//===----------------------------------------------------------------------===//
// X86-64 Support
//===----------------------------------------------------------------------===//
include "X86Instr64bit.td"
//===----------------------------------------------------------------------===//
Evan Cheng
committed
// XMM Floating point support (requires SSE / SSE2)
//===----------------------------------------------------------------------===//
Evan Cheng
committed
include "X86InstrSSE.td"
//===----------------------------------------------------------------------===//
Evan Cheng
committed
// MMX and XMM Packed Integer support (requires MMX, SSE, and SSE2)
//===----------------------------------------------------------------------===//
Evan Cheng
committed
include "X86InstrMMX.td"