Newer
Older
def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap),
"lock\n\tcmpxchg{b}\t{$swap, $ptr|$ptr, $swap}",
Andrew Lenharth
committed
}
let Constraints = "$val = $dst", Defs = [EFLAGS] in {
def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val),
"lock\n\txadd{l}\t{$val, $ptr|$ptr, $val}",
[(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))]>,
Andrew Lenharth
committed
TB, LOCK;
def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val),
"lock\n\txadd{w}\t{$val, $ptr|$ptr, $val}",
[(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))]>,
Andrew Lenharth
committed
TB, OpSize, LOCK;
def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins i8mem:$ptr, GR8:$val),
"lock\n\txadd{b}\t{$val, $ptr|$ptr, $val}",
[(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))]>,
Andrew Lenharth
committed
TB, LOCK;
}
// Atomic exchange, and, or, xor
let Constraints = "$val = $dst", Defs = [EFLAGS],
usesCustomDAGSchedInserter = 1 in {
def ATOMAND32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
[(set GR32:$dst, (atomic_load_and_32 addr:$ptr, GR32:$val))]>;
def ATOMOR32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
[(set GR32:$dst, (atomic_load_or_32 addr:$ptr, GR32:$val))]>;
def ATOMXOR32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
[(set GR32:$dst, (atomic_load_xor_32 addr:$ptr, GR32:$val))]>;
def ATOMNAND32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
[(set GR32:$dst, (atomic_load_nand_32 addr:$ptr, GR32:$val))]>;
def ATOMMIN32: I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val),
[(set GR32:$dst, (atomic_load_min_32 addr:$ptr, GR32:$val))]>;
def ATOMMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
[(set GR32:$dst, (atomic_load_max_32 addr:$ptr, GR32:$val))]>;
def ATOMUMIN32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
[(set GR32:$dst, (atomic_load_umin_32 addr:$ptr, GR32:$val))]>;
def ATOMUMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
[(set GR32:$dst, (atomic_load_umax_32 addr:$ptr, GR32:$val))]>;
def ATOMAND16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
[(set GR16:$dst, (atomic_load_and_16 addr:$ptr, GR16:$val))]>;
def ATOMOR16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
[(set GR16:$dst, (atomic_load_or_16 addr:$ptr, GR16:$val))]>;
def ATOMXOR16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
[(set GR16:$dst, (atomic_load_xor_16 addr:$ptr, GR16:$val))]>;
def ATOMNAND16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
[(set GR16:$dst, (atomic_load_nand_16 addr:$ptr, GR16:$val))]>;
def ATOMMIN16: I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val),
[(set GR16:$dst, (atomic_load_min_16 addr:$ptr, GR16:$val))]>;
def ATOMMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
[(set GR16:$dst, (atomic_load_max_16 addr:$ptr, GR16:$val))]>;
def ATOMUMIN16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
[(set GR16:$dst, (atomic_load_umin_16 addr:$ptr, GR16:$val))]>;
def ATOMUMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
[(set GR16:$dst, (atomic_load_umax_16 addr:$ptr, GR16:$val))]>;
def ATOMAND8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
[(set GR8:$dst, (atomic_load_and_8 addr:$ptr, GR8:$val))]>;
def ATOMOR8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
[(set GR8:$dst, (atomic_load_or_8 addr:$ptr, GR8:$val))]>;
def ATOMXOR8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
[(set GR8:$dst, (atomic_load_xor_8 addr:$ptr, GR8:$val))]>;
def ATOMNAND8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
[(set GR8:$dst, (atomic_load_nand_8 addr:$ptr, GR8:$val))]>;
let Constraints = "$val1 = $dst1, $val2 = $dst2",
Defs = [EFLAGS, EAX, EBX, ECX, EDX],
Uses = [EAX, EBX, ECX, EDX],
mayLoad = 1, mayStore = 1,
usesCustomDAGSchedInserter = 1 in {
def ATOMAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
(ins i64mem:$ptr, GR32:$val1, GR32:$val2),
def ATOMOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
(ins i64mem:$ptr, GR32:$val1, GR32:$val2),
def ATOMXOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
(ins i64mem:$ptr, GR32:$val1, GR32:$val2),
def ATOMNAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
(ins i64mem:$ptr, GR32:$val1, GR32:$val2),
def ATOMADD6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
(ins i64mem:$ptr, GR32:$val1, GR32:$val2),
def ATOMSUB6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
(ins i64mem:$ptr, GR32:$val1, GR32:$val2),
def ATOMSWAP6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
(ins i64mem:$ptr, GR32:$val1, GR32:$val2),
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//===----------------------------------------------------------------------===//
Bill Wendling
committed
// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable
def : Pat<(i32 (X86Wrapper tconstpool :$dst)), (MOV32ri tconstpool :$dst)>;
def : Pat<(i32 (X86Wrapper tjumptable :$dst)), (MOV32ri tjumptable :$dst)>;
def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>;
def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>;
def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;
Evan Cheng
committed
def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)),
(ADD32ri GR32:$src1, tconstpool:$src2)>;
def : Pat<(add GR32:$src1, (X86Wrapper tjumptable:$src2)),
(ADD32ri GR32:$src1, tjumptable:$src2)>;
def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)),
(ADD32ri GR32:$src1, tglobaladdr:$src2)>;
def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)),
(ADD32ri GR32:$src1, texternalsym:$src2)>;
def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst),
(MOV32mi addr:$dst, tglobaladdr:$src)>;
def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
(MOV32mi addr:$dst, texternalsym:$src)>;
// tailcall stuff
Evan Cheng
committed
def : Pat<(X86tailcall GR32:$dst),
(TAILCALL)>;
def : Pat<(X86tailcall (i32 tglobaladdr:$dst)),
(TAILCALL)>;
def : Pat<(X86tailcall (i32 texternalsym:$dst)),
(TAILCALL)>;
def : Pat<(X86tcret GR32:$dst, imm:$off),
(TCRETURNri GR32:$dst, imm:$off)>;
def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
(TCRETURNdi texternalsym:$dst, imm:$off)>;
def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
(TCRETURNdi texternalsym:$dst, imm:$off)>;
(CALLpcrel32 texternalsym:$dst)>;
Evan Cheng
committed
def : Pat<(addc GR32:$src1, GR32:$src2),
(ADD32rr GR32:$src1, GR32:$src2)>;
def : Pat<(addc GR32:$src1, (load addr:$src2)),
(ADD32rm GR32:$src1, addr:$src2)>;
def : Pat<(addc GR32:$src1, imm:$src2),
(ADD32ri GR32:$src1, imm:$src2)>;
def : Pat<(addc GR32:$src1, i32immSExt8:$src2),
(ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
def : Pat<(subc GR32:$src1, GR32:$src2),
(SUB32rr GR32:$src1, GR32:$src2)>;
def : Pat<(subc GR32:$src1, (load addr:$src2)),
(SUB32rm GR32:$src1, addr:$src2)>;
def : Pat<(subc GR32:$src1, imm:$src2),
(SUB32ri GR32:$src1, imm:$src2)>;
def : Pat<(subc GR32:$src1, i32immSExt8:$src2),
(SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
Chris Lattner
committed
// Comparisons.
// TEST R,R is smaller than CMP R,0
def : Pat<(parallel (X86cmp GR8:$src1, 0), (implicit EFLAGS)),
Chris Lattner
committed
(TEST8rr GR8:$src1, GR8:$src1)>;
def : Pat<(parallel (X86cmp GR16:$src1, 0), (implicit EFLAGS)),
Chris Lattner
committed
(TEST16rr GR16:$src1, GR16:$src1)>;
def : Pat<(parallel (X86cmp GR32:$src1, 0), (implicit EFLAGS)),
Chris Lattner
committed
(TEST32rr GR32:$src1, GR32:$src1)>;
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
// Conditional moves with folded loads with operands swapped and conditions
// inverted.
def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_B, EFLAGS),
(CMOVAE16rm GR16:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_B, EFLAGS),
(CMOVAE32rm GR32:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_AE, EFLAGS),
(CMOVB16rm GR16:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_AE, EFLAGS),
(CMOVB32rm GR32:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_E, EFLAGS),
(CMOVNE16rm GR16:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_E, EFLAGS),
(CMOVNE32rm GR32:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NE, EFLAGS),
(CMOVE16rm GR16:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NE, EFLAGS),
(CMOVE32rm GR32:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_BE, EFLAGS),
(CMOVA16rm GR16:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_BE, EFLAGS),
(CMOVA32rm GR32:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_A, EFLAGS),
(CMOVBE16rm GR16:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_A, EFLAGS),
(CMOVBE32rm GR32:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_L, EFLAGS),
(CMOVGE16rm GR16:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_L, EFLAGS),
(CMOVGE32rm GR32:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_GE, EFLAGS),
(CMOVL16rm GR16:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_GE, EFLAGS),
(CMOVL32rm GR32:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_LE, EFLAGS),
(CMOVG16rm GR16:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_LE, EFLAGS),
(CMOVG32rm GR32:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_G, EFLAGS),
(CMOVLE16rm GR16:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_G, EFLAGS),
(CMOVLE32rm GR32:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_P, EFLAGS),
(CMOVNP16rm GR16:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_P, EFLAGS),
(CMOVNP32rm GR32:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NP, EFLAGS),
(CMOVP16rm GR16:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NP, EFLAGS),
(CMOVP32rm GR32:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_S, EFLAGS),
(CMOVNS16rm GR16:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_S, EFLAGS),
(CMOVNS32rm GR32:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NS, EFLAGS),
(CMOVS16rm GR16:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NS, EFLAGS),
(CMOVS32rm GR32:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_O, EFLAGS),
(CMOVNO16rm GR16:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_O, EFLAGS),
(CMOVNO32rm GR32:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NO, EFLAGS),
(CMOVO16rm GR16:$src2, addr:$src1)>;
def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NO, EFLAGS),
(CMOVO32rm GR32:$src2, addr:$src1)>;
// zextload bool -> zextload byte
def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>;
def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
// extload bool -> extload byte
def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>;
Bill Wendling
committed
def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>,
Requires<[In32BitMode]>;
def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
Bill Wendling
committed
def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>,
Requires<[In32BitMode]>;
def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>;
def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
// anyext
Bill Wendling
committed
def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>,
Requires<[In32BitMode]>;
def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>,
Requires<[In32BitMode]>;
def : Pat<(i32 (anyext GR16:$src)),
(INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>;
// (and (i32 load), 255) -> (zextload i8)
def : Pat<(i32 (and (nvloadi32 addr:$src), (i32 255))),
(MOVZX32rm8 addr:$src)>;
def : Pat<(i32 (and (nvloadi32 addr:$src), (i32 65535))),
(MOVZX32rm16 addr:$src)>;
//===----------------------------------------------------------------------===//
// Some peepholes
//===----------------------------------------------------------------------===//
// Odd encoding trick: -128 fits into an 8-bit immediate field while
// +128 doesn't, so in this special case use a sub instead of an add.
def : Pat<(add GR16:$src1, 128),
(SUB16ri8 GR16:$src1, -128)>;
def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst),
(SUB16mi8 addr:$dst, -128)>;
def : Pat<(add GR32:$src1, 128),
(SUB32ri8 GR32:$src1, -128)>;
def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst),
(SUB32mi8 addr:$dst, -128)>;
// r & (2^16-1) ==> movz
def : Pat<(and GR32:$src1, 0xffff),
(MOVZX32rr16 (i16 (EXTRACT_SUBREG GR32:$src1, x86_subreg_16bit)))>;
// r & (2^8-1) ==> movz
def : Pat<(and GR32:$src1, 0xff),
(MOVZX32rr8 (i8 (EXTRACT_SUBREG (MOV32to32_ GR32:$src1),
x86_subreg_8bit)))>,
Requires<[In32BitMode]>;
// r & (2^8-1) ==> movz
def : Pat<(and GR16:$src1, 0xff),
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
(MOVZX16rr8 (i8 (EXTRACT_SUBREG (MOV16to16_ GR16:$src1),
x86_subreg_8bit)))>,
Requires<[In32BitMode]>;
// sext_inreg patterns
def : Pat<(sext_inreg GR32:$src, i16),
(MOVSX32rr16 (i16 (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit)))>;
def : Pat<(sext_inreg GR32:$src, i8),
(MOVSX32rr8 (i8 (EXTRACT_SUBREG (MOV32to32_ GR32:$src),
x86_subreg_8bit)))>,
Requires<[In32BitMode]>;
def : Pat<(sext_inreg GR16:$src, i8),
(MOVSX16rr8 (i8 (EXTRACT_SUBREG (MOV16to16_ GR16:$src),
x86_subreg_8bit)))>,
Requires<[In32BitMode]>;
// trunc patterns
def : Pat<(i16 (trunc GR32:$src)),
(i16 (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit))>;
def : Pat<(i8 (trunc GR32:$src)),
(i8 (EXTRACT_SUBREG (MOV32to32_ GR32:$src), x86_subreg_8bit))>,
Requires<[In32BitMode]>;
def : Pat<(i8 (trunc GR16:$src)),
(i8 (EXTRACT_SUBREG (MOV16to16_ GR16:$src), x86_subreg_8bit))>,
Requires<[In32BitMode]>;
Evan Cheng
committed
def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>;
def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
// (shl x (and y, 31)) ==> (shl x, y)
def : Pat<(shl GR8:$src1, (and CL:$amt, 31)),
(SHL8rCL GR8:$src1)>;
def : Pat<(shl GR16:$src1, (and CL:$amt, 31)),
(SHL16rCL GR16:$src1)>;
def : Pat<(shl GR32:$src1, (and CL:$amt, 31)),
(SHL32rCL GR32:$src1)>;
def : Pat<(store (shl (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst),
(SHL8mCL addr:$dst)>;
def : Pat<(store (shl (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst),
(SHL16mCL addr:$dst)>;
def : Pat<(store (shl (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst),
(SHL32mCL addr:$dst)>;
def : Pat<(srl GR8:$src1, (and CL:$amt, 31)),
(SHR8rCL GR8:$src1)>;
def : Pat<(srl GR16:$src1, (and CL:$amt, 31)),
(SHR16rCL GR16:$src1)>;
def : Pat<(srl GR32:$src1, (and CL:$amt, 31)),
(SHR32rCL GR32:$src1)>;
def : Pat<(store (srl (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst),
(SHR8mCL addr:$dst)>;
def : Pat<(store (srl (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst),
(SHR16mCL addr:$dst)>;
def : Pat<(store (srl (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst),
(SHR32mCL addr:$dst)>;
def : Pat<(sra GR8:$src1, (and CL:$amt, 31)),
(SAR8rCL GR8:$src1)>;
def : Pat<(sra GR16:$src1, (and CL:$amt, 31)),
(SAR16rCL GR16:$src1)>;
def : Pat<(sra GR32:$src1, (and CL:$amt, 31)),
(SAR32rCL GR32:$src1)>;
def : Pat<(store (sra (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst),
(SAR8mCL addr:$dst)>;
def : Pat<(store (sra (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst),
(SAR16mCL addr:$dst)>;
def : Pat<(store (sra (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst),
(SAR32mCL addr:$dst)>;
// (or (x >> c) | (y << (32 - c))) ==> (shrd32 x, y, c)
Evan Cheng
committed
def : Pat<(or (srl GR32:$src1, CL:$amt),
(shl GR32:$src2, (sub 32, CL:$amt))),
(SHRD32rrCL GR32:$src1, GR32:$src2)>;
def : Pat<(store (or (srl (loadi32 addr:$dst), CL:$amt),
Evan Cheng
committed
(shl GR32:$src2, (sub 32, CL:$amt))), addr:$dst),
(SHRD32mrCL addr:$dst, GR32:$src2)>;
def : Pat<(or (srl GR32:$src1, (i8 (trunc ECX:$amt))),
(shl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))),
(SHRD32rrCL GR32:$src1, GR32:$src2)>;
def : Pat<(store (or (srl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))),
(shl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))),
addr:$dst),
(SHRD32mrCL addr:$dst, GR32:$src2)>;
def : Pat<(shrd GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm:$amt2)),
(SHRD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>;
def : Pat<(store (shrd (loadi32 addr:$dst), (i8 imm:$amt1),
GR32:$src2, (i8 imm:$amt2)), addr:$dst),
(SHRD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>;
// (or (x << c) | (y >> (32 - c))) ==> (shld32 x, y, c)
Evan Cheng
committed
def : Pat<(or (shl GR32:$src1, CL:$amt),
(srl GR32:$src2, (sub 32, CL:$amt))),
(SHLD32rrCL GR32:$src1, GR32:$src2)>;
def : Pat<(store (or (shl (loadi32 addr:$dst), CL:$amt),
Evan Cheng
committed
(srl GR32:$src2, (sub 32, CL:$amt))), addr:$dst),
(SHLD32mrCL addr:$dst, GR32:$src2)>;
def : Pat<(or (shl GR32:$src1, (i8 (trunc ECX:$amt))),
(srl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))),
(SHLD32rrCL GR32:$src1, GR32:$src2)>;
def : Pat<(store (or (shl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))),
(srl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))),
addr:$dst),
(SHLD32mrCL addr:$dst, GR32:$src2)>;
def : Pat<(shld GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm:$amt2)),
(SHLD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>;
def : Pat<(store (shld (loadi32 addr:$dst), (i8 imm:$amt1),
GR32:$src2, (i8 imm:$amt2)), addr:$dst),
(SHLD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>;
// (or (x >> c) | (y << (16 - c))) ==> (shrd16 x, y, c)
Evan Cheng
committed
def : Pat<(or (srl GR16:$src1, CL:$amt),
(shl GR16:$src2, (sub 16, CL:$amt))),
(SHRD16rrCL GR16:$src1, GR16:$src2)>;
def : Pat<(store (or (srl (loadi16 addr:$dst), CL:$amt),
Evan Cheng
committed
(shl GR16:$src2, (sub 16, CL:$amt))), addr:$dst),
(SHRD16mrCL addr:$dst, GR16:$src2)>;
def : Pat<(or (srl GR16:$src1, (i8 (trunc CX:$amt))),
(shl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))),
(SHRD16rrCL GR16:$src1, GR16:$src2)>;
def : Pat<(store (or (srl (loadi16 addr:$dst), (i8 (trunc CX:$amt))),
(shl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))),
addr:$dst),
(SHRD16mrCL addr:$dst, GR16:$src2)>;
def : Pat<(shrd GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm:$amt2)),
(SHRD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>;
def : Pat<(store (shrd (loadi16 addr:$dst), (i8 imm:$amt1),
GR16:$src2, (i8 imm:$amt2)), addr:$dst),
(SHRD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>;
// (or (x << c) | (y >> (16 - c))) ==> (shld16 x, y, c)
Evan Cheng
committed
def : Pat<(or (shl GR16:$src1, CL:$amt),
(srl GR16:$src2, (sub 16, CL:$amt))),
(SHLD16rrCL GR16:$src1, GR16:$src2)>;
def : Pat<(store (or (shl (loadi16 addr:$dst), CL:$amt),
Evan Cheng
committed
(srl GR16:$src2, (sub 16, CL:$amt))), addr:$dst),
(SHLD16mrCL addr:$dst, GR16:$src2)>;
def : Pat<(or (shl GR16:$src1, (i8 (trunc CX:$amt))),
(srl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))),
(SHLD16rrCL GR16:$src1, GR16:$src2)>;
def : Pat<(store (or (shl (loadi16 addr:$dst), (i8 (trunc CX:$amt))),
(srl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))),
addr:$dst),
(SHLD16mrCL addr:$dst, GR16:$src2)>;
def : Pat<(shld GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm:$amt2)),
(SHLD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>;
def : Pat<(store (shld (loadi16 addr:$dst), (i8 imm:$amt1),
GR16:$src2, (i8 imm:$amt2)), addr:$dst),
(SHLD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>;
Bill Wendling
committed
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
//===----------------------------------------------------------------------===//
// Overflow Patterns
//===----------------------------------------------------------------------===//
// Register-Register Addition with Overflow
def : Pat<(parallel (X86add_ovf GR8:$src1, GR8:$src2),
(implicit EFLAGS)),
(ADD8rr GR8:$src1, GR8:$src2)>;
// Register-Register Addition with Overflow
def : Pat<(parallel (X86add_ovf GR16:$src1, GR16:$src2),
(implicit EFLAGS)),
(ADD16rr GR16:$src1, GR16:$src2)>;
def : Pat<(parallel (X86add_ovf GR32:$src1, GR32:$src2),
(implicit EFLAGS)),
(ADD32rr GR32:$src1, GR32:$src2)>;
// Register-Memory Addition with Overflow
def : Pat<(parallel (X86add_ovf GR8:$src1, (load addr:$src2)),
(implicit EFLAGS)),
(ADD8rm GR8:$src1, addr:$src2)>;
def : Pat<(parallel (X86add_ovf GR16:$src1, (load addr:$src2)),
(implicit EFLAGS)),
(ADD16rm GR16:$src1, addr:$src2)>;
def : Pat<(parallel (X86add_ovf GR32:$src1, (load addr:$src2)),
(implicit EFLAGS)),
(ADD32rm GR32:$src1, addr:$src2)>;
// Register-Integer Addition with Overflow
def : Pat<(parallel (X86add_ovf GR8:$src1, imm:$src2),
(implicit EFLAGS)),
(ADD8ri GR8:$src1, imm:$src2)>;
// Register-Integer Addition with Overflow
def : Pat<(parallel (X86add_ovf GR16:$src1, imm:$src2),
(implicit EFLAGS)),
(ADD16ri GR16:$src1, imm:$src2)>;
def : Pat<(parallel (X86add_ovf GR32:$src1, imm:$src2),
(implicit EFLAGS)),
(ADD32ri GR32:$src1, imm:$src2)>;
def : Pat<(parallel (X86add_ovf GR16:$src1, i16immSExt8:$src2),
(implicit EFLAGS)),
(ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
def : Pat<(parallel (X86add_ovf GR32:$src1, i32immSExt8:$src2),
(implicit EFLAGS)),
(ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
// Memory-Register Addition with Overflow
def : Pat<(parallel (store (X86add_ovf (load addr:$dst), GR8:$src2),
addr:$dst),
(implicit EFLAGS)),
(ADD8mr addr:$dst, GR8:$src2)>;
def : Pat<(parallel (store (X86add_ovf (load addr:$dst), GR16:$src2),
addr:$dst),
(implicit EFLAGS)),
(ADD16mr addr:$dst, GR16:$src2)>;
def : Pat<(parallel (store (X86add_ovf (load addr:$dst), GR32:$src2),
addr:$dst),
(implicit EFLAGS)),
(ADD32mr addr:$dst, GR32:$src2)>;
def : Pat<(parallel (store (X86add_ovf (loadi8 addr:$dst), imm:$src2),
addr:$dst),
(implicit EFLAGS)),
(ADD8mi addr:$dst, imm:$src2)>;
def : Pat<(parallel (store (X86add_ovf (loadi16 addr:$dst), imm:$src2),
addr:$dst),
(implicit EFLAGS)),
(ADD16mi addr:$dst, imm:$src2)>;
def : Pat<(parallel (store (X86add_ovf (loadi32 addr:$dst), imm:$src2),
addr:$dst),
(implicit EFLAGS)),
(ADD32mi addr:$dst, imm:$src2)>;
def : Pat<(parallel (store (X86add_ovf (load addr:$dst), i16immSExt8:$src2),
addr:$dst),
(implicit EFLAGS)),
(ADD16mi8 addr:$dst, i16immSExt8:$src2)>;
def : Pat<(parallel (store (X86add_ovf (load addr:$dst), i32immSExt8:$src2),
addr:$dst),
(implicit EFLAGS)),
(ADD32mi8 addr:$dst, i32immSExt8:$src2)>;
// Register-Register Subtraction with Overflow
def : Pat<(parallel (X86sub_ovf GR8:$src1, GR8:$src2),
(implicit EFLAGS)),
(SUB8rr GR8:$src1, GR8:$src2)>;
def : Pat<(parallel (X86sub_ovf GR16:$src1, GR16:$src2),
(implicit EFLAGS)),
(SUB16rr GR16:$src1, GR16:$src2)>;
def : Pat<(parallel (X86sub_ovf GR32:$src1, GR32:$src2),
(implicit EFLAGS)),
(SUB32rr GR32:$src1, GR32:$src2)>;
// Register-Memory Subtraction with Overflow
def : Pat<(parallel (X86sub_ovf GR8:$src1, (load addr:$src2)),
(implicit EFLAGS)),
(SUB8rm GR8:$src1, addr:$src2)>;
def : Pat<(parallel (X86sub_ovf GR16:$src1, (load addr:$src2)),
(implicit EFLAGS)),
(SUB16rm GR16:$src1, addr:$src2)>;
def : Pat<(parallel (X86sub_ovf GR32:$src1, (load addr:$src2)),
(implicit EFLAGS)),
(SUB32rm GR32:$src1, addr:$src2)>;
// Register-Integer Subtraction with Overflow
def : Pat<(parallel (X86sub_ovf GR8:$src1, imm:$src2),
(implicit EFLAGS)),
(SUB8ri GR8:$src1, imm:$src2)>;
def : Pat<(parallel (X86sub_ovf GR16:$src1, imm:$src2),
(implicit EFLAGS)),
(SUB16ri GR16:$src1, imm:$src2)>;
def : Pat<(parallel (X86sub_ovf GR32:$src1, imm:$src2),
(implicit EFLAGS)),
(SUB32ri GR32:$src1, imm:$src2)>;
def : Pat<(parallel (X86sub_ovf GR16:$src1, i16immSExt8:$src2),
(implicit EFLAGS)),
(SUB16ri8 GR16:$src1, i16immSExt8:$src2)>;
def : Pat<(parallel (X86sub_ovf GR32:$src1, i32immSExt8:$src2),
(implicit EFLAGS)),
(SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
// Memory-Register Subtraction with Overflow
def : Pat<(parallel (store (X86sub_ovf (load addr:$dst), GR8:$src2),
addr:$dst),
(implicit EFLAGS)),
(SUB8mr addr:$dst, GR8:$src2)>;
def : Pat<(parallel (store (X86sub_ovf (load addr:$dst), GR16:$src2),
addr:$dst),
(implicit EFLAGS)),
(SUB16mr addr:$dst, GR16:$src2)>;
def : Pat<(parallel (store (X86sub_ovf (load addr:$dst), GR32:$src2),
addr:$dst),
(implicit EFLAGS)),
(SUB32mr addr:$dst, GR32:$src2)>;
// Memory-Integer Subtraction with Overflow
def : Pat<(parallel (store (X86sub_ovf (loadi8 addr:$dst), imm:$src2),
addr:$dst),
(implicit EFLAGS)),
(SUB8mi addr:$dst, imm:$src2)>;
def : Pat<(parallel (store (X86sub_ovf (loadi16 addr:$dst), imm:$src2),
addr:$dst),
(implicit EFLAGS)),
(SUB16mi addr:$dst, imm:$src2)>;
def : Pat<(parallel (store (X86sub_ovf (loadi32 addr:$dst), imm:$src2),
addr:$dst),
(implicit EFLAGS)),
(SUB32mi addr:$dst, imm:$src2)>;
def : Pat<(parallel (store (X86sub_ovf (load addr:$dst), i16immSExt8:$src2),
addr:$dst),
(implicit EFLAGS)),
(SUB16mi8 addr:$dst, i16immSExt8:$src2)>;
def : Pat<(parallel (store (X86sub_ovf (load addr:$dst), i32immSExt8:$src2),
addr:$dst),
(implicit EFLAGS)),
(SUB32mi8 addr:$dst, i32immSExt8:$src2)>;
// Register-Register Signed Integer Multiply with Overflow
def : Pat<(parallel (X86smul_ovf GR16:$src1, GR16:$src2),
(implicit EFLAGS)),
(IMUL16rr GR16:$src1, GR16:$src2)>;
def : Pat<(parallel (X86smul_ovf GR32:$src1, GR32:$src2),
(implicit EFLAGS)),
(IMUL32rr GR32:$src1, GR32:$src2)>;
// Register-Memory Signed Integer Multiply with Overflow
def : Pat<(parallel (X86smul_ovf GR16:$src1, (load addr:$src2)),
(implicit EFLAGS)),
(IMUL16rm GR16:$src1, addr:$src2)>;
def : Pat<(parallel (X86smul_ovf GR32:$src1, (load addr:$src2)),
(implicit EFLAGS)),
(IMUL32rm GR32:$src1, addr:$src2)>;
// Register-Integer Signed Integer Multiply with Overflow
def : Pat<(parallel (X86smul_ovf GR16:$src1, imm:$src2),
(implicit EFLAGS)),
(IMUL16rri GR16:$src1, imm:$src2)>;
def : Pat<(parallel (X86smul_ovf GR32:$src1, imm:$src2),
(implicit EFLAGS)),
(IMUL32rri GR32:$src1, imm:$src2)>;
def : Pat<(parallel (X86smul_ovf GR16:$src1, i16immSExt8:$src2),
(implicit EFLAGS)),
(IMUL16rri8 GR16:$src1, i16immSExt8:$src2)>;
def : Pat<(parallel (X86smul_ovf GR32:$src1, i32immSExt8:$src2),
(implicit EFLAGS)),
(IMUL32rri8 GR32:$src1, i32immSExt8:$src2)>;
// Memory-Integer Signed Integer Multiply with Overflow
def : Pat<(parallel (X86smul_ovf (load addr:$src1), imm:$src2),
(implicit EFLAGS)),
(IMUL16rmi addr:$src1, imm:$src2)>;
def : Pat<(parallel (X86smul_ovf (load addr:$src1), imm:$src2),
(implicit EFLAGS)),
(IMUL32rmi addr:$src1, imm:$src2)>;
def : Pat<(parallel (X86smul_ovf (load addr:$src1), i16immSExt8:$src2),
(implicit EFLAGS)),
(IMUL16rmi8 addr:$src1, i16immSExt8:$src2)>;
def : Pat<(parallel (X86smul_ovf (load addr:$src1), i32immSExt8:$src2),
(implicit EFLAGS)),
(IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>;
// Optimize multiple with overflow by 2.
let AddedComplexity = 2 in {
def : Pat<(parallel (X86smul_ovf GR16:$src1, 2),
(implicit EFLAGS)),
(ADD16rr GR16:$src1, GR16:$src1)>;
def : Pat<(parallel (X86smul_ovf GR32:$src1, 2),
(implicit EFLAGS)),
(ADD32rr GR32:$src1, GR32:$src1)>;
}
//===----------------------------------------------------------------------===//
// Floating Point Stack Support
//===----------------------------------------------------------------------===//
include "X86InstrFPStack.td"
//===----------------------------------------------------------------------===//
// X86-64 Support
//===----------------------------------------------------------------------===//
include "X86Instr64bit.td"
//===----------------------------------------------------------------------===//
Evan Cheng
committed
// XMM Floating point support (requires SSE / SSE2)
//===----------------------------------------------------------------------===//
Evan Cheng
committed
include "X86InstrSSE.td"
//===----------------------------------------------------------------------===//
Evan Cheng
committed
// MMX and XMM Packed Integer support (requires MMX, SSE, and SSE2)
//===----------------------------------------------------------------------===//
Evan Cheng
committed
include "X86InstrMMX.td"