Skip to content
Snippets Groups Projects
Commit 37521aa8 authored by Nadav Rotem's avatar Nadav Rotem
Browse files

The PMOVZXWD family of functions had patterns extends narrow vector types to wide vector types.

It had patterns for zext-loading and extending. This commit adds patterns for loading a wide type, performing a bitcast,
and extending. This is an odd pattern, but it is commonly used when writing code with intrinsics.

rdar://11897677

llvm-svn: 163995
parent db908594
No related branches found
No related tags found
No related merge requests found
...@@ -5576,31 +5576,43 @@ let Predicates = [HasAVX] in { ...@@ -5576,31 +5576,43 @@ let Predicates = [HasAVX] in {
(VPMOVSXBWrm addr:$src)>; (VPMOVSXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)), def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
(VPMOVSXBWrm addr:$src)>; (VPMOVSXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxbw (bc_v16i8 (loadv2i64 addr:$src))),
(VPMOVSXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)), def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
(VPMOVSXWDrm addr:$src)>; (VPMOVSXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)), def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
(VPMOVSXWDrm addr:$src)>; (VPMOVSXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxwd (bc_v8i16 (loadv2i64 addr:$src))),
(VPMOVSXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)), def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
(VPMOVSXDQrm addr:$src)>; (VPMOVSXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)), def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
(VPMOVSXDQrm addr:$src)>; (VPMOVSXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxdq (bc_v4i32 (loadv2i64 addr:$src))),
(VPMOVSXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)), def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
(VPMOVZXBWrm addr:$src)>; (VPMOVZXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)), def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
(VPMOVZXBWrm addr:$src)>; (VPMOVZXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxbw (bc_v16i8 (loadv2i64 addr:$src))),
(VPMOVZXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)), def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
(VPMOVZXWDrm addr:$src)>; (VPMOVZXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)), def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
(VPMOVZXWDrm addr:$src)>; (VPMOVZXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxwd (bc_v8i16 (loadv2i64 addr:$src))),
(VPMOVZXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)), def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
(VPMOVZXDQrm addr:$src)>; (VPMOVZXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)), def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
(VPMOVZXDQrm addr:$src)>; (VPMOVZXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxdq (bc_v4i32 (loadv2i64 addr:$src))),
(VPMOVZXDQrm addr:$src)>;
} }
let Predicates = [UseSSE41] in { let Predicates = [UseSSE41] in {
...@@ -5609,31 +5621,43 @@ let Predicates = [UseSSE41] in { ...@@ -5609,31 +5621,43 @@ let Predicates = [UseSSE41] in {
(PMOVSXBWrm addr:$src)>; (PMOVSXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)), def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
(PMOVSXBWrm addr:$src)>; (PMOVSXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxbw (bc_v16i8 (loadv2i64 addr:$src))),
(PMOVSXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)), def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
(PMOVSXWDrm addr:$src)>; (PMOVSXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)), def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
(PMOVSXWDrm addr:$src)>; (PMOVSXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxwd (bc_v8i16 (loadv2i64 addr:$src))),
(PMOVSXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)), def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
(PMOVSXDQrm addr:$src)>; (PMOVSXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)), def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
(PMOVSXDQrm addr:$src)>; (PMOVSXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxdq (bc_v4i32 (loadv2i64 addr:$src))),
(PMOVSXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)), def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
(PMOVZXBWrm addr:$src)>; (PMOVZXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)), def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
(PMOVZXBWrm addr:$src)>; (PMOVZXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxbw (bc_v16i8 (loadv2i64 addr:$src))),
(PMOVZXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)), def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
(PMOVZXWDrm addr:$src)>; (PMOVZXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)), def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
(PMOVZXWDrm addr:$src)>; (PMOVZXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxwd (bc_v8i16 (loadv2i64 addr:$src))),
(PMOVZXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)), def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
(PMOVZXDQrm addr:$src)>; (PMOVZXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)), def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
(PMOVZXDQrm addr:$src)>; (PMOVZXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxdq (bc_v4i32 (loadv2i64 addr:$src))),
(PMOVZXDQrm addr:$src)>;
} }
let Predicates = [HasAVX2] in { let Predicates = [HasAVX2] in {
......
; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
; rdar://11897677
;CHECK: intrin_pmov
;CHECK: pmovzxbw (%rsi), %xmm0
;CHECK-NEXT: movdqu
;CHECK-NEXT: ret
define void @intrin_pmov(i16* noalias %dest, i8* noalias %src) nounwind uwtable ssp {
%1 = bitcast i8* %src to <2 x i64>*
%2 = load <2 x i64>* %1, align 16
%3 = bitcast <2 x i64> %2 to <16 x i8>
%4 = tail call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %3) nounwind
%5 = bitcast i16* %dest to i8*
%6 = bitcast <8 x i16> %4 to <16 x i8>
tail call void @llvm.x86.sse2.storeu.dq(i8* %5, <16 x i8> %6) nounwind
ret void
}
declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment