Skip to content
Snippets Groups Projects
Commit 81eb193f authored by Bruno Cardoso Lopes's avatar Bruno Cardoso Lopes
Browse files

Match VPERMIL masks more strictly and update the target specific mask

generation to always catch the weird cases.

llvm-svn: 136453
parent 795f5585
No related branches found
No related tags found
No related merge requests found
......@@ -3465,11 +3465,14 @@ static bool isVPERMILPSMask(const SmallVectorImpl<int> &Mask, EVT VT,
return false;
 
// The mask on the high lane should be the same as the low. Actually,
// they can differ if any of the corresponding index in a lane is undef.
// they can differ if any of the corresponding index in a lane is undef
// and the other stays in range.
int LaneSize = NumElts/NumLanes;
for (int i = 0; i < LaneSize; ++i) {
int HighElt = i+LaneSize;
if (Mask[i] < 0 || Mask[HighElt] < 0)
if (Mask[i] < 0 && (isUndefOrInRange(Mask[HighElt], LaneSize, NumElts)))
continue;
if (Mask[HighElt] < 0 && (isUndefOrInRange(Mask[i], 0, LaneSize)))
continue;
if (Mask[HighElt]-Mask[i] != LaneSize)
return false;
......@@ -3486,13 +3489,20 @@ static unsigned getShuffleVPERMILPSImmediate(SDNode *N) {
 
int NumElts = VT.getVectorNumElements();
int NumLanes = VT.getSizeInBits()/128;
int LaneSize = NumElts/NumLanes;
 
// Although the mask is equal for both lanes do it twice to get the cases
// where a mask will match because the same mask element is undef on the
// first half but valid on the second. This would get pathological cases
// such as: shuffle <u, 0, 1, 2, 4, 4, 5, 6>, which is completely valid.
unsigned Mask = 0;
for (int i = 0; i < NumElts/NumLanes /* lane size */; ++i) {
int MaskElt = SVOp->getMaskElt(i);
if (MaskElt < 0)
continue;
Mask |= MaskElt << (i*2);
for (int l = 0; l < NumLanes; ++l) {
for (int i = 0; i < LaneSize; ++i) {
int MaskElt = SVOp->getMaskElt(i+(l*LaneSize));
if (MaskElt < 0)
continue;
Mask |= MaskElt << (i*2);
}
}
 
return Mask;
......
......@@ -27,3 +27,13 @@ entry:
%shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 3>
ret <4 x i64> %shuffle
}
; vpermil should match masks like this: <u,3,1,2,4,u,5,6>. Check that the
; target specific mask was correctly generated.
; CHECK: vpermilps $-100
define <8 x float> @funcA(<8 x float> %a) nounwind uwtable readnone ssp {
entry:
%shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 8, i32 3, i32 1, i32 2, i32 4, i32 8, i32 5, i32 6>
ret <8 x float> %shuffle
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment