Skip to content
Snippets Groups Projects
Commit 27fed8e5 authored by Simon Pilgrim's avatar Simon Pilgrim
Browse files

[X86][AVX] Fixed v16i16/v32i8 ADD/SUB costs on AVX1 subtargets

Add explicit v16i16/v32i8 ADD/SUB costs, matching the costs of v4i64/v8i32 - they were missing for some reason.

This has side effects on the LV max bandwidth tests (AVX1 now prefers 128-bit vectors vs AVX2 which still prefers 256-bit)

llvm-svn: 286832
parent e784395c
No related branches found
No related tags found
No related merge requests found
...@@ -526,6 +526,10 @@ int X86TTIImpl::getArithmeticInstrCost( ...@@ -526,6 +526,10 @@ int X86TTIImpl::getArithmeticInstrCost(
// Two ops + 1 extract + 1 insert = 4. // Two ops + 1 extract + 1 insert = 4.
{ ISD::MUL, MVT::v16i16, 4 }, { ISD::MUL, MVT::v16i16, 4 },
{ ISD::MUL, MVT::v8i32, 4 }, { ISD::MUL, MVT::v8i32, 4 },
{ ISD::SUB, MVT::v32i8, 4 },
{ ISD::ADD, MVT::v32i8, 4 },
{ ISD::SUB, MVT::v16i16, 4 },
{ ISD::ADD, MVT::v16i16, 4 },
{ ISD::SUB, MVT::v8i32, 4 }, { ISD::SUB, MVT::v8i32, 4 },
{ ISD::ADD, MVT::v8i32, 4 }, { ISD::ADD, MVT::v8i32, 4 },
{ ISD::SUB, MVT::v4i64, 4 }, { ISD::SUB, MVT::v4i64, 4 },
......
...@@ -57,13 +57,13 @@ define i32 @add(i32 %arg) { ...@@ -57,13 +57,13 @@ define i32 @add(i32 %arg) {
%G = add <8 x i16> undef, undef %G = add <8 x i16> undef, undef
; SSSE3: cost of 2 {{.*}} %H = add ; SSSE3: cost of 2 {{.*}} %H = add
; SSE42: cost of 2 {{.*}} %H = add ; SSE42: cost of 2 {{.*}} %H = add
; AVX: cost of 2 {{.*}} %H = add ; AVX: cost of 4 {{.*}} %H = add
; AVX2: cost of 1 {{.*}} %H = add ; AVX2: cost of 1 {{.*}} %H = add
; AVX512: cost of 1 {{.*}} %H = add ; AVX512: cost of 1 {{.*}} %H = add
%H = add <16 x i16> undef, undef %H = add <16 x i16> undef, undef
; SSSE3: cost of 4 {{.*}} %I = add ; SSSE3: cost of 4 {{.*}} %I = add
; SSE42: cost of 4 {{.*}} %I = add ; SSE42: cost of 4 {{.*}} %I = add
; AVX: cost of 4 {{.*}} %I = add ; AVX: cost of 8 {{.*}} %I = add
; AVX2: cost of 2 {{.*}} %I = add ; AVX2: cost of 2 {{.*}} %I = add
; AVX512F: cost of 2 {{.*}} %I = add ; AVX512F: cost of 2 {{.*}} %I = add
; AVX512BW: cost of 1 {{.*}} %I = add ; AVX512BW: cost of 1 {{.*}} %I = add
...@@ -77,13 +77,13 @@ define i32 @add(i32 %arg) { ...@@ -77,13 +77,13 @@ define i32 @add(i32 %arg) {
%J = add <16 x i8> undef, undef %J = add <16 x i8> undef, undef
; SSSE3: cost of 2 {{.*}} %K = add ; SSSE3: cost of 2 {{.*}} %K = add
; SSE42: cost of 2 {{.*}} %K = add ; SSE42: cost of 2 {{.*}} %K = add
; AVX: cost of 2 {{.*}} %K = add ; AVX: cost of 4 {{.*}} %K = add
; AVX2: cost of 1 {{.*}} %K = add ; AVX2: cost of 1 {{.*}} %K = add
; AVX512: cost of 1 {{.*}} %K = add ; AVX512: cost of 1 {{.*}} %K = add
%K = add <32 x i8> undef, undef %K = add <32 x i8> undef, undef
; SSSE3: cost of 4 {{.*}} %L = add ; SSSE3: cost of 4 {{.*}} %L = add
; SSE42: cost of 4 {{.*}} %L = add ; SSE42: cost of 4 {{.*}} %L = add
; AVX: cost of 4 {{.*}} %L = add ; AVX: cost of 8 {{.*}} %L = add
; AVX2: cost of 2 {{.*}} %L = add ; AVX2: cost of 2 {{.*}} %L = add
; AVX512F: cost of 2 {{.*}} %L = add ; AVX512F: cost of 2 {{.*}} %L = add
; AVX512BW: cost of 1 {{.*}} %L = add ; AVX512BW: cost of 1 {{.*}} %L = add
...@@ -140,13 +140,13 @@ define i32 @sub(i32 %arg) { ...@@ -140,13 +140,13 @@ define i32 @sub(i32 %arg) {
%G = sub <8 x i16> undef, undef %G = sub <8 x i16> undef, undef
; SSSE3: cost of 2 {{.*}} %H = sub ; SSSE3: cost of 2 {{.*}} %H = sub
; SSE42: cost of 2 {{.*}} %H = sub ; SSE42: cost of 2 {{.*}} %H = sub
; AVX: cost of 2 {{.*}} %H = sub ; AVX: cost of 4 {{.*}} %H = sub
; AVX2: cost of 1 {{.*}} %H = sub ; AVX2: cost of 1 {{.*}} %H = sub
; AVX512: cost of 1 {{.*}} %H = sub ; AVX512: cost of 1 {{.*}} %H = sub
%H = sub <16 x i16> undef, undef %H = sub <16 x i16> undef, undef
; SSSE3: cost of 4 {{.*}} %I = sub ; SSSE3: cost of 4 {{.*}} %I = sub
; SSE42: cost of 4 {{.*}} %I = sub ; SSE42: cost of 4 {{.*}} %I = sub
; AVX: cost of 4 {{.*}} %I = sub ; AVX: cost of 8 {{.*}} %I = sub
; AVX2: cost of 2 {{.*}} %I = sub ; AVX2: cost of 2 {{.*}} %I = sub
; AVX512F: cost of 2 {{.*}} %I = sub ; AVX512F: cost of 2 {{.*}} %I = sub
; AVX512BW: cost of 1 {{.*}} %I = sub ; AVX512BW: cost of 1 {{.*}} %I = sub
...@@ -160,13 +160,13 @@ define i32 @sub(i32 %arg) { ...@@ -160,13 +160,13 @@ define i32 @sub(i32 %arg) {
%J = sub <16 x i8> undef, undef %J = sub <16 x i8> undef, undef
; SSSE3: cost of 2 {{.*}} %K = sub ; SSSE3: cost of 2 {{.*}} %K = sub
; SSE42: cost of 2 {{.*}} %K = sub ; SSE42: cost of 2 {{.*}} %K = sub
; AVX: cost of 2 {{.*}} %K = sub ; AVX: cost of 4 {{.*}} %K = sub
; AVX2: cost of 1 {{.*}} %K = sub ; AVX2: cost of 1 {{.*}} %K = sub
; AVX512: cost of 1 {{.*}} %K = sub ; AVX512: cost of 1 {{.*}} %K = sub
%K = sub <32 x i8> undef, undef %K = sub <32 x i8> undef, undef
; SSSE3: cost of 4 {{.*}} %L = sub ; SSSE3: cost of 4 {{.*}} %L = sub
; SSE42: cost of 4 {{.*}} %L = sub ; SSE42: cost of 4 {{.*}} %L = sub
; AVX: cost of 4 {{.*}} %L = sub ; AVX: cost of 8 {{.*}} %L = sub
; AVX2: cost of 2 {{.*}} %L = sub ; AVX2: cost of 2 {{.*}} %L = sub
; AVX512F: cost of 2 {{.*}} %L = sub ; AVX512F: cost of 2 {{.*}} %L = sub
; AVX512BW: cost of 1 {{.*}} %L = sub ; AVX512BW: cost of 1 {{.*}} %L = sub
......
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -mcpu=corei7-avx -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck %s ; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -mcpu=corei7-avx -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-AVX1
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -mcpu=core-avx2 -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-AVX2
; REQUIRES: asserts ; REQUIRES: asserts
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
...@@ -16,7 +17,8 @@ target triple = "x86_64-unknown-linux-gnu" ...@@ -16,7 +17,8 @@ target triple = "x86_64-unknown-linux-gnu"
; -vectorizer-maximize-bandwidth is indicated. ; -vectorizer-maximize-bandwidth is indicated.
; ;
; CHECK-label: foo ; CHECK-label: foo
; CHECK: LV: Selecting VF: 32. ; CHECK-AVX1: LV: Selecting VF: 16.
; CHECK-AVX2: LV: Selecting VF: 32.
define void @foo() { define void @foo() {
entry: entry:
br label %for.body br label %for.body
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment