Commit d954f2d7 authored by Steinar Midtskogen's avatar Steinar Midtskogen
Browse files

Disable unsupported SIMD optimisations for CLPF for 32 bit VS targets

VS compiling for 32 bit targets does not support vector types in
structs as arguments, which makes the v256 type of the intrinsics hard
to support, so optimizations for this target are disabled.

Change-Id: I675394cf1aed0cb18a48f21216470867031b30ce
parent fb1425fa
......@@ -840,18 +840,28 @@ specialize qw/aom_lpf_horizontal_4_dual sse2 neon dspr2 msa/;
if (aom_config("CONFIG_CLPF") eq "yes") {
if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
add_proto qw/void aom_clpf_block_hbd/, "const uint16_t *src, uint16_t *dst, int sstride, int dstride, int x0, int y0, int sizex, int sizey, int width, int height, unsigned int strength";
specialize qw/aom_clpf_block_hbd sse2 ssse3 sse4_1 neon/;
add_proto qw/void aom_clpf_detect_hbd/, "const uint16_t *rec, const uint16_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum0, int *sum1, unsigned int strength, int shift, int size";
specialize qw/aom_clpf_detect_hbd sse2 ssse3 sse4_1 neon/;
add_proto qw/void aom_clpf_detect_multi_hbd/, "const uint16_t *rec, const uint16_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum, int shift, int size";
specialize qw/aom_clpf_detect_multi_hbd sse2 ssse3 sse4_1 neon/;
# VS compiling for 32 bit targets does not support vector types in
# structs as arguments, which makes the v256 type of the intrinsics
# hard to support, so optimizations for this target is disabled.
if ($opts{config} !~ /libs-x86-win32-vs.*/) {
specialize qw/aom_clpf_block_hbd sse2 ssse3 sse4_1 neon/;
specialize qw/aom_clpf_detect_hbd sse2 ssse3 sse4_1 neon/;
specialize qw/aom_clpf_detect_multi_hbd sse2 ssse3 sse4_1 neon/;
}
}
add_proto qw/void aom_clpf_block/, "const uint8_t *src, uint8_t *dst, int sstride, int dstride, int x0, int y0, int sizex, int sizey, int width, int height, unsigned int strength";
specialize qw/aom_clpf_block sse2 ssse3 sse4_1 neon/;
add_proto qw/void aom_clpf_detect/, "const uint8_t *rec, const uint8_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum0, int *sum1, unsigned int strength, int size";
specialize qw/aom_clpf_detect sse2 ssse3 sse4_1 neon/;
add_proto qw/void aom_clpf_detect_multi/, "const uint8_t *rec, const uint8_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum, int size";
specialize qw/aom_clpf_detect_multi sse2 ssse3 sse4_1 neon/;
# VS compiling for 32 bit targets does not support vector types in
# structs as arguments, which makes the v256 type of the intrinsics
# hard to support, so optimizations for this target is disabled.
if ($opts{config} !~ /libs-x86-win32-vs.*/) {
specialize qw/aom_clpf_block sse2 ssse3 sse4_1 neon/;
specialize qw/aom_clpf_detect sse2 ssse3 sse4_1 neon/;
specialize qw/aom_clpf_detect_multi sse2 ssse3 sse4_1 neon/;
}
}
if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
......
......@@ -25,7 +25,10 @@
#if HAVE_NEON
#include "simd/v256_intrinsics_arm.h"
#elif HAVE_SSE2
// VS compiling for 32 bit targets does not support vector types in
// structs as arguments, which makes the v256 type of the intrinsics
// hard to support, so optimizations for this target are disabled.
#elif HAVE_SSE2 && (defined(_WIN64) || !defined(_MSC_VER) || defined(__clang__))
#include "simd/v256_intrinsics_x86.h"
#else
#include "simd/v256_intrinsics.h"
......
......@@ -213,6 +213,10 @@ TEST_P(ClpfHbdSpeedTest, TestSpeed) {
using std::tr1::make_tuple;
// VS compiling for 32 bit targets does not support vector types in
// structs as arguments, which makes the v256 type of the intrinsics
// hard to support, so optimizations for this target are disabled.
#if defined(_WIN64) || !defined(_MSC_VER) || defined(__clang__)
// Test all supported architectures and block sizes
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P(
......@@ -294,7 +298,7 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(&aom_clpf_block_hbd_neon, &aom_clpf_block_hbd_c, 4, 8),
make_tuple(&aom_clpf_block_hbd_neon, &aom_clpf_block_hbd_c, 4, 4)));
#endif
#endif
#endif // CONFIG_AOM_HIGHBITDEPTH
// Test speed for all supported architectures
#if HAVE_SSE2
......@@ -349,6 +353,7 @@ INSTANTIATE_TEST_CASE_P(NEON, ClpfHbdSpeedTest,
&aom_clpf_block_hbd_c, 8,
8)));
#endif
#endif
#endif // CONFIG_AOM_HIGHBITDEPTH
#endif // defined(_WIN64) || !defined(_MSC_VER)
} // namespace
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment