Commit ec8093ab authored by Yaowu Xu's avatar Yaowu Xu
Browse files

aom_dsp: remove x86inc.asm distinction

Manually cherrypicked from libvpx/master:
1b833d63d9c82270e4ea588541d14e9111c64c79
a4f3751b

Change-Id: I3b82e54a3173ac1458a13f33fd36094fec066f1c
parent f062f987
......@@ -65,18 +65,14 @@ DSP_SRCS-yes += daalaboolwriter.c
DSP_SRCS-yes += daalaboolwriter.h
endif
ifeq ($(CONFIG_USE_X86INC),yes)
DSP_SRCS-$(HAVE_SSE) += x86/intrapred_sse2.asm
DSP_SRCS-$(HAVE_SSE2) += x86/intrapred_sse2.asm
DSP_SRCS-$(HAVE_SSSE3) += x86/intrapred_ssse3.asm
DSP_SRCS-$(HAVE_SSSE3) += x86/aom_subpixel_8t_ssse3.asm
endif # CONFIG_USE_X86INC
ifeq ($(CONFIG_AOM_HIGHBITDEPTH),yes)
ifeq ($(CONFIG_USE_X86INC),yes)
DSP_SRCS-$(HAVE_SSE) += x86/highbd_intrapred_sse2.asm
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_intrapred_sse2.asm
endif # CONFIG_USE_X86INC
endif # CONFIG_AOM_HIGHBITDEPTH
DSP_SRCS-$(HAVE_NEON_ASM) += arm/intrapred_neon_asm$(ASM)
......@@ -116,9 +112,7 @@ ifeq ($(CONFIG_AOM_HIGHBITDEPTH),yes)
DSP_SRCS-$(HAVE_SSE2) += x86/aom_high_subpixel_8t_sse2.asm
DSP_SRCS-$(HAVE_SSE2) += x86/aom_high_subpixel_bilinear_sse2.asm
endif
ifeq ($(CONFIG_USE_X86INC),yes)
DSP_SRCS-$(HAVE_SSE2) += x86/aom_convolve_copy_sse2.asm
endif
ifeq ($(HAVE_NEON_ASM),yes)
DSP_SRCS-yes += arm/aom_convolve_copy_neon_asm$(ASM)
......@@ -209,10 +203,8 @@ DSP_SRCS-$(HAVE_SSE2) += x86/fwd_txfm_sse2.c
DSP_SRCS-$(HAVE_SSE2) += x86/fwd_txfm_impl_sse2.h
DSP_SRCS-$(HAVE_SSE2) += x86/fwd_dct32x32_impl_sse2.h
ifeq ($(ARCH_X86_64),yes)
ifeq ($(CONFIG_USE_X86INC),yes)
DSP_SRCS-$(HAVE_SSSE3) += x86/fwd_txfm_ssse3_x86_64.asm
endif
endif
DSP_SRCS-$(HAVE_AVX2) += x86/fwd_txfm_avx2.c
DSP_SRCS-$(HAVE_AVX2) += x86/fwd_dct32x32_impl_avx2.h
DSP_SRCS-$(HAVE_NEON) += arm/fwd_txfm_neon.c
......@@ -227,12 +219,10 @@ DSP_SRCS-yes += inv_txfm.h
DSP_SRCS-yes += inv_txfm.c
DSP_SRCS-$(HAVE_SSE2) += x86/inv_txfm_sse2.h
DSP_SRCS-$(HAVE_SSE2) += x86/inv_txfm_sse2.c
ifeq ($(CONFIG_USE_X86INC),yes)
DSP_SRCS-$(HAVE_SSE2) += x86/inv_wht_sse2.asm
ifeq ($(ARCH_X86_64),yes)
DSP_SRCS-$(HAVE_SSSE3) += x86/inv_txfm_ssse3_x86_64.asm
endif # ARCH_X86_64
endif # CONFIG_USE_X86INC
ifeq ($(HAVE_NEON_ASM),yes)
DSP_SRCS-yes += arm/save_reg_neon$(ASM)
......@@ -284,11 +274,9 @@ ifeq ($(CONFIG_AOM_HIGHBITDEPTH),yes)
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_quantize_intrin_sse2.c
endif
ifeq ($(ARCH_X86_64),yes)
ifeq ($(CONFIG_USE_X86INC),yes)
DSP_SRCS-$(HAVE_SSSE3) += x86/quantize_ssse3_x86_64.asm
DSP_SRCS-$(HAVE_AVX) += x86/quantize_avx_x86_64.asm
endif
endif
# avg
DSP_SRCS-yes += avg.c
......@@ -296,10 +284,8 @@ DSP_SRCS-$(HAVE_SSE2) += x86/avg_intrin_sse2.c
DSP_SRCS-$(HAVE_NEON) += arm/avg_neon.c
DSP_SRCS-$(HAVE_MSA) += mips/avg_msa.c
ifeq ($(ARCH_X86_64),yes)
ifeq ($(CONFIG_USE_X86INC),yes)
DSP_SRCS-$(HAVE_SSSE3) += x86/avg_ssse3_x86_64.asm
endif
endif
endif # CONFIG_AV1_ENCODER
......@@ -321,7 +307,6 @@ DSP_SRCS-$(HAVE_SSE4_1) += x86/sad_sse4.asm
DSP_SRCS-$(HAVE_AVX2) += x86/sad4d_avx2.c
DSP_SRCS-$(HAVE_AVX2) += x86/sad_avx2.c
ifeq ($(CONFIG_USE_X86INC),yes)
DSP_SRCS-$(HAVE_SSE) += x86/sad4d_sse2.asm
DSP_SRCS-$(HAVE_SSE) += x86/sad_sse2.asm
DSP_SRCS-$(HAVE_SSE2) += x86/sad4d_sse2.asm
......@@ -332,7 +317,6 @@ ifeq ($(CONFIG_AOM_HIGHBITDEPTH),yes)
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_sad4d_sse2.asm
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_sad_sse2.asm
endif # CONFIG_AOM_HIGHBITDEPTH
endif # CONFIG_USE_X86INC
endif # CONFIG_ENCODERS
......@@ -363,17 +347,13 @@ ifeq ($(ARCH_X86_64),yes)
DSP_SRCS-$(HAVE_SSE2) += x86/ssim_opt_x86_64.asm
endif # ARCH_X86_64
ifeq ($(CONFIG_USE_X86INC),yes)
DSP_SRCS-$(HAVE_SSE) += x86/subpel_variance_sse2.asm
DSP_SRCS-$(HAVE_SSE2) += x86/subpel_variance_sse2.asm # Contains SSE2 and SSSE3
endif # CONFIG_USE_X86INC
ifeq ($(CONFIG_AOM_HIGHBITDEPTH),yes)
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_variance_sse2.c
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_variance_impl_sse2.asm
ifeq ($(CONFIG_USE_X86INC),yes)
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_subpel_variance_impl_sse2.asm
endif # CONFIG_USE_X86INC
endif # CONFIG_AOM_HIGHBITDEPTH
ifeq ($(CONFIG_MOTION_VAR),yes)
......
This diff is collapsed.
......@@ -250,7 +250,6 @@ unsigned int aom_highbd_12_mse8x8_sse2(const uint8_t *src8, int src_stride,
return *sse;
}
#if CONFIG_USE_X86INC
// The 2 unused parameters are place holders for PIC enabled build.
// These definitions are for functions defined in
// highbd_subpel_variance_impl_sse2.asm
......@@ -557,7 +556,6 @@ FNS(sse2);
#undef FNS
#undef FN
#endif // CONFIG_USE_X86INC
void aom_highbd_upsampled_pred_sse2(uint16_t *pred, int width, int height,
const uint8_t *ref8, const int ref_stride) {
......
......@@ -23,33 +23,41 @@ SECTION .text
movd m4, [ref2q+%3]
movd m7, [ref3q+%3]
movd m5, [ref4q+%3]
punpckldq m0, [srcq +%4]
punpckldq m6, [ref1q+%5]
punpckldq m4, [ref2q+%5]
punpckldq m7, [ref3q+%5]
punpckldq m5, [ref4q+%5]
movd m1, [srcq +%4]
movd m2, [ref1q+%5]
punpckldq m0, m1
punpckldq m6, m2
movd m1, [ref2q+%5]
movd m2, [ref3q+%5]
movd m3, [ref4q+%5]
punpckldq m4, m1
punpckldq m7, m2
punpckldq m5, m3
movlhps m0, m0
movlhps m6, m4
movlhps m7, m5
psadbw m6, m0
psadbw m4, m0
psadbw m7, m0
psadbw m5, m0
punpckldq m6, m4
punpckldq m7, m5
%else
movd m1, [ref1q+%3]
movd m5, [ref1q+%5]
movd m2, [ref2q+%3]
movd m4, [ref2q+%5]
punpckldq m1, m5
punpckldq m2, m4
movd m3, [ref3q+%3]
movd m5, [ref3q+%5]
punpckldq m3, m5
movd m4, [ref4q+%3]
punpckldq m0, [srcq +%4]
punpckldq m1, [ref1q+%5]
punpckldq m2, [ref2q+%5]
punpckldq m3, [ref3q+%5]
punpckldq m4, [ref4q+%5]
movd m5, [ref4q+%5]
punpckldq m4, m5
movd m5, [srcq +%4]
punpckldq m0, m5
movlhps m0, m0
movlhps m1, m2
movlhps m3, m4
psadbw m1, m0
psadbw m2, m0
psadbw m3, m0
psadbw m4, m0
punpckldq m1, m2
punpckldq m3, m4
paddd m6, m1
paddd m7, m3
%endif
......@@ -170,10 +178,16 @@ SECTION .text
PROCESS_32x2x4 0, %4, %5, %4 + 32, %5 + 32, %6
%endmacro
; PROCESS_128x2x4 first, off_{first,second}_{src,ref}, advance_at_end
%macro PROCESS_128x2x4 5-6 0
PROCESS_64x2x4 %1, %2, %3, %2 + 64, %3 + 64
PROCESS_64x2x4 0, %4, %5, %4 + 64, %5 + 64, %6
%endmacro
; void aom_sadNxNx4d_sse2(uint8_t *src, int src_stride,
; uint8_t *ref[4], int ref_stride,
; uint32_t res[4]);
; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16 or 8x8
; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16, 8x8, 8x4, 4x8 and 4x4
%macro SADNXN4D 2
%if UNIX64
cglobal sad%1x%2x4d, 5, 8, 8, src, src_stride, ref1, ref_stride, \
......@@ -195,7 +209,7 @@ cglobal sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \
%endrep
PROCESS_%1x2x4 0, 0, 0, src_strideq, ref_strideq, 0
%if mmsize == 16
%if %1 > 4
pslldq m5, 4
pslldq m7, 4
por m4, m5
......@@ -210,8 +224,10 @@ cglobal sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \
RET
%else
movifnidn r4, r4mp
movq [r4+0], m6
movq [r4+8], m7
pshufd m6, m6, 0x08
pshufd m7, m7, 0x08
movq [r4+0], m6
movq [r4+8], m7
RET
%endif
%endmacro
......@@ -228,7 +244,5 @@ SADNXN4D 16, 8
SADNXN4D 8, 16
SADNXN4D 8, 8
SADNXN4D 8, 4
INIT_MMX sse
SADNXN4D 4, 8
SADNXN4D 4, 4
This diff is collapsed.
......@@ -335,7 +335,6 @@ unsigned int aom_mse16x16_sse2(const uint8_t *src, int src_stride,
return *sse;
}
#if CONFIG_USE_X86INC
// The 2 unused parameters are place holders for PIC enabled build.
// These definitions are for functions defined in subpel_variance.asm
#define DECL(w, opt) \
......@@ -344,11 +343,11 @@ unsigned int aom_mse16x16_sse2(const uint8_t *src, int src_stride,
const uint8_t *dst, ptrdiff_t dst_stride, int height, unsigned int *sse, \
void *unused0, void *unused)
#define DECLS(opt1, opt2) \
DECL(4, opt2); \
DECL(4, opt1); \
DECL(8, opt1); \
DECL(16, opt1)
DECLS(sse2, sse);
DECLS(sse2, sse2);
DECLS(ssse3, ssse3);
#undef DECLS
#undef DECL
......@@ -397,10 +396,10 @@ DECLS(ssse3, ssse3);
FN(8, 16, 8, 3, 4, opt1, (int32_t), (int32_t)); \
FN(8, 8, 8, 3, 3, opt1, (int32_t), (int32_t)); \
FN(8, 4, 8, 3, 2, opt1, (int32_t), (int32_t)); \
FN(4, 8, 4, 2, 3, opt2, (int32_t), (int32_t)); \
FN(4, 4, 4, 2, 2, opt2, (int32_t), (int32_t))
FN(4, 8, 4, 2, 3, opt1, (int32_t), (int32_t)); \
FN(4, 4, 4, 2, 2, opt1, (int32_t), (int32_t))
FNS(sse2, sse);
FNS(sse2, sse2);
FNS(ssse3, ssse3);
#undef FNS
......@@ -414,11 +413,11 @@ FNS(ssse3, ssse3);
ptrdiff_t sec_stride, int height, unsigned int *sse, void *unused0, \
void *unused)
#define DECLS(opt1, opt2) \
DECL(4, opt2); \
DECL(4, opt1); \
DECL(8, opt1); \
DECL(16, opt1)
DECLS(sse2, sse);
DECLS(sse2, sse2);
DECLS(ssse3, ssse3);
#undef DECL
#undef DECLS
......@@ -468,15 +467,14 @@ DECLS(ssse3, ssse3);
FN(8, 16, 8, 3, 4, opt1, (uint32_t), (int32_t)); \
FN(8, 8, 8, 3, 3, opt1, (uint32_t), (int32_t)); \
FN(8, 4, 8, 3, 2, opt1, (uint32_t), (int32_t)); \
FN(4, 8, 4, 2, 3, opt2, (uint32_t), (int32_t)); \
FN(4, 4, 4, 2, 2, opt2, (uint32_t), (int32_t))
FN(4, 8, 4, 2, 3, opt1, (uint32_t), (int32_t)); \
FN(4, 4, 4, 2, 2, opt1, (uint32_t), (int32_t))
FNS(sse2, sse);
FNS(ssse3, ssse3);
#undef FNS
#undef FN
#endif // CONFIG_USE_X86INC
void aom_upsampled_pred_sse2(uint8_t *pred, int width, int height,
const uint8_t *ref, const int ref_stride) {
......@@ -683,7 +681,6 @@ void aom_comp_avg_upsampled_pred_sse2(uint8_t *comp_pred, const uint8_t *pred,
p0 = _mm_packus_epi16(p0, zero);
*(int *)comp_pred = _mm_cvtsi128_si32(p0);
comp_pred += 4;
pred += 4;
ref += 4 * 8;
......
......@@ -677,18 +677,8 @@ INSTANTIATE_TEST_CASE_P(NEON, SADx4Test, ::testing::ValuesIn(x4d_neon_tests));
//------------------------------------------------------------------------------
// x86 functions
#if HAVE_SSE
#if CONFIG_USE_X86INC
const SadMxNx4Param x4d_sse_tests[] = {
make_tuple(4, 8, &aom_sad4x8x4d_sse, -1),
make_tuple(4, 4, &aom_sad4x4x4d_sse, -1),
};
INSTANTIATE_TEST_CASE_P(SSE, SADx4Test, ::testing::ValuesIn(x4d_sse_tests));
#endif // CONFIG_USE_X86INC
#endif // HAVE_SSE
#if HAVE_SSE2
#if CONFIG_USE_X86INC
const SadMxNParam sse2_tests[] = {
make_tuple(64, 64, &aom_sad64x64_sse2, -1),
make_tuple(64, 32, &aom_sad64x32_sse2, -1),
......@@ -805,6 +795,8 @@ const SadMxNx4Param x4d_sse2_tests[] = {
make_tuple(8, 16, &aom_sad8x16x4d_sse2, -1),
make_tuple(8, 8, &aom_sad8x8x4d_sse2, -1),
make_tuple(8, 4, &aom_sad8x4x4d_sse2, -1),
make_tuple(4, 8, &aom_sad4x8x4d_sse2, -1),
make_tuple(4, 4, &aom_sad4x4x4d_sse2, -1),
#if CONFIG_AOM_HIGHBITDEPTH
make_tuple(64, 64, &aom_highbd_sad64x64x4d_sse2, 8),
make_tuple(64, 32, &aom_highbd_sad64x32x4d_sse2, 8),
......@@ -848,7 +840,6 @@ const SadMxNx4Param x4d_sse2_tests[] = {
#endif // CONFIG_AOM_HIGHBITDEPTH
};
INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::ValuesIn(x4d_sse2_tests));
#endif // CONFIG_USE_X86INC
#endif // HAVE_SSE2
#if HAVE_SSE3
......
......@@ -987,8 +987,8 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(3, 4, &aom_sub_pixel_variance8x16_sse2, 0),
make_tuple(3, 3, &aom_sub_pixel_variance8x8_sse2, 0),
make_tuple(3, 2, &aom_sub_pixel_variance8x4_sse2, 0),
make_tuple(2, 3, &aom_sub_pixel_variance4x8_sse, 0),
make_tuple(2, 2, &aom_sub_pixel_variance4x4_sse, 0)));
make_tuple(2, 3, &aom_sub_pixel_variance4x8_sse2, 0),
make_tuple(2, 2, &aom_sub_pixel_variance4x4_sse2, 0)));
INSTANTIATE_TEST_CASE_P(
SSE2, AvxSubpelAvgVarianceTest,
......@@ -1004,8 +1004,8 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(3, 4, &aom_sub_pixel_avg_variance8x16_sse2, 0),
make_tuple(3, 3, &aom_sub_pixel_avg_variance8x8_sse2, 0),
make_tuple(3, 2, &aom_sub_pixel_avg_variance8x4_sse2, 0),
make_tuple(2, 3, &aom_sub_pixel_avg_variance4x8_sse, 0),
make_tuple(2, 2, &aom_sub_pixel_avg_variance4x4_sse, 0)));
make_tuple(2, 3, &aom_sub_pixel_avg_variance4x8_sse2, 0),
make_tuple(2, 2, &aom_sub_pixel_avg_variance4x4_sse2, 0)));
#endif // CONFIG_USE_X86INC
#if CONFIG_AOM_HIGHBITDEPTH
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment