Commit 6d0ed3ed authored by Yaowu Xu's avatar Yaowu Xu

Remove CONFIG_PARALLEL_DEBLOCKING

The experiment is fully adopted now.

Change-Id: I27906d2af4c746ce55aa17f64d1c0ef281e23ab2
parent e4cf4fa4
...@@ -86,12 +86,6 @@ set(AOM_DSP_COMMON_INTRIN_AVX2 ...@@ -86,12 +86,6 @@ set(AOM_DSP_COMMON_INTRIN_AVX2
"${AOM_ROOT}/aom_dsp/x86/inv_txfm_common_avx2.h" "${AOM_ROOT}/aom_dsp/x86/inv_txfm_common_avx2.h"
"${AOM_ROOT}/aom_dsp/x86/txfm_common_avx2.h") "${AOM_ROOT}/aom_dsp/x86/txfm_common_avx2.h")
if (NOT CONFIG_PARALLEL_DEBLOCKING)
set(AOM_DSP_COMMON_INTRIN_AVX2
${AOM_DSP_COMMON_INTRIN_AVX2}
"${AOM_ROOT}/aom_dsp/x86/loopfilter_avx2.c")
endif ()
if (NOT CONFIG_EXT_PARTITION) if (NOT CONFIG_EXT_PARTITION)
set(AOM_DSP_COMMON_ASM_NEON set(AOM_DSP_COMMON_ASM_NEON
"${AOM_ROOT}/aom_dsp/arm/aom_convolve8_avg_neon_asm.asm" "${AOM_ROOT}/aom_dsp/arm/aom_convolve8_avg_neon_asm.asm"
...@@ -113,14 +107,6 @@ set(AOM_DSP_COMMON_ASM_NEON ...@@ -113,14 +107,6 @@ set(AOM_DSP_COMMON_ASM_NEON
"${AOM_ROOT}/aom_dsp/arm/intrapred_neon_asm.asm" "${AOM_ROOT}/aom_dsp/arm/intrapred_neon_asm.asm"
"${AOM_ROOT}/aom_dsp/arm/save_reg_neon.asm") "${AOM_ROOT}/aom_dsp/arm/save_reg_neon.asm")
if (NOT CONFIG_PARALLEL_DEBLOCKING)
set(AOM_DSP_COMMON_ASM_NEON
${AOM_DSP_COMMON_ASM_NEON}
"${AOM_ROOT}/aom_dsp/arm/loopfilter_16_neon.asm"
"${AOM_ROOT}/aom_dsp/arm/loopfilter_4_neon.asm"
"${AOM_ROOT}/aom_dsp/arm/loopfilter_8_neon.asm"
"${AOM_ROOT}/aom_dsp/arm/loopfilter_mb_neon.asm")
endif ()
if (NOT CONFIG_EXT_PARTITION) if (NOT CONFIG_EXT_PARTITION)
set(AOM_DSP_COMMON_INTRIN_NEON set(AOM_DSP_COMMON_INTRIN_NEON
...@@ -140,11 +126,6 @@ set(AOM_DSP_COMMON_INTRIN_NEON ...@@ -140,11 +126,6 @@ set(AOM_DSP_COMMON_INTRIN_NEON
"${AOM_ROOT}/aom_dsp/arm/subtract_neon.c" "${AOM_ROOT}/aom_dsp/arm/subtract_neon.c"
"${AOM_ROOT}/aom_dsp/arm/variance_neon.c") "${AOM_ROOT}/aom_dsp/arm/variance_neon.c")
if (NOT CONFIG_PARALLEL_DEBLOCKING)
set(AOM_DSP_COMMON_INTRIN_NEON
${AOM_DSP_COMMON_INTRIN_NEON}
"${AOM_ROOT}/aom_dsp/arm/loopfilter_neon.c")
endif ()
if ("${AOM_TARGET_CPU}" STREQUAL "arm64") if ("${AOM_TARGET_CPU}" STREQUAL "arm64")
if (NOT CONFIG_EXT_PARTITION) if (NOT CONFIG_EXT_PARTITION)
...@@ -168,13 +149,6 @@ if ("${AOM_TARGET_CPU}" STREQUAL "arm64") ...@@ -168,13 +149,6 @@ if ("${AOM_TARGET_CPU}" STREQUAL "arm64")
"${AOM_ROOT}/aom_dsp/arm/idct8x8_add_neon.c" "${AOM_ROOT}/aom_dsp/arm/idct8x8_add_neon.c"
"${AOM_ROOT}/aom_dsp/arm/intrapred_neon.c") "${AOM_ROOT}/aom_dsp/arm/intrapred_neon.c")
if (NOT CONFIG_PARALLEL_DEBLOCKING)
set(AOM_DSP_COMMON_INTRIN_NEON
${AOM_DSP_COMMON_INTRIN_NEON}
"${AOM_ROOT}/aom_dsp/arm/loopfilter_16_neon.c"
"${AOM_ROOT}/aom_dsp/arm/loopfilter_4_neon.c"
"${AOM_ROOT}/aom_dsp/arm/loopfilter_8_neon.c")
endif ()
endif () endif ()
set(AOM_DSP_COMMON_INTRIN_DSPR2 set(AOM_DSP_COMMON_INTRIN_DSPR2
...@@ -196,18 +170,6 @@ set(AOM_DSP_COMMON_INTRIN_DSPR2 ...@@ -196,18 +170,6 @@ set(AOM_DSP_COMMON_INTRIN_DSPR2
"${AOM_ROOT}/aom_dsp/mips/intrapred8_dspr2.c" "${AOM_ROOT}/aom_dsp/mips/intrapred8_dspr2.c"
"${AOM_ROOT}/aom_dsp/mips/inv_txfm_dspr2.h") "${AOM_ROOT}/aom_dsp/mips/inv_txfm_dspr2.h")
if (NOT CONFIG_PARALLEL_DEBLOCKING)
set(AOM_DSP_COMMON_INTRIN_DSPR2
${AOM_DSP_COMMON_INTRIN_DSPR2}
"${AOM_ROOT}/aom_dsp/mips/loopfilter_filters_dspr2.c"
"${AOM_ROOT}/aom_dsp/mips/loopfilter_filters_dspr2.h"
"${AOM_ROOT}/aom_dsp/mips/loopfilter_macros_dspr2.h"
"${AOM_ROOT}/aom_dsp/mips/loopfilter_masks_dspr2.h"
"${AOM_ROOT}/aom_dsp/mips/loopfilter_mb_dspr2.c"
"${AOM_ROOT}/aom_dsp/mips/loopfilter_mb_horiz_dspr2.c"
"${AOM_ROOT}/aom_dsp/mips/loopfilter_mb_vert_dspr2.c")
endif ()
set(AOM_DSP_COMMON_INTRIN_MSA set(AOM_DSP_COMMON_INTRIN_MSA
"${AOM_ROOT}/aom_dsp/mips/aom_convolve8_avg_horiz_msa.c" "${AOM_ROOT}/aom_dsp/mips/aom_convolve8_avg_horiz_msa.c"
"${AOM_ROOT}/aom_dsp/mips/aom_convolve8_avg_msa.c" "${AOM_ROOT}/aom_dsp/mips/aom_convolve8_avg_msa.c"
...@@ -230,15 +192,6 @@ set(AOM_DSP_COMMON_INTRIN_MSA ...@@ -230,15 +192,6 @@ set(AOM_DSP_COMMON_INTRIN_MSA
"${AOM_ROOT}/aom_dsp/mips/macros_msa.h" "${AOM_ROOT}/aom_dsp/mips/macros_msa.h"
"${AOM_ROOT}/aom_dsp/mips/txfm_macros_msa.h") "${AOM_ROOT}/aom_dsp/mips/txfm_macros_msa.h")
if (NOT CONFIG_PARALLEL_DEBLOCKING)
set(AOM_DSP_COMMON_INTRIN_MSA
${AOM_DSP_COMMON_INTRIN_MSA}
"${AOM_ROOT}/aom_dsp/mips/loopfilter_16_msa.c"
"${AOM_ROOT}/aom_dsp/mips/loopfilter_4_msa.c"
"${AOM_ROOT}/aom_dsp/mips/loopfilter_8_msa.c"
"${AOM_ROOT}/aom_dsp/mips/loopfilter_msa.h")
endif ()
set(AOM_DSP_COMMON_ASM_SSE2 set(AOM_DSP_COMMON_ASM_SSE2
${AOM_DSP_COMMON_ASM_SSE2} ${AOM_DSP_COMMON_ASM_SSE2}
"${AOM_ROOT}/aom_dsp/x86/aom_high_subpixel_8t_sse2.asm" "${AOM_ROOT}/aom_dsp/x86/aom_high_subpixel_8t_sse2.asm"
......
...@@ -432,130 +432,77 @@ if (aom_config("CONFIG_LOOP_RESTORATION") eq "yes") { ...@@ -432,130 +432,77 @@ if (aom_config("CONFIG_LOOP_RESTORATION") eq "yes") {
# Loopfilter # Loopfilter
# #
add_proto qw/void aom_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; add_proto qw/void aom_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") { specialize qw/aom_lpf_vertical_16 sse2/;
specialize qw/aom_lpf_vertical_16 sse2/;
} else {
specialize qw/aom_lpf_vertical_16 sse2 neon_asm dspr2 msa/;
$aom_lpf_vertical_16_neon_asm=aom_lpf_vertical_16_neon;
}
add_proto qw/void aom_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; add_proto qw/void aom_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") ne "yes") {
specialize qw/aom_lpf_vertical_16_dual sse2 neon_asm dspr2 msa/;
$aom_lpf_vertical_16_dual_neon_asm=aom_lpf_vertical_16_dual_neon;
}
add_proto qw/void aom_lpf_vertical_6/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; add_proto qw/void aom_lpf_vertical_6/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") { specialize qw/aom_lpf_vertical_6 sse2/;
specialize qw/aom_lpf_vertical_6 sse2/;
}
add_proto qw/void aom_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; add_proto qw/void aom_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") { specialize qw/aom_lpf_vertical_8 sse2/;
specialize qw/aom_lpf_vertical_8 sse2/;
} else {
specialize qw/aom_lpf_vertical_8 sse2 neon dspr2 msa/;
}
add_proto qw/void aom_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; add_proto qw/void aom_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") ne "yes") {
specialize qw/aom_lpf_vertical_8_dual sse2 neon_asm dspr2 msa/;
$aom_lpf_vertical_8_dual_neon_asm=aom_lpf_vertical_8_dual_neon;
}
add_proto qw/void aom_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; add_proto qw/void aom_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") { specialize qw/aom_lpf_vertical_4 sse2/;
specialize qw/aom_lpf_vertical_4 sse2/;
} else {
specialize qw/aom_lpf_vertical_4 sse2 neon dspr2 msa/;
}
add_proto qw/void aom_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; add_proto qw/void aom_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") ne "yes") {
specialize qw/aom_lpf_vertical_4_dual sse2 neon dspr2 msa/;
}
add_proto qw/void aom_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; add_proto qw/void aom_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") { specialize qw/aom_lpf_horizontal_16 sse2/;
specialize qw/aom_lpf_horizontal_16 sse2/;
} else {
specialize qw/aom_lpf_horizontal_16 sse2 avx2 neon_asm dspr2 msa/;
$aom_lpf_horizontal_16_neon_asm=aom_lpf_horizontal_16_neon;
}
add_proto qw/void aom_lpf_horizontal_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; add_proto qw/void aom_lpf_horizontal_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") { specialize qw/aom_lpf_horizontal_16_dual sse2/;
specialize qw/aom_lpf_horizontal_16_dual sse2/;
} else {
specialize qw/aom_lpf_horizontal_16_dual sse2 avx2 neon_asm dspr2 msa/;
$aom_lpf_horizontal_16_dual_neon_asm=aom_lpf_horizontal_16_dual_neon;
}
add_proto qw/void aom_lpf_horizontal_6/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; add_proto qw/void aom_lpf_horizontal_6/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") { specialize qw/aom_lpf_horizontal_6 sse2/;
specialize qw/aom_lpf_horizontal_6 sse2/;
}
add_proto qw/void aom_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; add_proto qw/void aom_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") { specialize qw/aom_lpf_horizontal_8 sse2/;
specialize qw/aom_lpf_horizontal_8 sse2/;
} else {
specialize qw/aom_lpf_horizontal_8 sse2 neon dspr2 msa/;
}
add_proto qw/void aom_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; add_proto qw/void aom_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") ne "yes") {
specialize qw/aom_lpf_horizontal_8_dual sse2 neon_asm dspr2 msa/;
$aom_lpf_horizontal_8_dual_neon_asm=aom_lpf_horizontal_8_dual_neon;
}
add_proto qw/void aom_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; add_proto qw/void aom_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") { specialize qw/aom_lpf_horizontal_4 sse2/;
specialize qw/aom_lpf_horizontal_4 sse2/;
} else {
specialize qw/aom_lpf_horizontal_4 sse2 neon dspr2 msa/;
}
add_proto qw/void aom_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; add_proto qw/void aom_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") ne "yes") {
specialize qw/aom_lpf_horizontal_4_dual sse2 neon dspr2 msa/;
}
add_proto qw/void aom_highbd_lpf_vertical_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; add_proto qw/void aom_highbd_lpf_vertical_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/aom_highbd_lpf_vertical_16 sse2/; specialize qw/aom_highbd_lpf_vertical_16 sse2/;
add_proto qw/void aom_highbd_lpf_vertical_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; add_proto qw/void aom_highbd_lpf_vertical_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/aom_highbd_lpf_vertical_16_dual sse2 avx2/; specialize qw/aom_highbd_lpf_vertical_16_dual sse2 avx2/;
add_proto qw/void aom_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; add_proto qw/void aom_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/aom_highbd_lpf_vertical_8 sse2/; specialize qw/aom_highbd_lpf_vertical_8 sse2/;
add_proto qw/void aom_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; add_proto qw/void aom_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
specialize qw/aom_highbd_lpf_vertical_8_dual sse2 avx2/; specialize qw/aom_highbd_lpf_vertical_8_dual sse2 avx2/;
add_proto qw/void aom_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; add_proto qw/void aom_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/aom_highbd_lpf_vertical_4 sse2/; specialize qw/aom_highbd_lpf_vertical_4 sse2/;
add_proto qw/void aom_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; add_proto qw/void aom_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
specialize qw/aom_highbd_lpf_vertical_4_dual sse2 avx2/; specialize qw/aom_highbd_lpf_vertical_4_dual sse2 avx2/;
add_proto qw/void aom_highbd_lpf_horizontal_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; add_proto qw/void aom_highbd_lpf_horizontal_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/aom_highbd_lpf_horizontal_16 sse2/; specialize qw/aom_highbd_lpf_horizontal_16 sse2/;
add_proto qw/void aom_highbd_lpf_horizontal_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; add_proto qw/void aom_highbd_lpf_horizontal_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/aom_highbd_lpf_horizontal_16_dual sse2 avx2/; specialize qw/aom_highbd_lpf_horizontal_16_dual sse2 avx2/;
add_proto qw/void aom_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; add_proto qw/void aom_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/aom_highbd_lpf_horizontal_8 sse2/; specialize qw/aom_highbd_lpf_horizontal_8 sse2/;
add_proto qw/void aom_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; add_proto qw/void aom_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
specialize qw/aom_highbd_lpf_horizontal_8_dual sse2 avx2/; specialize qw/aom_highbd_lpf_horizontal_8_dual sse2 avx2/;
add_proto qw/void aom_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; add_proto qw/void aom_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/aom_highbd_lpf_horizontal_4 sse2/; specialize qw/aom_highbd_lpf_horizontal_4 sse2/;
add_proto qw/void aom_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; add_proto qw/void aom_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
specialize qw/aom_highbd_lpf_horizontal_4_dual sse2 avx2/; specialize qw/aom_highbd_lpf_horizontal_4_dual sse2 avx2/;
# Helper functions. # Helper functions.
add_proto qw/void av1_round_shift_array/, "int32_t *arr, int size, int bit"; add_proto qw/void av1_round_shift_array/, "int32_t *arr, int size, int bit";
......
...@@ -36,7 +36,7 @@ static INLINE int16_t signed_char_clamp_high(int t, int bd) { ...@@ -36,7 +36,7 @@ static INLINE int16_t signed_char_clamp_high(int t, int bd) {
default: return (int16_t)clamp(t, -128, 128 - 1); default: return (int16_t)clamp(t, -128, 128 - 1);
} }
} }
#if CONFIG_PARALLEL_DEBLOCKING
// should we apply any filter at all: 11111111 yes, 00000000 no // should we apply any filter at all: 11111111 yes, 00000000 no
static INLINE int8_t filter_mask2(uint8_t limit, uint8_t blimit, uint8_t p1, static INLINE int8_t filter_mask2(uint8_t limit, uint8_t blimit, uint8_t p1,
uint8_t p0, uint8_t q0, uint8_t q1) { uint8_t p0, uint8_t q0, uint8_t q1) {
...@@ -46,7 +46,7 @@ static INLINE int8_t filter_mask2(uint8_t limit, uint8_t blimit, uint8_t p1, ...@@ -46,7 +46,7 @@ static INLINE int8_t filter_mask2(uint8_t limit, uint8_t blimit, uint8_t p1,
mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1;
return ~mask; return ~mask;
} }
#endif // CONFIG_PARALLEL_DEBLOCKING
static INLINE int8_t filter_mask(uint8_t limit, uint8_t blimit, uint8_t p3, static INLINE int8_t filter_mask(uint8_t limit, uint8_t blimit, uint8_t p3,
uint8_t p2, uint8_t p1, uint8_t p0, uint8_t q0, uint8_t p2, uint8_t p1, uint8_t p0, uint8_t q0,
uint8_t q1, uint8_t q2, uint8_t q3) { uint8_t q1, uint8_t q2, uint8_t q3) {
...@@ -156,25 +156,14 @@ void aom_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */, ...@@ -156,25 +156,14 @@ void aom_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */,
const uint8_t *blimit, const uint8_t *limit, const uint8_t *blimit, const uint8_t *limit,
const uint8_t *thresh) { const uint8_t *thresh) {
int i; int i;
#if CONFIG_PARALLEL_DEBLOCKING
int count = 4; int count = 4;
#else
int count = 8;
#endif
// loop filter designed to work using chars so that we can make maximum use // loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions. // of 8 bit simd instructions.
for (i = 0; i < count; ++i) { for (i = 0; i < count; ++i) {
#if !CONFIG_PARALLEL_DEBLOCKING
const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
const int8_t mask =
filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3);
#else // CONFIG_PARALLEL_DEBLOCKING
const uint8_t p1 = s[-2 * p], p0 = s[-p]; const uint8_t p1 = s[-2 * p], p0 = s[-p];
const uint8_t q0 = s[0 * p], q1 = s[1 * p]; const uint8_t q0 = s[0 * p], q1 = s[1 * p];
const int8_t mask = filter_mask2(*limit, *blimit, p1, p0, q0, q1); const int8_t mask = filter_mask2(*limit, *blimit, p1, p0, q0, q1);
#endif // !CONFIG_PARALLEL_DEBLOCKING
filter4(mask, *thresh, s - 2 * p, s - 1 * p, s, s + 1 * p); filter4(mask, *thresh, s - 2 * p, s - 1 * p, s, s + 1 * p);
++s; ++s;
} }
...@@ -185,35 +174,20 @@ void aom_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0, ...@@ -185,35 +174,20 @@ void aom_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1) { const uint8_t *thresh1) {
aom_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0); aom_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0);
#if CONFIG_PARALLEL_DEBLOCKING
aom_lpf_horizontal_4_c(s + 4, p, blimit1, limit1, thresh1); aom_lpf_horizontal_4_c(s + 4, p, blimit1, limit1, thresh1);
#else
aom_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1);
#endif
} }
void aom_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, void aom_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) { const uint8_t *limit, const uint8_t *thresh) {
int i; int i;
#if CONFIG_PARALLEL_DEBLOCKING
int count = 4; int count = 4;
#else
int count = 8;
#endif
// loop filter designed to work using chars so that we can make maximum use // loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions. // of 8 bit simd instructions.
for (i = 0; i < count; ++i) { for (i = 0; i < count; ++i) {
#if !CONFIG_PARALLEL_DEBLOCKING
const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
const int8_t mask =
filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3);
#else // CONFIG_PARALLEL_DEBLOCKING
const uint8_t p1 = s[-2], p0 = s[-1]; const uint8_t p1 = s[-2], p0 = s[-1];
const uint8_t q0 = s[0], q1 = s[1]; const uint8_t q0 = s[0], q1 = s[1];
const int8_t mask = filter_mask2(*limit, *blimit, p1, p0, q0, q1); const int8_t mask = filter_mask2(*limit, *blimit, p1, p0, q0, q1);
#endif // !CONFIG_PARALLEL_DEBLOCKING
filter4(mask, *thresh, s - 2, s - 1, s, s + 1); filter4(mask, *thresh, s - 2, s - 1, s, s + 1);
s += pitch; s += pitch;
} }
...@@ -224,11 +198,7 @@ void aom_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, ...@@ -224,11 +198,7 @@ void aom_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,
const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1) { const uint8_t *thresh1) {
aom_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0); aom_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0);
#if CONFIG_PARALLEL_DEBLOCKING
aom_lpf_vertical_4_c(s + 4 * pitch, pitch, blimit1, limit1, thresh1); aom_lpf_vertical_4_c(s + 4 * pitch, pitch, blimit1, limit1, thresh1);
#else
aom_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1);
#endif
} }
#if PARALLEL_DEBLOCKING_5_TAP_CHROMA #if PARALLEL_DEBLOCKING_5_TAP_CHROMA
...@@ -274,11 +244,7 @@ static INLINE void filter8(int8_t mask, uint8_t thresh, int8_t flat, ...@@ -274,11 +244,7 @@ static INLINE void filter8(int8_t mask, uint8_t thresh, int8_t flat,
void aom_lpf_horizontal_6_c(uint8_t *s, int p, const uint8_t *blimit, void aom_lpf_horizontal_6_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) { const uint8_t *limit, const uint8_t *thresh) {
int i; int i;
#if CONFIG_PARALLEL_DEBLOCKING
int count = 4; int count = 4;
#else
int count = 8;
#endif
// loop filter designed to work using chars so that we can make maximum use // loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions. // of 8 bit simd instructions.
...@@ -299,11 +265,7 @@ void aom_lpf_horizontal_6_c(uint8_t *s, int p, const uint8_t *blimit, ...@@ -299,11 +265,7 @@ void aom_lpf_horizontal_6_c(uint8_t *s, int p, const uint8_t *blimit,
void aom_lpf_horizontal_8_c(uint8_t *s, int p, const uint8_t *blimit, void aom_lpf_horizontal_8_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) { const uint8_t *limit, const uint8_t *thresh) {
int i; int i;
#if CONFIG_PARALLEL_DEBLOCKING
int count = 4; int count = 4;
#else
int count = 8;
#endif
// loop filter designed to work using chars so that we can make maximum use // loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions. // of 8 bit simd instructions.
...@@ -325,22 +287,14 @@ void aom_lpf_horizontal_8_dual_c(uint8_t *s, int p, const uint8_t *blimit0, ...@@ -325,22 +287,14 @@ void aom_lpf_horizontal_8_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1) { const uint8_t *thresh1) {
aom_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0); aom_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0);
#if CONFIG_PARALLEL_DEBLOCKING
aom_lpf_horizontal_8_c(s + 4, p, blimit1, limit1, thresh1); aom_lpf_horizontal_8_c(s + 4, p, blimit1, limit1, thresh1);
#else
aom_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1);
#endif
} }
#if PARALLEL_DEBLOCKING_5_TAP_CHROMA #if PARALLEL_DEBLOCKING_5_TAP_CHROMA
void aom_lpf_vertical_6_c(uint8_t *s, int pitch, const uint8_t *blimit, void aom_lpf_vertical_6_c(uint8_t *s, int pitch, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) { const uint8_t *limit, const uint8_t *thresh) {
int i; int i;
#if CONFIG_PARALLEL_DEBLOCKING
int count = 4; int count = 4;
#else
int count = 8;
#endif
for (i = 0; i < count; ++i) { for (i = 0; i < count; ++i) {
const uint8_t p2 = s[-3], p1 = s[-2], p0 = s[-1]; const uint8_t p2 = s[-3], p1 = s[-2], p0 = s[-1];
...@@ -357,11 +311,7 @@ void aom_lpf_vertical_6_c(uint8_t *s, int pitch, const uint8_t *blimit, ...@@ -357,11 +311,7 @@ void aom_lpf_vertical_6_c(uint8_t *s, int pitch, const uint8_t *blimit,
void aom_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, void aom_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) { const uint8_t *limit, const uint8_t *thresh) {
int i; int i;
#if CONFIG_PARALLEL_DEBLOCKING
int count = 4; int count = 4;
#else
int count = 8;
#endif
for (i = 0; i < count; ++i) { for (i = 0; i < count; ++i) {
const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
...@@ -380,11 +330,7 @@ void aom_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, ...@@ -380,11 +330,7 @@ void aom_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,
const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1) { const uint8_t *thresh1) {
aom_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0); aom_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0);
#if CONFIG_PARALLEL_DEBLOCKING
aom_lpf_vertical_8_c(s + 4 * pitch, pitch, blimit1, limit1, thresh1); aom_lpf_vertical_8_c(s + 4 * pitch, pitch, blimit1, limit1, thresh1);
#else
aom_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1);
#endif
} }
#if PARALLEL_DEBLOCKING_13_TAP #if PARALLEL_DEBLOCKING_13_TAP
...@@ -497,11 +443,7 @@ static void mb_lpf_horizontal_edge_w(uint8_t *s, int p, const uint8_t *blimit, ...@@ -497,11 +443,7 @@ static void mb_lpf_horizontal_edge_w(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *limit,
const uint8_t *thresh, int count) { const uint8_t *thresh, int count) {
int i; int i;
#if CONFIG_PARALLEL_DEBLOCKING
int step = 4; int step = 4;
#else
int step = 8;
#endif
// loop filter designed to work using chars so that we can make maximum use // loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions. // of 8 bit simd instructions.
...@@ -581,23 +523,14 @@ static void mb_lpf_vertical_edge_w(uint8_t *s, int p, const uint8_t *blimit, ...@@ -581,23 +523,14 @@ static void mb_lpf_vertical_edge_w(uint8_t *s, int p, const uint8_t *blimit,
void aom_lpf_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit, void aom_lpf_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) { const uint8_t *limit, const uint8_t *thresh) {
#if CONFIG_PARALLEL_DEBLOCKING
mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 4); mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 4);
#else
mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8);
#endif
} }
void aom_lpf_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit, void aom_lpf_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) { const uint8_t *limit, const uint8_t *thresh) {
#if CONFIG_PARALLEL_DEBLOCKING
mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8); mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8);
#else
mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 16);
#endif
} }
#if CONFIG_PARALLEL_DEBLOCKING
// Should we apply any filter at all: 11111111 yes, 00000000 no ? // Should we apply any filter at all: 11111111 yes, 00000000 no ?
static INLINE int8_t highbd_filter_mask2(uint8_t limit, uint8_t blimit, static INLINE int8_t highbd_filter_mask2(uint8_t limit, uint8_t blimit,
uint16_t p1, uint16_t p0, uint16_t q0, uint16_t p1, uint16_t p0, uint16_t q0,
...@@ -610,7 +543,6 @@ static INLINE int8_t highbd_filter_mask2(uint8_t limit, uint8_t blimit, ...@@ -610,7 +543,6 @@ static INLINE int8_t highbd_filter_mask2(uint8_t limit, uint8_t blimit,
mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit16) * -1; mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit16) * -1;
return ~mask; return ~mask;
} }
#endif // CONFIG_PARALLEL_DEBLOCKING