Commit 6d0ed3ed authored by Yaowu Xu's avatar Yaowu Xu

Remove CONFIG_PARALLEL_DEBLOCKING

The experiment is fully adopted now.

Change-Id: I27906d2af4c746ce55aa17f64d1c0ef281e23ab2
parent e4cf4fa4
......@@ -86,12 +86,6 @@ set(AOM_DSP_COMMON_INTRIN_AVX2
"${AOM_ROOT}/aom_dsp/x86/inv_txfm_common_avx2.h"
"${AOM_ROOT}/aom_dsp/x86/txfm_common_avx2.h")
if (NOT CONFIG_PARALLEL_DEBLOCKING)
set(AOM_DSP_COMMON_INTRIN_AVX2
${AOM_DSP_COMMON_INTRIN_AVX2}
"${AOM_ROOT}/aom_dsp/x86/loopfilter_avx2.c")
endif ()
if (NOT CONFIG_EXT_PARTITION)
set(AOM_DSP_COMMON_ASM_NEON
"${AOM_ROOT}/aom_dsp/arm/aom_convolve8_avg_neon_asm.asm"
......@@ -113,14 +107,6 @@ set(AOM_DSP_COMMON_ASM_NEON
"${AOM_ROOT}/aom_dsp/arm/intrapred_neon_asm.asm"
"${AOM_ROOT}/aom_dsp/arm/save_reg_neon.asm")
if (NOT CONFIG_PARALLEL_DEBLOCKING)
set(AOM_DSP_COMMON_ASM_NEON
${AOM_DSP_COMMON_ASM_NEON}
"${AOM_ROOT}/aom_dsp/arm/loopfilter_16_neon.asm"
"${AOM_ROOT}/aom_dsp/arm/loopfilter_4_neon.asm"
"${AOM_ROOT}/aom_dsp/arm/loopfilter_8_neon.asm"
"${AOM_ROOT}/aom_dsp/arm/loopfilter_mb_neon.asm")
endif ()
if (NOT CONFIG_EXT_PARTITION)
set(AOM_DSP_COMMON_INTRIN_NEON
......@@ -140,11 +126,6 @@ set(AOM_DSP_COMMON_INTRIN_NEON
"${AOM_ROOT}/aom_dsp/arm/subtract_neon.c"
"${AOM_ROOT}/aom_dsp/arm/variance_neon.c")
if (NOT CONFIG_PARALLEL_DEBLOCKING)
set(AOM_DSP_COMMON_INTRIN_NEON
${AOM_DSP_COMMON_INTRIN_NEON}
"${AOM_ROOT}/aom_dsp/arm/loopfilter_neon.c")
endif ()
if ("${AOM_TARGET_CPU}" STREQUAL "arm64")
if (NOT CONFIG_EXT_PARTITION)
......@@ -168,13 +149,6 @@ if ("${AOM_TARGET_CPU}" STREQUAL "arm64")
"${AOM_ROOT}/aom_dsp/arm/idct8x8_add_neon.c"
"${AOM_ROOT}/aom_dsp/arm/intrapred_neon.c")
if (NOT CONFIG_PARALLEL_DEBLOCKING)
set(AOM_DSP_COMMON_INTRIN_NEON
${AOM_DSP_COMMON_INTRIN_NEON}
"${AOM_ROOT}/aom_dsp/arm/loopfilter_16_neon.c"
"${AOM_ROOT}/aom_dsp/arm/loopfilter_4_neon.c"
"${AOM_ROOT}/aom_dsp/arm/loopfilter_8_neon.c")
endif ()
endif ()
set(AOM_DSP_COMMON_INTRIN_DSPR2
......@@ -196,18 +170,6 @@ set(AOM_DSP_COMMON_INTRIN_DSPR2
"${AOM_ROOT}/aom_dsp/mips/intrapred8_dspr2.c"
"${AOM_ROOT}/aom_dsp/mips/inv_txfm_dspr2.h")
if (NOT CONFIG_PARALLEL_DEBLOCKING)
set(AOM_DSP_COMMON_INTRIN_DSPR2
${AOM_DSP_COMMON_INTRIN_DSPR2}
"${AOM_ROOT}/aom_dsp/mips/loopfilter_filters_dspr2.c"
"${AOM_ROOT}/aom_dsp/mips/loopfilter_filters_dspr2.h"
"${AOM_ROOT}/aom_dsp/mips/loopfilter_macros_dspr2.h"
"${AOM_ROOT}/aom_dsp/mips/loopfilter_masks_dspr2.h"
"${AOM_ROOT}/aom_dsp/mips/loopfilter_mb_dspr2.c"
"${AOM_ROOT}/aom_dsp/mips/loopfilter_mb_horiz_dspr2.c"
"${AOM_ROOT}/aom_dsp/mips/loopfilter_mb_vert_dspr2.c")
endif ()
set(AOM_DSP_COMMON_INTRIN_MSA
"${AOM_ROOT}/aom_dsp/mips/aom_convolve8_avg_horiz_msa.c"
"${AOM_ROOT}/aom_dsp/mips/aom_convolve8_avg_msa.c"
......@@ -230,15 +192,6 @@ set(AOM_DSP_COMMON_INTRIN_MSA
"${AOM_ROOT}/aom_dsp/mips/macros_msa.h"
"${AOM_ROOT}/aom_dsp/mips/txfm_macros_msa.h")
if (NOT CONFIG_PARALLEL_DEBLOCKING)
set(AOM_DSP_COMMON_INTRIN_MSA
${AOM_DSP_COMMON_INTRIN_MSA}
"${AOM_ROOT}/aom_dsp/mips/loopfilter_16_msa.c"
"${AOM_ROOT}/aom_dsp/mips/loopfilter_4_msa.c"
"${AOM_ROOT}/aom_dsp/mips/loopfilter_8_msa.c"
"${AOM_ROOT}/aom_dsp/mips/loopfilter_msa.h")
endif ()
set(AOM_DSP_COMMON_ASM_SSE2
${AOM_DSP_COMMON_ASM_SSE2}
"${AOM_ROOT}/aom_dsp/x86/aom_high_subpixel_8t_sse2.asm"
......
......@@ -432,130 +432,77 @@ if (aom_config("CONFIG_LOOP_RESTORATION") eq "yes") {
# Loopfilter
#
add_proto qw/void aom_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
specialize qw/aom_lpf_vertical_16 sse2/;
} else {
specialize qw/aom_lpf_vertical_16 sse2 neon_asm dspr2 msa/;
$aom_lpf_vertical_16_neon_asm=aom_lpf_vertical_16_neon;
}
specialize qw/aom_lpf_vertical_16 sse2/;
add_proto qw/void aom_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") ne "yes") {
specialize qw/aom_lpf_vertical_16_dual sse2 neon_asm dspr2 msa/;
$aom_lpf_vertical_16_dual_neon_asm=aom_lpf_vertical_16_dual_neon;
}
add_proto qw/void aom_lpf_vertical_6/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
specialize qw/aom_lpf_vertical_6 sse2/;
}
specialize qw/aom_lpf_vertical_6 sse2/;
add_proto qw/void aom_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
specialize qw/aom_lpf_vertical_8 sse2/;
} else {
specialize qw/aom_lpf_vertical_8 sse2 neon dspr2 msa/;
}
specialize qw/aom_lpf_vertical_8 sse2/;
add_proto qw/void aom_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") ne "yes") {
specialize qw/aom_lpf_vertical_8_dual sse2 neon_asm dspr2 msa/;
$aom_lpf_vertical_8_dual_neon_asm=aom_lpf_vertical_8_dual_neon;
}
add_proto qw/void aom_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
specialize qw/aom_lpf_vertical_4 sse2/;
} else {
specialize qw/aom_lpf_vertical_4 sse2 neon dspr2 msa/;
}
specialize qw/aom_lpf_vertical_4 sse2/;
add_proto qw/void aom_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") ne "yes") {
specialize qw/aom_lpf_vertical_4_dual sse2 neon dspr2 msa/;
}
add_proto qw/void aom_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
specialize qw/aom_lpf_horizontal_16 sse2/;
} else {
specialize qw/aom_lpf_horizontal_16 sse2 avx2 neon_asm dspr2 msa/;
$aom_lpf_horizontal_16_neon_asm=aom_lpf_horizontal_16_neon;
}
specialize qw/aom_lpf_horizontal_16 sse2/;
add_proto qw/void aom_lpf_horizontal_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
specialize qw/aom_lpf_horizontal_16_dual sse2/;
} else {
specialize qw/aom_lpf_horizontal_16_dual sse2 avx2 neon_asm dspr2 msa/;
$aom_lpf_horizontal_16_dual_neon_asm=aom_lpf_horizontal_16_dual_neon;
}
specialize qw/aom_lpf_horizontal_16_dual sse2/;
add_proto qw/void aom_lpf_horizontal_6/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
specialize qw/aom_lpf_horizontal_6 sse2/;
}
specialize qw/aom_lpf_horizontal_6 sse2/;
add_proto qw/void aom_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
specialize qw/aom_lpf_horizontal_8 sse2/;
} else {
specialize qw/aom_lpf_horizontal_8 sse2 neon dspr2 msa/;
}
specialize qw/aom_lpf_horizontal_8 sse2/;
add_proto qw/void aom_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") ne "yes") {
specialize qw/aom_lpf_horizontal_8_dual sse2 neon_asm dspr2 msa/;
$aom_lpf_horizontal_8_dual_neon_asm=aom_lpf_horizontal_8_dual_neon;
}
add_proto qw/void aom_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
specialize qw/aom_lpf_horizontal_4 sse2/;
} else {
specialize qw/aom_lpf_horizontal_4 sse2 neon dspr2 msa/;
}
specialize qw/aom_lpf_horizontal_4 sse2/;
add_proto qw/void aom_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") ne "yes") {
specialize qw/aom_lpf_horizontal_4_dual sse2 neon dspr2 msa/;
}
add_proto qw/void aom_highbd_lpf_vertical_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/aom_highbd_lpf_vertical_16 sse2/;
add_proto qw/void aom_highbd_lpf_vertical_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/aom_highbd_lpf_vertical_16 sse2/;
add_proto qw/void aom_highbd_lpf_vertical_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/aom_highbd_lpf_vertical_16_dual sse2 avx2/;
add_proto qw/void aom_highbd_lpf_vertical_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/aom_highbd_lpf_vertical_16_dual sse2 avx2/;
add_proto qw/void aom_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/aom_highbd_lpf_vertical_8 sse2/;
add_proto qw/void aom_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/aom_highbd_lpf_vertical_8 sse2/;
add_proto qw/void aom_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
specialize qw/aom_highbd_lpf_vertical_8_dual sse2 avx2/;
add_proto qw/void aom_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
specialize qw/aom_highbd_lpf_vertical_8_dual sse2 avx2/;
add_proto qw/void aom_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/aom_highbd_lpf_vertical_4 sse2/;
add_proto qw/void aom_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/aom_highbd_lpf_vertical_4 sse2/;
add_proto qw/void aom_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
specialize qw/aom_highbd_lpf_vertical_4_dual sse2 avx2/;
add_proto qw/void aom_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
specialize qw/aom_highbd_lpf_vertical_4_dual sse2 avx2/;
add_proto qw/void aom_highbd_lpf_horizontal_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/aom_highbd_lpf_horizontal_16 sse2/;
add_proto qw/void aom_highbd_lpf_horizontal_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/aom_highbd_lpf_horizontal_16 sse2/;
add_proto qw/void aom_highbd_lpf_horizontal_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/aom_highbd_lpf_horizontal_16_dual sse2 avx2/;
add_proto qw/void aom_highbd_lpf_horizontal_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/aom_highbd_lpf_horizontal_16_dual sse2 avx2/;
add_proto qw/void aom_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/aom_highbd_lpf_horizontal_8 sse2/;
add_proto qw/void aom_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/aom_highbd_lpf_horizontal_8 sse2/;
add_proto qw/void aom_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
specialize qw/aom_highbd_lpf_horizontal_8_dual sse2 avx2/;
add_proto qw/void aom_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
specialize qw/aom_highbd_lpf_horizontal_8_dual sse2 avx2/;
add_proto qw/void aom_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/aom_highbd_lpf_horizontal_4 sse2/;
add_proto qw/void aom_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/aom_highbd_lpf_horizontal_4 sse2/;
add_proto qw/void aom_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
specialize qw/aom_highbd_lpf_horizontal_4_dual sse2 avx2/;
add_proto qw/void aom_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
specialize qw/aom_highbd_lpf_horizontal_4_dual sse2 avx2/;
# Helper functions.
add_proto qw/void av1_round_shift_array/, "int32_t *arr, int size, int bit";
......
......@@ -36,7 +36,7 @@ static INLINE int16_t signed_char_clamp_high(int t, int bd) {
default: return (int16_t)clamp(t, -128, 128 - 1);
}
}
#if CONFIG_PARALLEL_DEBLOCKING
// should we apply any filter at all: 11111111 yes, 00000000 no
static INLINE int8_t filter_mask2(uint8_t limit, uint8_t blimit, uint8_t p1,
uint8_t p0, uint8_t q0, uint8_t q1) {
......@@ -46,7 +46,7 @@ static INLINE int8_t filter_mask2(uint8_t limit, uint8_t blimit, uint8_t p1,
mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1;
return ~mask;
}
#endif // CONFIG_PARALLEL_DEBLOCKING
static INLINE int8_t filter_mask(uint8_t limit, uint8_t blimit, uint8_t p3,
uint8_t p2, uint8_t p1, uint8_t p0, uint8_t q0,
uint8_t q1, uint8_t q2, uint8_t q3) {
......@@ -156,25 +156,14 @@ void aom_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */,
const uint8_t *blimit, const uint8_t *limit,
const uint8_t *thresh) {
int i;
#if CONFIG_PARALLEL_DEBLOCKING
int count = 4;
#else
int count = 8;
#endif
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
for (i = 0; i < count; ++i) {
#if !CONFIG_PARALLEL_DEBLOCKING
const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
const int8_t mask =
filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3);
#else // CONFIG_PARALLEL_DEBLOCKING
const uint8_t p1 = s[-2 * p], p0 = s[-p];
const uint8_t q0 = s[0 * p], q1 = s[1 * p];
const int8_t mask = filter_mask2(*limit, *blimit, p1, p0, q0, q1);
#endif // !CONFIG_PARALLEL_DEBLOCKING
filter4(mask, *thresh, s - 2 * p, s - 1 * p, s, s + 1 * p);
++s;
}
......@@ -185,35 +174,20 @@ void aom_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1) {
aom_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0);
#if CONFIG_PARALLEL_DEBLOCKING
aom_lpf_horizontal_4_c(s + 4, p, blimit1, limit1, thresh1);
#else
aom_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1);
#endif
}
void aom_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) {
int i;
#if CONFIG_PARALLEL_DEBLOCKING
int count = 4;
#else
int count = 8;
#endif
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
for (i = 0; i < count; ++i) {
#if !CONFIG_PARALLEL_DEBLOCKING
const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
const int8_t mask =
filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3);
#else // CONFIG_PARALLEL_DEBLOCKING
const uint8_t p1 = s[-2], p0 = s[-1];
const uint8_t q0 = s[0], q1 = s[1];
const int8_t mask = filter_mask2(*limit, *blimit, p1, p0, q0, q1);
#endif // !CONFIG_PARALLEL_DEBLOCKING
filter4(mask, *thresh, s - 2, s - 1, s, s + 1);
s += pitch;
}
......@@ -224,11 +198,7 @@ void aom_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,
const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1) {
aom_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0);
#if CONFIG_PARALLEL_DEBLOCKING
aom_lpf_vertical_4_c(s + 4 * pitch, pitch, blimit1, limit1, thresh1);
#else
aom_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1);
#endif
}
#if PARALLEL_DEBLOCKING_5_TAP_CHROMA
......@@ -274,11 +244,7 @@ static INLINE void filter8(int8_t mask, uint8_t thresh, int8_t flat,
void aom_lpf_horizontal_6_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) {
int i;
#if CONFIG_PARALLEL_DEBLOCKING
int count = 4;
#else
int count = 8;
#endif
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
......@@ -299,11 +265,7 @@ void aom_lpf_horizontal_6_c(uint8_t *s, int p, const uint8_t *blimit,
void aom_lpf_horizontal_8_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) {
int i;
#if CONFIG_PARALLEL_DEBLOCKING
int count = 4;
#else
int count = 8;
#endif
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
......@@ -325,22 +287,14 @@ void aom_lpf_horizontal_8_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1) {
aom_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0);
#if CONFIG_PARALLEL_DEBLOCKING
aom_lpf_horizontal_8_c(s + 4, p, blimit1, limit1, thresh1);
#else
aom_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1);
#endif
}
#if PARALLEL_DEBLOCKING_5_TAP_CHROMA
void aom_lpf_vertical_6_c(uint8_t *s, int pitch, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) {
int i;
#if CONFIG_PARALLEL_DEBLOCKING
int count = 4;
#else
int count = 8;
#endif
for (i = 0; i < count; ++i) {
const uint8_t p2 = s[-3], p1 = s[-2], p0 = s[-1];
......@@ -357,11 +311,7 @@ void aom_lpf_vertical_6_c(uint8_t *s, int pitch, const uint8_t *blimit,
void aom_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) {
int i;
#if CONFIG_PARALLEL_DEBLOCKING
int count = 4;
#else
int count = 8;
#endif
for (i = 0; i < count; ++i) {
const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
......@@ -380,11 +330,7 @@ void aom_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,
const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1) {
aom_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0);
#if CONFIG_PARALLEL_DEBLOCKING
aom_lpf_vertical_8_c(s + 4 * pitch, pitch, blimit1, limit1, thresh1);
#else
aom_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1);
#endif
}
#if PARALLEL_DEBLOCKING_13_TAP
......@@ -497,11 +443,7 @@ static void mb_lpf_horizontal_edge_w(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit,
const uint8_t *thresh, int count) {
int i;
#if CONFIG_PARALLEL_DEBLOCKING
int step = 4;
#else
int step = 8;
#endif
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
......@@ -581,23 +523,14 @@ static void mb_lpf_vertical_edge_w(uint8_t *s, int p, const uint8_t *blimit,
void aom_lpf_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) {
#if CONFIG_PARALLEL_DEBLOCKING
mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 4);
#else
mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8);
#endif
}
void aom_lpf_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) {
#if CONFIG_PARALLEL_DEBLOCKING
mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8);
#else
mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 16);
#endif
}
#if CONFIG_PARALLEL_DEBLOCKING
// Should we apply any filter at all: 11111111 yes, 00000000 no ?
static INLINE int8_t highbd_filter_mask2(uint8_t limit, uint8_t blimit,
uint16_t p1, uint16_t p0, uint16_t q0,
......@@ -610,7 +543,6 @@ static INLINE int8_t highbd_filter_mask2(uint8_t limit, uint8_t blimit,
mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit16) * -1;
return ~mask;
}
#endif // CONFIG_PARALLEL_DEBLOCKING
// Should we apply any filter at all: 11111111 yes, 00000000 no ?
static INLINE int8_t highbd_filter_mask(uint8_t limit, uint8_t blimit,
......@@ -739,34 +671,17 @@ void aom_highbd_lpf_horizontal_4_c(uint16_t *s, int p /* pitch */,
const uint8_t *blimit, const uint8_t *limit,
const uint8_t *thresh, int bd) {
int i;
#if CONFIG_PARALLEL_DEBLOCKING
int count = 4;
#else
int count = 8;
#endif
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
for (i = 0; i < count; ++i) {
#if !CONFIG_PARALLEL_DEBLOCKING
const uint16_t p3 = s[-4 * p];
const uint16_t p2 = s[-3 * p];
const uint16_t p1 = s[-2 * p];
const uint16_t p0 = s[-p];
const uint16_t q0 = s[0 * p];
const uint16_t q1 = s[1 * p];
const uint16_t q2 = s[2 * p];
const uint16_t q3 = s[3 * p];
const int8_t mask =
highbd_filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3, bd);
#else // CONFIG_PARALLEL_DEBLOCKING
const uint16_t p1 = s[-2 * p];
const uint16_t p0 = s[-p];
const uint16_t q0 = s[0 * p];
const uint16_t q1 = s[1 * p];
const int8_t mask =
highbd_filter_mask2(*limit, *blimit, p1, p0, q0, q1, bd);
#endif // !CONFIG_PARALLEL_DEBLOCKING
highbd_filter4(mask, *thresh, s - 2 * p, s - 1 * p, s, s + 1 * p, bd);
++s;
}
......@@ -777,37 +692,22 @@ void aom_highbd_lpf_horizontal_4_dual_c(
const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1, int bd) {
aom_highbd_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, bd);
#if CONFIG_PARALLEL_DEBLOCKING
aom_highbd_lpf_horizontal_4_c(s + 4, p, blimit1, limit1, thresh1, bd);
#else
aom_highbd_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, bd);
#endif
}
void aom_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int bd) {
int i;
#if CONFIG_PARALLEL_DEBLOCKING
int count = 4;
#else
int count = 8;
#endif
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
for (i = 0; i < count; ++i) {
#if !CONFIG_PARALLEL_DEBLOCKING
const uint16_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
const uint16_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
const int8_t mask =
highbd_filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3, bd);
#else // CONFIG_PARALLEL_DEBLOCKING
const uint16_t p1 = s[-2], p0 = s[-1];
const uint16_t q0 = s[0], q1 = s[1];
const int8_t mask =
highbd_filter_mask2(*limit, *blimit, p1, p0, q0, q1, bd);
#endif // !CONFIG_PARALLEL_DEBLOCKING
highbd_filter4(mask, *thresh, s - 2, s - 1, s, s + 1, bd);
s += pitch;
}
......@@ -818,13 +718,8 @@ void aom_highbd_lpf_vertical_4_dual_c(
const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1, int bd) {
aom_highbd_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, bd);
#if CONFIG_PARALLEL_DEBLOCKING
aom_highbd_lpf_vertical_4_c(s + 4 * pitch, pitch, blimit1, limit1, thresh1,
bd);
#else
aom_highbd_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1,
bd);
#endif
}
#if PARALLEL_DEBLOCKING_5_TAP_CHROMA
......@@ -871,11 +766,7 @@ void aom_highbd_lpf_horizontal_8_c(uint16_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int bd) {
int i;
#if CONFIG_PARALLEL_DEBLOCKING
int count = 4;
#else
int count = 8;
#endif
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
......@@ -898,11 +789,7 @@ void aom_highbd_lpf_horizontal_6_c(uint16_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int bd) {
int i;
#if CONFIG_PARALLEL_DEBLOCKING
int count = 4;
#else
int count = 8;
#endif
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
......@@ -925,11 +812,7 @@ void aom_highbd_lpf_horizontal_8_dual_c(
const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1, int bd) {
aom_highbd_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, bd);
#if CONFIG_PARALLEL_DEBLOCKING
aom_highbd_lpf_horizontal_8_c(s + 4, p, blimit1, limit1, thresh1, bd);
#else
aom_highbd_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, bd);
#endif
}
#if PARALLEL_DEBLOCKING_5_TAP_CHROMA
......@@ -937,11 +820,7 @@ void aom_highbd_lpf_vertical_6_c(uint16_t *s, int pitch, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int bd) {
int i;
#if CONFIG_PARALLEL_DEBLOCKING
int count = 4;
#else
int count = 8;
#endif
for (i = 0; i < count; ++i) {
const uint16_t p2 = s[-3], p1 = s[-2], p0 = s[-1];
......@@ -960,11 +839,7 @@ void aom_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int bd) {
int i;
#if CONFIG_PARALLEL_DEBLOCKING
int count = 4;
#else
int count = 8;
#endif
for (i = 0; i < count; ++i) {
const uint16_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
......@@ -984,13 +859,8 @@ void aom_highbd_lpf_vertical_8_dual_c(
const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1, int bd) {
aom_highbd_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, bd);
#if CONFIG_PARALLEL_DEBLOCKING
aom_highbd_lpf_vertical_8_c(s + 4 * pitch, pitch, blimit1, limit1, thresh1,
bd);
#else
aom_highbd_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1,
bd);
#endif
}
#if PARALLEL_DEBLOCKING_13_TAP
......@@ -1129,11 +999,7 @@ static void highbd_mb_lpf_horizontal_edge_w(uint16_t *s, int p,
const uint8_t *thresh, int count,
int bd) {
int i;
#if CONFIG_PARALLEL_DEBLOCKING
int step = 4;
#else
int step = 8;
#endif
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
......@@ -1183,11 +1049,7 @@ void aom_highbd_lpf_horizontal_16_dual_c(uint16_t *s, int p,
const uint8_t *blimit,
const uint8_t *limit,
const uint8_t *thresh, int bd) {
#if CONFIG_PARALLEL_DEBLOCKING
highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1, bd);
#else
highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2, bd);
#endif
}
static void highbd_mb_lpf_vertical_edge_w(uint16_t *s, int p,
......@@ -1232,20 +1094,12 @@ static void highbd_mb_lpf_vertical_edge_w(uint16_t *s, int p,
void aom_highbd_lpf_vertical_16_c(uint16_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int bd) {