diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl index c4e8bd1347f64933dbf3144527eb5bf865a700f1..b115ea5ab6a788250d6f1a3299fe0e1525c4c70e 100755 --- a/aom_dsp/aom_dsp_rtcd_defs.pl +++ b/aom_dsp/aom_dsp_rtcd_defs.pl @@ -516,11 +516,11 @@ if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") { add_proto qw/void aom_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; specialize qw/aom_highbd_lpf_vertical_4_dual sse2 avx2/; - add_proto qw/void aom_highbd_lpf_horizontal_edge_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; - specialize qw/aom_highbd_lpf_horizontal_edge_8 sse2/; + add_proto qw/void aom_highbd_lpf_horizontal_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; + specialize qw/aom_highbd_lpf_horizontal_16 sse2/; - add_proto qw/void aom_highbd_lpf_horizontal_edge_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; - specialize qw/aom_highbd_lpf_horizontal_edge_16 sse2 avx2/; + add_proto qw/void aom_highbd_lpf_horizontal_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; + specialize qw/aom_highbd_lpf_horizontal_16_dual sse2 avx2/; add_proto qw/void aom_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; specialize qw/aom_highbd_lpf_horizontal_8 sse2/; diff --git a/aom_dsp/loopfilter.c b/aom_dsp/loopfilter.c index c21441926a2f99163744bd1c39183211ab26ba36..c8fa3f4119c6f5eea9c44b02430feb74902cacde 100644 --- a/aom_dsp/loopfilter.c +++ b/aom_dsp/loopfilter.c @@ -1258,14 +1258,13 @@ static void highbd_mb_lpf_horizontal_edge_w(uint16_t *s, int p, } } -void aom_highbd_lpf_horizontal_edge_8_c(uint16_t *s, int p, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh, int bd) { +void aom_highbd_lpf_horizontal_16_c(uint16_t *s, int p, const uint8_t *blimit, + const uint8_t *limit, const uint8_t *thresh, + int bd) { highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1, bd); } -void aom_highbd_lpf_horizontal_edge_16_c(uint16_t *s, int p, +void aom_highbd_lpf_horizontal_16_dual_c(uint16_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd) { diff --git a/aom_dsp/x86/highbd_loopfilter_avx2.c b/aom_dsp/x86/highbd_loopfilter_avx2.c index 8eede8829c79df32814bc7d0fadf3d18edf435d3..b904e09df6acace65b1fab25ad75b8cd00eb1877 100644 --- a/aom_dsp/x86/highbd_loopfilter_avx2.c +++ b/aom_dsp/x86/highbd_loopfilter_avx2.c @@ -204,11 +204,11 @@ static INLINE void highbd_filter4(__m256i *p, __m256i *q, const __m256i *mask, #endif // #if !CONFIG_PARALLEL_DEBLOCKING #if CONFIG_PARALLEL_DEBLOCKING -void aom_highbd_lpf_horizontal_edge_16_avx2(uint16_t *s, int p, +void aom_highbd_lpf_horizontal_16_dual_avx2(uint16_t *s, int p, const uint8_t *blt, const uint8_t *lt, const uint8_t *thr, int bd) { - aom_highbd_lpf_horizontal_edge_16_sse2(s, p, blt, lt, thr, bd); + aom_highbd_lpf_horizontal_16_dual_sse2(s, p, blt, lt, thr, bd); } void aom_highbd_lpf_vertical_16_dual_avx2(uint16_t *s, int p, @@ -249,7 +249,7 @@ void aom_highbd_lpf_vertical_8_dual_avx2( limit1, thresh1, bd); } #else -void aom_highbd_lpf_horizontal_edge_16_avx2(uint16_t *s, int pitch, +void aom_highbd_lpf_horizontal_16_dual_avx2(uint16_t *s, int pitch, const uint8_t *blt, const uint8_t *lt, const uint8_t *thr, int bd) { @@ -415,7 +415,7 @@ void aom_highbd_lpf_vertical_16_dual_avx2(uint16_t *s, int p, highbd_transpose16x16(s - 8, p, t_dst, 16); // Loop filtering - aom_highbd_lpf_horizontal_edge_16_avx2(t_dst + 8 * 16, 16, blimit, limit, + aom_highbd_lpf_horizontal_16_dual_avx2(t_dst + 8 * 16, 16, blimit, limit, thresh, bd); // Transpose back diff --git a/aom_dsp/x86/highbd_loopfilter_sse2.c b/aom_dsp/x86/highbd_loopfilter_sse2.c index d2ab76ea4f553ab5e3d474f85d4a4298febd1752..df76ec6b033fe668d641c716210f3ced30217889 100644 --- a/aom_dsp/x86/highbd_loopfilter_sse2.c +++ b/aom_dsp/x86/highbd_loopfilter_sse2.c @@ -383,10 +383,10 @@ static INLINE void highbd_lpf_horz_edge_8_8p(uint16_t *s, int pitch, highbd_lpf_horz_edge_8_internal(s, pitch, blt, lt, thr, bd, EIGHT_PIXELS); } -void aom_highbd_lpf_horizontal_edge_8_sse2(uint16_t *s, int p, - const uint8_t *_blimit, - const uint8_t *_limit, - const uint8_t *_thresh, int bd) { +void aom_highbd_lpf_horizontal_16_sse2(uint16_t *s, int p, + const uint8_t *_blimit, + const uint8_t *_limit, + const uint8_t *_thresh, int bd) { #if CONFIG_PARALLEL_DEBLOCKING highbd_lpf_horz_edge_8_4p(s, p, _blimit, _limit, _thresh, bd); #else @@ -394,7 +394,7 @@ void aom_highbd_lpf_horizontal_edge_8_sse2(uint16_t *s, int p, #endif } -void aom_highbd_lpf_horizontal_edge_16_sse2(uint16_t *s, int p, +void aom_highbd_lpf_horizontal_16_dual_sse2(uint16_t *s, int p, const uint8_t *_blimit, const uint8_t *_limit, const uint8_t *_thresh, int bd) { @@ -979,8 +979,8 @@ void aom_highbd_lpf_vertical_16_sse2(uint16_t *s, int p, const uint8_t *blimit, highbd_transpose(src, p, dst, 8, 2); // Loop filtering - aom_highbd_lpf_horizontal_edge_8_sse2(t_dst + 8 * 8, 8, blimit, limit, thresh, - bd); + aom_highbd_lpf_horizontal_16_sse2(t_dst + 8 * 8, 8, blimit, limit, thresh, + bd); src[0] = t_dst; src[1] = t_dst + 8 * 8; dst[0] = s - 8; @@ -1003,7 +1003,7 @@ void aom_highbd_lpf_vertical_16_dual_sse2(uint16_t *s, int p, #if CONFIG_PARALLEL_DEBLOCKING highbd_lpf_horz_edge_8_8p(t_dst + 8 * 16, 16, blimit, limit, thresh, bd); #else - aom_highbd_lpf_horizontal_edge_16_sse2(t_dst + 8 * 16, 16, blimit, limit, + aom_highbd_lpf_horizontal_16_dual_sse2(t_dst + 8 * 16, 16, blimit, limit, thresh, bd); #endif // Transpose back diff --git a/av1/common/av1_loopfilter.c b/av1/common/av1_loopfilter.c index 3da134b8be4f6f4b41fab6d144d62853df602c8d..f5b9e4a70334a05d2831c74910850dfeecc05d17 100644 --- a/av1/common/av1_loopfilter.c +++ b/av1/common/av1_loopfilter.c @@ -830,12 +830,12 @@ static void highbd_filter_selectively_horiz( if (mask & 1) { if (mask_16x16 & 1) { if ((mask_16x16 & 3) == 3) { - aom_highbd_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim, + aom_highbd_lpf_horizontal_16_dual(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, bd); count = 2; } else { - aom_highbd_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, bd); + aom_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, bd); } } else if (mask_8x8 & 1) { if ((mask_8x8 & 3) == 3) { @@ -2483,11 +2483,11 @@ static void av1_filter_block_plane_horz( if (cm->use_highbitdepth) #if CONFIG_DEBLOCK_13TAP // TODO(olah): Remove _c once SIMD for 13-tap is available - aom_highbd_lpf_horizontal_edge_16_c( + aom_highbd_lpf_horizontal_16_dual_c( CONVERT_TO_SHORTPTR(p), dst_stride, params.mblim, params.lim, params.hev_thr, cm->bit_depth); #else - aom_highbd_lpf_horizontal_edge_16( + aom_highbd_lpf_horizontal_16_dual( CONVERT_TO_SHORTPTR(p), dst_stride, params.mblim, params.lim, params.hev_thr, cm->bit_depth); #endif diff --git a/test/lpf_test.cc b/test/lpf_test.cc index f7a0ef50274b87d4bf7c1e56b999ca9335713a62..385a9463c228d327f925cae37632f4cf187a4a35 100644 --- a/test/lpf_test.cc +++ b/test/lpf_test.cc @@ -494,12 +494,12 @@ const loop8_param_t kHbdLoop8Test6[] = { #if !CONFIG_DEBLOCK_13TAP // Despite the name the following funcition is doing 15-tap filtering // which is changed to 13-tap and not yet implemented in SIMD - make_tuple(&aom_highbd_lpf_horizontal_edge_8_sse2, - &aom_highbd_lpf_horizontal_edge_8_c, 8), + make_tuple(&aom_highbd_lpf_horizontal_16_sse2, + &aom_highbd_lpf_horizontal_16_c, 8), #endif #if !CONFIG_DEBLOCK_13TAP // No SIMD implementation for deblock_13tap yet - make_tuple(&aom_highbd_lpf_horizontal_edge_16_sse2, - &aom_highbd_lpf_horizontal_edge_16_c, 8), + make_tuple(&aom_highbd_lpf_horizontal_16_dual_sse2, + &aom_highbd_lpf_horizontal_16_dual_c, 8), #endif make_tuple(&aom_highbd_lpf_vertical_8_sse2, &aom_highbd_lpf_vertical_8_c, 8), #if !CONFIG_DEBLOCK_13TAP // No SIMD implementation for deblock_13tap yet @@ -514,12 +514,12 @@ const loop8_param_t kHbdLoop8Test6[] = { #if !CONFIG_DEBLOCK_13TAP // Despite the name the following funcition is doing 15-tap filtering // which is changed to 13-tap and not yet implemented in SIMD - make_tuple(&aom_highbd_lpf_horizontal_edge_8_sse2, - &aom_highbd_lpf_horizontal_edge_8_c, 10), + make_tuple(&aom_highbd_lpf_horizontal_16_sse2, + &aom_highbd_lpf_horizontal_16_c, 10), #endif #if !CONFIG_DEBLOCK_13TAP // No SIMD implementation for deblock_13tap yet - make_tuple(&aom_highbd_lpf_horizontal_edge_16_sse2, - &aom_highbd_lpf_horizontal_edge_16_c, 10), + make_tuple(&aom_highbd_lpf_horizontal_16_dual_sse2, + &aom_highbd_lpf_horizontal_16_dual_c, 10), #endif make_tuple(&aom_highbd_lpf_vertical_8_sse2, &aom_highbd_lpf_vertical_8_c, 10), #if !CONFIG_DEBLOCK_13TAP // No SIMD implementation for deblock_13tap yet @@ -534,12 +534,12 @@ const loop8_param_t kHbdLoop8Test6[] = { #if !CONFIG_DEBLOCK_13TAP // Despite the name the following funcition is doing 15-tap filtering // which is changed to 13-tap and not yet implemented in SIMD - make_tuple(&aom_highbd_lpf_horizontal_edge_8_sse2, - &aom_highbd_lpf_horizontal_edge_8_c, 12), + make_tuple(&aom_highbd_lpf_horizontal_16_sse2, + &aom_highbd_lpf_horizontal_16_c, 12), #endif #if !CONFIG_DEBLOCK_13TAP // No SIMD implementation for deblock_13tap yet - make_tuple(&aom_highbd_lpf_horizontal_edge_16_sse2, - &aom_highbd_lpf_horizontal_edge_16_c, 12), + make_tuple(&aom_highbd_lpf_horizontal_16_dual_sse2, + &aom_highbd_lpf_horizontal_16_dual_c, 12), make_tuple(&aom_highbd_lpf_vertical_16_sse2, &aom_highbd_lpf_vertical_16_c, 12), make_tuple(&aom_highbd_lpf_vertical_16_dual_sse2, @@ -587,12 +587,12 @@ INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test6Param, #if !CONFIG_DEBLOCK_13TAP // No SIMD implementation for deblock_13tap yet const loop8_param_t kHbdLoop8Test6Avx2[] = { - make_tuple(&aom_highbd_lpf_horizontal_edge_16_avx2, - &aom_highbd_lpf_horizontal_edge_16_c, 8), - make_tuple(&aom_highbd_lpf_horizontal_edge_16_avx2, - &aom_highbd_lpf_horizontal_edge_16_c, 10), - make_tuple(&aom_highbd_lpf_horizontal_edge_16_avx2, - &aom_highbd_lpf_horizontal_edge_16_c, 12), + make_tuple(&aom_highbd_lpf_horizontal_16_dual_avx2, + &aom_highbd_lpf_horizontal_16_dual_c, 8), + make_tuple(&aom_highbd_lpf_horizontal_16_dual_avx2, + &aom_highbd_lpf_horizontal_16_dual_c, 10), + make_tuple(&aom_highbd_lpf_horizontal_16_dual_avx2, + &aom_highbd_lpf_horizontal_16_dual_c, 12), make_tuple(&aom_highbd_lpf_vertical_16_dual_avx2, &aom_highbd_lpf_vertical_16_dual_c, 8), make_tuple(&aom_highbd_lpf_vertical_16_dual_avx2,