Commit 5978212b authored by Steinar Midtskogen's avatar Steinar Midtskogen
Browse files

Add experiment CONFIG_CDEF_SINGLEPASS: Make CDEF single pass

Low latency, cpu-used=0:
   PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
-0.3162 | -0.6719 | -0.6535 |   0.0089 | -0.3890 | -0.1515 |    -0.6682

High latency, cpu-used=0:
   PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
-0.0293 | -0.3556 | -0.5505 |   0.0684 | -0.0862 |  0.0513 |    -0.2765

Low latency, cpu-used=4:
   PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
-0.2248 | -0.7764 | -0.6630 |  -0.2109 | -0.3240 | -0.2532 |    -0.6980

High latency, cpu-used=4:
   PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
-0.1118 | -0.5841 | -0.7406 |  -0.0463 | -0.2442 | -0.1064 |    -0.4187

Change-Id: I9ca8399c8f45489541a66f535fb3d771eb1d59ab
parent 660064a9
...@@ -236,13 +236,15 @@ endif () ...@@ -236,13 +236,15 @@ endif ()
if (CONFIG_CDEF) if (CONFIG_CDEF)
set(AOM_AV1_COMMON_SOURCES set(AOM_AV1_COMMON_SOURCES
${AOM_AV1_COMMON_SOURCES} ${AOM_AV1_COMMON_SOURCES}
"${AOM_ROOT}/av1/common/clpf.c" if (!CONFIG_CDEF_SINGLEPASS)
"${AOM_ROOT}/av1/common/clpf_simd.h" "${AOM_ROOT}/av1/common/clpf.c"
"${AOM_ROOT}/av1/common/clpf_simd.h"
"${AOM_ROOT}/av1/common/cdef_block_simd.h")
endif ()
"${AOM_ROOT}/av1/common/cdef.c" "${AOM_ROOT}/av1/common/cdef.c"
"${AOM_ROOT}/av1/common/cdef.h" "${AOM_ROOT}/av1/common/cdef.h"
"${AOM_ROOT}/av1/common/cdef_block.c" "${AOM_ROOT}/av1/common/cdef_block.c"
"${AOM_ROOT}/av1/common/cdef_block.h" "${AOM_ROOT}/av1/common/cdef_block.h")
"${AOM_ROOT}/av1/common/cdef_block_simd.h")
set(AOM_AV1_ENCODER_SOURCES set(AOM_AV1_ENCODER_SOURCES
${AOM_AV1_ENCODER_SOURCES} ${AOM_AV1_ENCODER_SOURCES}
...@@ -250,22 +252,34 @@ if (CONFIG_CDEF) ...@@ -250,22 +252,34 @@ if (CONFIG_CDEF)
set(AOM_AV1_COMMON_INTRIN_SSE2 set(AOM_AV1_COMMON_INTRIN_SSE2
${AOM_AV1_COMMON_INTRIN_SSE2} ${AOM_AV1_COMMON_INTRIN_SSE2}
"${AOM_ROOT}/av1/common/clpf_sse2.c" if (!CONFIG_CDEF_SINGLEPASS)
"${AOM_ROOT}/av1/common/clpf_sse2.c"
endif ()
"${AOM_ROOT}/av1/common/cdef_block_sse2.c") "${AOM_ROOT}/av1/common/cdef_block_sse2.c")
set(AOM_AV1_COMMON_INTRIN_SSSE3 set(AOM_AV1_COMMON_INTRIN_SSSE3
${AOM_AV1_COMMON_INTRIN_SSSE3} ${AOM_AV1_COMMON_INTRIN_SSSE3}
"${AOM_ROOT}/av1/common/clpf_ssse3.c" if (!CONFIG_CDEF_SINGLEPASS)
"${AOM_ROOT}/av1/common/clpf_ssse3.c"
endif ()
"${AOM_ROOT}/av1/common/cdef_block_ssse3.c") "${AOM_ROOT}/av1/common/cdef_block_ssse3.c")
set(AOM_AV1_COMMON_INTRIN_SSE4_1 set(AOM_AV1_COMMON_INTRIN_SSE4_1
${AOM_AV1_COMMON_INTRIN_SSE4_1} ${AOM_AV1_COMMON_INTRIN_SSE4_1}
"${AOM_ROOT}/av1/common/clpf_sse4.c" if (!CONFIG_CDEF_SINGLEPASS)
"${AOM_ROOT}/av1/common/clpf_sse4.c"
endif ()
"${AOM_ROOT}/av1/common/cdef_block_sse4.c") "${AOM_ROOT}/av1/common/cdef_block_sse4.c")
set(AOM_AV1_COMMON_INTRIN_AVX2
${AOM_AV1_COMMON_INTRIN_AVX2}
"${AOM_ROOT}/av1/common/cdef_block_avx2.c")
set(AOM_AV1_COMMON_INTRIN_NEON set(AOM_AV1_COMMON_INTRIN_NEON
${AOM_AV1_COMMON_INTRIN_NEON} ${AOM_AV1_COMMON_INTRIN_NEON}
"${AOM_ROOT}/av1/common/clpf_neon.c" if (!CONFIG_CDEF_SINGLEPASS)
"${AOM_ROOT}/av1/common/clpf_neon.c"
endif ()
"${AOM_ROOT}/av1/common/cdef_block_neon.c") "${AOM_ROOT}/av1/common/cdef_block_neon.c")
endif () endif ()
......
...@@ -90,12 +90,16 @@ AV1_COMMON_SRCS-yes += common/warped_motion.h ...@@ -90,12 +90,16 @@ AV1_COMMON_SRCS-yes += common/warped_motion.h
AV1_COMMON_SRCS-yes += common/warped_motion.c AV1_COMMON_SRCS-yes += common/warped_motion.c
endif endif
ifeq ($(CONFIG_CDEF),yes) ifeq ($(CONFIG_CDEF),yes)
ifeq ($(CONFIG_CDEF_SINGLEPASS),yes)
AV1_COMMON_SRCS-$(HAVE_AVX2) += common/cdef_block_avx2.c
else
AV1_COMMON_SRCS-yes += common/clpf.c AV1_COMMON_SRCS-yes += common/clpf.c
AV1_COMMON_SRCS-yes += common/clpf_simd.h AV1_COMMON_SRCS-yes += common/clpf_simd.h
AV1_COMMON_SRCS-$(HAVE_SSE2) += common/clpf_sse2.c AV1_COMMON_SRCS-$(HAVE_SSE2) += common/clpf_sse2.c
AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/clpf_ssse3.c AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/clpf_ssse3.c
AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/clpf_sse4.c AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/clpf_sse4.c
AV1_COMMON_SRCS-$(HAVE_NEON) += common/clpf_neon.c AV1_COMMON_SRCS-$(HAVE_NEON) += common/clpf_neon.c
endif
AV1_COMMON_SRCS-$(HAVE_SSE2) += common/cdef_block_sse2.c AV1_COMMON_SRCS-$(HAVE_SSE2) += common/cdef_block_sse2.c
AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/cdef_block_ssse3.c AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/cdef_block_ssse3.c
AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/cdef_block_sse4.c AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/cdef_block_sse4.c
......
...@@ -520,18 +520,22 @@ if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") { ...@@ -520,18 +520,22 @@ if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
# Deringing Functions # Deringing Functions
if (aom_config("CONFIG_CDEF") eq "yes") { if (aom_config("CONFIG_CDEF") eq "yes") {
add_proto qw/void aom_clpf_block_hbd/, "uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
add_proto qw/void aom_clpf_hblock_hbd/, "uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
add_proto qw/void aom_clpf_block/, "uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
add_proto qw/void aom_clpf_hblock/, "uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
add_proto qw/int cdef_find_dir/, "const uint16_t *img, int stride, int32_t *var, int coeff_shift"; add_proto qw/int cdef_find_dir/, "const uint16_t *img, int stride, int32_t *var, int coeff_shift";
add_proto qw/void cdef_direction_4x4/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping"; if (aom_config("CONFIG_CDEF_SINGLEPASS") ne "yes") {
add_proto qw/void cdef_direction_8x8/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping"; add_proto qw/void aom_clpf_block_hbd/, "uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
add_proto qw/void aom_clpf_hblock_hbd/, "uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
add_proto qw/void aom_clpf_block/, "uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
add_proto qw/void aom_clpf_hblock/, "uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
add_proto qw/void cdef_direction_4x4/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping";
add_proto qw/void cdef_direction_8x8/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping";
add_proto qw/void copy_8x8_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride";
add_proto qw/void copy_4x4_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride";
add_proto qw/void copy_8x8_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride";
add_proto qw/void copy_4x4_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride";
} else {
add_proto qw/void cdef_filter_block/, "uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max";
}
add_proto qw/void copy_8x8_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride";
add_proto qw/void copy_4x4_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride";
add_proto qw/void copy_8x8_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride";
add_proto qw/void copy_4x4_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride";
add_proto qw/void copy_rect8_8bit_to_16bit/, "uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h"; add_proto qw/void copy_rect8_8bit_to_16bit/, "uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h";
add_proto qw/void copy_rect8_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h"; add_proto qw/void copy_rect8_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h";
...@@ -539,20 +543,28 @@ if (aom_config("CONFIG_CDEF") eq "yes") { ...@@ -539,20 +543,28 @@ if (aom_config("CONFIG_CDEF") eq "yes") {
# structs as arguments, which makes the v256 type of the intrinsics # structs as arguments, which makes the v256 type of the intrinsics
# hard to support, so optimizations for this target are disabled. # hard to support, so optimizations for this target are disabled.
if ($opts{config} !~ /libs-x86-win32-vs.*/) { if ($opts{config} !~ /libs-x86-win32-vs.*/) {
specialize qw/aom_clpf_block_hbd sse2 ssse3 sse4_1 neon/; if (aom_config("CONFIG_CDEF_SINGLEPASS") eq "yes") {
specialize qw/aom_clpf_hblock_hbd sse2 ssse3 sse4_1 neon/; specialize qw/cdef_find_dir sse2 ssse3 sse4_1 avx2 neon/;
specialize qw/aom_clpf_block sse2 ssse3 sse4_1 neon/; specialize qw/cdef_filter_block sse2 ssse3 sse4_1 avx2 neon/;
specialize qw/aom_clpf_hblock sse2 ssse3 sse4_1 neon/; specialize qw/copy_rect8_8bit_to_16bit sse2 ssse3 sse4_1 avx2 neon/;
specialize qw/cdef_find_dir sse2 ssse3 sse4_1 neon/; specialize qw/copy_rect8_16bit_to_16bit sse2 ssse3 sse4_1 avx2 neon/;
specialize qw/cdef_direction_4x4 sse2 ssse3 sse4_1 neon/; } else {
specialize qw/cdef_direction_8x8 sse2 ssse3 sse4_1 neon/; specialize qw/cdef_find_dir sse2 ssse3 sse4_1 neon/;
specialize qw/aom_clpf_block_hbd sse2 ssse3 sse4_1 neon/;
specialize qw/copy_8x8_16bit_to_8bit sse2 ssse3 sse4_1 neon/; specialize qw/aom_clpf_hblock_hbd sse2 ssse3 sse4_1 neon/;
specialize qw/copy_4x4_16bit_to_8bit sse2 ssse3 sse4_1 neon/; specialize qw/aom_clpf_block sse2 ssse3 sse4_1 neon/;
specialize qw/copy_8x8_16bit_to_16bit sse2 ssse3 sse4_1 neon/; specialize qw/aom_clpf_hblock sse2 ssse3 sse4_1 neon/;
specialize qw/copy_4x4_16bit_to_16bit sse2 ssse3 sse4_1 neon/; specialize qw/cdef_find_dir sse2 ssse3 sse4_1 neon/;
specialize qw/copy_rect8_8bit_to_16bit sse2 ssse3 sse4_1 neon/; specialize qw/cdef_direction_4x4 sse2 ssse3 sse4_1 neon/;
specialize qw/copy_rect8_16bit_to_16bit sse2 ssse3 sse4_1 neon/; specialize qw/cdef_direction_8x8 sse2 ssse3 sse4_1 neon/;
specialize qw/copy_8x8_16bit_to_8bit sse2 ssse3 sse4_1 neon/;
specialize qw/copy_4x4_16bit_to_8bit sse2 ssse3 sse4_1 neon/;
specialize qw/copy_8x8_16bit_to_16bit sse2 ssse3 sse4_1 neon/;
specialize qw/copy_4x4_16bit_to_16bit sse2 ssse3 sse4_1 neon/;
specialize qw/copy_rect8_8bit_to_16bit sse2 ssse3 sse4_1 neon/;
specialize qw/copy_rect8_16bit_to_16bit sse2 ssse3 sse4_1 neon/;
}
} }
} }
......
...@@ -260,14 +260,21 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, ...@@ -260,14 +260,21 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
uv_sec_strength == 0) || uv_sec_strength == 0) ||
(cdef_count = sb_compute_cdef_list( (cdef_count = sb_compute_cdef_list(
cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64, dlist, cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64, dlist,
get_filter_skip(level) || get_filter_skip(uv_level))) == 0) { #if CONFIG_CDEF_SINGLEPASS
(level & 1) || (uv_level & 1))) == 0)
#else
get_filter_skip(level) || get_filter_skip(uv_level))) == 0)
#endif
{
cdef_left = 0; cdef_left = 0;
continue; continue;
} }
curr_row_cdef[fbc] = 1; curr_row_cdef[fbc] = 1;
for (pli = 0; pli < nplanes; pli++) { for (pli = 0; pli < nplanes; pli++) {
#if !CONFIG_CDEF_SINGLEPASS
uint16_t dst[CDEF_BLOCKSIZE * CDEF_BLOCKSIZE]; uint16_t dst[CDEF_BLOCKSIZE * CDEF_BLOCKSIZE];
#endif
int coffset; int coffset;
int rend, cend; int rend, cend;
int pri_damping = cm->cdef_pri_damping; int pri_damping = cm->cdef_pri_damping;
...@@ -386,15 +393,28 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, ...@@ -386,15 +393,28 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
#if CONFIG_HIGHBITDEPTH #if CONFIG_HIGHBITDEPTH
if (cm->use_highbitdepth) { if (cm->use_highbitdepth) {
cdef_filter_fb( cdef_filter_fb(
#if CONFIG_CDEF_SINGLEPASS
NULL,
&CONVERT_TO_SHORTPTR(
#else
(uint8_t *)&CONVERT_TO_SHORTPTR( (uint8_t *)&CONVERT_TO_SHORTPTR(
#endif
xd->plane[pli] xd->plane[pli]
.dst.buf)[xd->plane[pli].dst.stride * .dst.buf)[xd->plane[pli].dst.stride *
(MI_SIZE_64X64 * fbr << mi_high_l2[pli]) + (MI_SIZE_64X64 * fbr << mi_high_l2[pli]) +
(fbc * MI_SIZE_64X64 << mi_wide_l2[pli])], (fbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
#if CONFIG_CDEF_SINGLEPASS
xd->plane[pli].dst.stride,
#else
xd->plane[pli].dst.stride, dst, xd->plane[pli].dst.stride, dst,
#endif
&src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER], xdec[pli], &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER], xdec[pli],
ydec[pli], dir, NULL, var, pli, dlist, cdef_count, level, ydec[pli], dir, NULL, var, pli, dlist, cdef_count, level,
#if CONFIG_CDEF_SINGLEPASS
sec_strength, pri_damping, sec_damping, coeff_shift);
#else
sec_strength, sec_damping, pri_damping, coeff_shift, 0, 1); sec_strength, sec_damping, pri_damping, coeff_shift, 0, 1);
#endif
} else { } else {
#endif #endif
cdef_filter_fb( cdef_filter_fb(
...@@ -402,10 +422,18 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, ...@@ -402,10 +422,18 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
.dst.buf[xd->plane[pli].dst.stride * .dst.buf[xd->plane[pli].dst.stride *
(MI_SIZE_64X64 * fbr << mi_high_l2[pli]) + (MI_SIZE_64X64 * fbr << mi_high_l2[pli]) +
(fbc * MI_SIZE_64X64 << mi_wide_l2[pli])], (fbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
#if CONFIG_CDEF_SINGLEPASS
NULL, xd->plane[pli].dst.stride,
#else
xd->plane[pli].dst.stride, dst, xd->plane[pli].dst.stride, dst,
#endif
&src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER], xdec[pli], &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER], xdec[pli],
ydec[pli], dir, NULL, var, pli, dlist, cdef_count, level, ydec[pli], dir, NULL, var, pli, dlist, cdef_count, level,
#if CONFIG_CDEF_SINGLEPASS
sec_strength, pri_damping, sec_damping, coeff_shift);
#else
sec_strength, sec_damping, pri_damping, coeff_shift, 0, 0); sec_strength, sec_damping, pri_damping, coeff_shift, 0, 0);
#endif
#if CONFIG_HIGHBITDEPTH #if CONFIG_HIGHBITDEPTH
} }
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "./cdef.h" #include "./cdef.h"
/* Generated from gen_filter_tables.c. */ /* Generated from gen_filter_tables.c. */
#if !CONFIG_CDEF_SINGLEPASS || CDEF_FULL
const int cdef_directions[8][3] = { const int cdef_directions[8][3] = {
{ -1 * CDEF_BSTRIDE + 1, -2 * CDEF_BSTRIDE + 2, -3 * CDEF_BSTRIDE + 3 }, { -1 * CDEF_BSTRIDE + 1, -2 * CDEF_BSTRIDE + 2, -3 * CDEF_BSTRIDE + 3 },
{ 0 * CDEF_BSTRIDE + 1, -1 * CDEF_BSTRIDE + 2, -1 * CDEF_BSTRIDE + 3 }, { 0 * CDEF_BSTRIDE + 1, -1 * CDEF_BSTRIDE + 2, -1 * CDEF_BSTRIDE + 3 },
...@@ -31,6 +32,18 @@ const int cdef_directions[8][3] = { ...@@ -31,6 +32,18 @@ const int cdef_directions[8][3] = {
{ 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 0, 3 * CDEF_BSTRIDE + 0 }, { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 0, 3 * CDEF_BSTRIDE + 0 },
{ 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE - 1, 3 * CDEF_BSTRIDE - 1 } { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE - 1, 3 * CDEF_BSTRIDE - 1 }
}; };
#else
const int cdef_directions[8][2] = {
{ -1 * CDEF_BSTRIDE + 1, -2 * CDEF_BSTRIDE + 2 },
{ 0 * CDEF_BSTRIDE + 1, -1 * CDEF_BSTRIDE + 2 },
{ 0 * CDEF_BSTRIDE + 1, 0 * CDEF_BSTRIDE + 2 },
{ 0 * CDEF_BSTRIDE + 1, 1 * CDEF_BSTRIDE + 2 },
{ 1 * CDEF_BSTRIDE + 1, 2 * CDEF_BSTRIDE + 2 },
{ 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 1 },
{ 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 0 },
{ 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE - 1 }
};
#endif
/* Detect direction. 0 means 45-degree up-right, 2 is horizontal, and so on. /* Detect direction. 0 means 45-degree up-right, 2 is horizontal, and so on.
The search minimizes the weighted variance along all the lines in a The search minimizes the weighted variance along all the lines in a
...@@ -110,6 +123,94 @@ int cdef_find_dir_c(const uint16_t *img, int stride, int32_t *var, ...@@ -110,6 +123,94 @@ int cdef_find_dir_c(const uint16_t *img, int stride, int32_t *var,
return best_dir; return best_dir;
} }
#if CONFIG_CDEF_SINGLEPASS
#if CDEF_FULL
const int cdef_pri_taps[2][3] = { { 3, 2, 1 }, { 2, 2, 2 } };
const int cdef_sec_taps[2][2] = { { 3, 1 }, { 3, 1 } };
#else
const int cdef_pri_taps[2][2] = { { 4, 2 }, { 3, 3 } };
const int cdef_sec_taps[2][2] = { { 2, 1 }, { 2, 1 } };
#endif
/* Smooth in the direction detected. */
#if CDEF_CAP
void cdef_filter_block_c(uint8_t *dst8, uint16_t *dst16, int dstride,
const uint16_t *in, int pri_strength, int sec_strength,
int dir, int pri_damping, int sec_damping, int bsize,
UNUSED int max_unused)
#else
void cdef_filter_block_c(uint8_t *dst8, uint16_t *dst16, int dstride,
const uint16_t *in, int pri_strength, int sec_strength,
int dir, int pri_damping, int sec_damping, int bsize,
int max)
#endif
{
int i, j, k;
const int s = CDEF_BSTRIDE;
const int *pri_taps = cdef_pri_taps[pri_strength & 1];
const int *sec_taps = cdef_sec_taps[pri_strength & 1];
for (i = 0; i < 4 << (bsize == BLOCK_8X8); i++) {
for (j = 0; j < 4 << (bsize == BLOCK_8X8); j++) {
int16_t sum = 0;
int16_t y;
int16_t x = in[i * s + j];
#if CDEF_CAP
int max = x;
int min = x;
#endif
#if CDEF_FULL
for (k = 0; k < 3; k++)
#else
for (k = 0; k < 2; k++)
#endif
{
int16_t p0 = in[i * s + j + cdef_directions[dir][k]];
int16_t p1 = in[i * s + j - cdef_directions[dir][k]];
sum += pri_taps[k] * constrain(p0 - x, pri_strength, pri_damping);
sum += pri_taps[k] * constrain(p1 - x, pri_strength, pri_damping);
#if CDEF_CAP
if (p0 != CDEF_VERY_LARGE) max = AOMMAX(p0, max);
if (p1 != CDEF_VERY_LARGE) max = AOMMAX(p1, max);
min = AOMMIN(p0, min);
min = AOMMIN(p1, min);
#endif
#if CDEF_FULL
if (k == 2) continue;
#endif
int16_t s0 = in[i * s + j + cdef_directions[(dir + 2) & 7][k]];
int16_t s1 = in[i * s + j - cdef_directions[(dir + 2) & 7][k]];
int16_t s2 = in[i * s + j + cdef_directions[(dir + 6) & 7][k]];
int16_t s3 = in[i * s + j - cdef_directions[(dir + 6) & 7][k]];
#if CDEF_CAP
if (s0 != CDEF_VERY_LARGE) max = AOMMAX(s0, max);
if (s1 != CDEF_VERY_LARGE) max = AOMMAX(s1, max);
if (s2 != CDEF_VERY_LARGE) max = AOMMAX(s2, max);
if (s3 != CDEF_VERY_LARGE) max = AOMMAX(s3, max);
min = AOMMIN(s0, min);
min = AOMMIN(s1, min);
min = AOMMIN(s2, min);
min = AOMMIN(s3, min);
#endif
sum += sec_taps[k] * constrain(s0 - x, sec_strength, sec_damping);
sum += sec_taps[k] * constrain(s1 - x, sec_strength, sec_damping);
sum += sec_taps[k] * constrain(s2 - x, sec_strength, sec_damping);
sum += sec_taps[k] * constrain(s3 - x, sec_strength, sec_damping);
}
#if CDEF_CAP
y = clamp((int16_t)x + ((8 + sum - (sum < 0)) >> 4), min, max);
#else
y = clamp((int16_t)x + ((8 + sum - (sum < 0)) >> 4), 0, max);
#endif
if (dst8)
dst8[i * dstride + j] = (uint8_t)y;
else
dst16[i * dstride + j] = (uint16_t)y;
}
}
}
#else
/* Smooth in the direction detected. */ /* Smooth in the direction detected. */
void cdef_direction_8x8_c(uint16_t *y, int ystride, const uint16_t *in, void cdef_direction_8x8_c(uint16_t *y, int ystride, const uint16_t *in,
int threshold, int dir, int damping) { int threshold, int dir, int damping) {
...@@ -167,6 +268,7 @@ void cdef_direction_4x4_c(uint16_t *y, int ystride, const uint16_t *in, ...@@ -167,6 +268,7 @@ void cdef_direction_4x4_c(uint16_t *y, int ystride, const uint16_t *in,
} }
} }
} }
#endif
/* Compute the primary filter strength for an 8x8 block based on the /* Compute the primary filter strength for an 8x8 block based on the
directional variance difference. A high variance difference means directional variance difference. A high variance difference means
...@@ -180,6 +282,7 @@ static INLINE int adjust_strength(int strength, int32_t var) { ...@@ -180,6 +282,7 @@ static INLINE int adjust_strength(int strength, int32_t var) {
return var ? (strength * (4 + i) + 8) >> 4 : 0; return var ? (strength * (4 + i) + 8) >> 4 : 0;
} }
#if !CONFIG_CDEF_SINGLEPASS
void copy_8x8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src, void copy_8x8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src,
int sstride) { int sstride) {
int i, j; int i, j;
...@@ -303,25 +406,56 @@ void cdef_filter_fb(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in, ...@@ -303,25 +406,56 @@ void cdef_filter_fb(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in,
cdef_list *dlist, int cdef_count, int level, cdef_list *dlist, int cdef_count, int level,
int sec_strength, int sec_damping, int pri_damping, int sec_strength, int sec_damping, int pri_damping,
int coeff_shift, int skip_dering, int hbd) { int coeff_shift, int skip_dering, int hbd) {
#else
void cdef_filter_fb(uint8_t *dst8, uint16_t *dst16, int dstride, uint16_t *in,
int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],
int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli,
cdef_list *dlist, int cdef_count, int level,
int sec_strength, int pri_damping, int sec_damping,
int coeff_shift) {
#endif
int bi; int bi;
int bx; int bx;
int by; int by;
int bsize, bsizex, bsizey; int bsize, bsizex, bsizey;
#if CONFIG_CDEF_SINGLEPASS
int pri_strength = (level >> 1) << coeff_shift;
int filter_skip = level & 1;
if (!pri_strength && !sec_strength && filter_skip) {
pri_strength = 19 << coeff_shift;
sec_strength = 7 << coeff_shift;
}
#else
int threshold = (level >> 1) << coeff_shift; int threshold = (level >> 1) << coeff_shift;
int filter_skip = get_filter_skip(level); int filter_skip = get_filter_skip(level);
if (level == 1) threshold = 31 << coeff_shift; if (level == 1) threshold = 31 << coeff_shift;
cdef_direction_func cdef_direction[] = { cdef_direction_4x4, cdef_direction_func cdef_direction[] = { cdef_direction_4x4,
cdef_direction_8x8 }; cdef_direction_8x8 };
#endif
sec_damping += coeff_shift - (pli != AOM_PLANE_Y); sec_damping += coeff_shift - (pli != AOM_PLANE_Y);
pri_damping += coeff_shift - (pli != AOM_PLANE_Y); pri_damping += coeff_shift - (pli != AOM_PLANE_Y);
bsize = bsize =
ydec ? (xdec ? BLOCK_4X4 : BLOCK_8X4) : (xdec ? BLOCK_4X8 : BLOCK_8X8); ydec ? (xdec ? BLOCK_4X4 : BLOCK_8X4) : (xdec ? BLOCK_4X8 : BLOCK_8X8);
bsizex = 3 - xdec; bsizex = 3 - xdec;
bsizey = 3 - ydec; bsizey = 3 - ydec;
#if CONFIG_CDEF_SINGLEPASS
if (!skip_dering) { if (dirinit && pri_strength == 0 && sec_strength == 0)
#else
if (!skip_dering)
#endif
{
#if CONFIG_CDEF_SINGLEPASS
// If we're here, both primary and secondary strengths are 0, and
// we still haven't written anything to y[] yet, so we just copy
// the input to y[]. This is necessary only for av1_cdef_search()
// and only av1_cdef_search() sets dirinit.
for (bi = 0; bi < cdef_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
#else
if (pli == 0) { if (pli == 0) {
if (!dirinit || !*dirinit) { if (!dirinit || !*dirinit) {
for (bi = 0; bi < cdef_count; bi++) { for (bi = 0; bi < cdef_count; bi++) {
...@@ -394,12 +528,56 @@ void cdef_filter_fb(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in, ...@@ -394,12 +528,56 @@ void cdef_filter_fb(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in,
for (bi = 0; bi < cdef_count; bi++) { for (bi = 0; bi < cdef_count; bi++) {
by = dlist[bi].by; by = dlist[bi].by;
bx = dlist[bi].bx; bx = dlist[bi].bx;
#endif
int iy, ix; int iy, ix;
// TODO(stemidts/jmvalin): SIMD optimisations // TODO(stemidts/jmvalin): SIMD optimisations
for (iy = 0; iy < 1 << bsizey; iy++) for (iy = 0; iy < 1 << bsizey; iy++)
for (ix = 0; ix < 1 << bsizex; ix++) for (ix = 0; ix < 1 << bsizex; ix++)
#if CONFIG_CDEF_SINGLEPASS
dst16[(bi << (bsizex + bsizey)) + (iy << bsizex) + ix] =
#else
y[(bi << (bsizex + bsizey)) + (iy << bsizex) + ix] = y[(bi << (bsizex + bsizey)) + (iy << bsizex) + ix] =
#endif
in[((by << bsizey) + iy) * CDEF_BSTRIDE + (bx << bsizex) + ix]; in[((by << bsizey) + iy) * CDEF_BSTRIDE + (bx << bsizex) + ix];
} }
#if CONFIG_CDEF_SINGLEPASS
return;
#endif
} }
#if CONFIG_CDEF_SINGLEPASS
if (pli == 0) {
if (!dirinit || !*dirinit) {
for (bi = 0; bi < cdef_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
dir[by][bx] = cdef_find_dir(&in[8 * by * CDEF_BSTRIDE + 8 * bx],
CDEF_BSTRIDE, &var[by][bx], coeff_shift);
}
if (dirinit) *dirinit = 1;
}
}
assert(bsize == BLOCK_8X8 || bsize == BLOCK_4X4);
for (bi = 0; bi < cdef_count; bi++) {
int t = !filter_skip && dlist[bi].skip ? 0 : pri_strength;
int s = !filter_skip && dlist[bi].skip ? 0 : sec_strength;
by = dlist[bi].by;
bx = dlist[bi].bx;
if (dst8)
cdef_filter_block(
&dst8[(by << bsizey) * dstride + (bx << bsizex)], NULL, dstride,
&in[(by * CDEF_BSTRIDE << bsizey) + (bx << bsizex)],
(pli ? t : adjust_strength(t, var[by][bx])), s, t ? dir[by][bx] : 0,
pri_damping, sec_damping, bsize, (256 << coeff_shift) - 1);
else
cdef_filter_block(
NULL, &dst16[dirinit ? bi << (bsizex + bsizey)
: (by << bsizey) * dstride + (bx << bsizex)],
dirinit ? 1 << bsizex : dstride,
&in[(by * CDEF_BSTRIDE << bsizey) + (bx << bsizex)],
(pli ? t : adjust_strength(t, var[by][bx])), s, t ? dir[by][bx] : 0,
pri_damping, sec_damping, bsize, (256 << coeff_shift) - 1);
}
#endif
} }