Commit 5978212b authored by Steinar Midtskogen's avatar Steinar Midtskogen

Add experiment CONFIG_CDEF_SINGLEPASS: Make CDEF single pass

Low latency, cpu-used=0:
   PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
-0.3162 | -0.6719 | -0.6535 |   0.0089 | -0.3890 | -0.1515 |    -0.6682

High latency, cpu-used=0:
   PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
-0.0293 | -0.3556 | -0.5505 |   0.0684 | -0.0862 |  0.0513 |    -0.2765

Low latency, cpu-used=4:
   PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
-0.2248 | -0.7764 | -0.6630 |  -0.2109 | -0.3240 | -0.2532 |    -0.6980

High latency, cpu-used=4:
   PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
-0.1118 | -0.5841 | -0.7406 |  -0.0463 | -0.2442 | -0.1064 |    -0.4187

Change-Id: I9ca8399c8f45489541a66f535fb3d771eb1d59ab
parent 660064a9
......@@ -236,13 +236,15 @@ endif ()
if (CONFIG_CDEF)
set(AOM_AV1_COMMON_SOURCES
${AOM_AV1_COMMON_SOURCES}
"${AOM_ROOT}/av1/common/clpf.c"
"${AOM_ROOT}/av1/common/clpf_simd.h"
if (!CONFIG_CDEF_SINGLEPASS)
"${AOM_ROOT}/av1/common/clpf.c"
"${AOM_ROOT}/av1/common/clpf_simd.h"
"${AOM_ROOT}/av1/common/cdef_block_simd.h")
endif ()
"${AOM_ROOT}/av1/common/cdef.c"
"${AOM_ROOT}/av1/common/cdef.h"
"${AOM_ROOT}/av1/common/cdef_block.c"
"${AOM_ROOT}/av1/common/cdef_block.h"
"${AOM_ROOT}/av1/common/cdef_block_simd.h")
"${AOM_ROOT}/av1/common/cdef_block.h")
set(AOM_AV1_ENCODER_SOURCES
${AOM_AV1_ENCODER_SOURCES}
......@@ -250,22 +252,34 @@ if (CONFIG_CDEF)
set(AOM_AV1_COMMON_INTRIN_SSE2
${AOM_AV1_COMMON_INTRIN_SSE2}
"${AOM_ROOT}/av1/common/clpf_sse2.c"
if (!CONFIG_CDEF_SINGLEPASS)
"${AOM_ROOT}/av1/common/clpf_sse2.c"
endif ()
"${AOM_ROOT}/av1/common/cdef_block_sse2.c")
set(AOM_AV1_COMMON_INTRIN_SSSE3
${AOM_AV1_COMMON_INTRIN_SSSE3}
"${AOM_ROOT}/av1/common/clpf_ssse3.c"
if (!CONFIG_CDEF_SINGLEPASS)
"${AOM_ROOT}/av1/common/clpf_ssse3.c"
endif ()
"${AOM_ROOT}/av1/common/cdef_block_ssse3.c")
set(AOM_AV1_COMMON_INTRIN_SSE4_1
${AOM_AV1_COMMON_INTRIN_SSE4_1}
"${AOM_ROOT}/av1/common/clpf_sse4.c"
if (!CONFIG_CDEF_SINGLEPASS)
"${AOM_ROOT}/av1/common/clpf_sse4.c"
endif ()
"${AOM_ROOT}/av1/common/cdef_block_sse4.c")
set(AOM_AV1_COMMON_INTRIN_AVX2
${AOM_AV1_COMMON_INTRIN_AVX2}
"${AOM_ROOT}/av1/common/cdef_block_avx2.c")
set(AOM_AV1_COMMON_INTRIN_NEON
${AOM_AV1_COMMON_INTRIN_NEON}
"${AOM_ROOT}/av1/common/clpf_neon.c"
if (!CONFIG_CDEF_SINGLEPASS)
"${AOM_ROOT}/av1/common/clpf_neon.c"
endif ()
"${AOM_ROOT}/av1/common/cdef_block_neon.c")
endif ()
......
......@@ -90,12 +90,16 @@ AV1_COMMON_SRCS-yes += common/warped_motion.h
AV1_COMMON_SRCS-yes += common/warped_motion.c
endif
ifeq ($(CONFIG_CDEF),yes)
ifeq ($(CONFIG_CDEF_SINGLEPASS),yes)
AV1_COMMON_SRCS-$(HAVE_AVX2) += common/cdef_block_avx2.c
else
AV1_COMMON_SRCS-yes += common/clpf.c
AV1_COMMON_SRCS-yes += common/clpf_simd.h
AV1_COMMON_SRCS-$(HAVE_SSE2) += common/clpf_sse2.c
AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/clpf_ssse3.c
AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/clpf_sse4.c
AV1_COMMON_SRCS-$(HAVE_NEON) += common/clpf_neon.c
endif
AV1_COMMON_SRCS-$(HAVE_SSE2) += common/cdef_block_sse2.c
AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/cdef_block_ssse3.c
AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/cdef_block_sse4.c
......
......@@ -520,18 +520,22 @@ if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
# Deringing Functions
if (aom_config("CONFIG_CDEF") eq "yes") {
add_proto qw/void aom_clpf_block_hbd/, "uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
add_proto qw/void aom_clpf_hblock_hbd/, "uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
add_proto qw/void aom_clpf_block/, "uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
add_proto qw/void aom_clpf_hblock/, "uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
add_proto qw/int cdef_find_dir/, "const uint16_t *img, int stride, int32_t *var, int coeff_shift";
add_proto qw/void cdef_direction_4x4/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping";
add_proto qw/void cdef_direction_8x8/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping";
if (aom_config("CONFIG_CDEF_SINGLEPASS") ne "yes") {
add_proto qw/void aom_clpf_block_hbd/, "uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
add_proto qw/void aom_clpf_hblock_hbd/, "uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
add_proto qw/void aom_clpf_block/, "uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
add_proto qw/void aom_clpf_hblock/, "uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
add_proto qw/void cdef_direction_4x4/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping";
add_proto qw/void cdef_direction_8x8/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping";
add_proto qw/void copy_8x8_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride";
add_proto qw/void copy_4x4_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride";
add_proto qw/void copy_8x8_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride";
add_proto qw/void copy_4x4_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride";
} else {
add_proto qw/void cdef_filter_block/, "uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max";
}
add_proto qw/void copy_8x8_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride";
add_proto qw/void copy_4x4_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride";
add_proto qw/void copy_8x8_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride";
add_proto qw/void copy_4x4_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride";
add_proto qw/void copy_rect8_8bit_to_16bit/, "uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h";
add_proto qw/void copy_rect8_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h";
......@@ -539,20 +543,28 @@ if (aom_config("CONFIG_CDEF") eq "yes") {
# structs as arguments, which makes the v256 type of the intrinsics
# hard to support, so optimizations for this target are disabled.
if ($opts{config} !~ /libs-x86-win32-vs.*/) {
specialize qw/aom_clpf_block_hbd sse2 ssse3 sse4_1 neon/;
specialize qw/aom_clpf_hblock_hbd sse2 ssse3 sse4_1 neon/;
specialize qw/aom_clpf_block sse2 ssse3 sse4_1 neon/;
specialize qw/aom_clpf_hblock sse2 ssse3 sse4_1 neon/;
specialize qw/cdef_find_dir sse2 ssse3 sse4_1 neon/;
specialize qw/cdef_direction_4x4 sse2 ssse3 sse4_1 neon/;
specialize qw/cdef_direction_8x8 sse2 ssse3 sse4_1 neon/;
specialize qw/copy_8x8_16bit_to_8bit sse2 ssse3 sse4_1 neon/;
specialize qw/copy_4x4_16bit_to_8bit sse2 ssse3 sse4_1 neon/;
specialize qw/copy_8x8_16bit_to_16bit sse2 ssse3 sse4_1 neon/;
specialize qw/copy_4x4_16bit_to_16bit sse2 ssse3 sse4_1 neon/;
specialize qw/copy_rect8_8bit_to_16bit sse2 ssse3 sse4_1 neon/;
specialize qw/copy_rect8_16bit_to_16bit sse2 ssse3 sse4_1 neon/;
if (aom_config("CONFIG_CDEF_SINGLEPASS") eq "yes") {
specialize qw/cdef_find_dir sse2 ssse3 sse4_1 avx2 neon/;
specialize qw/cdef_filter_block sse2 ssse3 sse4_1 avx2 neon/;
specialize qw/copy_rect8_8bit_to_16bit sse2 ssse3 sse4_1 avx2 neon/;
specialize qw/copy_rect8_16bit_to_16bit sse2 ssse3 sse4_1 avx2 neon/;
} else {
specialize qw/cdef_find_dir sse2 ssse3 sse4_1 neon/;
specialize qw/aom_clpf_block_hbd sse2 ssse3 sse4_1 neon/;
specialize qw/aom_clpf_hblock_hbd sse2 ssse3 sse4_1 neon/;
specialize qw/aom_clpf_block sse2 ssse3 sse4_1 neon/;
specialize qw/aom_clpf_hblock sse2 ssse3 sse4_1 neon/;
specialize qw/cdef_find_dir sse2 ssse3 sse4_1 neon/;
specialize qw/cdef_direction_4x4 sse2 ssse3 sse4_1 neon/;
specialize qw/cdef_direction_8x8 sse2 ssse3 sse4_1 neon/;
specialize qw/copy_8x8_16bit_to_8bit sse2 ssse3 sse4_1 neon/;
specialize qw/copy_4x4_16bit_to_8bit sse2 ssse3 sse4_1 neon/;
specialize qw/copy_8x8_16bit_to_16bit sse2 ssse3 sse4_1 neon/;
specialize qw/copy_4x4_16bit_to_16bit sse2 ssse3 sse4_1 neon/;
specialize qw/copy_rect8_8bit_to_16bit sse2 ssse3 sse4_1 neon/;
specialize qw/copy_rect8_16bit_to_16bit sse2 ssse3 sse4_1 neon/;
}
}
}
......
......@@ -260,14 +260,21 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
uv_sec_strength == 0) ||
(cdef_count = sb_compute_cdef_list(
cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64, dlist,
get_filter_skip(level) || get_filter_skip(uv_level))) == 0) {
#if CONFIG_CDEF_SINGLEPASS
(level & 1) || (uv_level & 1))) == 0)
#else
get_filter_skip(level) || get_filter_skip(uv_level))) == 0)
#endif
{
cdef_left = 0;
continue;
}
curr_row_cdef[fbc] = 1;
for (pli = 0; pli < nplanes; pli++) {
#if !CONFIG_CDEF_SINGLEPASS
uint16_t dst[CDEF_BLOCKSIZE * CDEF_BLOCKSIZE];
#endif
int coffset;
int rend, cend;
int pri_damping = cm->cdef_pri_damping;
......@@ -386,15 +393,28 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
#if CONFIG_HIGHBITDEPTH
if (cm->use_highbitdepth) {
cdef_filter_fb(
#if CONFIG_CDEF_SINGLEPASS
NULL,
&CONVERT_TO_SHORTPTR(
#else
(uint8_t *)&CONVERT_TO_SHORTPTR(
#endif
xd->plane[pli]
.dst.buf)[xd->plane[pli].dst.stride *
(MI_SIZE_64X64 * fbr << mi_high_l2[pli]) +
(fbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
#if CONFIG_CDEF_SINGLEPASS
xd->plane[pli].dst.stride,
#else
xd->plane[pli].dst.stride, dst,
#endif
&src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER], xdec[pli],
ydec[pli], dir, NULL, var, pli, dlist, cdef_count, level,
#if CONFIG_CDEF_SINGLEPASS
sec_strength, pri_damping, sec_damping, coeff_shift);
#else
sec_strength, sec_damping, pri_damping, coeff_shift, 0, 1);
#endif
} else {
#endif
cdef_filter_fb(
......@@ -402,10 +422,18 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
.dst.buf[xd->plane[pli].dst.stride *
(MI_SIZE_64X64 * fbr << mi_high_l2[pli]) +
(fbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
#if CONFIG_CDEF_SINGLEPASS
NULL, xd->plane[pli].dst.stride,
#else
xd->plane[pli].dst.stride, dst,
#endif
&src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER], xdec[pli],
ydec[pli], dir, NULL, var, pli, dlist, cdef_count, level,
#if CONFIG_CDEF_SINGLEPASS
sec_strength, pri_damping, sec_damping, coeff_shift);
#else
sec_strength, sec_damping, pri_damping, coeff_shift, 0, 0);
#endif
#if CONFIG_HIGHBITDEPTH
}
......
......@@ -21,6 +21,7 @@
#include "./cdef.h"
/* Generated from gen_filter_tables.c. */
#if !CONFIG_CDEF_SINGLEPASS || CDEF_FULL
const int cdef_directions[8][3] = {
{ -1 * CDEF_BSTRIDE + 1, -2 * CDEF_BSTRIDE + 2, -3 * CDEF_BSTRIDE + 3 },
{ 0 * CDEF_BSTRIDE + 1, -1 * CDEF_BSTRIDE + 2, -1 * CDEF_BSTRIDE + 3 },
......@@ -31,6 +32,18 @@ const int cdef_directions[8][3] = {
{ 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 0, 3 * CDEF_BSTRIDE + 0 },
{ 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE - 1, 3 * CDEF_BSTRIDE - 1 }
};
#else
const int cdef_directions[8][2] = {
{ -1 * CDEF_BSTRIDE + 1, -2 * CDEF_BSTRIDE + 2 },
{ 0 * CDEF_BSTRIDE + 1, -1 * CDEF_BSTRIDE + 2 },
{ 0 * CDEF_BSTRIDE + 1, 0 * CDEF_BSTRIDE + 2 },
{ 0 * CDEF_BSTRIDE + 1, 1 * CDEF_BSTRIDE + 2 },
{ 1 * CDEF_BSTRIDE + 1, 2 * CDEF_BSTRIDE + 2 },
{ 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 1 },
{ 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 0 },
{ 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE - 1 }
};
#endif
/* Detect direction. 0 means 45-degree up-right, 2 is horizontal, and so on.
The search minimizes the weighted variance along all the lines in a
......@@ -110,6 +123,94 @@ int cdef_find_dir_c(const uint16_t *img, int stride, int32_t *var,
return best_dir;
}
#if CONFIG_CDEF_SINGLEPASS
#if CDEF_FULL
const int cdef_pri_taps[2][3] = { { 3, 2, 1 }, { 2, 2, 2 } };
const int cdef_sec_taps[2][2] = { { 3, 1 }, { 3, 1 } };
#else
const int cdef_pri_taps[2][2] = { { 4, 2 }, { 3, 3 } };
const int cdef_sec_taps[2][2] = { { 2, 1 }, { 2, 1 } };
#endif
/* Smooth in the direction detected. */
#if CDEF_CAP
void cdef_filter_block_c(uint8_t *dst8, uint16_t *dst16, int dstride,
const uint16_t *in, int pri_strength, int sec_strength,
int dir, int pri_damping, int sec_damping, int bsize,
UNUSED int max_unused)
#else
void cdef_filter_block_c(uint8_t *dst8, uint16_t *dst16, int dstride,
const uint16_t *in, int pri_strength, int sec_strength,
int dir, int pri_damping, int sec_damping, int bsize,
int max)
#endif
{
int i, j, k;
const int s = CDEF_BSTRIDE;
const int *pri_taps = cdef_pri_taps[pri_strength & 1];
const int *sec_taps = cdef_sec_taps[pri_strength & 1];
for (i = 0; i < 4 << (bsize == BLOCK_8X8); i++) {
for (j = 0; j < 4 << (bsize == BLOCK_8X8); j++) {
int16_t sum = 0;
int16_t y;
int16_t x = in[i * s + j];
#if CDEF_CAP
int max = x;
int min = x;
#endif
#if CDEF_FULL
for (k = 0; k < 3; k++)
#else
for (k = 0; k < 2; k++)
#endif
{
int16_t p0 = in[i * s + j + cdef_directions[dir][k]];
int16_t p1 = in[i * s + j - cdef_directions[dir][k]];
sum += pri_taps[k] * constrain(p0 - x, pri_strength, pri_damping);
sum += pri_taps[k] * constrain(p1 - x, pri_strength, pri_damping);
#if CDEF_CAP
if (p0 != CDEF_VERY_LARGE) max = AOMMAX(p0, max);
if (p1 != CDEF_VERY_LARGE) max = AOMMAX(p1, max);
min = AOMMIN(p0, min);
min = AOMMIN(p1, min);
#endif
#if CDEF_FULL
if (k == 2) continue;
#endif
int16_t s0 = in[i * s + j + cdef_directions[(dir + 2) & 7][k]];
int16_t s1 = in[i * s + j - cdef_directions[(dir + 2) & 7][k]];
int16_t s2 = in[i * s + j + cdef_directions[(dir + 6) & 7][k]];
int16_t s3 = in[i * s + j - cdef_directions[(dir + 6) & 7][k]];
#if CDEF_CAP
if (s0 != CDEF_VERY_LARGE) max = AOMMAX(s0, max);
if (s1 != CDEF_VERY_LARGE) max = AOMMAX(s1, max);
if (s2 != CDEF_VERY_LARGE) max = AOMMAX(s2, max);
if (s3 != CDEF_VERY_LARGE) max = AOMMAX(s3, max);
min = AOMMIN(s0, min);
min = AOMMIN(s1, min);
min = AOMMIN(s2, min);
min = AOMMIN(s3, min);
#endif
sum += sec_taps[k] * constrain(s0 - x, sec_strength, sec_damping);
sum += sec_taps[k] * constrain(s1 - x, sec_strength, sec_damping);
sum += sec_taps[k] * constrain(s2 - x, sec_strength, sec_damping);
sum += sec_taps[k] * constrain(s3 - x, sec_strength, sec_damping);
}
#if CDEF_CAP
y = clamp((int16_t)x + ((8 + sum - (sum < 0)) >> 4), min, max);
#else
y = clamp((int16_t)x + ((8 + sum - (sum < 0)) >> 4), 0, max);
#endif
if (dst8)
dst8[i * dstride + j] = (uint8_t)y;
else
dst16[i * dstride + j] = (uint16_t)y;
}
}
}
#else
/* Smooth in the direction detected. */
void cdef_direction_8x8_c(uint16_t *y, int ystride, const uint16_t *in,
int threshold, int dir, int damping) {
......@@ -167,6 +268,7 @@ void cdef_direction_4x4_c(uint16_t *y, int ystride, const uint16_t *in,
}
}
}
#endif
/* Compute the primary filter strength for an 8x8 block based on the
directional variance difference. A high variance difference means
......@@ -180,6 +282,7 @@ static INLINE int adjust_strength(int strength, int32_t var) {
return var ? (strength * (4 + i) + 8) >> 4 : 0;
}
#if !CONFIG_CDEF_SINGLEPASS
void copy_8x8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src,
int sstride) {
int i, j;
......@@ -303,25 +406,56 @@ void cdef_filter_fb(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in,
cdef_list *dlist, int cdef_count, int level,
int sec_strength, int sec_damping, int pri_damping,
int coeff_shift, int skip_dering, int hbd) {
#else
void cdef_filter_fb(uint8_t *dst8, uint16_t *dst16, int dstride, uint16_t *in,
int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],
int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli,
cdef_list *dlist, int cdef_count, int level,
int sec_strength, int pri_damping, int sec_damping,
int coeff_shift) {
#endif
int bi;
int bx;
int by;
int bsize, bsizex, bsizey;
#if CONFIG_CDEF_SINGLEPASS
int pri_strength = (level >> 1) << coeff_shift;
int filter_skip = level & 1;
if (!pri_strength && !sec_strength && filter_skip) {
pri_strength = 19 << coeff_shift;
sec_strength = 7 << coeff_shift;
}
#else
int threshold = (level >> 1) << coeff_shift;
int filter_skip = get_filter_skip(level);
if (level == 1) threshold = 31 << coeff_shift;
cdef_direction_func cdef_direction[] = { cdef_direction_4x4,
cdef_direction_8x8 };
#endif
sec_damping += coeff_shift - (pli != AOM_PLANE_Y);
pri_damping += coeff_shift - (pli != AOM_PLANE_Y);
bsize =
ydec ? (xdec ? BLOCK_4X4 : BLOCK_8X4) : (xdec ? BLOCK_4X8 : BLOCK_8X8);
bsizex = 3 - xdec;
bsizey = 3 - ydec;
if (!skip_dering) {
#if CONFIG_CDEF_SINGLEPASS
if (dirinit && pri_strength == 0 && sec_strength == 0)
#else
if (!skip_dering)
#endif
{
#if CONFIG_CDEF_SINGLEPASS
// If we're here, both primary and secondary strengths are 0, and
// we still haven't written anything to y[] yet, so we just copy
// the input to y[]. This is necessary only for av1_cdef_search()
// and only av1_cdef_search() sets dirinit.
for (bi = 0; bi < cdef_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
#else
if (pli == 0) {
if (!dirinit || !*dirinit) {
for (bi = 0; bi < cdef_count; bi++) {
......@@ -394,12 +528,56 @@ void cdef_filter_fb(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in,
for (bi = 0; bi < cdef_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
#endif
int iy, ix;
// TODO(stemidts/jmvalin): SIMD optimisations
for (iy = 0; iy < 1 << bsizey; iy++)
for (ix = 0; ix < 1 << bsizex; ix++)
#if CONFIG_CDEF_SINGLEPASS
dst16[(bi << (bsizex + bsizey)) + (iy << bsizex) + ix] =
#else
y[(bi << (bsizex + bsizey)) + (iy << bsizex) + ix] =
#endif
in[((by << bsizey) + iy) * CDEF_BSTRIDE + (bx << bsizex) + ix];
}
#if CONFIG_CDEF_SINGLEPASS
return;
#endif
}
#if CONFIG_CDEF_SINGLEPASS
if (pli == 0) {
if (!dirinit || !*dirinit) {
for (bi = 0; bi < cdef_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
dir[by][bx] = cdef_find_dir(&in[8 * by * CDEF_BSTRIDE + 8 * bx],
CDEF_BSTRIDE, &var[by][bx], coeff_shift);
}
if (dirinit) *dirinit = 1;
}
}
assert(bsize == BLOCK_8X8 || bsize == BLOCK_4X4);
for (bi = 0; bi < cdef_count; bi++) {
int t = !filter_skip && dlist[bi].skip ? 0 : pri_strength;
int s = !filter_skip && dlist[bi].skip ? 0 : sec_strength;
by = dlist[bi].by;
bx = dlist[bi].bx;
if (dst8)
cdef_filter_block(
&dst8[(by << bsizey) * dstride + (bx << bsizex)], NULL, dstride,
&in[(by * CDEF_BSTRIDE << bsizey) + (bx << bsizex)],
(pli ? t : adjust_strength(t, var[by][bx])), s, t ? dir[by][bx] : 0,
pri_damping, sec_damping, bsize, (256 << coeff_shift) - 1);
else
cdef_filter_block(
NULL, &dst16[dirinit ? bi << (bsizex + bsizey)
: (by << bsizey) * dstride + (bx << bsizex)],
dirinit ? 1 << bsizex : dstride,
&in[(by * CDEF_BSTRIDE << bsizey) + (bx << bsizex)],
(pli ? t : adjust_strength(t, var[by][bx])), s, t ? dir[by][bx] : 0,
pri_damping, sec_damping, bsize, (256 << coeff_shift) - 1);
}
#endif
}
......@@ -17,6 +17,9 @@
#define CDEF_BLOCKSIZE 64
#define CDEF_BLOCKSIZE_LOG2 6
#define CDEF_NBLOCKS (CDEF_BLOCKSIZE / 8)
#if CONFIG_CDEF_SINGLEPASS
#define CDEF_SB_SHIFT (MAX_SB_SIZE_LOG2 - CDEF_BLOCKSIZE_LOG2)
#endif
/* We need to buffer three vertical lines. */
#define CDEF_VBORDER (3)
......@@ -28,7 +31,24 @@
#define CDEF_VERY_LARGE (30000)
#define CDEF_INBUF_SIZE (CDEF_BSTRIDE * (CDEF_BLOCKSIZE + 2 * CDEF_VBORDER))
#if CONFIG_CDEF_SINGLEPASS
// Filter configuration
#define CDEF_CAP 1 // 1 = Cap change to largest diff
#define CDEF_FULL 0 // 1 = 7x7 filter, 0 = 5x5 filter
#if CDEF_FULL
extern const int cdef_pri_taps[2][3];
extern const int cdef_sec_taps[2][2];
extern const int cdef_directions[8][3];
#else
extern const int cdef_pri_taps[2][2];
extern const int cdef_sec_taps[2][2];
extern const int cdef_directions[8][2];
#endif
#else // CONFIG_CDEF_SINGLEPASS
extern const int cdef_directions[8][3];
#endif
typedef struct {
uint8_t by;
......@@ -36,12 +56,30 @@ typedef struct {
uint8_t skip;
} cdef_list;
#if CONFIG_CDEF_SINGLEPASS
typedef void (*cdef_filter_block_func)(uint8_t *dst8, uint16_t *dst16,
int dstride, const uint16_t *in,
int pri_strength, int sec_strength,
int dir, int pri_damping,
int sec_damping, int bsize, int max);
void copy_cdef_16bit_to_16bit(uint16_t *dst, int dstride, uint16_t *src,
cdef_list *dlist, int cdef_count, int bsize);
#else
typedef void (*cdef_direction_func)(uint16_t *y, int ystride,
const uint16_t *in, int threshold, int dir,
int damping);
int get_filter_skip(int level);
#endif
#if CONFIG_CDEF_SINGLEPASS
void cdef_filter_fb(uint8_t *dst8, uint16_t *dst16, int dstride, uint16_t *in,
int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],
int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli,
cdef_list *dlist, int cdef_count, int level,
int sec_strength, int pri_damping, int sec_damping,
int coeff_shift);
#else
void cdef_filter_fb(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in,
int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],
int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli,
......@@ -49,3 +87,4 @@ void cdef_filter_fb(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in,
int sec_strength, int sec_damping, int pri_damping,
int coeff_shift, int skip_dering, int hbd);
#endif
#endif
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include "aom_dsp/aom_simd.h"
#define SIMD_FUNC(name) name##_avx2
#include "./cdef_block_simd.h"
This diff is collapsed.
......@@ -3011,8 +3011,12 @@ static void setup_loopfilter(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
#if CONFIG_CDEF
static void setup_cdef(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
int i;
#if CONFIG_CDEF_SINGLEPASS
cm->cdef_pri_damping = cm->cdef_sec_damping = aom_rb_read_literal(rb, 2) + 3;
#else
cm->cdef_pri_damping = aom_rb_read_literal(rb, 1) + 5;
cm->cdef_sec_damping = aom_rb_read_literal(rb, 2) + 3;
#endif
cm->cdef_bits = aom_rb_read_literal(rb, 2);
cm->nb_cdef_strengths = 1 << cm->cdef_bits;
for (i = 0; i < cm->nb_cdef_strengths; i++) {
......
......@@ -3454,8 +3454,13 @@ static void encode_loopfilter(AV1_COMMON *cm, struct aom_write_bit_buffer *wb) {
#if CONFIG_CDEF
static void encode_cdef(const AV1_COMMON *cm, struct aom_write_bit_buffer *wb) {
int i;
#if CONFIG_CDEF_SINGLEPASS
aom_wb_write_literal(wb, cm->cdef_pri_damping - 3, 2);
assert(cm->cdef_pri_damping == cm->cdef_sec_damping);
#else
aom_wb_write_literal(wb, cm->cdef_pri_damping - 5, 1);
aom_wb_write_literal(wb, cm->cdef_sec_damping - 3, 2);
#endif
aom_wb_write_literal(wb, cm->cdef_bits, 2);
for (i = 0; i < cm->nb_cdef_strengths; i++) {
aom_wb_write_literal(wb, cm->cdef_strengths[i], CDEF_STRENGTH_BITS);
......
......@@ -4206,7 +4206,7 @@ static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) {
cm->cdef_strengths[0] = 0;
cm->nb_cdef_strengths = 1;
} else {
// Find cm->dering_level, cm->clpf_strength_u and cm->clpf_strength_v
// Find CDEF parameters
av1_cdef_search(cm->frame_to_show, cpi->source, cm, xd,
cpi->oxcf.speed > 0);
......
......@@ -68,11 +68,16 @@ static uint64_t search_one_dual(int *lev0, int *lev1, int nb_strengths,
uint64_t (**mse)[TOTAL_STRENGTHS], int sb_count,
int fast) {
uint64_t tot_mse[TOTAL_STRENGTHS][TOTAL_STRENGTHS];
#if !CONFIG_CDEF_SINGLEPASS
const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
#endif
int i, j;
uint64_t best_tot_mse = (uint64_t)1 << 63;
int best_id0 = 0;
int best_id1 = 0;
#if CONFIG_CDEF_SINGLEPASS
const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
#endif
memset(tot_mse, 0, sizeof(tot_mse));
for (i = 0; i < sb_count; i++) {
int gi;
......@@ -305,7 +310,11 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
int *sb_index = aom_malloc(nvfb * nhfb * sizeof(*sb_index));
int *selected_strength = aom_malloc(nvfb * nhfb * sizeof(*sb_index));
uint64_t(*mse[2])[TOTAL_STRENGTHS];
#if CONFIG_CDEF_SINGLEPASS
int pri_damping = 3 + (cm->base_qindex >> 6);
#else
int pri_damping = 6;
#endif
int sec_damping = 3 + (cm->base_qindex >> 6);
int i;
int nb_strengths;
......@@ -414,6 +423,17 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
int xsize = (nhb << mi_wide_l2[pli]) +
CDEF_HBORDER * (fbc != nhfb - 1) + xoff;
sec_strength = gi % CDEF_SEC_STRENGTHS;
#if CONFIG_CDEF_SINGLEPASS
copy_sb16_16(&in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE,
src[pli],
(fbr * MI_SIZE_64X64 << mi_high_l2[pli]) - yoff,
(fbc * MI_SIZE_64X64 << mi_wide_l2[pli]) - xoff,
stride[pli], ysize, xsize);
cdef_filter_fb(NULL, tmp_dst, CDEF_BSTRIDE, in, xdec[pli], ydec[pli],
dir, &dirinit, var, pli, dlist, cdef_count, threshold,
sec_strength + (sec_strength == 3), pri_damping,
sec_damping, coeff_shift);
#else
if (sec_strength == 0)
copy_sb16_16(&in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE,
src[pli],
......@@ -425,6 +445,7 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
pli, dlist, cdef_count, threshold,
sec_strength + (sec_strength == 3), sec_damping,
pri_damping, coeff_shift, sec_strength != 0, 1);
#endif
curr_mse = compute_cdef_dist(
ref_coeff[pli] +
(fbr * MI_SIZE_64X64 << mi_high_l2[pli]) * stride[pli] +
......
......@@ -114,6 +114,7 @@ set(CONFIG_AOM_QM 0 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_BGSPRITE 0 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_CB4X4 1 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_CDEF 1 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_CDEF_SINGLEPASS 0 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_CFL 0 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_CHROMA_2X2 0 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_CHROMA_SUB8X8 1 CACHE NUMBER "AV1 experiment flag.")
......
......@@ -244,6 +244,7 @@ HAVE_LIST="
EXPERIMENT_LIST="