Commit 8322ff04 authored by Steinar Midtskogen's avatar Steinar Midtskogen Committed by Sebastien Alaiwan

Remove CDEF_SINGLEPASS defines

The experiment has been adopted and has been enabled by default for a
while and the alternative code path has not been maintained for a long
time, which is now removed.

Change-Id: Iaf22f2969b45b71b2bf67707e131ab4c439b7fa6
parent 27b5136f
......@@ -279,30 +279,6 @@ set(AOM_AV1_COMMON_INTRIN_NEON
${AOM_AV1_COMMON_INTRIN_NEON}
"${AOM_ROOT}/av1/common/cdef_block_neon.c")
if (NOT CONFIG_CDEF_SINGLEPASS)
set(AOM_AV1_COMMON_SOURCES
${AOM_AV1_COMMON_SOURCES}
"${AOM_ROOT}/av1/common/clpf.c"
"${AOM_ROOT}/av1/common/clpf_simd.h"
"${AOM_ROOT}/av1/common/cdef_block_simd.h")
set(AOM_AV1_COMMON_INTRIN_SSE2
${AOM_AV1_COMMON_INTRIN_SSE2}
"${AOM_ROOT}/av1/common/clpf_sse2.c")
set(AOM_AV1_COMMON_INTRIN_SSSE3
${AOM_AV1_COMMON_INTRIN_SSSE3}
"${AOM_ROOT}/av1/common/clpf_ssse3.c")
set(AOM_AV1_COMMON_INTRIN_SSE4_1
${AOM_AV1_COMMON_INTRIN_SSE4_1}
"${AOM_ROOT}/av1/common/clpf_sse4.c")
set(AOM_AV1_COMMON_INTRIN_NEON
${AOM_AV1_COMMON_INTRIN_NEON}
"${AOM_ROOT}/av1/common/clpf_neon.c")
endif ()
set(AOM_AV1_COMMON_INTRIN_SSE2
${AOM_AV1_COMMON_INTRIN_SSE2}
"${AOM_ROOT}/av1/common/x86/convolve_2d_sse2.c")
......
......@@ -94,16 +94,7 @@ AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/intra_edge_sse4.c
endif
AV1_COMMON_SRCS-yes += common/warped_motion.h
AV1_COMMON_SRCS-yes += common/warped_motion.c
ifeq ($(CONFIG_CDEF_SINGLEPASS),yes)
AV1_COMMON_SRCS-$(HAVE_AVX2) += common/cdef_block_avx2.c
else
AV1_COMMON_SRCS-yes += common/clpf.c
AV1_COMMON_SRCS-yes += common/clpf_simd.h
AV1_COMMON_SRCS-$(HAVE_SSE2) += common/clpf_sse2.c
AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/clpf_ssse3.c
AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/clpf_sse4.c
AV1_COMMON_SRCS-$(HAVE_NEON) += common/clpf_neon.c
endif
AV1_COMMON_SRCS-$(HAVE_SSE2) += common/cdef_block_sse2.c
AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/cdef_block_ssse3.c
AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/cdef_block_sse4.c
......
......@@ -499,20 +499,7 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
# Deringing Functions
add_proto qw/int cdef_find_dir/, "const uint16_t *img, int stride, int32_t *var, int coeff_shift";
if (aom_config("CONFIG_CDEF_SINGLEPASS") ne "yes") {
add_proto qw/void aom_clpf_block_hbd/, "uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
add_proto qw/void aom_clpf_hblock_hbd/, "uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
add_proto qw/void aom_clpf_block/, "uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
add_proto qw/void aom_clpf_hblock/, "uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
add_proto qw/void cdef_direction_4x4/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping";
add_proto qw/void cdef_direction_8x8/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping";
add_proto qw/void copy_8x8_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride";
add_proto qw/void copy_4x4_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride";
add_proto qw/void copy_8x8_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride";
add_proto qw/void copy_4x4_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride";
} else {
add_proto qw/void cdef_filter_block/, "uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max, int coeff_shift";
}
add_proto qw/void cdef_filter_block/, "uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max, int coeff_shift";
add_proto qw/void copy_rect8_8bit_to_16bit/, "uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h";
add_proto qw/void copy_rect8_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h";
......@@ -521,28 +508,10 @@ add_proto qw/void copy_rect8_16bit_to_16bit/, "uint16_t *dst, int dstride, const
# structs as arguments, which makes the v256 type of the intrinsics
# hard to support, so optimizations for this target are disabled.
if ($opts{config} !~ /libs-x86-win32-vs.*/) {
if (aom_config("CONFIG_CDEF_SINGLEPASS") eq "yes") {
specialize qw/cdef_find_dir sse2 ssse3 sse4_1 avx2 neon/;
specialize qw/cdef_filter_block sse2 ssse3 sse4_1 avx2 neon/;
specialize qw/copy_rect8_8bit_to_16bit sse2 ssse3 sse4_1 avx2 neon/;
specialize qw/copy_rect8_16bit_to_16bit sse2 ssse3 sse4_1 avx2 neon/;
} else {
specialize qw/cdef_find_dir sse2 ssse3 sse4_1 neon/;
specialize qw/aom_clpf_block_hbd sse2 ssse3 sse4_1 neon/;
specialize qw/aom_clpf_hblock_hbd sse2 ssse3 sse4_1 neon/;
specialize qw/aom_clpf_block sse2 ssse3 sse4_1 neon/;
specialize qw/aom_clpf_hblock sse2 ssse3 sse4_1 neon/;
specialize qw/cdef_find_dir sse2 ssse3 sse4_1 neon/;
specialize qw/cdef_direction_4x4 sse2 ssse3 sse4_1 neon/;
specialize qw/cdef_direction_8x8 sse2 ssse3 sse4_1 neon/;
specialize qw/copy_8x8_16bit_to_8bit sse2 ssse3 sse4_1 neon/;
specialize qw/copy_4x4_16bit_to_8bit sse2 ssse3 sse4_1 neon/;
specialize qw/copy_8x8_16bit_to_16bit sse2 ssse3 sse4_1 neon/;
specialize qw/copy_4x4_16bit_to_16bit sse2 ssse3 sse4_1 neon/;
specialize qw/copy_rect8_8bit_to_16bit sse2 ssse3 sse4_1 neon/;
specialize qw/copy_rect8_16bit_to_16bit sse2 ssse3 sse4_1 neon/;
}
specialize qw/cdef_find_dir sse2 ssse3 sse4_1 avx2 neon/;
specialize qw/cdef_filter_block sse2 ssse3 sse4_1 avx2 neon/;
specialize qw/copy_rect8_8bit_to_16bit sse2 ssse3 sse4_1 avx2 neon/;
specialize qw/copy_rect8_16bit_to_16bit sse2 ssse3 sse4_1 avx2 neon/;
}
# WARPED_MOTION / GLOBAL_MOTION functions
......
......@@ -281,9 +281,6 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
curr_row_cdef[fbc] = 1;
for (int pli = 0; pli < nplanes; pli++) {
#if !CONFIG_CDEF_SINGLEPASS
DECLARE_ALIGNED(16, uint16_t, dst[CDEF_BLOCKSIZE * CDEF_BLOCKSIZE]);
#endif
int coffset;
int rend, cend;
int pri_damping = cm->cdef_pri_damping;
......@@ -399,27 +396,16 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
#if CONFIG_HIGHBITDEPTH
if (cm->use_highbitdepth) {
cdef_filter_fb(
#if CONFIG_CDEF_SINGLEPASS
NULL,
&CONVERT_TO_SHORTPTR(xd->plane[pli].dst.buf)
#else
(uint8_t *)&CONVERT_TO_SHORTPTR(xd->plane[pli].dst.buf)
#endif
[xd->plane[pli].dst.stride *
(MI_SIZE_64X64 * fbr << mi_high_l2[pli]) +
(fbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
#if CONFIG_CDEF_SINGLEPASS
&CONVERT_TO_SHORTPTR(
xd->plane[pli]
.dst.buf)[xd->plane[pli].dst.stride *
(MI_SIZE_64X64 * fbr << mi_high_l2[pli]) +
(fbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
xd->plane[pli].dst.stride,
#else
xd->plane[pli].dst.stride, dst,
#endif
&src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER], xdec[pli],
ydec[pli], dir, NULL, var, pli, dlist, cdef_count, level,
#if CONFIG_CDEF_SINGLEPASS
sec_strength, pri_damping, sec_damping, coeff_shift);
#else
sec_strength, sec_damping, pri_damping, coeff_shift, 0, 1);
#endif
} else {
#endif
cdef_filter_fb(
......@@ -427,18 +413,10 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
.dst.buf[xd->plane[pli].dst.stride *
(MI_SIZE_64X64 * fbr << mi_high_l2[pli]) +
(fbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
#if CONFIG_CDEF_SINGLEPASS
NULL, xd->plane[pli].dst.stride,
#else
xd->plane[pli].dst.stride, dst,
#endif
&src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER], xdec[pli],
ydec[pli], dir, NULL, var, pli, dlist, cdef_count, level,
#if CONFIG_CDEF_SINGLEPASS
sec_strength, pri_damping, sec_damping, coeff_shift);
#else
sec_strength, sec_damping, pri_damping, coeff_shift, 0, 0);
#endif
#if CONFIG_HIGHBITDEPTH
}
......
This diff is collapsed.
......@@ -17,9 +17,7 @@
#define CDEF_BLOCKSIZE 64
#define CDEF_BLOCKSIZE_LOG2 6
#define CDEF_NBLOCKS ((1 << MAX_SB_SIZE_LOG2) / 8)
#if CONFIG_CDEF_SINGLEPASS
#define CDEF_SB_SHIFT (MAX_SB_SIZE_LOG2 - CDEF_BLOCKSIZE_LOG2)
#endif
/* We need to buffer three vertical lines. */
#define CDEF_VBORDER (3)
......@@ -33,7 +31,6 @@
#define CDEF_INBUF_SIZE \
(CDEF_BSTRIDE * ((1 << MAX_SB_SIZE_LOG2) + 2 * CDEF_VBORDER))
#if CONFIG_CDEF_SINGLEPASS
// Filter configuration
#define CDEF_CAP 1 // 1 = Cap change to largest diff
#define CDEF_FULL 0 // 1 = 7x7 filter, 0 = 5x5 filter
......@@ -48,17 +45,12 @@ extern const int cdef_sec_taps[2][2];
DECLARE_ALIGNED(16, extern const int, cdef_directions[8][2]);
#endif
#else // CONFIG_CDEF_SINGLEPASS
DECLARE_ALIGNED(16, extern const int, cdef_directions[8][3]);
#endif
typedef struct {
uint8_t by;
uint8_t bx;
uint8_t skip;
} cdef_list;
#if CONFIG_CDEF_SINGLEPASS
typedef void (*cdef_filter_block_func)(uint8_t *dst8, uint16_t *dst16,
int dstride, const uint16_t *in,
int pri_strength, int sec_strength,
......@@ -67,26 +59,11 @@ typedef void (*cdef_filter_block_func)(uint8_t *dst8, uint16_t *dst16,
int coeff_shift);
void copy_cdef_16bit_to_16bit(uint16_t *dst, int dstride, uint16_t *src,
cdef_list *dlist, int cdef_count, int bsize);
#else
typedef void (*cdef_direction_func)(uint16_t *y, int ystride,
const uint16_t *in, int threshold, int dir,
int damping);
#endif
#if CONFIG_CDEF_SINGLEPASS
void cdef_filter_fb(uint8_t *dst8, uint16_t *dst16, int dstride, uint16_t *in,
int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],
int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli,
cdef_list *dlist, int cdef_count, int level,
int sec_strength, int pri_damping, int sec_damping,
int coeff_shift);
#else
void cdef_filter_fb(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in,
int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],
int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli,
cdef_list *dlist, int cdef_count, int level,
int sec_strength, int sec_damping, int pri_damping,
int coeff_shift, int skip_dering, int hbd);
#endif
#endif
......@@ -221,7 +221,6 @@ SIMD_INLINE v128 constrain16(v128 a, v128 b, unsigned int threshold,
return v128_xor(v128_add_16(sign, v128_min_s16(diff, s)), sign);
}
#if CONFIG_CDEF_SINGLEPASS
// sign(a - b) * min(abs(a - b), max(0, strength - (abs(a - b) >> adjdamp)))
SIMD_INLINE v128 constrain(v256 a, v256 b, unsigned int strength,
unsigned int adjdamp) {
......@@ -1081,153 +1080,6 @@ void SIMD_FUNC(cdef_filter_block)(uint8_t *dst8, uint16_t *dst16, int dstride,
}
}
#else
void SIMD_FUNC(cdef_direction_4x4)(uint16_t *y, int ystride, const uint16_t *in,
int threshold, int dir, int damping) {
int i;
v128 p0, p1, sum, row, res;
int o1 = cdef_directions[dir][0];
int o2 = cdef_directions[dir][1];
if (threshold) damping -= get_msb(threshold);
for (i = 0; i < 4; i += 2) {
sum = v128_zero();
row = v128_from_v64(v64_load_aligned(&in[i * CDEF_BSTRIDE]),
v64_load_aligned(&in[(i + 1) * CDEF_BSTRIDE]));
// p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping)
p0 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + o1]),
v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + o1]));
p0 = constrain16(p0, row, threshold, damping);
// p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping)
p1 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - o1]),
v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - o1]));
p1 = constrain16(p1, row, threshold, damping);
// sum += 4 * (p0 + p1)
sum = v128_add_16(sum, v128_shl_n_16(v128_add_16(p0, p1), 2));
// p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping)
p0 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + o2]),
v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + o2]));
p0 = constrain16(p0, row, threshold, damping);
// p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping)
p1 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - o2]),
v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - o2]));
p1 = constrain16(p1, row, threshold, damping);
// sum += 1 * (p0 + p1)
sum = v128_add_16(sum, v128_add_16(p0, p1));
// res = row + ((sum + 8) >> 4)
res = v128_add_16(sum, v128_dup_16(8));
res = v128_shr_n_s16(res, 4);
res = v128_add_16(row, res);
v64_store_aligned(&y[i * ystride], v128_high_v64(res));
v64_store_aligned(&y[(i + 1) * ystride], v128_low_v64(res));
}
}
void SIMD_FUNC(cdef_direction_8x8)(uint16_t *y, int ystride, const uint16_t *in,
int threshold, int dir, int damping) {
int i;
v128 sum, p0, p1, row, res;
int o1 = cdef_directions[dir][0];
int o2 = cdef_directions[dir][1];
int o3 = cdef_directions[dir][2];
if (threshold) damping -= get_msb(threshold);
for (i = 0; i < 8; i++) {
sum = v128_zero();
row = v128_load_aligned(&in[i * CDEF_BSTRIDE]);
// p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping)
p0 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + o1]);
p0 = constrain16(p0, row, threshold, damping);
// p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping)
p1 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - o1]);
p1 = constrain16(p1, row, threshold, damping);
// sum += 3 * (p0 + p1)
p0 = v128_add_16(p0, p1);
p0 = v128_add_16(p0, v128_shl_n_16(p0, 1));
sum = v128_add_16(sum, p0);
// p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping)
p0 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + o2]);
p0 = constrain16(p0, row, threshold, damping);
// p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping)
p1 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - o2]);
p1 = constrain16(p1, row, threshold, damping);
// sum += 2 * (p0 + p1)
p0 = v128_shl_n_16(v128_add_16(p0, p1), 1);
sum = v128_add_16(sum, p0);
// p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping)
p0 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + o3]);
p0 = constrain16(p0, row, threshold, damping);
// p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping)
p1 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - o3]);
p1 = constrain16(p1, row, threshold, damping);
// sum += (p0 + p1)
p0 = v128_add_16(p0, p1);
sum = v128_add_16(sum, p0);
// res = row + ((sum + 8) >> 4)
res = v128_add_16(sum, v128_dup_16(8));
res = v128_shr_n_s16(res, 4);
res = v128_add_16(row, res);
v128_store_unaligned(&y[i * ystride], res);
}
}
void SIMD_FUNC(copy_8x8_16bit_to_8bit)(uint8_t *dst, int dstride,
const uint16_t *src, int sstride) {
int i;
for (i = 0; i < 8; i++) {
v128 row = v128_load_unaligned(&src[i * sstride]);
row = v128_pack_s16_u8(row, row);
v64_store_unaligned(&dst[i * dstride], v128_low_v64(row));
}
}
void SIMD_FUNC(copy_4x4_16bit_to_8bit)(uint8_t *dst, int dstride,
const uint16_t *src, int sstride) {
int i;
for (i = 0; i < 4; i++) {
v128 row = v128_load_unaligned(&src[i * sstride]);
row = v128_pack_s16_u8(row, row);
u32_store_unaligned(&dst[i * dstride], v128_low_u32(row));
}
}
void SIMD_FUNC(copy_8x8_16bit_to_16bit)(uint16_t *dst, int dstride,
const uint16_t *src, int sstride) {
int i;
for (i = 0; i < 8; i++) {
v128 row = v128_load_unaligned(&src[i * sstride]);
v128_store_unaligned(&dst[i * dstride], row);
}
}
void SIMD_FUNC(copy_4x4_16bit_to_16bit)(uint16_t *dst, int dstride,
const uint16_t *src, int sstride) {
int i;
for (i = 0; i < 4; i++) {
v64 row = v64_load_unaligned(&src[i * sstride]);
v64_store_unaligned(&dst[i * dstride], row);
}
}
#endif
void SIMD_FUNC(copy_rect8_8bit_to_16bit)(uint16_t *dst, int dstride,
const uint8_t *src, int sstride, int v,
int h) {
......
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include "./av1_rtcd.h"
#include "./cdef.h"
#include "aom/aom_image.h"
#include "aom_dsp/aom_dsp_common.h"
static int clpf_sample(int X, int A, int B, int C, int D, int E, int F, int G,
int H, int s, unsigned int dmp) {
int delta = 1 * constrain(A - X, s, dmp) + 3 * constrain(B - X, s, dmp) +
1 * constrain(C - X, s, dmp) + 3 * constrain(D - X, s, dmp) +
3 * constrain(E - X, s, dmp) + 1 * constrain(F - X, s, dmp) +
3 * constrain(G - X, s, dmp) + 1 * constrain(H - X, s, dmp);
return (8 + delta - (delta < 0)) >> 4;
}
static int clpf_hsample(int X, int A, int B, int C, int D, int s,
unsigned int dmp) {
int delta = 1 * constrain(A - X, s, dmp) + 3 * constrain(B - X, s, dmp) +
3 * constrain(C - X, s, dmp) + 1 * constrain(D - X, s, dmp);
return (4 + delta - (delta < 0)) >> 3;
}
void aom_clpf_block_c(uint8_t *dst, const uint16_t *src, int dstride,
int sstride, int sizex, int sizey, unsigned int strength,
unsigned int damping) {
int x, y;
for (y = 0; y < sizey; y++) {
for (x = 0; x < sizex; x++) {
const int X = src[y * sstride + x];
const int A = src[(y - 2) * sstride + x];
const int B = src[(y - 1) * sstride + x];
const int C = src[y * sstride + x - 2];
const int D = src[y * sstride + x - 1];
const int E = src[y * sstride + x + 1];
const int F = src[y * sstride + x + 2];
const int G = src[(y + 1) * sstride + x];
const int H = src[(y + 2) * sstride + x];
const int delta =
clpf_sample(X, A, B, C, D, E, F, G, H, strength, damping);
dst[y * dstride + x] = X + delta;
}
}
}
// Identical to aom_clpf_block_c() apart from "dst".
void aom_clpf_block_hbd_c(uint16_t *dst, const uint16_t *src, int dstride,
int sstride, int sizex, int sizey,
unsigned int strength, unsigned int damping) {
int x, y;
for (y = 0; y < sizey; y++) {
for (x = 0; x < sizex; x++) {
const int X = src[y * sstride + x];
const int A = src[(y - 2) * sstride + x];
const int B = src[(y - 1) * sstride + x];
const int C = src[y * sstride + x - 2];
const int D = src[y * sstride + x - 1];
const int E = src[y * sstride + x + 1];
const int F = src[y * sstride + x + 2];
const int G = src[(y + 1) * sstride + x];
const int H = src[(y + 2) * sstride + x];
const int delta =
clpf_sample(X, A, B, C, D, E, F, G, H, strength, damping);
dst[y * dstride + x] = X + delta;
}
}
}
// Vertically restricted filter
void aom_clpf_hblock_c(uint8_t *dst, const uint16_t *src, int dstride,
int sstride, int sizex, int sizey, unsigned int strength,
unsigned int damping) {
int x, y;
for (y = 0; y < sizey; y++) {
for (x = 0; x < sizex; x++) {
const int X = src[y * sstride + x];
const int A = src[y * sstride + x - 2];
const int B = src[y * sstride + x - 1];
const int C = src[y * sstride + x + 1];
const int D = src[y * sstride + x + 2];
const int delta = clpf_hsample(X, A, B, C, D, strength, damping);
dst[y * dstride + x] = X + delta;
}
}
}
void aom_clpf_hblock_hbd_c(uint16_t *dst, const uint16_t *src, int dstride,
int sstride, int sizex, int sizey,
unsigned int strength, unsigned int damping) {
int x, y;
for (y = 0; y < sizey; y++) {
for (x = 0; x < sizex; x++) {
const int X = src[y * sstride + x];
const int A = src[y * sstride + x - 2];
const int B = src[y * sstride + x - 1];
const int C = src[y * sstride + x + 1];
const int D = src[y * sstride + x + 2];
const int delta = clpf_hsample(X, A, B, C, D, strength, damping);
dst[y * dstride + x] = X + delta;
}
}
}
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include "aom_dsp/aom_simd.h"
#define SIMD_FUNC(name) name##_neon
#include "./clpf_simd.h"
This diff is collapsed.
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include "aom_dsp/aom_simd.h"
#define SIMD_FUNC(name) name##_sse2
#include "./clpf_simd.h"
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include "aom_dsp/aom_simd.h"
#define SIMD_FUNC(name) name##_sse4_1
#include "./clpf_simd.h"
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include "aom_dsp/aom_simd.h"
#define SIMD_FUNC(name) name##_ssse3
#include "./clpf_simd.h"
......@@ -1034,12 +1034,7 @@ static void setup_cdef(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
#if CONFIG_INTRABC
if (cm->allow_intrabc && NO_FILTER_FOR_IBC) return;
#endif // CONFIG_INTRABC
#if CONFIG_CDEF_SINGLEPASS
cm->cdef_pri_damping = cm->cdef_sec_damping = aom_rb_read_literal(rb, 2) + 3;
#else
cm->cdef_pri_damping = aom_rb_read_literal(rb, 1) + 5;
cm->cdef_sec_damping = aom_rb_read_literal(rb, 2) + 3;
#endif
cm->cdef_bits = aom_rb_read_literal(rb, 2);
cm->nb_cdef_strengths = 1 << cm->cdef_bits;
for (int i = 0; i < cm->nb_cdef_strengths; i++) {
......
......@@ -2501,13 +2501,8 @@ static void encode_cdef(const AV1_COMMON *cm, struct aom_write_bit_buffer *wb) {
if (cm->allow_intrabc && NO_FILTER_FOR_IBC) return;
#endif // CONFIG_INTRABC
int i;
#if CONFIG_CDEF_SINGLEPASS
aom_wb_write_literal(wb, cm->cdef_pri_damping - 3, 2);
assert(cm->cdef_pri_damping == cm->cdef_sec_damping);
#else
aom_wb_write_literal(wb, cm->cdef_pri_damping - 5, 1);
aom_wb_write_literal(wb, cm->cdef_sec_damping - 3, 2);
#endif
aom_wb_write_literal(wb, cm->cdef_bits, 2);
for (i = 0; i < cm->nb_cdef_strengths; i++) {
aom_wb_write_literal(wb, cm->cdef_strengths[i], CDEF_STRENGTH_BITS);
......
......@@ -68,16 +68,11 @@ static uint64_t search_one_dual(int *lev0, int *lev1, int nb_strengths,
uint64_t (**mse)[TOTAL_STRENGTHS], int sb_count,
int fast) {
uint64_t tot_mse[TOTAL_STRENGTHS][TOTAL_STRENGTHS];
#if !CONFIG_CDEF_SINGLEPASS
const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
#endif
int i, j;
uint64_t best_tot_mse = (uint64_t)1 << 63;
int best_id0 = 0;
int best_id1 = 0;
#if CONFIG_CDEF_SINGLEPASS
const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
#endif
memset(tot_mse, 0, sizeof(tot_mse));
for (i = 0; i < sb_count; i++) {
int gi;
......@@ -314,11 +309,7 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
int *sb_index = aom_malloc(nvfb * nhfb * sizeof(*sb_index));
int *selected_strength = aom_malloc(nvfb * nhfb * sizeof(*sb_index));
uint64_t(*mse[2])[TOTAL_STRENGTHS];
#if CONFIG_CDEF_SINGLEPASS
int pri_damping = 3 + (cm->base_qindex >> 6);
#else
int pri_damping = 6;
#endif
int sec_damping = 3 + (cm->base_qindex >> 6);
int i;
int nb_strengths;
......@@ -449,7 +440,6 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
int xsize = (nhb << mi_wide_l2[pli]) +
CDEF_HBORDER * (fbc != nhfb - 1) + xoff;
sec_strength = gi % CDEF_SEC_STRENGTHS;
#if CONFIG_CDEF_SINGLEPASS
copy_sb16_16(&in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE,
src[pli],
(fbr * MI_SIZE_64X64 << mi_high_l2[pli]) - yoff,
......@@ -459,19 +449,6 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
dir, &dirinit, var, pli, dlist, cdef_count, threshold,
sec_strength + (sec_strength == 3), pri_damping,
sec_damping, coeff_shift);
#else
if (sec_strength == 0)
copy_sb16_16(&in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE,
src[pli],
(fbr * MI_SIZE_64X64 << mi_high_l2[pli]) - yoff,
(fbc * MI_SIZE_64X64 << mi_wide_l2[pli]) - xoff,
stride[pli], ysize, xsize);
cdef_filter_fb(sec_strength ? NULL : (uint8_t *)in, CDEF_BSTRIDE,
tmp_dst, in, xdec[pli], ydec[pli], dir, &dirinit, var,
pli, dlist, cdef_count, threshold,
sec_strength + (sec_strength == 3), sec_damping,
pri_damping, coeff_shift, sec_strength != 0, 1);
#endif
curr_mse = compute_cdef_dist(
ref_coeff[pli] +
(fbr * MI_SIZE_64X64 << mi_high_l2[pli]) * stride[pli] +
......
......@@ -110,7 +110,6 @@ set(CONFIG_ADD_4BYTES_OBUSIZE 0 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_AMVR 0 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_AOM_QM 1 CACHE NUMBER "AV1 experiment flag.")