Commit 4305e6be authored by Steinar Midtskogen's avatar Steinar Midtskogen

CLPF: Add quality dependent damping in the constrain function

PSNR YCbCr:  -0.17%     -0.03%     -0.40%
APSNR YCbCr: -0.17%     -0.02%     -0.39%
PSNRHVS:     -0.06%
SSIM:        -0.17%
MSSSIM:      -0.07%
CIEDE2000:   -0.12%

Change-Id: I69a4b6a4e18c22c3930069396540a6fee45cb30d
parent ee4b3a80
...@@ -854,8 +854,8 @@ specialize qw/aom_lpf_horizontal_4_dual sse2 neon dspr2 msa/; ...@@ -854,8 +854,8 @@ specialize qw/aom_lpf_horizontal_4_dual sse2 neon dspr2 msa/;
if (aom_config("CONFIG_CDEF") eq "yes") { if (aom_config("CONFIG_CDEF") eq "yes") {
if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") { if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
add_proto qw/void aom_clpf_block_hbd/, "const uint16_t *src, uint16_t *dst, int sstride, int dstride, int x0, int y0, int sizex, int sizey, unsigned int strength, BOUNDARY_TYPE bt, unsigned int bd"; add_proto qw/void aom_clpf_block_hbd/, "const uint16_t *src, uint16_t *dst, int sstride, int dstride, int x0, int y0, int sizex, int sizey, unsigned int strength, BOUNDARY_TYPE bt, unsigned int bd";
add_proto qw/void aom_clpf_detect_hbd/, "const uint16_t *rec, const uint16_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum0, int *sum1, unsigned int strength, int size, unsigned int bd"; add_proto qw/void aom_clpf_detect_hbd/, "const uint16_t *rec, const uint16_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum0, int *sum1, unsigned int strength, int size, unsigned int bd, unsigned int dmp";
add_proto qw/void aom_clpf_detect_multi_hbd/, "const uint16_t *rec, const uint16_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum, int size, unsigned int bd"; add_proto qw/void aom_clpf_detect_multi_hbd/, "const uint16_t *rec, const uint16_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum, int size, unsigned int bd, unsigned int dmp";
# VS compiling for 32 bit targets does not support vector types in # VS compiling for 32 bit targets does not support vector types in
# structs as arguments, which makes the v256 type of the intrinsics # structs as arguments, which makes the v256 type of the intrinsics
# hard to support, so optimizations for this target are disabled. # hard to support, so optimizations for this target are disabled.
...@@ -866,8 +866,8 @@ if (aom_config("CONFIG_CDEF") eq "yes") { ...@@ -866,8 +866,8 @@ if (aom_config("CONFIG_CDEF") eq "yes") {
} }
} }
add_proto qw/void aom_clpf_block/, "const uint8_t *src, uint8_t *dst, int sstride, int dstride, int x0, int y0, int sizex, int sizey, unsigned int strength, BOUNDARY_TYPE bt, unsigned int bd"; add_proto qw/void aom_clpf_block/, "const uint8_t *src, uint8_t *dst, int sstride, int dstride, int x0, int y0, int sizex, int sizey, unsigned int strength, BOUNDARY_TYPE bt, unsigned int bd";
add_proto qw/void aom_clpf_detect/, "const uint8_t *rec, const uint8_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum0, int *sum1, unsigned int strength, int size, unsigned int bd"; add_proto qw/void aom_clpf_detect/, "const uint8_t *rec, const uint8_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum0, int *sum1, unsigned int strength, int size, unsigned int dmp";
add_proto qw/void aom_clpf_detect_multi/, "const uint8_t *rec, const uint8_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum, int size, unsigned int bd"; add_proto qw/void aom_clpf_detect_multi/, "const uint8_t *rec, const uint8_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum, int size, unsigned int dmp";
# VS compiling for 32 bit targets does not support vector types in # VS compiling for 32 bit targets does not support vector types in
# structs as arguments, which makes the v256 type of the intrinsics # structs as arguments, which makes the v256 type of the intrinsics
# hard to support, so optimizations for this target are disabled. # hard to support, so optimizations for this target are disabled.
......
...@@ -16,25 +16,25 @@ ...@@ -16,25 +16,25 @@
int sign(int i) { return i < 0 ? -1 : 1; } int sign(int i) { return i < 0 ? -1 : 1; }
int constrain(int x, int s, unsigned int bitdepth) { int constrain(int x, int s, unsigned int damping) {
return sign(x) * return sign(x) *
AOMMAX(0, abs(x) - AOMMAX(0, abs(x) - s + (abs(x) >> (bitdepth - 3 - AOMMAX(0, abs(x) - AOMMAX(0, abs(x) - s +
get_msb(s))))); (abs(x) >> (damping - get_msb(s)))));
} }
int av1_clpf_sample(int X, int A, int B, int C, int D, int E, int F, int G, int av1_clpf_sample(int X, int A, int B, int C, int D, int E, int F, int G,
int H, int s, unsigned int bd) { int H, int s, unsigned int dmp) {
int delta = 1 * constrain(A - X, s, bd) + 3 * constrain(B - X, s, bd) + int delta = 1 * constrain(A - X, s, dmp) + 3 * constrain(B - X, s, dmp) +
1 * constrain(C - X, s, bd) + 3 * constrain(D - X, s, bd) + 1 * constrain(C - X, s, dmp) + 3 * constrain(D - X, s, dmp) +
3 * constrain(E - X, s, bd) + 1 * constrain(F - X, s, bd) + 3 * constrain(E - X, s, dmp) + 1 * constrain(F - X, s, dmp) +
3 * constrain(G - X, s, bd) + 1 * constrain(H - X, s, bd); 3 * constrain(G - X, s, dmp) + 1 * constrain(H - X, s, dmp);
return (8 + delta - (delta < 0)) >> 4; return (8 + delta - (delta < 0)) >> 4;
} }
void aom_clpf_block_c(const uint8_t *src, uint8_t *dst, int sstride, void aom_clpf_block_c(const uint8_t *src, uint8_t *dst, int sstride,
int dstride, int x0, int y0, int sizex, int sizey, int dstride, int x0, int y0, int sizex, int sizey,
unsigned int strength, BOUNDARY_TYPE bt, unsigned int strength, BOUNDARY_TYPE bt,
unsigned int bitdepth) { unsigned int damping) {
int x, y; int x, y;
const int xmin = x0 - !(bt & TILE_LEFT_BOUNDARY) * 2; const int xmin = x0 - !(bt & TILE_LEFT_BOUNDARY) * 2;
const int ymin = y0 - !(bt & TILE_ABOVE_BOUNDARY) * 2; const int ymin = y0 - !(bt & TILE_ABOVE_BOUNDARY) * 2;
...@@ -53,7 +53,7 @@ void aom_clpf_block_c(const uint8_t *src, uint8_t *dst, int sstride, ...@@ -53,7 +53,7 @@ void aom_clpf_block_c(const uint8_t *src, uint8_t *dst, int sstride,
const int G = src[AOMMIN(ymax, y + 1) * sstride + x]; const int G = src[AOMMIN(ymax, y + 1) * sstride + x];
const int H = src[AOMMIN(ymax, y + 2) * sstride + x]; const int H = src[AOMMIN(ymax, y + 2) * sstride + x];
const int delta = const int delta =
av1_clpf_sample(X, A, B, C, D, E, F, G, H, strength, bitdepth); av1_clpf_sample(X, A, B, C, D, E, F, G, H, strength, damping);
dst[y * dstride + x] = X + delta; dst[y * dstride + x] = X + delta;
} }
} }
...@@ -64,7 +64,7 @@ void aom_clpf_block_c(const uint8_t *src, uint8_t *dst, int sstride, ...@@ -64,7 +64,7 @@ void aom_clpf_block_c(const uint8_t *src, uint8_t *dst, int sstride,
void aom_clpf_block_hbd_c(const uint16_t *src, uint16_t *dst, int sstride, void aom_clpf_block_hbd_c(const uint16_t *src, uint16_t *dst, int sstride,
int dstride, int x0, int y0, int sizex, int sizey, int dstride, int x0, int y0, int sizex, int sizey,
unsigned int strength, BOUNDARY_TYPE bt, unsigned int strength, BOUNDARY_TYPE bt,
unsigned int bitdepth) { unsigned int damping) {
int x, y; int x, y;
const int xmin = x0 - !(bt & TILE_LEFT_BOUNDARY) * 2; const int xmin = x0 - !(bt & TILE_LEFT_BOUNDARY) * 2;
const int ymin = y0 - !(bt & TILE_ABOVE_BOUNDARY) * 2; const int ymin = y0 - !(bt & TILE_ABOVE_BOUNDARY) * 2;
...@@ -83,7 +83,7 @@ void aom_clpf_block_hbd_c(const uint16_t *src, uint16_t *dst, int sstride, ...@@ -83,7 +83,7 @@ void aom_clpf_block_hbd_c(const uint16_t *src, uint16_t *dst, int sstride,
const int G = src[AOMMIN(ymax, y + 1) * sstride + x]; const int G = src[AOMMIN(ymax, y + 1) * sstride + x];
const int H = src[AOMMIN(ymax, y + 2) * sstride + x]; const int H = src[AOMMIN(ymax, y + 2) * sstride + x];
const int delta = const int delta =
av1_clpf_sample(X, A, B, C, D, E, F, G, H, strength, bitdepth); av1_clpf_sample(X, A, B, C, D, E, F, G, H, strength, damping);
dst[y * dstride + x] = X + delta; dst[y * dstride + x] = X + delta;
} }
} }
...@@ -91,14 +91,13 @@ void aom_clpf_block_hbd_c(const uint16_t *src, uint16_t *dst, int sstride, ...@@ -91,14 +91,13 @@ void aom_clpf_block_hbd_c(const uint16_t *src, uint16_t *dst, int sstride,
#endif #endif
// Return number of filtered blocks // Return number of filtered blocks
void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame, void av1_clpf_frame(
const YV12_BUFFER_CONFIG *org, AV1_COMMON *cm, const YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *org,
int enable_fb_flag, unsigned int strength, AV1_COMMON *cm, int enable_fb_flag, unsigned int strength,
unsigned int fb_size_log2, int plane, unsigned int fb_size_log2, int plane,
int (*decision)(int, int, const YV12_BUFFER_CONFIG *, int (*decision)(int, int, const YV12_BUFFER_CONFIG *,
const YV12_BUFFER_CONFIG *, const YV12_BUFFER_CONFIG *, const AV1_COMMON *cm, int, int,
const AV1_COMMON *cm, int, int, int, int, unsigned int, unsigned int, int8_t *, int)) {
unsigned int, unsigned int, int8_t *)) {
/* Constrained low-pass filter (CLPF) */ /* Constrained low-pass filter (CLPF) */
int c, k, l, m, n; int c, k, l, m, n;
const int subx = plane != AOM_PLANE_Y && frame->subsampling_x; const int subx = plane != AOM_PLANE_Y && frame->subsampling_x;
...@@ -124,6 +123,11 @@ void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame, ...@@ -124,6 +123,11 @@ void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
? (plane == AOM_PLANE_U ? frame->u_buffer : frame->v_buffer) ? (plane == AOM_PLANE_U ? frame->u_buffer : frame->v_buffer)
: frame->y_buffer; : frame->y_buffer;
uint8_t *dst_buffer; uint8_t *dst_buffer;
// Damping is the filter cut-off log2 point for the constrain function.
// For instance, if the damping is 5, neighbour differences above 32 will
// be ignored and half of the strength will be applied for a difference of 16.
int damping =
cm->bit_depth - 5 - (plane != AOM_PLANE_Y) + (cm->base_qindex >> 6);
// Make buffer space for in-place filtering // Make buffer space for in-place filtering
#if CONFIG_AOM_HIGHBITDEPTH #if CONFIG_AOM_HIGHBITDEPTH
...@@ -169,7 +173,8 @@ void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame, ...@@ -169,7 +173,8 @@ void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
decision(k, l, frame, org, cm, bs, w / bs, h / bs, strength, decision(k, l, frame, org, cm, bs, w / bs, h / bs, strength,
fb_size_log2, fb_size_log2,
cm->clpf_blocks + yoff / MIN_FB_SIZE * cm->clpf_stride + cm->clpf_blocks + yoff / MIN_FB_SIZE * cm->clpf_stride +
xoff / MIN_FB_SIZE))) { xoff / MIN_FB_SIZE,
plane))) {
// Iterate over all smaller blocks inside the filter block // Iterate over all smaller blocks inside the filter block
for (m = 0; m < ((h + bs - 1) >> bslog); m++) { for (m = 0; m < ((h + bs - 1) >> bslog); m++) {
for (n = 0; n < ((w + bs - 1) >> bslog); n++) { for (n = 0; n < ((w + bs - 1) >> bslog); n++) {
...@@ -260,16 +265,16 @@ void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame, ...@@ -260,16 +265,16 @@ void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
aom_clpf_block_hbd(CONVERT_TO_SHORTPTR(src_buffer), aom_clpf_block_hbd(CONVERT_TO_SHORTPTR(src_buffer),
CONVERT_TO_SHORTPTR(dst_buffer), sstride, CONVERT_TO_SHORTPTR(dst_buffer), sstride,
dstride, xpos, ypos, sizex, sizey, strength, dstride, xpos, ypos, sizex, sizey, strength,
boundary_type, cm->bit_depth); boundary_type, damping);
} else { } else {
aom_clpf_block(src_buffer, dst_buffer, sstride, dstride, xpos, aom_clpf_block(src_buffer, dst_buffer, sstride, dstride, xpos,
ypos, sizex, sizey, strength, boundary_type, ypos, sizex, sizey, strength, boundary_type,
cm->bit_depth); damping);
} }
#else #else
aom_clpf_block(src_buffer, dst_buffer, sstride, dstride, xpos, aom_clpf_block(src_buffer, dst_buffer, sstride, dstride, xpos,
ypos, sizex, sizey, strength, boundary_type, ypos, sizex, sizey, strength, boundary_type,
cm->bit_depth); damping);
#endif #endif
} }
} }
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
#define MIN_FB_SIZE (1 << MIN_FB_SIZE_LOG2) #define MIN_FB_SIZE (1 << MIN_FB_SIZE_LOG2)
int av1_clpf_sample(int X, int A, int B, int C, int D, int E, int F, int G, int av1_clpf_sample(int X, int A, int B, int C, int D, int E, int F, int G,
int H, int b, unsigned int bd); int H, int b, unsigned int dmp);
void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame, void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
const YV12_BUFFER_CONFIG *org, AV1_COMMON *cm, const YV12_BUFFER_CONFIG *org, AV1_COMMON *cm,
int enable_fb_flag, unsigned int strength, int enable_fb_flag, unsigned int strength,
...@@ -27,6 +27,6 @@ void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame, ...@@ -27,6 +27,6 @@ void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
int (*decision)(int, int, const YV12_BUFFER_CONFIG *, int (*decision)(int, int, const YV12_BUFFER_CONFIG *,
const YV12_BUFFER_CONFIG *, const YV12_BUFFER_CONFIG *,
const AV1_COMMON *cm, int, int, int, const AV1_COMMON *cm, int, int, int,
unsigned int, unsigned int, int8_t *)); unsigned int, unsigned int, int8_t *, int));
#endif #endif
This diff is collapsed.
...@@ -16,11 +16,12 @@ ...@@ -16,11 +16,12 @@
// sign(a - b) * max(0, abs(a - b) - max(0, abs(a - b) - // sign(a - b) * max(0, abs(a - b) - max(0, abs(a - b) -
// strength + (abs(a - b) >> (5 - log2(s))))) // strength + (abs(a - b) >> (5 - log2(s)))))
SIMD_INLINE v128 constrain(v128 a, v128 b, unsigned int strength) { SIMD_INLINE v128 constrain(v128 a, v128 b, unsigned int strength,
unsigned int damping) {
const v128 diff = v128_sub_8(v128_max_u8(a, b), v128_min_u8(a, b)); const v128 diff = v128_sub_8(v128_max_u8(a, b), v128_min_u8(a, b));
const v128 sign = v128_cmpeq_8(v128_min_u8(a, b), a); // -(a <= b) const v128 sign = v128_cmpeq_8(v128_min_u8(a, b), a); // -(a <= b)
const v128 s = v128_ssub_u8(v128_dup_8(strength), const v128 s = v128_ssub_u8(v128_dup_8(strength),
v128_shr_u8(diff, 5 - get_msb(strength))); v128_shr_u8(diff, damping - get_msb(strength)));
return v128_sub_8(v128_xor(sign, v128_ssub_u8(diff, v128_ssub_u8(diff, s))), return v128_sub_8(v128_xor(sign, v128_ssub_u8(diff, v128_ssub_u8(diff, s))),
sign); sign);
} }
...@@ -30,14 +31,15 @@ SIMD_INLINE v128 constrain(v128 a, v128 b, unsigned int strength) { ...@@ -30,14 +31,15 @@ SIMD_INLINE v128 constrain(v128 a, v128 b, unsigned int strength) {
// 3/16 * constrain(e, x, s) + 1/16 * constrain(f, x, s) + // 3/16 * constrain(e, x, s) + 1/16 * constrain(f, x, s) +
// 3/16 * constrain(g, x, s) + 1/16 * constrain(h, x, s) // 3/16 * constrain(g, x, s) + 1/16 * constrain(h, x, s)
SIMD_INLINE v128 calc_delta(v128 x, v128 a, v128 b, v128 c, v128 d, v128 e, SIMD_INLINE v128 calc_delta(v128 x, v128 a, v128 b, v128 c, v128 d, v128 e,
v128 f, v128 g, v128 h, unsigned int s) { v128 f, v128 g, v128 h, unsigned int s,
unsigned int dmp) {
const v128 bdeg = const v128 bdeg =
v128_add_8(v128_add_8(constrain(b, x, s), constrain(d, x, s)), v128_add_8(v128_add_8(constrain(b, x, s, dmp), constrain(d, x, s, dmp)),
v128_add_8(constrain(e, x, s), constrain(g, x, s))); v128_add_8(constrain(e, x, s, dmp), constrain(g, x, s, dmp)));
const v128 delta = const v128 delta = v128_add_8(
v128_add_8(v128_add_8(v128_add_8(constrain(a, x, s), constrain(c, x, s)), v128_add_8(v128_add_8(constrain(a, x, s, dmp), constrain(c, x, s, dmp)),
v128_add_8(constrain(f, x, s), constrain(h, x, s))), v128_add_8(constrain(f, x, s, dmp), constrain(h, x, s, dmp))),
v128_add_8(v128_add_8(bdeg, bdeg), bdeg)); v128_add_8(v128_add_8(bdeg, bdeg), bdeg));
return v128_add_8( return v128_add_8(
x, v128_shr_s8( x, v128_shr_s8(
v128_add_8(v128_dup_8(8), v128_add_8(v128_dup_8(8),
......
...@@ -2746,7 +2746,8 @@ static int clpf_bit(UNUSED int k, UNUSED int l, ...@@ -2746,7 +2746,8 @@ static int clpf_bit(UNUSED int k, UNUSED int l,
UNUSED const YV12_BUFFER_CONFIG *org, UNUSED const YV12_BUFFER_CONFIG *org,
UNUSED const AV1_COMMON *cm, UNUSED int block_size, UNUSED const AV1_COMMON *cm, UNUSED int block_size,
UNUSED int w, UNUSED int h, UNUSED unsigned int strength, UNUSED int w, UNUSED int h, UNUSED unsigned int strength,
UNUSED unsigned int fb_size_log2, int8_t *bit) { UNUSED unsigned int fb_size_log2, int8_t *bit,
UNUSED int plane) {
return *bit; return *bit;
} }
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
void aom_clpf_detect_c(const uint8_t *rec, const uint8_t *org, int rstride, void aom_clpf_detect_c(const uint8_t *rec, const uint8_t *org, int rstride,
int ostride, int x0, int y0, int width, int height, int ostride, int x0, int y0, int width, int height,
int *sum0, int *sum1, unsigned int strength, int size, int *sum0, int *sum1, unsigned int strength, int size,
unsigned int bd) { unsigned int dmp) {
int x, y; int x, y;
for (y = y0; y < y0 + size; y++) { for (y = y0; y < y0 + size; y++) {
for (x = x0; x < x0 + size; x++) { for (x = x0; x < x0 + size; x++) {
...@@ -34,7 +34,7 @@ void aom_clpf_detect_c(const uint8_t *rec, const uint8_t *org, int rstride, ...@@ -34,7 +34,7 @@ void aom_clpf_detect_c(const uint8_t *rec, const uint8_t *org, int rstride,
const int G = rec[AOMMIN(height - 1, y + 1) * rstride + x]; const int G = rec[AOMMIN(height - 1, y + 1) * rstride + x];
const int H = rec[AOMMIN(height - 1, y + 2) * rstride + x]; const int H = rec[AOMMIN(height - 1, y + 2) * rstride + x];
const int delta = const int delta =
av1_clpf_sample(X, A, B, C, D, E, F, G, H, strength, bd); av1_clpf_sample(X, A, B, C, D, E, F, G, H, strength, dmp);
const int Y = X + delta; const int Y = X + delta;
*sum0 += (O - X) * (O - X); *sum0 += (O - X) * (O - X);
*sum1 += (O - Y) * (O - Y); *sum1 += (O - Y) * (O - Y);
...@@ -45,7 +45,7 @@ void aom_clpf_detect_c(const uint8_t *rec, const uint8_t *org, int rstride, ...@@ -45,7 +45,7 @@ void aom_clpf_detect_c(const uint8_t *rec, const uint8_t *org, int rstride,
void aom_clpf_detect_multi_c(const uint8_t *rec, const uint8_t *org, void aom_clpf_detect_multi_c(const uint8_t *rec, const uint8_t *org,
int rstride, int ostride, int x0, int y0, int rstride, int ostride, int x0, int y0,
int width, int height, int *sum, int size, int width, int height, int *sum, int size,
unsigned int bd) { unsigned int dmp) {
int x, y; int x, y;
for (y = y0; y < y0 + size; y++) { for (y = y0; y < y0 + size; y++) {
...@@ -60,9 +60,9 @@ void aom_clpf_detect_multi_c(const uint8_t *rec, const uint8_t *org, ...@@ -60,9 +60,9 @@ void aom_clpf_detect_multi_c(const uint8_t *rec, const uint8_t *org,
const int F = rec[y * rstride + AOMMIN(width - 1, x + 2)]; const int F = rec[y * rstride + AOMMIN(width - 1, x + 2)];
const int G = rec[AOMMIN(height - 1, y + 1) * rstride + x]; const int G = rec[AOMMIN(height - 1, y + 1) * rstride + x];
const int H = rec[AOMMIN(height - 1, y + 2) * rstride + x]; const int H = rec[AOMMIN(height - 1, y + 2) * rstride + x];
const int delta1 = av1_clpf_sample(X, A, B, C, D, E, F, G, H, 1, bd); const int delta1 = av1_clpf_sample(X, A, B, C, D, E, F, G, H, 1, dmp);
const int delta2 = av1_clpf_sample(X, A, B, C, D, E, F, G, H, 2, bd); const int delta2 = av1_clpf_sample(X, A, B, C, D, E, F, G, H, 2, dmp);
const int delta3 = av1_clpf_sample(X, A, B, C, D, E, F, G, H, 4, bd); const int delta3 = av1_clpf_sample(X, A, B, C, D, E, F, G, H, 4, dmp);
const int F1 = X + delta1; const int F1 = X + delta1;
const int F2 = X + delta2; const int F2 = X + delta2;
const int F3 = X + delta3; const int F3 = X + delta3;
...@@ -79,7 +79,8 @@ void aom_clpf_detect_multi_c(const uint8_t *rec, const uint8_t *org, ...@@ -79,7 +79,8 @@ void aom_clpf_detect_multi_c(const uint8_t *rec, const uint8_t *org,
void aom_clpf_detect_hbd_c(const uint16_t *rec, const uint16_t *org, void aom_clpf_detect_hbd_c(const uint16_t *rec, const uint16_t *org,
int rstride, int ostride, int x0, int y0, int width, int rstride, int ostride, int x0, int y0, int width,
int height, int *sum0, int *sum1, int height, int *sum0, int *sum1,
unsigned int strength, int size, unsigned int bd) { unsigned int strength, int size, unsigned int bd,
unsigned int dmp) {
const int shift = bd - 8; const int shift = bd - 8;
int x, y; int x, y;
for (y = y0; y < y0 + size; y++) { for (y = y0; y < y0 + size; y++) {
...@@ -95,7 +96,7 @@ void aom_clpf_detect_hbd_c(const uint16_t *rec, const uint16_t *org, ...@@ -95,7 +96,7 @@ void aom_clpf_detect_hbd_c(const uint16_t *rec, const uint16_t *org,
const int G = rec[AOMMIN(height - 1, y + 1) * rstride + x] >> shift; const int G = rec[AOMMIN(height - 1, y + 1) * rstride + x] >> shift;
const int H = rec[AOMMIN(height - 1, y + 2) * rstride + x] >> shift; const int H = rec[AOMMIN(height - 1, y + 2) * rstride + x] >> shift;
const int delta = av1_clpf_sample(X, A, B, C, D, E, F, G, H, const int delta = av1_clpf_sample(X, A, B, C, D, E, F, G, H,
strength >> shift, bd - shift); strength >> shift, dmp - shift);
const int Y = X + delta; const int Y = X + delta;
*sum0 += (O - X) * (O - X); *sum0 += (O - X) * (O - X);
*sum1 += (O - Y) * (O - Y); *sum1 += (O - Y) * (O - Y);
...@@ -107,7 +108,7 @@ void aom_clpf_detect_hbd_c(const uint16_t *rec, const uint16_t *org, ...@@ -107,7 +108,7 @@ void aom_clpf_detect_hbd_c(const uint16_t *rec, const uint16_t *org,
void aom_clpf_detect_multi_hbd_c(const uint16_t *rec, const uint16_t *org, void aom_clpf_detect_multi_hbd_c(const uint16_t *rec, const uint16_t *org,
int rstride, int ostride, int x0, int y0, int rstride, int ostride, int x0, int y0,
int width, int height, int *sum, int size, int width, int height, int *sum, int size,
unsigned int bd) { unsigned int bd, unsigned int dmp) {
const int shift = bd - 8; const int shift = bd - 8;
int x, y; int x, y;
...@@ -124,11 +125,11 @@ void aom_clpf_detect_multi_hbd_c(const uint16_t *rec, const uint16_t *org, ...@@ -124,11 +125,11 @@ void aom_clpf_detect_multi_hbd_c(const uint16_t *rec, const uint16_t *org,
const int G = rec[AOMMIN(height - 1, y + 1) * rstride + x] >> shift; const int G = rec[AOMMIN(height - 1, y + 1) * rstride + x] >> shift;
const int H = rec[AOMMIN(height - 1, y + 2) * rstride + x] >> shift; const int H = rec[AOMMIN(height - 1, y + 2) * rstride + x] >> shift;
const int delta1 = const int delta1 =
av1_clpf_sample(X, A, B, C, D, E, F, G, H, 1, bd - shift); av1_clpf_sample(X, A, B, C, D, E, F, G, H, 1, dmp - shift);
const int delta2 = const int delta2 =
av1_clpf_sample(X, A, B, C, D, E, F, G, H, 2, bd - shift); av1_clpf_sample(X, A, B, C, D, E, F, G, H, 2, dmp - shift);
const int delta3 = const int delta3 =
av1_clpf_sample(X, A, B, C, D, E, F, G, H, 4, bd - shift); av1_clpf_sample(X, A, B, C, D, E, F, G, H, 4, dmp - shift);
const int F1 = X + delta1; const int F1 = X + delta1;
const int F2 = X + delta2; const int F2 = X + delta2;
const int F3 = X + delta3; const int F3 = X + delta3;
...@@ -144,8 +145,10 @@ void aom_clpf_detect_multi_hbd_c(const uint16_t *rec, const uint16_t *org, ...@@ -144,8 +145,10 @@ void aom_clpf_detect_multi_hbd_c(const uint16_t *rec, const uint16_t *org,
int av1_clpf_decision(int k, int l, const YV12_BUFFER_CONFIG *rec, int av1_clpf_decision(int k, int l, const YV12_BUFFER_CONFIG *rec,
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm, const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
int block_size, int w, int h, unsigned int strength, int block_size, int w, int h, unsigned int strength,
unsigned int fb_size_log2, int8_t *res) { unsigned int fb_size_log2, int8_t *res, int plane) {
int m, n, sum0 = 0, sum1 = 0; int m, n, sum0 = 0, sum1 = 0;
int damping =
cm->bit_depth - 5 - (plane != AOM_PLANE_Y) + (cm->base_qindex >> 6);
for (m = 0; m < h; m++) { for (m = 0; m < h; m++) {
for (n = 0; n < w; n++) { for (n = 0; n < w; n++) {
...@@ -160,18 +163,18 @@ int av1_clpf_decision(int k, int l, const YV12_BUFFER_CONFIG *rec, ...@@ -160,18 +163,18 @@ int av1_clpf_decision(int k, int l, const YV12_BUFFER_CONFIG *rec,
CONVERT_TO_SHORTPTR(org->y_buffer), rec->y_stride, CONVERT_TO_SHORTPTR(org->y_buffer), rec->y_stride,
org->y_stride, xpos, ypos, rec->y_crop_width, org->y_stride, xpos, ypos, rec->y_crop_width,
rec->y_crop_height, &sum0, &sum1, strength, rec->y_crop_height, &sum0, &sum1, strength,
block_size, cm->bit_depth); block_size, cm->bit_depth, damping);
} else { } else {
aom_clpf_detect(rec->y_buffer, org->y_buffer, rec->y_stride, aom_clpf_detect(rec->y_buffer, org->y_buffer, rec->y_stride,
org->y_stride, xpos, ypos, rec->y_crop_width, org->y_stride, xpos, ypos, rec->y_crop_width,
rec->y_crop_height, &sum0, &sum1, strength, rec->y_crop_height, &sum0, &sum1, strength,
block_size, cm->bit_depth); block_size, damping);
} }
#else #else
aom_clpf_detect(rec->y_buffer, org->y_buffer, rec->y_stride, aom_clpf_detect(rec->y_buffer, org->y_buffer, rec->y_stride,
org->y_stride, xpos, ypos, rec->y_crop_width, org->y_stride, xpos, ypos, rec->y_crop_width,
rec->y_crop_height, &sum0, &sum1, strength, block_size, rec->y_crop_height, &sum0, &sum1, strength, block_size,
cm->bit_depth); damping);
#endif #endif
} }
} }
...@@ -214,6 +217,9 @@ static int clpf_rdo(int y, int x, const YV12_BUFFER_CONFIG *rec, ...@@ -214,6 +217,9 @@ static int clpf_rdo(int y, int x, const YV12_BUFFER_CONFIG *rec,
plane != AOM_PLANE_Y ? rec->uv_crop_height : rec->y_crop_height; plane != AOM_PLANE_Y ? rec->uv_crop_height : rec->y_crop_height;
int rec_stride = plane != AOM_PLANE_Y ? rec->uv_stride : rec->y_stride; int rec_stride = plane != AOM_PLANE_Y ? rec->uv_stride : rec->y_stride;
int org_stride = plane != AOM_PLANE_Y ? org->uv_stride : org->y_stride; int org_stride = plane != AOM_PLANE_Y ? org->uv_stride : org->y_stride;
int damping =
cm->bit_depth - 5 - (plane != AOM_PLANE_Y) + (cm->base_qindex >> 6);
sum[0] = sum[1] = sum[2] = sum[3] = sum[4] = sum[5] = sum[6] = sum[7] = 0; sum[0] = sum[1] = sum[2] = sum[3] = sum[4] = sum[5] = sum[6] = sum[7] = 0;
if (plane == AOM_PLANE_Y && if (plane == AOM_PLANE_Y &&
fb_size_log2 > (unsigned int)get_msb(MAX_FB_SIZE) - 3) { fb_size_log2 > (unsigned int)get_msb(MAX_FB_SIZE) - 3) {
...@@ -270,19 +276,19 @@ static int clpf_rdo(int y, int x, const YV12_BUFFER_CONFIG *rec, ...@@ -270,19 +276,19 @@ static int clpf_rdo(int y, int x, const YV12_BUFFER_CONFIG *rec,
->mbmi.skip; ->mbmi.skip;
#if CONFIG_AOM_HIGHBITDEPTH #if CONFIG_AOM_HIGHBITDEPTH
if (cm->use_highbitdepth) { if (cm->use_highbitdepth) {
aom_clpf_detect_multi_hbd(CONVERT_TO_SHORTPTR(rec_buffer), aom_clpf_detect_multi_hbd(
CONVERT_TO_SHORTPTR(org_buffer), rec_stride, CONVERT_TO_SHORTPTR(rec_buffer), CONVERT_TO_SHORTPTR(org_buffer),
org_stride, xpos, ypos, rec_width, rec_height, rec_stride, org_stride, xpos, ypos, rec_width, rec_height,
sum + skip, block_size, cm->bit_depth); sum + skip, block_size, cm->bit_depth, damping);
} else { } else {
aom_clpf_detect_multi(rec_buffer, org_buffer, rec_stride, org_stride, aom_clpf_detect_multi(rec_buffer, org_buffer, rec_stride, org_stride,
xpos, ypos, rec_width, rec_height, sum + skip, xpos, ypos, rec_width, rec_height, sum + skip,
block_size, cm->bit_depth); block_size, damping);
} }
#else #else
aom_clpf_detect_multi(rec_buffer, org_buffer, rec_stride, org_stride, aom_clpf_detect_multi(rec_buffer, org_buffer, rec_stride, org_stride,
xpos, ypos, rec_width, rec_height, sum + skip, xpos, ypos, rec_width, rec_height, sum + skip,
block_size, cm->bit_depth); block_size, damping);
#endif #endif
filtered |= !skip; filtered |= !skip;
} }
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
int av1_clpf_decision(int k, int l, const YV12_BUFFER_CONFIG *rec, int av1_clpf_decision(int k, int l, const YV12_BUFFER_CONFIG *rec,
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm, const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
int block_size, int w, int h, unsigned int strength, int block_size, int w, int h, unsigned int strength,
unsigned int fb_size_log2, int8_t *res); unsigned int fb_size_log2, int8_t *res, int plane);
void av1_clpf_test_frame(const YV12_BUFFER_CONFIG *rec, void av1_clpf_test_frame(const YV12_BUFFER_CONFIG *rec,
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm, const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
......
...@@ -69,7 +69,7 @@ void SIMD_FUNC(aom_clpf_detect)(const uint8_t *rec, const uint8_t *org, ...@@ -69,7 +69,7 @@ void SIMD_FUNC(aom_clpf_detect)(const uint8_t *rec, const uint8_t *org,
int rstride, int ostride, int x0, int y0, int rstride, int ostride, int x0, int y0,
int width, int height, int *sum0, int *sum1, int width, int height, int *sum0, int *sum1,
unsigned int strength, int size, unsigned int strength, int size,
unsigned int bd) { unsigned int dmp) {
const int bottom = height - 2 - y0; const int bottom = height - 2 - y0;
const int right = width - 8 - x0; const int right = width - 8 - x0;
ssd128_internal ssd0 = v128_ssd_u8_init(); ssd128_internal ssd0 = v128_ssd_u8_init();
...@@ -78,7 +78,7 @@ void SIMD_FUNC(aom_clpf_detect)(const uint8_t *rec, const uint8_t *org, ...@@ -78,7 +78,7 @@ void SIMD_FUNC(aom_clpf_detect)(const uint8_t *rec, const uint8_t *org,
if (size != 8) { // Fallback to plain C if (size != 8) { // Fallback to plain C
aom_clpf_detect_c(rec, org, rstride, ostride, x0, y0, width, height, sum0, aom_clpf_detect_c(rec, org, rstride, ostride, x0, y0, width, height, sum0,
sum1, strength, size, bd); sum1, strength, size, dmp);
return; return;
} }
...@@ -90,8 +90,8 @@ void SIMD_FUNC(aom_clpf_detect)(const uint8_t *rec, const uint8_t *org, ...@@ -90,8 +90,8 @@ void SIMD_FUNC(aom_clpf_detect)(const uint8_t *rec, const uint8_t *org,
read_two_lines(rec, org, rstride, ostride, x0, y0, bottom, right, y, &o, &r, read_two_lines(rec, org, rstride, ostride, x0, y0, bottom, right, y, &o, &r,
&a, &b, &c, &d, &e, &f, &g, &h); &a, &b, &c, &d, &e, &f, &g, &h);
ssd0 = v128_ssd_u8(ssd0, o, r); ssd0 = v128_ssd_u8(ssd0, o, r);
ssd1 = ssd1 = v128_ssd_u8(ssd1, o,
v128_ssd_u8(ssd1, o, calc_delta(r, a, b, c, d, e, f, g, h, strength)); calc_delta(r, a, b, c, d, e, f, g, h, strength, dmp));
rec += rstride * 2; rec += rstride * 2;
org += ostride * 2; org += ostride * 2;
} }
...@@ -102,17 +102,17 @@ void SIMD_FUNC(aom_clpf_detect)(const uint8_t *rec, const uint8_t *org, ...@@ -102,17 +102,17 @@ void SIMD_FUNC(aom_clpf_detect)(const uint8_t *rec, const uint8_t *org,
SIMD_INLINE void calc_delta_multi(v128 r, v128 o, v128 a, v128 b, v128 c, SIMD_INLINE void calc_delta_multi(v128 r, v128 o, v128 a, v128 b, v128 c,
v128 d, v128 e, v128 f, v128 g, v128 h, v128 d, v128 e, v128 f, v128 g, v128 h,
ssd128_internal *ssd1, ssd128_internal *ssd2, ssd128_internal *ssd1, ssd128_internal *ssd2,
ssd128_internal *ssd3) { ssd128_internal *ssd3, unsigned int dmp) {
*ssd1 = v128_ssd_u8(*ssd1, o, calc_delta(r, a, b, c, d, e, f, g, h, 1)); *ssd1 = v128_ssd_u8(*ssd1, o, calc_delta(r, a, b, c, d, e, f, g, h, 1, dmp));
*ssd2 = v128_ssd_u8(*ssd2, o, calc_delta(r, a, b, c, d, e, f, g, h, 2)); *ssd2 = v128_ssd_u8(*ssd2, o, calc_delta(r, a, b, c, d, e, f, g, h, 2, dmp));
*ssd3 = v128_ssd_u8(*ssd3, o, calc_delta(r, a, b, c, d, e, f, g, h, 4)); *ssd3 = v128_ssd_u8(*ssd3, o, calc_delta(r, a, b, c, d, e, f, g, h, 4, dmp));
} }
// Test multiple filter strengths at once. // Test multiple filter strengths at once.
void SIMD_FUNC(aom_clpf_detect_multi)(const uint8_t *rec, const uint8_t *org, void SIMD_FUNC(aom_clpf_detect_multi)(const uint8_t *rec, const uint8_t *org,
int rstride, int ostride, int x0, int y0, int rstride, int ostride, int x0, int y0,
int width, int height, int *sum, int size, int width, int height, int *sum, int size,
unsigned int bd) { unsigned int dmp) {
const int bottom = height - 2 - y0; const int bottom = height - 2 - y0;
const int right = width - 8 - x0; const int right = width - 8 - x0;
ssd128_internal ssd0 = v128_ssd_u8_init(); ssd128_internal ssd0 = v128_ssd_u8_init();
...@@ -123,7 +123,7 @@ void SIMD_FUNC(aom_clpf_detect_multi)(const uint8_t *rec, const uint8_t *org, ...@@ -123,7 +123,7 @@ void SIMD_FUNC(aom_clpf_detect_multi)(const uint8_t *rec, const uint8_t *org,
if (size != 8) { // Fallback to plain C if (size != 8) { // Fallback to plain C
aom_clpf_detect_multi_c(rec, org, rstride, ostride, x0, y0, width, height, aom_clpf_detect_multi_c(rec, org, rstride, ostride, x0, y0, width, height,
sum, size, bd); sum, size, dmp);
return; return;
} }
...@@ -135,7 +135,7 @@ void SIMD_FUNC(aom_clpf_detect_multi)(const uint8_t *rec, const uint8_t *org, ...@@ -135,7 +135,7 @@ void SIMD_FUNC(aom_clpf_detect_multi)(const uint8_t *rec, const uint8_t *org,
read_two_lines(rec, org, rstride, ostride, x0, y0, bottom, right, y, &o, &r, read_two_lines(rec, org, rstride, ostride, x0