Commit 4305e6be authored by Steinar Midtskogen's avatar Steinar Midtskogen

CLPF: Add quality dependent damping in the constrain function

PSNR YCbCr:  -0.17%     -0.03%     -0.40%
APSNR YCbCr: -0.17%     -0.02%     -0.39%
PSNRHVS:     -0.06%
SSIM:        -0.17%
MSSSIM:      -0.07%
CIEDE2000:   -0.12%

Change-Id: I69a4b6a4e18c22c3930069396540a6fee45cb30d
parent ee4b3a80
......@@ -854,8 +854,8 @@ specialize qw/aom_lpf_horizontal_4_dual sse2 neon dspr2 msa/;
if (aom_config("CONFIG_CDEF") eq "yes") {
if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
add_proto qw/void aom_clpf_block_hbd/, "const uint16_t *src, uint16_t *dst, int sstride, int dstride, int x0, int y0, int sizex, int sizey, unsigned int strength, BOUNDARY_TYPE bt, unsigned int bd";
add_proto qw/void aom_clpf_detect_hbd/, "const uint16_t *rec, const uint16_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum0, int *sum1, unsigned int strength, int size, unsigned int bd";
add_proto qw/void aom_clpf_detect_multi_hbd/, "const uint16_t *rec, const uint16_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum, int size, unsigned int bd";
add_proto qw/void aom_clpf_detect_hbd/, "const uint16_t *rec, const uint16_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum0, int *sum1, unsigned int strength, int size, unsigned int bd, unsigned int dmp";
add_proto qw/void aom_clpf_detect_multi_hbd/, "const uint16_t *rec, const uint16_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum, int size, unsigned int bd, unsigned int dmp";
# VS compiling for 32 bit targets does not support vector types in
# structs as arguments, which makes the v256 type of the intrinsics
# hard to support, so optimizations for this target are disabled.
......@@ -866,8 +866,8 @@ if (aom_config("CONFIG_CDEF") eq "yes") {
}
}
add_proto qw/void aom_clpf_block/, "const uint8_t *src, uint8_t *dst, int sstride, int dstride, int x0, int y0, int sizex, int sizey, unsigned int strength, BOUNDARY_TYPE bt, unsigned int bd";
add_proto qw/void aom_clpf_detect/, "const uint8_t *rec, const uint8_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum0, int *sum1, unsigned int strength, int size, unsigned int bd";
add_proto qw/void aom_clpf_detect_multi/, "const uint8_t *rec, const uint8_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum, int size, unsigned int bd";
add_proto qw/void aom_clpf_detect/, "const uint8_t *rec, const uint8_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum0, int *sum1, unsigned int strength, int size, unsigned int dmp";
add_proto qw/void aom_clpf_detect_multi/, "const uint8_t *rec, const uint8_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum, int size, unsigned int dmp";
# VS compiling for 32 bit targets does not support vector types in
# structs as arguments, which makes the v256 type of the intrinsics
# hard to support, so optimizations for this target are disabled.
......
......@@ -16,25 +16,25 @@
int sign(int i) { return i < 0 ? -1 : 1; }
int constrain(int x, int s, unsigned int bitdepth) {
int constrain(int x, int s, unsigned int damping) {
return sign(x) *
AOMMAX(0, abs(x) - AOMMAX(0, abs(x) - s + (abs(x) >> (bitdepth - 3 -
get_msb(s)))));
AOMMAX(0, abs(x) - AOMMAX(0, abs(x) - s +
(abs(x) >> (damping - get_msb(s)))));
}
int av1_clpf_sample(int X, int A, int B, int C, int D, int E, int F, int G,
int H, int s, unsigned int bd) {
int delta = 1 * constrain(A - X, s, bd) + 3 * constrain(B - X, s, bd) +
1 * constrain(C - X, s, bd) + 3 * constrain(D - X, s, bd) +
3 * constrain(E - X, s, bd) + 1 * constrain(F - X, s, bd) +
3 * constrain(G - X, s, bd) + 1 * constrain(H - X, s, bd);
int H, int s, unsigned int dmp) {
int delta = 1 * constrain(A - X, s, dmp) + 3 * constrain(B - X, s, dmp) +
1 * constrain(C - X, s, dmp) + 3 * constrain(D - X, s, dmp) +
3 * constrain(E - X, s, dmp) + 1 * constrain(F - X, s, dmp) +
3 * constrain(G - X, s, dmp) + 1 * constrain(H - X, s, dmp);
return (8 + delta - (delta < 0)) >> 4;
}
void aom_clpf_block_c(const uint8_t *src, uint8_t *dst, int sstride,
int dstride, int x0, int y0, int sizex, int sizey,
unsigned int strength, BOUNDARY_TYPE bt,
unsigned int bitdepth) {
unsigned int damping) {
int x, y;
const int xmin = x0 - !(bt & TILE_LEFT_BOUNDARY) * 2;
const int ymin = y0 - !(bt & TILE_ABOVE_BOUNDARY) * 2;
......@@ -53,7 +53,7 @@ void aom_clpf_block_c(const uint8_t *src, uint8_t *dst, int sstride,
const int G = src[AOMMIN(ymax, y + 1) * sstride + x];
const int H = src[AOMMIN(ymax, y + 2) * sstride + x];
const int delta =
av1_clpf_sample(X, A, B, C, D, E, F, G, H, strength, bitdepth);
av1_clpf_sample(X, A, B, C, D, E, F, G, H, strength, damping);
dst[y * dstride + x] = X + delta;
}
}
......@@ -64,7 +64,7 @@ void aom_clpf_block_c(const uint8_t *src, uint8_t *dst, int sstride,
void aom_clpf_block_hbd_c(const uint16_t *src, uint16_t *dst, int sstride,
int dstride, int x0, int y0, int sizex, int sizey,
unsigned int strength, BOUNDARY_TYPE bt,
unsigned int bitdepth) {
unsigned int damping) {
int x, y;
const int xmin = x0 - !(bt & TILE_LEFT_BOUNDARY) * 2;
const int ymin = y0 - !(bt & TILE_ABOVE_BOUNDARY) * 2;
......@@ -83,7 +83,7 @@ void aom_clpf_block_hbd_c(const uint16_t *src, uint16_t *dst, int sstride,
const int G = src[AOMMIN(ymax, y + 1) * sstride + x];
const int H = src[AOMMIN(ymax, y + 2) * sstride + x];
const int delta =
av1_clpf_sample(X, A, B, C, D, E, F, G, H, strength, bitdepth);
av1_clpf_sample(X, A, B, C, D, E, F, G, H, strength, damping);
dst[y * dstride + x] = X + delta;
}
}
......@@ -91,14 +91,13 @@ void aom_clpf_block_hbd_c(const uint16_t *src, uint16_t *dst, int sstride,
#endif
// Return number of filtered blocks
void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
const YV12_BUFFER_CONFIG *org, AV1_COMMON *cm,
int enable_fb_flag, unsigned int strength,
unsigned int fb_size_log2, int plane,
int (*decision)(int, int, const YV12_BUFFER_CONFIG *,
const YV12_BUFFER_CONFIG *,
const AV1_COMMON *cm, int, int, int,
unsigned int, unsigned int, int8_t *)) {
void av1_clpf_frame(
const YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *org,
AV1_COMMON *cm, int enable_fb_flag, unsigned int strength,
unsigned int fb_size_log2, int plane,
int (*decision)(int, int, const YV12_BUFFER_CONFIG *,
const YV12_BUFFER_CONFIG *, const AV1_COMMON *cm, int, int,
int, unsigned int, unsigned int, int8_t *, int)) {
/* Constrained low-pass filter (CLPF) */
int c, k, l, m, n;
const int subx = plane != AOM_PLANE_Y && frame->subsampling_x;
......@@ -124,6 +123,11 @@ void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
? (plane == AOM_PLANE_U ? frame->u_buffer : frame->v_buffer)
: frame->y_buffer;
uint8_t *dst_buffer;
// Damping is the filter cut-off log2 point for the constrain function.
// For instance, if the damping is 5, neighbour differences above 32 will
// be ignored and half of the strength will be applied for a difference of 16.
int damping =
cm->bit_depth - 5 - (plane != AOM_PLANE_Y) + (cm->base_qindex >> 6);
// Make buffer space for in-place filtering
#if CONFIG_AOM_HIGHBITDEPTH
......@@ -169,7 +173,8 @@ void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
decision(k, l, frame, org, cm, bs, w / bs, h / bs, strength,
fb_size_log2,
cm->clpf_blocks + yoff / MIN_FB_SIZE * cm->clpf_stride +
xoff / MIN_FB_SIZE))) {
xoff / MIN_FB_SIZE,
plane))) {
// Iterate over all smaller blocks inside the filter block
for (m = 0; m < ((h + bs - 1) >> bslog); m++) {
for (n = 0; n < ((w + bs - 1) >> bslog); n++) {
......@@ -260,16 +265,16 @@ void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
aom_clpf_block_hbd(CONVERT_TO_SHORTPTR(src_buffer),
CONVERT_TO_SHORTPTR(dst_buffer), sstride,
dstride, xpos, ypos, sizex, sizey, strength,
boundary_type, cm->bit_depth);
boundary_type, damping);
} else {
aom_clpf_block(src_buffer, dst_buffer, sstride, dstride, xpos,
ypos, sizex, sizey, strength, boundary_type,
cm->bit_depth);
damping);
}
#else
aom_clpf_block(src_buffer, dst_buffer, sstride, dstride, xpos,
ypos, sizex, sizey, strength, boundary_type,
cm->bit_depth);
damping);
#endif
}
}
......
......@@ -19,7 +19,7 @@
#define MIN_FB_SIZE (1 << MIN_FB_SIZE_LOG2)
int av1_clpf_sample(int X, int A, int B, int C, int D, int E, int F, int G,
int H, int b, unsigned int bd);
int H, int b, unsigned int dmp);
void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
const YV12_BUFFER_CONFIG *org, AV1_COMMON *cm,
int enable_fb_flag, unsigned int strength,
......@@ -27,6 +27,6 @@ void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
int (*decision)(int, int, const YV12_BUFFER_CONFIG *,
const YV12_BUFFER_CONFIG *,
const AV1_COMMON *cm, int, int, int,
unsigned int, unsigned int, int8_t *));
unsigned int, unsigned int, int8_t *, int));
#endif
This diff is collapsed.
......@@ -16,11 +16,12 @@
// sign(a - b) * max(0, abs(a - b) - max(0, abs(a - b) -
// strength + (abs(a - b) >> (5 - log2(s)))))
SIMD_INLINE v128 constrain(v128 a, v128 b, unsigned int strength) {
SIMD_INLINE v128 constrain(v128 a, v128 b, unsigned int strength,
unsigned int damping) {
const v128 diff = v128_sub_8(v128_max_u8(a, b), v128_min_u8(a, b));
const v128 sign = v128_cmpeq_8(v128_min_u8(a, b), a); // -(a <= b)
const v128 s = v128_ssub_u8(v128_dup_8(strength),
v128_shr_u8(diff, 5 - get_msb(strength)));
v128_shr_u8(diff, damping - get_msb(strength)));
return v128_sub_8(v128_xor(sign, v128_ssub_u8(diff, v128_ssub_u8(diff, s))),
sign);
}
......@@ -30,14 +31,15 @@ SIMD_INLINE v128 constrain(v128 a, v128 b, unsigned int strength) {
// 3/16 * constrain(e, x, s) + 1/16 * constrain(f, x, s) +
// 3/16 * constrain(g, x, s) + 1/16 * constrain(h, x, s)
SIMD_INLINE v128 calc_delta(v128 x, v128 a, v128 b, v128 c, v128 d, v128 e,
v128 f, v128 g, v128 h, unsigned int s) {
v128 f, v128 g, v128 h, unsigned int s,
unsigned int dmp) {
const v128 bdeg =
v128_add_8(v128_add_8(constrain(b, x, s), constrain(d, x, s)),
v128_add_8(constrain(e, x, s), constrain(g, x, s)));
const v128 delta =
v128_add_8(v128_add_8(v128_add_8(constrain(a, x, s), constrain(c, x, s)),
v128_add_8(constrain(f, x, s), constrain(h, x, s))),
v128_add_8(v128_add_8(bdeg, bdeg), bdeg));
v128_add_8(v128_add_8(constrain(b, x, s, dmp), constrain(d, x, s, dmp)),
v128_add_8(constrain(e, x, s, dmp), constrain(g, x, s, dmp)));
const v128 delta = v128_add_8(
v128_add_8(v128_add_8(constrain(a, x, s, dmp), constrain(c, x, s, dmp)),
v128_add_8(constrain(f, x, s, dmp), constrain(h, x, s, dmp))),
v128_add_8(v128_add_8(bdeg, bdeg), bdeg));
return v128_add_8(
x, v128_shr_s8(
v128_add_8(v128_dup_8(8),
......
......@@ -2746,7 +2746,8 @@ static int clpf_bit(UNUSED int k, UNUSED int l,
UNUSED const YV12_BUFFER_CONFIG *org,
UNUSED const AV1_COMMON *cm, UNUSED int block_size,
UNUSED int w, UNUSED int h, UNUSED unsigned int strength,
UNUSED unsigned int fb_size_log2, int8_t *bit) {
UNUSED unsigned int fb_size_log2, int8_t *bit,
UNUSED int plane) {
return *bit;
}
......
......@@ -19,7 +19,7 @@
void aom_clpf_detect_c(const uint8_t *rec, const uint8_t *org, int rstride,
int ostride, int x0, int y0, int width, int height,
int *sum0, int *sum1, unsigned int strength, int size,
unsigned int bd) {
unsigned int dmp) {
int x, y;
for (y = y0; y < y0 + size; y++) {
for (x = x0; x < x0 + size; x++) {
......@@ -34,7 +34,7 @@ void aom_clpf_detect_c(const uint8_t *rec, const uint8_t *org, int rstride,
const int G = rec[AOMMIN(height - 1, y + 1) * rstride + x];
const int H = rec[AOMMIN(height - 1, y + 2) * rstride + x];
const int delta =
av1_clpf_sample(X, A, B, C, D, E, F, G, H, strength, bd);
av1_clpf_sample(X, A, B, C, D, E, F, G, H, strength, dmp);
const int Y = X + delta;
*sum0 += (O - X) * (O - X);
*sum1 += (O - Y) * (O - Y);
......@@ -45,7 +45,7 @@ void aom_clpf_detect_c(const uint8_t *rec, const uint8_t *org, int rstride,
void aom_clpf_detect_multi_c(const uint8_t *rec, const uint8_t *org,
int rstride, int ostride, int x0, int y0,
int width, int height, int *sum, int size,
unsigned int bd) {
unsigned int dmp) {
int x, y;
for (y = y0; y < y0 + size; y++) {
......@@ -60,9 +60,9 @@ void aom_clpf_detect_multi_c(const uint8_t *rec, const uint8_t *org,
const int F = rec[y * rstride + AOMMIN(width - 1, x + 2)];
const int G = rec[AOMMIN(height - 1, y + 1) * rstride + x];
const int H = rec[AOMMIN(height - 1, y + 2) * rstride + x];
const int delta1 = av1_clpf_sample(X, A, B, C, D, E, F, G, H, 1, bd);
const int delta2 = av1_clpf_sample(X, A, B, C, D, E, F, G, H, 2, bd);
const int delta3 = av1_clpf_sample(X, A, B, C, D, E, F, G, H, 4, bd);
const int delta1 = av1_clpf_sample(X, A, B, C, D, E, F, G, H, 1, dmp);
const int delta2 = av1_clpf_sample(X, A, B, C, D, E, F, G, H, 2, dmp);
const int delta3 = av1_clpf_sample(X, A, B, C, D, E, F, G, H, 4, dmp);
const int F1 = X + delta1;
const int F2 = X + delta2;
const int F3 = X + delta3;
......@@ -79,7 +79,8 @@ void aom_clpf_detect_multi_c(const uint8_t *rec, const uint8_t *org,
void aom_clpf_detect_hbd_c(const uint16_t *rec, const uint16_t *org,
int rstride, int ostride, int x0, int y0, int width,
int height, int *sum0, int *sum1,
unsigned int strength, int size, unsigned int bd) {
unsigned int strength, int size, unsigned int bd,
unsigned int dmp) {
const int shift = bd - 8;
int x, y;
for (y = y0; y < y0 + size; y++) {
......@@ -95,7 +96,7 @@ void aom_clpf_detect_hbd_c(const uint16_t *rec, const uint16_t *org,
const int G = rec[AOMMIN(height - 1, y + 1) * rstride + x] >> shift;
const int H = rec[AOMMIN(height - 1, y + 2) * rstride + x] >> shift;
const int delta = av1_clpf_sample(X, A, B, C, D, E, F, G, H,
strength >> shift, bd - shift);
strength >> shift, dmp - shift);
const int Y = X + delta;
*sum0 += (O - X) * (O - X);
*sum1 += (O - Y) * (O - Y);
......@@ -107,7 +108,7 @@ void aom_clpf_detect_hbd_c(const uint16_t *rec, const uint16_t *org,
void aom_clpf_detect_multi_hbd_c(const uint16_t *rec, const uint16_t *org,
int rstride, int ostride, int x0, int y0,
int width, int height, int *sum, int size,
unsigned int bd) {
unsigned int bd, unsigned int dmp) {
const int shift = bd - 8;
int x, y;
......@@ -124,11 +125,11 @@ void aom_clpf_detect_multi_hbd_c(const uint16_t *rec, const uint16_t *org,
const int G = rec[AOMMIN(height - 1, y + 1) * rstride + x] >> shift;
const int H = rec[AOMMIN(height - 1, y + 2) * rstride + x] >> shift;
const int delta1 =
av1_clpf_sample(X, A, B, C, D, E, F, G, H, 1, bd - shift);
av1_clpf_sample(X, A, B, C, D, E, F, G, H, 1, dmp - shift);
const int delta2 =
av1_clpf_sample(X, A, B, C, D, E, F, G, H, 2, bd - shift);
av1_clpf_sample(X, A, B, C, D, E, F, G, H, 2, dmp - shift);
const int delta3 =
av1_clpf_sample(X, A, B, C, D, E, F, G, H, 4, bd - shift);
av1_clpf_sample(X, A, B, C, D, E, F, G, H, 4, dmp - shift);
const int F1 = X + delta1;
const int F2 = X + delta2;
const int F3 = X + delta3;
......@@ -144,8 +145,10 @@ void aom_clpf_detect_multi_hbd_c(const uint16_t *rec, const uint16_t *org,
int av1_clpf_decision(int k, int l, const YV12_BUFFER_CONFIG *rec,
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
int block_size, int w, int h, unsigned int strength,
unsigned int fb_size_log2, int8_t *res) {
unsigned int fb_size_log2, int8_t *res, int plane) {
int m, n, sum0 = 0, sum1 = 0;
int damping =
cm->bit_depth - 5 - (plane != AOM_PLANE_Y) + (cm->base_qindex >> 6);
for (m = 0; m < h; m++) {
for (n = 0; n < w; n++) {
......@@ -160,18 +163,18 @@ int av1_clpf_decision(int k, int l, const YV12_BUFFER_CONFIG *rec,
CONVERT_TO_SHORTPTR(org->y_buffer), rec->y_stride,
org->y_stride, xpos, ypos, rec->y_crop_width,
rec->y_crop_height, &sum0, &sum1, strength,
block_size, cm->bit_depth);
block_size, cm->bit_depth, damping);
} else {
aom_clpf_detect(rec->y_buffer, org->y_buffer, rec->y_stride,
org->y_stride, xpos, ypos, rec->y_crop_width,
rec->y_crop_height, &sum0, &sum1, strength,
block_size, cm->bit_depth);
block_size, damping);
}
#else
aom_clpf_detect(rec->y_buffer, org->y_buffer, rec->y_stride,
org->y_stride, xpos, ypos, rec->y_crop_width,
rec->y_crop_height, &sum0, &sum1, strength, block_size,
cm->bit_depth);
damping);
#endif
}
}
......@@ -214,6 +217,9 @@ static int clpf_rdo(int y, int x, const YV12_BUFFER_CONFIG *rec,
plane != AOM_PLANE_Y ? rec->uv_crop_height : rec->y_crop_height;
int rec_stride = plane != AOM_PLANE_Y ? rec->uv_stride : rec->y_stride;
int org_stride = plane != AOM_PLANE_Y ? org->uv_stride : org->y_stride;
int damping =
cm->bit_depth - 5 - (plane != AOM_PLANE_Y) + (cm->base_qindex >> 6);
sum[0] = sum[1] = sum[2] = sum[3] = sum[4] = sum[5] = sum[6] = sum[7] = 0;
if (plane == AOM_PLANE_Y &&
fb_size_log2 > (unsigned int)get_msb(MAX_FB_SIZE) - 3) {
......@@ -270,19 +276,19 @@ static int clpf_rdo(int y, int x, const YV12_BUFFER_CONFIG *rec,
->mbmi.skip;
#if CONFIG_AOM_HIGHBITDEPTH
if (cm->use_highbitdepth) {
aom_clpf_detect_multi_hbd(CONVERT_TO_SHORTPTR(rec_buffer),
CONVERT_TO_SHORTPTR(org_buffer), rec_stride,
org_stride, xpos, ypos, rec_width, rec_height,
sum + skip, block_size, cm->bit_depth);
aom_clpf_detect_multi_hbd(
CONVERT_TO_SHORTPTR(rec_buffer), CONVERT_TO_SHORTPTR(org_buffer),
rec_stride, org_stride, xpos, ypos, rec_width, rec_height,
sum + skip, block_size, cm->bit_depth, damping);
} else {
aom_clpf_detect_multi(rec_buffer, org_buffer, rec_stride, org_stride,
xpos, ypos, rec_width, rec_height, sum + skip,
block_size, cm->bit_depth);
block_size, damping);
}
#else
aom_clpf_detect_multi(rec_buffer, org_buffer, rec_stride, org_stride,
xpos, ypos, rec_width, rec_height, sum + skip,
block_size, cm->bit_depth);
block_size, damping);
#endif
filtered |= !skip;
}
......
......@@ -17,7 +17,7 @@
int av1_clpf_decision(int k, int l, const YV12_BUFFER_CONFIG *rec,
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
int block_size, int w, int h, unsigned int strength,
unsigned int fb_size_log2, int8_t *res);
unsigned int fb_size_log2, int8_t *res, int plane);
void av1_clpf_test_frame(const YV12_BUFFER_CONFIG *rec,
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
......
......@@ -69,7 +69,7 @@ void SIMD_FUNC(aom_clpf_detect)(const uint8_t *rec, const uint8_t *org,
int rstride, int ostride, int x0, int y0,
int width, int height, int *sum0, int *sum1,
unsigned int strength, int size,
unsigned int bd) {
unsigned int dmp) {
const int bottom = height - 2 - y0;
const int right = width - 8 - x0;
ssd128_internal ssd0 = v128_ssd_u8_init();
......@@ -78,7 +78,7 @@ void SIMD_FUNC(aom_clpf_detect)(const uint8_t *rec, const uint8_t *org,
if (size != 8) { // Fallback to plain C
aom_clpf_detect_c(rec, org, rstride, ostride, x0, y0, width, height, sum0,
sum1, strength, size, bd);
sum1, strength, size, dmp);
return;
}
......@@ -90,8 +90,8 @@ void SIMD_FUNC(aom_clpf_detect)(const uint8_t *rec, const uint8_t *org,
read_two_lines(rec, org, rstride, ostride, x0, y0, bottom, right, y, &o, &r,
&a, &b, &c, &d, &e, &f, &g, &h);
ssd0 = v128_ssd_u8(ssd0, o, r);
ssd1 =
v128_ssd_u8(ssd1, o, calc_delta(r, a, b, c, d, e, f, g, h, strength));
ssd1 = v128_ssd_u8(ssd1, o,
calc_delta(r, a, b, c, d, e, f, g, h, strength, dmp));
rec += rstride * 2;
org += ostride * 2;
}
......@@ -102,17 +102,17 @@ void SIMD_FUNC(aom_clpf_detect)(const uint8_t *rec, const uint8_t *org,
SIMD_INLINE void calc_delta_multi(v128 r, v128 o, v128 a, v128 b, v128 c,
v128 d, v128 e, v128 f, v128 g, v128 h,
ssd128_internal *ssd1, ssd128_internal *ssd2,
ssd128_internal *ssd3) {
*ssd1 = v128_ssd_u8(*ssd1, o, calc_delta(r, a, b, c, d, e, f, g, h, 1));
*ssd2 = v128_ssd_u8(*ssd2, o, calc_delta(r, a, b, c, d, e, f, g, h, 2));
*ssd3 = v128_ssd_u8(*ssd3, o, calc_delta(r, a, b, c, d, e, f, g, h, 4));
ssd128_internal *ssd3, unsigned int dmp) {
*ssd1 = v128_ssd_u8(*ssd1, o, calc_delta(r, a, b, c, d, e, f, g, h, 1, dmp));
*ssd2 = v128_ssd_u8(*ssd2, o, calc_delta(r, a, b, c, d, e, f, g, h, 2, dmp));
*ssd3 = v128_ssd_u8(*ssd3, o, calc_delta(r, a, b, c, d, e, f, g, h, 4, dmp));
}
// Test multiple filter strengths at once.
void SIMD_FUNC(aom_clpf_detect_multi)(const uint8_t *rec, const uint8_t *org,
int rstride, int ostride, int x0, int y0,
int width, int height, int *sum, int size,
unsigned int bd) {
unsigned int dmp) {
const int bottom = height - 2 - y0;
const int right = width - 8 - x0;
ssd128_internal ssd0 = v128_ssd_u8_init();
......@@ -123,7 +123,7 @@ void SIMD_FUNC(aom_clpf_detect_multi)(const uint8_t *rec, const uint8_t *org,
if (size != 8) { // Fallback to plain C
aom_clpf_detect_multi_c(rec, org, rstride, ostride, x0, y0, width, height,
sum, size, bd);
sum, size, dmp);
return;
}
......@@ -135,7 +135,7 @@ void SIMD_FUNC(aom_clpf_detect_multi)(const uint8_t *rec, const uint8_t *org,
read_two_lines(rec, org, rstride, ostride, x0, y0, bottom, right, y, &o, &r,
&a, &b, &c, &d, &e, &f, &g, &h);
ssd0 = v128_ssd_u8(ssd0, o, r);
calc_delta_multi(r, o, a, b, c, d, e, f, g, h, &ssd1, &ssd2, &ssd3);
calc_delta_multi(r, o, a, b, c, d, e, f, g, h, &ssd1, &ssd2, &ssd3, dmp);
rec += 2 * rstride;
org += 2 * ostride;
}
......@@ -190,7 +190,8 @@ void SIMD_FUNC(aom_clpf_detect_hbd)(const uint16_t *rec, const uint16_t *org,
int rstride, int ostride, int x0, int y0,
int width, int height, int *sum0, int *sum1,
unsigned int strength, int size,
unsigned int bitdepth) {
unsigned int bitdepth,
unsigned int damping) {
const int shift = bitdepth - 8;
const int bottom = height - 2 - y0;
const int right = width - 8 - x0;
......@@ -200,7 +201,7 @@ void SIMD_FUNC(aom_clpf_detect_hbd)(const uint16_t *rec, const uint16_t *org,
if (size != 8) { // Fallback to plain C
aom_clpf_detect_hbd_c(rec, org, rstride, ostride, x0, y0, width, height,
sum0, sum1, strength, size, bitdepth);
sum0, sum1, strength, size, bitdepth, damping);
return;
}
......@@ -212,8 +213,8 @@ void SIMD_FUNC(aom_clpf_detect_hbd)(const uint16_t *rec, const uint16_t *org,
read_two_lines_hbd(rec, org, rstride, ostride, x0, y0, bottom, right, y, &o,
&r, &a, &b, &c, &d, &e, &f, &g, &h, shift);
ssd0 = v128_ssd_u8(ssd0, o, r);
ssd1 = v128_ssd_u8(
ssd1, o, calc_delta(r, a, b, c, d, e, f, g, h, strength >> shift));
ssd1 = v128_ssd_u8(ssd1, o, calc_delta(r, a, b, c, d, e, f, g, h,
strength >> shift, damping));
rec += rstride * 2;
org += ostride * 2;
}
......@@ -225,7 +226,8 @@ void SIMD_FUNC(aom_clpf_detect_multi_hbd)(const uint16_t *rec,
const uint16_t *org, int rstride,
int ostride, int x0, int y0,
int width, int height, int *sum,
int size, unsigned int bitdepth) {
int size, unsigned int bitdepth,
unsigned int damping) {
const int bottom = height - 2 - y0;
const int right = width - 8 - x0;
ssd128_internal ssd0 = v128_ssd_u8_init();
......@@ -236,7 +238,7 @@ void SIMD_FUNC(aom_clpf_detect_multi_hbd)(const uint16_t *rec,
if (size != 8) { // Fallback to plain C
aom_clpf_detect_multi_hbd_c(rec, org, rstride, ostride, x0, y0, width,
height, sum, size, bitdepth);
height, sum, size, bitdepth, damping);
return;
}
......@@ -248,7 +250,8 @@ void SIMD_FUNC(aom_clpf_detect_multi_hbd)(const uint16_t *rec,
read_two_lines_hbd(rec, org, rstride, ostride, x0, y0, bottom, right, y, &o,
&r, &a, &b, &c, &d, &e, &f, &g, &h, bitdepth - 8);
ssd0 = v128_ssd_u8(ssd0, o, r);
calc_delta_multi(r, o, a, b, c, d, e, f, g, h, &ssd1, &ssd2, &ssd3);
calc_delta_multi(r, o, a, b, c, d, e, f, g, h, &ssd1, &ssd2, &ssd3,
damping);
rec += rstride * 2;
org += ostride * 2;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment