Commit ecf9a0c8 authored by Steinar Midtskogen's avatar Steinar Midtskogen Committed by Yaowu Xu

Extend CLPF to chroma.

Objective quality impact (low latency):

PSNR YCbCr:      0.13%     -1.37%     -1.79%
   PSNRHVS:      0.03%
      SSIM:      0.24%
    MSSSIM:      0.10%
 CIEDE2000:     -0.83%

Change-Id: I8ddf0def569286775f0f9d4d4005932766a7fc27
parent 9021d09f
......@@ -590,16 +590,16 @@ if (aom_config("CONFIG_CLPF") eq "yes") {
if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
add_proto qw/void aom_clpf_block_hbd/, "const uint16_t *src, uint16_t *dst, int sstride, int dstride, int x0, int y0, int sizex, int sizey, int width, int height, unsigned int strength";
specialize qw/aom_clpf_block_hbd sse2 ssse3 sse4_1 neon/;
add_proto qw/void aom_clpf_detect_hbd/, "const uint16_t *rec, const uint16_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum0, int *sum1, unsigned int strength, int shift";
add_proto qw/void aom_clpf_detect_hbd/, "const uint16_t *rec, const uint16_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum0, int *sum1, unsigned int strength, int shift, int size";
specialize qw/aom_clpf_detect_hbd sse2 ssse3 sse4_1 neon/;
add_proto qw/void aom_clpf_detect_multi_hbd/, "const uint16_t *rec, const uint16_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum, int shift";
add_proto qw/void aom_clpf_detect_multi_hbd/, "const uint16_t *rec, const uint16_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum, int shift, int size";
specialize qw/aom_clpf_detect_multi_hbd sse2 ssse3 sse4_1 neon/;
}
add_proto qw/void aom_clpf_block/, "const uint8_t *src, uint8_t *dst, int sstride, int dstride, int x0, int y0, int sizex, int sizey, int width, int height, unsigned int strength";
specialize qw/aom_clpf_block sse2 ssse3 sse4_1 neon/;
add_proto qw/void aom_clpf_detect/, "const uint8_t *rec, const uint8_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum0, int *sum1, unsigned int strength";
add_proto qw/void aom_clpf_detect/, "const uint8_t *rec, const uint8_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum0, int *sum1, unsigned int strength, int size";
specialize qw/aom_clpf_detect sse2 ssse3 sse4_1 neon/;
add_proto qw/void aom_clpf_detect_multi/, "const uint8_t *rec, const uint8_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum";
add_proto qw/void aom_clpf_detect_multi/, "const uint8_t *rec, const uint8_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum, int size";
specialize qw/aom_clpf_detect_multi sse2 ssse3 sse4_1 neon/;
}
......
......@@ -8,9 +8,10 @@
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include <assert.h>
#include "av1/common/clpf.h"
#include "./aom_dsp_rtcd.h"
#include "aom/aom_image.h"
#include "aom_dsp/aom_dsp_common.h"
int av1_clpf_maxbits(const AV1_COMMON *cm) {
......@@ -72,21 +73,24 @@ void aom_clpf_block_hbd_c(const uint16_t *src, uint16_t *dst, int sstride,
#endif
// Return number of filtered blocks
int av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
const YV12_BUFFER_CONFIG *org, AV1_COMMON *cm,
int enable_fb_flag, unsigned int strength,
unsigned int fb_size_log2, uint8_t *blocks,
int av1_clpf_frame(
const YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *org,
AV1_COMMON *cm, int enable_fb_flag, unsigned int strength,
unsigned int fb_size_log2, uint8_t *blocks, int plane,
int (*decision)(int, int, const YV12_BUFFER_CONFIG *,
const YV12_BUFFER_CONFIG *,
const AV1_COMMON *cm, int, int, int,
unsigned int, unsigned int, uint8_t *)) {
const YV12_BUFFER_CONFIG *, const AV1_COMMON *cm, int, int,
int, unsigned int, unsigned int, uint8_t *, int)) {
/* Constrained low-pass filter (CLPF) */
int c, k, l, m, n;
const int bs = MI_SIZE;
const int width = frame->y_crop_width;
const int height = frame->y_crop_height;
const int subx = plane != AOM_PLANE_Y && frame->subsampling_x;
const int suby = plane != AOM_PLANE_Y && frame->subsampling_y;
const int bs = (subx || suby) ? 4 : 8;
const int bslog = get_msb(bs);
int width = plane != AOM_PLANE_Y ? frame->uv_crop_width : frame->y_crop_width;
int height =
plane != AOM_PLANE_Y ? frame->uv_crop_height : frame->y_crop_height;
int xpos, ypos;
const int sstride = frame->y_stride;
const int sstride = plane != AOM_PLANE_Y ? frame->uv_stride : frame->y_stride;
int dstride = bs;
const int num_fb_hor = (width + (1 << fb_size_log2) - 1) >> fb_size_log2;
const int num_fb_ver = (height + (1 << fb_size_log2) - 1) >> fb_size_log2;
......@@ -97,9 +101,11 @@ int av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
int cache_idx = 0;
const int cache_size = num_fb_hor << (2 * fb_size_log2);
const int cache_blocks = cache_size / (bs * bs);
YV12_BUFFER_CONFIG dst = *frame;
assert(bs == 8); // Optimised code assumes this.
uint8_t *src_buffer =
plane != AOM_PLANE_Y
? (plane == AOM_PLANE_U ? frame->u_buffer : frame->v_buffer)
: frame->y_buffer;
uint8_t *dst_buffer;
#if CONFIG_AOM_HIGHBITDEPTH
strength <<= (cm->bit_depth - 8);
......@@ -108,10 +114,10 @@ int av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
// Make buffer space for in-place filtering
#if CONFIG_AOM_HIGHBITDEPTH
CHECK_MEM_ERROR(cm, cache, aom_malloc(cache_size << !!cm->use_highbitdepth));
dst.y_buffer = cm->use_highbitdepth ? CONVERT_TO_BYTEPTR(cache) : cache;
dst_buffer = cm->use_highbitdepth ? CONVERT_TO_BYTEPTR(cache) : cache;
#else
CHECK_MEM_ERROR(cm, cache, aom_malloc(cache_size));
dst.y_buffer = cache;
dst_buffer = cache;
#endif
CHECK_MEM_ERROR(cm, cache_ptr, aom_malloc(cache_blocks * sizeof(*cache_ptr)));
CHECK_MEM_ERROR(cm, cache_dst, aom_malloc(cache_blocks * sizeof(*cache_dst)));
......@@ -130,7 +136,8 @@ int av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
ypos = yoff + m * bs;
if (xpos < width && ypos < height) {
allskip &=
cm->mi_grid_visible[ypos / bs * cm->mi_stride + xpos / bs]
cm->mi_grid_visible[(ypos << suby) / MI_SIZE * cm->mi_stride +
(xpos << subx) / MI_SIZE]
->mbmi.skip;
}
}
......@@ -144,13 +151,14 @@ int av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
if (!allskip && // Do not filter the block if all is skip encoded
(!enable_fb_flag ||
decision(k, l, frame, org, cm, bs, w / bs, h / bs, strength,
fb_size_log2, blocks + block_index))) {
fb_size_log2, blocks + block_index, plane))) {
// Iterate over all smaller blocks inside the filter block
for (m = 0; m < (h + bs - 1) / bs; m++) {
for (n = 0; n < (w + bs - 1) / bs; n++) {
for (m = 0; m < ((h + bs - 1) >> bslog); m++) {
for (n = 0; n < ((w + bs - 1) >> bslog); n++) {
xpos = xoff + n * bs;
ypos = yoff + m * bs;
if (!cm->mi_grid_visible[ypos / bs * cm->mi_stride + xpos / bs]
if (!cm->mi_grid_visible[(ypos << suby) / MI_SIZE * cm->mi_stride +
(xpos << subx) / MI_SIZE]
->mbmi.skip) { // Not skip block
// Temporary buffering needed if filtering in-place
if (cache_ptr[cache_idx]) {
......@@ -161,50 +169,59 @@ int av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
for (c = 0; c < bs; c++) {
*(uint64_t *)(d + c * sstride) =
*(uint64_t *)(cache_ptr[cache_idx] + c * bs * 2);
if (bs == 8)
*(uint64_t *)(d + c * sstride + 4) =
*(uint64_t *)(cache_ptr[cache_idx] + c * bs * 2 + 8);
}
} else {
for (c = 0; c < bs; c++)
if (bs == 8)
*(uint64_t *)(cache_dst[cache_idx] + c * sstride) =
*(uint64_t *)(cache_ptr[cache_idx] + c * bs);
else
*(uint32_t *)(cache_dst[cache_idx] + c * sstride) =
*(uint32_t *)(cache_ptr[cache_idx] + c * bs);
}
#else
for (c = 0; c < bs; c++)
if (bs == 8)
*(uint64_t *)(cache_dst[cache_idx] + c * sstride) =
*(uint64_t *)(cache_ptr[cache_idx] + c * bs);
else
*(uint32_t *)(cache_dst[cache_idx] + c * sstride) =
*(uint32_t *)(cache_ptr[cache_idx] + c * bs);
#endif
}
#if CONFIG_AOM_HIGHBITDEPTH
if (cm->use_highbitdepth) {
cache_ptr[cache_idx] = cache + cache_idx * bs * bs * 2;
dst.y_buffer =
dst_buffer =
CONVERT_TO_BYTEPTR(cache_ptr[cache_idx]) - ypos * bs - xpos;
} else {
cache_ptr[cache_idx] = cache + cache_idx * bs * bs;
dst.y_buffer = cache_ptr[cache_idx] - ypos * bs - xpos;
dst_buffer = cache_ptr[cache_idx] - ypos * bs - xpos;
}
#else
cache_ptr[cache_idx] = cache + cache_idx * bs * bs;
dst.y_buffer = cache_ptr[cache_idx] - ypos * bs - xpos;
dst_buffer = cache_ptr[cache_idx] - ypos * bs - xpos;
#endif
cache_dst[cache_idx] = frame->y_buffer + ypos * sstride + xpos;
cache_dst[cache_idx] = src_buffer + ypos * sstride + xpos;
if (++cache_idx >= cache_blocks) cache_idx = 0;
// Apply the filter
#if CONFIG_AOM_HIGHBITDEPTH
if (cm->use_highbitdepth) {
aom_clpf_block_hbd(CONVERT_TO_SHORTPTR(frame->y_buffer),
CONVERT_TO_SHORTPTR(dst.y_buffer), sstride,
aom_clpf_block_hbd(CONVERT_TO_SHORTPTR(src_buffer),
CONVERT_TO_SHORTPTR(dst_buffer), sstride,
dstride, xpos, ypos, bs, bs, width, height,
strength);
} else {
aom_clpf_block(frame->y_buffer, dst.y_buffer, sstride, dstride,
xpos, ypos, bs, bs, width, height, strength);
aom_clpf_block(src_buffer, dst_buffer, sstride, dstride, xpos,
ypos, bs, bs, width, height, strength);
}
#else
aom_clpf_block(frame->y_buffer, dst.y_buffer, sstride, dstride,
xpos, ypos, bs, bs, width, height, strength);
aom_clpf_block(src_buffer, dst_buffer, sstride, dstride, xpos,
ypos, bs, bs, width, height, strength);
#endif
}
}
......@@ -223,16 +240,25 @@ int av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
for (c = 0; c < bs; c++) {
*(uint64_t *)(d + c * sstride) =
*(uint64_t *)(cache_ptr[cache_idx] + c * bs * 2);
if (bs == 8)
*(uint64_t *)(d + c * sstride + 4) =
*(uint64_t *)(cache_ptr[cache_idx] + c * bs * 2 + 8);
}
} else {
for (c = 0; c < bs; c++)
if (bs == 4)
*(uint32_t *)(cache_dst[cache_idx] + c * sstride) =
*(uint32_t *)(cache_ptr[cache_idx] + c * bs);
else
*(uint64_t *)(cache_dst[cache_idx] + c * sstride) =
*(uint64_t *)(cache_ptr[cache_idx] + c * bs);
}
#else
for (c = 0; c < bs; c++)
if (bs == 4)
*(uint32_t *)(cache_dst[cache_idx] + c * sstride) =
*(uint32_t *)(cache_ptr[cache_idx] + c * bs);
else
*(uint64_t *)(cache_dst[cache_idx] + c * sstride) =
*(uint64_t *)(cache_ptr[cache_idx] + c * bs);
#endif
......
......@@ -20,10 +20,10 @@ int av1_clpf_sample(int X, int A, int B, int C, int D, int E, int F, int b);
int av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
const YV12_BUFFER_CONFIG *org, AV1_COMMON *cm,
int enable_fb_flag, unsigned int strength,
unsigned int fb_size_log2, uint8_t *blocks,
unsigned int fb_size_log2, uint8_t *blocks, int plane,
int (*decision)(int, int, const YV12_BUFFER_CONFIG *,
const YV12_BUFFER_CONFIG *,
const AV1_COMMON *cm, int, int, int,
unsigned int, unsigned int, uint8_t *));
unsigned int, unsigned int, uint8_t *, int));
#endif
This diff is collapsed.
......@@ -153,7 +153,9 @@ typedef struct AV1Common {
#if CONFIG_CLPF
int clpf_numblocks;
int clpf_size;
int clpf_strength;
int clpf_strength_y;
int clpf_strength_u;
int clpf_strength_v;
uint8_t *clpf_blocks;
#endif
......
......@@ -29,6 +29,7 @@
#include "av1/common/alloccommon.h"
#if CONFIG_CLPF
#include "aom/aom_image.h"
#include "av1/common/clpf.h"
#endif
#include "av1/common/common.h"
......@@ -2046,8 +2047,10 @@ static void setup_loopfilter(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
#if CONFIG_CLPF
static void setup_clpf(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
cm->clpf_blocks = 0;
cm->clpf_strength = aom_rb_read_literal(rb, 2);
if (cm->clpf_strength) {
cm->clpf_strength_y = aom_rb_read_literal(rb, 2);
cm->clpf_strength_u = aom_rb_read_literal(rb, 2);
cm->clpf_strength_v = aom_rb_read_literal(rb, 2);
if (cm->clpf_strength_y) {
cm->clpf_size = aom_rb_read_literal(rb, 2);
if (cm->clpf_size) {
int i;
......@@ -2065,7 +2068,8 @@ static int clpf_bit(UNUSED int k, UNUSED int l,
UNUSED const YV12_BUFFER_CONFIG *org,
UNUSED const AV1_COMMON *cm, UNUSED int block_size,
UNUSED int w, UNUSED int h, UNUSED unsigned int strength,
UNUSED unsigned int fb_size_log2, uint8_t *bit) {
UNUSED unsigned int fb_size_log2, uint8_t *bit,
UNUSED int comp) {
return *bit;
}
#endif
......@@ -3928,10 +3932,23 @@ void av1_decode_frame(AV1Decoder *pbi, const uint8_t *data,
#endif // CONFIG_LOOP_RESTORATION
#if CONFIG_CLPF
if (cm->clpf_strength && !cm->skip_loop_filter) {
av1_clpf_frame(&pbi->cur_buf->buf, 0, cm, !!cm->clpf_size,
cm->clpf_strength + (cm->clpf_strength == 3),
4 + cm->clpf_size, cm->clpf_blocks, clpf_bit);
if (!cm->skip_loop_filter) {
const YV12_BUFFER_CONFIG *const frame = &pbi->cur_buf->buf;
if (cm->clpf_strength_y) {
av1_clpf_frame(frame, NULL, cm, !!cm->clpf_size,
cm->clpf_strength_y + (cm->clpf_strength_y == 3),
4 + cm->clpf_size, cm->clpf_blocks, AOM_PLANE_Y, clpf_bit);
}
if (cm->clpf_strength_u) {
av1_clpf_frame(frame, NULL, cm, 0,
cm->clpf_strength_u + (cm->clpf_strength_u == 3), 4, NULL,
AOM_PLANE_U, NULL);
}
if (cm->clpf_strength_v) {
av1_clpf_frame(frame, NULL, cm, 0,
cm->clpf_strength_v + (cm->clpf_strength_v == 3), 4, NULL,
AOM_PLANE_V, NULL);
}
}
if (cm->clpf_blocks) aom_free(cm->clpf_blocks);
#endif
......
......@@ -2590,8 +2590,10 @@ static void encode_loopfilter(AV1_COMMON *cm, struct aom_write_bit_buffer *wb) {
#if CONFIG_CLPF
static void encode_clpf(const AV1_COMMON *cm, struct aom_write_bit_buffer *wb) {
aom_wb_write_literal(wb, cm->clpf_strength, 2);
if (cm->clpf_strength) {
aom_wb_write_literal(wb, cm->clpf_strength_y, 2);
aom_wb_write_literal(wb, cm->clpf_strength_u, 2);
aom_wb_write_literal(wb, cm->clpf_strength_v, 2);
if (cm->clpf_strength_y) {
aom_wb_write_literal(wb, cm->clpf_size, 2);
if (cm->clpf_size) {
int i;
......
This diff is collapsed.
......@@ -17,10 +17,10 @@
int av1_clpf_decision(int k, int l, const YV12_BUFFER_CONFIG *rec,
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
int block_size, int w, int h, unsigned int strength,
unsigned int fb_size_log2, uint8_t *res);
unsigned int fb_size_log2, uint8_t *res, int plane);
void av1_clpf_test_frame(const YV12_BUFFER_CONFIG *rec,
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
int *best_strength, int *best_bs);
int *best_strength, int *best_bs, int plane);
#endif
This diff is collapsed.
......@@ -16,6 +16,7 @@
#include "av1/common/alloccommon.h"
#if CONFIG_CLPF
#include "aom/aom_image.h"
#include "av1/common/clpf.h"
#include "av1/encoder/clpf_rdo.h"
#endif
......@@ -3422,7 +3423,7 @@ static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) {
#endif
}
#if CONFIG_CLPF
cm->clpf_strength = 0;
cm->clpf_strength_y = cm->clpf_strength_u = cm->clpf_strength_v = 0;
cm->clpf_size = 2;
CHECK_MEM_ERROR(
cm, cm->clpf_blocks,
......@@ -3430,21 +3431,37 @@ static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) {
((cm->frame_to_show->y_crop_height + 31) & ~31) >>
10));
if (!is_lossless_requested(&cpi->oxcf)) {
const YV12_BUFFER_CONFIG *const frame = cm->frame_to_show;
// Find the best strength and block size for the entire frame
int fb_size_log2, strength;
av1_clpf_test_frame(cm->frame_to_show, cpi->Source, cm, &strength,
&fb_size_log2);
int fb_size_log2, strength_y, strength_u, strength_v;
av1_clpf_test_frame(frame, cpi->Source, cm, &strength_y, &fb_size_log2,
AOM_PLANE_Y);
av1_clpf_test_frame(frame, cpi->Source, cm, &strength_u, &fb_size_log2,
AOM_PLANE_U);
av1_clpf_test_frame(frame, cpi->Source, cm, &strength_v, &fb_size_log2,
AOM_PLANE_V);
if (!fb_size_log2) fb_size_log2 = get_msb(MAX_FB_SIZE);
if (strength) {
if (strength_y) {
// Apply the filter using the chosen strength
cm->clpf_strength = strength - (strength == 4);
cm->clpf_strength_y = strength_y - (strength_y == 4);
cm->clpf_size =
fb_size_log2 ? fb_size_log2 - get_msb(MAX_FB_SIZE) + 3 : 0;
cm->clpf_numblocks = av1_clpf_frame(
cm->frame_to_show, cpi->Source, cm, !!cm->clpf_size, strength,
4 + cm->clpf_size, cm->clpf_blocks, av1_clpf_decision);
frame, cpi->Source, cm, !!cm->clpf_size, strength_y,
4 + cm->clpf_size, cm->clpf_blocks, AOM_PLANE_Y, av1_clpf_decision);
}
if (strength_u) {
cm->clpf_strength_u = strength_u - (strength_u == 4);
av1_clpf_frame(frame, NULL, cm, 0, strength_u, 4, NULL, AOM_PLANE_U,
NULL);
}
if (strength_v) {
cm->clpf_strength_v = strength_v - (strength_v == 4);
av1_clpf_frame(frame, NULL, cm, 0, strength_v, 4, NULL, AOM_PLANE_V,
NULL);
}
}
#endif
......
......@@ -147,6 +147,8 @@ void test_clpf(int w, int h, int depth, int iterations,
<< "strength: " << (1 << strength) << std::endl
<< "xpos: " << xpos << std::endl
<< "ypos: " << ypos << std::endl
<< "w: " << w << std::endl
<< "h: " << h << std::endl
<< "A=" << (pos > size ? (int16_t)s[pos - size] : -1) << std::endl
<< "B=" << (pos % size - 2 >= 0 ? (int16_t)s[pos - 2] : -1) << std::endl
<< "C=" << (pos % size - 1 >= 0 ? (int16_t)s[pos - 1] : -1) << std::endl
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment