Commit 80eaf1a1 authored by Yaowu Xu's avatar Yaowu Xu Committed by Gerrit Code Review
Browse files

Merge "Extend CLPF to chroma." into nextgenv2

parents 39b25dfa ecf9a0c8
......@@ -590,16 +590,16 @@ if (aom_config("CONFIG_CLPF") eq "yes") {
if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
add_proto qw/void aom_clpf_block_hbd/, "const uint16_t *src, uint16_t *dst, int sstride, int dstride, int x0, int y0, int sizex, int sizey, int width, int height, unsigned int strength";
specialize qw/aom_clpf_block_hbd sse2 ssse3 sse4_1 neon/;
add_proto qw/void aom_clpf_detect_hbd/, "const uint16_t *rec, const uint16_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum0, int *sum1, unsigned int strength, int shift";
add_proto qw/void aom_clpf_detect_hbd/, "const uint16_t *rec, const uint16_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum0, int *sum1, unsigned int strength, int shift, int size";
specialize qw/aom_clpf_detect_hbd sse2 ssse3 sse4_1 neon/;
add_proto qw/void aom_clpf_detect_multi_hbd/, "const uint16_t *rec, const uint16_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum, int shift";
add_proto qw/void aom_clpf_detect_multi_hbd/, "const uint16_t *rec, const uint16_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum, int shift, int size";
specialize qw/aom_clpf_detect_multi_hbd sse2 ssse3 sse4_1 neon/;
}
add_proto qw/void aom_clpf_block/, "const uint8_t *src, uint8_t *dst, int sstride, int dstride, int x0, int y0, int sizex, int sizey, int width, int height, unsigned int strength";
specialize qw/aom_clpf_block sse2 ssse3 sse4_1 neon/;
add_proto qw/void aom_clpf_detect/, "const uint8_t *rec, const uint8_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum0, int *sum1, unsigned int strength";
add_proto qw/void aom_clpf_detect/, "const uint8_t *rec, const uint8_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum0, int *sum1, unsigned int strength, int size";
specialize qw/aom_clpf_detect sse2 ssse3 sse4_1 neon/;
add_proto qw/void aom_clpf_detect_multi/, "const uint8_t *rec, const uint8_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum";
add_proto qw/void aom_clpf_detect_multi/, "const uint8_t *rec, const uint8_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum, int size";
specialize qw/aom_clpf_detect_multi sse2 ssse3 sse4_1 neon/;
}
......
......@@ -8,9 +8,10 @@
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include <assert.h>
#include "av1/common/clpf.h"
#include "./aom_dsp_rtcd.h"
#include "aom/aom_image.h"
#include "aom_dsp/aom_dsp_common.h"
int av1_clpf_maxbits(const AV1_COMMON *cm) {
......@@ -72,21 +73,24 @@ void aom_clpf_block_hbd_c(const uint16_t *src, uint16_t *dst, int sstride,
#endif
// Return number of filtered blocks
int av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
const YV12_BUFFER_CONFIG *org, AV1_COMMON *cm,
int enable_fb_flag, unsigned int strength,
unsigned int fb_size_log2, uint8_t *blocks,
int (*decision)(int, int, const YV12_BUFFER_CONFIG *,
const YV12_BUFFER_CONFIG *,
const AV1_COMMON *cm, int, int, int,
unsigned int, unsigned int, uint8_t *)) {
int av1_clpf_frame(
const YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *org,
AV1_COMMON *cm, int enable_fb_flag, unsigned int strength,
unsigned int fb_size_log2, uint8_t *blocks, int plane,
int (*decision)(int, int, const YV12_BUFFER_CONFIG *,
const YV12_BUFFER_CONFIG *, const AV1_COMMON *cm, int, int,
int, unsigned int, unsigned int, uint8_t *, int)) {
/* Constrained low-pass filter (CLPF) */
int c, k, l, m, n;
const int bs = MI_SIZE;
const int width = frame->y_crop_width;
const int height = frame->y_crop_height;
const int subx = plane != AOM_PLANE_Y && frame->subsampling_x;
const int suby = plane != AOM_PLANE_Y && frame->subsampling_y;
const int bs = (subx || suby) ? 4 : 8;
const int bslog = get_msb(bs);
int width = plane != AOM_PLANE_Y ? frame->uv_crop_width : frame->y_crop_width;
int height =
plane != AOM_PLANE_Y ? frame->uv_crop_height : frame->y_crop_height;
int xpos, ypos;
const int sstride = frame->y_stride;
const int sstride = plane != AOM_PLANE_Y ? frame->uv_stride : frame->y_stride;
int dstride = bs;
const int num_fb_hor = (width + (1 << fb_size_log2) - 1) >> fb_size_log2;
const int num_fb_ver = (height + (1 << fb_size_log2) - 1) >> fb_size_log2;
......@@ -97,9 +101,11 @@ int av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
int cache_idx = 0;
const int cache_size = num_fb_hor << (2 * fb_size_log2);
const int cache_blocks = cache_size / (bs * bs);
YV12_BUFFER_CONFIG dst = *frame;
assert(bs == 8); // Optimised code assumes this.
uint8_t *src_buffer =
plane != AOM_PLANE_Y
? (plane == AOM_PLANE_U ? frame->u_buffer : frame->v_buffer)
: frame->y_buffer;
uint8_t *dst_buffer;
#if CONFIG_AOM_HIGHBITDEPTH
strength <<= (cm->bit_depth - 8);
......@@ -108,10 +114,10 @@ int av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
// Make buffer space for in-place filtering
#if CONFIG_AOM_HIGHBITDEPTH
CHECK_MEM_ERROR(cm, cache, aom_malloc(cache_size << !!cm->use_highbitdepth));
dst.y_buffer = cm->use_highbitdepth ? CONVERT_TO_BYTEPTR(cache) : cache;
dst_buffer = cm->use_highbitdepth ? CONVERT_TO_BYTEPTR(cache) : cache;
#else
CHECK_MEM_ERROR(cm, cache, aom_malloc(cache_size));
dst.y_buffer = cache;
dst_buffer = cache;
#endif
CHECK_MEM_ERROR(cm, cache_ptr, aom_malloc(cache_blocks * sizeof(*cache_ptr)));
CHECK_MEM_ERROR(cm, cache_dst, aom_malloc(cache_blocks * sizeof(*cache_dst)));
......@@ -130,7 +136,8 @@ int av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
ypos = yoff + m * bs;
if (xpos < width && ypos < height) {
allskip &=
cm->mi_grid_visible[ypos / bs * cm->mi_stride + xpos / bs]
cm->mi_grid_visible[(ypos << suby) / MI_SIZE * cm->mi_stride +
(xpos << subx) / MI_SIZE]
->mbmi.skip;
}
}
......@@ -144,13 +151,14 @@ int av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
if (!allskip && // Do not filter the block if all is skip encoded
(!enable_fb_flag ||
decision(k, l, frame, org, cm, bs, w / bs, h / bs, strength,
fb_size_log2, blocks + block_index))) {
fb_size_log2, blocks + block_index, plane))) {
// Iterate over all smaller blocks inside the filter block
for (m = 0; m < (h + bs - 1) / bs; m++) {
for (n = 0; n < (w + bs - 1) / bs; n++) {
for (m = 0; m < ((h + bs - 1) >> bslog); m++) {
for (n = 0; n < ((w + bs - 1) >> bslog); n++) {
xpos = xoff + n * bs;
ypos = yoff + m * bs;
if (!cm->mi_grid_visible[ypos / bs * cm->mi_stride + xpos / bs]
if (!cm->mi_grid_visible[(ypos << suby) / MI_SIZE * cm->mi_stride +
(xpos << subx) / MI_SIZE]
->mbmi.skip) { // Not skip block
// Temporary buffering needed if filtering in-place
if (cache_ptr[cache_idx]) {
......@@ -161,50 +169,59 @@ int av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
for (c = 0; c < bs; c++) {
*(uint64_t *)(d + c * sstride) =
*(uint64_t *)(cache_ptr[cache_idx] + c * bs * 2);
*(uint64_t *)(d + c * sstride + 4) =
*(uint64_t *)(cache_ptr[cache_idx] + c * bs * 2 + 8);
if (bs == 8)
*(uint64_t *)(d + c * sstride + 4) =
*(uint64_t *)(cache_ptr[cache_idx] + c * bs * 2 + 8);
}
} else {
for (c = 0; c < bs; c++)
*(uint64_t *)(cache_dst[cache_idx] + c * sstride) =
*(uint64_t *)(cache_ptr[cache_idx] + c * bs);
if (bs == 8)
*(uint64_t *)(cache_dst[cache_idx] + c * sstride) =
*(uint64_t *)(cache_ptr[cache_idx] + c * bs);
else
*(uint32_t *)(cache_dst[cache_idx] + c * sstride) =
*(uint32_t *)(cache_ptr[cache_idx] + c * bs);
}
#else
for (c = 0; c < bs; c++)
*(uint64_t *)(cache_dst[cache_idx] + c * sstride) =
*(uint64_t *)(cache_ptr[cache_idx] + c * bs);
if (bs == 8)
*(uint64_t *)(cache_dst[cache_idx] + c * sstride) =
*(uint64_t *)(cache_ptr[cache_idx] + c * bs);
else
*(uint32_t *)(cache_dst[cache_idx] + c * sstride) =
*(uint32_t *)(cache_ptr[cache_idx] + c * bs);
#endif
}
#if CONFIG_AOM_HIGHBITDEPTH
if (cm->use_highbitdepth) {
cache_ptr[cache_idx] = cache + cache_idx * bs * bs * 2;
dst.y_buffer =
dst_buffer =
CONVERT_TO_BYTEPTR(cache_ptr[cache_idx]) - ypos * bs - xpos;
} else {
cache_ptr[cache_idx] = cache + cache_idx * bs * bs;
dst.y_buffer = cache_ptr[cache_idx] - ypos * bs - xpos;
dst_buffer = cache_ptr[cache_idx] - ypos * bs - xpos;
}
#else
cache_ptr[cache_idx] = cache + cache_idx * bs * bs;
dst.y_buffer = cache_ptr[cache_idx] - ypos * bs - xpos;
dst_buffer = cache_ptr[cache_idx] - ypos * bs - xpos;
#endif
cache_dst[cache_idx] = frame->y_buffer + ypos * sstride + xpos;
cache_dst[cache_idx] = src_buffer + ypos * sstride + xpos;
if (++cache_idx >= cache_blocks) cache_idx = 0;
// Apply the filter
#if CONFIG_AOM_HIGHBITDEPTH
if (cm->use_highbitdepth) {
aom_clpf_block_hbd(CONVERT_TO_SHORTPTR(frame->y_buffer),
CONVERT_TO_SHORTPTR(dst.y_buffer), sstride,
aom_clpf_block_hbd(CONVERT_TO_SHORTPTR(src_buffer),
CONVERT_TO_SHORTPTR(dst_buffer), sstride,
dstride, xpos, ypos, bs, bs, width, height,
strength);
} else {
aom_clpf_block(frame->y_buffer, dst.y_buffer, sstride, dstride,
xpos, ypos, bs, bs, width, height, strength);
aom_clpf_block(src_buffer, dst_buffer, sstride, dstride, xpos,
ypos, bs, bs, width, height, strength);
}
#else
aom_clpf_block(frame->y_buffer, dst.y_buffer, sstride, dstride,
xpos, ypos, bs, bs, width, height, strength);
aom_clpf_block(src_buffer, dst_buffer, sstride, dstride, xpos,
ypos, bs, bs, width, height, strength);
#endif
}
}
......@@ -223,18 +240,27 @@ int av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
for (c = 0; c < bs; c++) {
*(uint64_t *)(d + c * sstride) =
*(uint64_t *)(cache_ptr[cache_idx] + c * bs * 2);
*(uint64_t *)(d + c * sstride + 4) =
*(uint64_t *)(cache_ptr[cache_idx] + c * bs * 2 + 8);
if (bs == 8)
*(uint64_t *)(d + c * sstride + 4) =
*(uint64_t *)(cache_ptr[cache_idx] + c * bs * 2 + 8);
}
} else {
for (c = 0; c < bs; c++)
*(uint64_t *)(cache_dst[cache_idx] + c * sstride) =
*(uint64_t *)(cache_ptr[cache_idx] + c * bs);
if (bs == 4)
*(uint32_t *)(cache_dst[cache_idx] + c * sstride) =
*(uint32_t *)(cache_ptr[cache_idx] + c * bs);
else
*(uint64_t *)(cache_dst[cache_idx] + c * sstride) =
*(uint64_t *)(cache_ptr[cache_idx] + c * bs);
}
#else
for (c = 0; c < bs; c++)
*(uint64_t *)(cache_dst[cache_idx] + c * sstride) =
*(uint64_t *)(cache_ptr[cache_idx] + c * bs);
if (bs == 4)
*(uint32_t *)(cache_dst[cache_idx] + c * sstride) =
*(uint32_t *)(cache_ptr[cache_idx] + c * bs);
else
*(uint64_t *)(cache_dst[cache_idx] + c * sstride) =
*(uint64_t *)(cache_ptr[cache_idx] + c * bs);
#endif
}
......
......@@ -20,10 +20,10 @@ int av1_clpf_sample(int X, int A, int B, int C, int D, int E, int F, int b);
int av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
const YV12_BUFFER_CONFIG *org, AV1_COMMON *cm,
int enable_fb_flag, unsigned int strength,
unsigned int fb_size_log2, uint8_t *blocks,
unsigned int fb_size_log2, uint8_t *blocks, int plane,
int (*decision)(int, int, const YV12_BUFFER_CONFIG *,
const YV12_BUFFER_CONFIG *,
const AV1_COMMON *cm, int, int, int,
unsigned int, unsigned int, uint8_t *));
unsigned int, unsigned int, uint8_t *, int));
#endif
This diff is collapsed.
......@@ -153,7 +153,9 @@ typedef struct AV1Common {
#if CONFIG_CLPF
int clpf_numblocks;
int clpf_size;
int clpf_strength;
int clpf_strength_y;
int clpf_strength_u;
int clpf_strength_v;
uint8_t *clpf_blocks;
#endif
......
......@@ -29,6 +29,7 @@
#include "av1/common/alloccommon.h"
#if CONFIG_CLPF
#include "aom/aom_image.h"
#include "av1/common/clpf.h"
#endif
#include "av1/common/common.h"
......@@ -2046,8 +2047,10 @@ static void setup_loopfilter(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
#if CONFIG_CLPF
static void setup_clpf(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
cm->clpf_blocks = 0;
cm->clpf_strength = aom_rb_read_literal(rb, 2);
if (cm->clpf_strength) {
cm->clpf_strength_y = aom_rb_read_literal(rb, 2);
cm->clpf_strength_u = aom_rb_read_literal(rb, 2);
cm->clpf_strength_v = aom_rb_read_literal(rb, 2);
if (cm->clpf_strength_y) {
cm->clpf_size = aom_rb_read_literal(rb, 2);
if (cm->clpf_size) {
int i;
......@@ -2065,7 +2068,8 @@ static int clpf_bit(UNUSED int k, UNUSED int l,
UNUSED const YV12_BUFFER_CONFIG *org,
UNUSED const AV1_COMMON *cm, UNUSED int block_size,
UNUSED int w, UNUSED int h, UNUSED unsigned int strength,
UNUSED unsigned int fb_size_log2, uint8_t *bit) {
UNUSED unsigned int fb_size_log2, uint8_t *bit,
UNUSED int comp) {
return *bit;
}
#endif
......@@ -3928,10 +3932,23 @@ void av1_decode_frame(AV1Decoder *pbi, const uint8_t *data,
#endif // CONFIG_LOOP_RESTORATION
#if CONFIG_CLPF
if (cm->clpf_strength && !cm->skip_loop_filter) {
av1_clpf_frame(&pbi->cur_buf->buf, 0, cm, !!cm->clpf_size,
cm->clpf_strength + (cm->clpf_strength == 3),
4 + cm->clpf_size, cm->clpf_blocks, clpf_bit);
if (!cm->skip_loop_filter) {
const YV12_BUFFER_CONFIG *const frame = &pbi->cur_buf->buf;
if (cm->clpf_strength_y) {
av1_clpf_frame(frame, NULL, cm, !!cm->clpf_size,
cm->clpf_strength_y + (cm->clpf_strength_y == 3),
4 + cm->clpf_size, cm->clpf_blocks, AOM_PLANE_Y, clpf_bit);
}
if (cm->clpf_strength_u) {
av1_clpf_frame(frame, NULL, cm, 0,
cm->clpf_strength_u + (cm->clpf_strength_u == 3), 4, NULL,
AOM_PLANE_U, NULL);
}
if (cm->clpf_strength_v) {
av1_clpf_frame(frame, NULL, cm, 0,
cm->clpf_strength_v + (cm->clpf_strength_v == 3), 4, NULL,
AOM_PLANE_V, NULL);
}
}
if (cm->clpf_blocks) aom_free(cm->clpf_blocks);
#endif
......
......@@ -2590,8 +2590,10 @@ static void encode_loopfilter(AV1_COMMON *cm, struct aom_write_bit_buffer *wb) {
#if CONFIG_CLPF
static void encode_clpf(const AV1_COMMON *cm, struct aom_write_bit_buffer *wb) {
aom_wb_write_literal(wb, cm->clpf_strength, 2);
if (cm->clpf_strength) {
aom_wb_write_literal(wb, cm->clpf_strength_y, 2);
aom_wb_write_literal(wb, cm->clpf_strength_u, 2);
aom_wb_write_literal(wb, cm->clpf_strength_v, 2);
if (cm->clpf_strength_y) {
aom_wb_write_literal(wb, cm->clpf_size, 2);
if (cm->clpf_size) {
int i;
......
......@@ -11,16 +11,17 @@
#include "av1/common/clpf.h"
#include "./aom_dsp_rtcd.h"
#include "aom/aom_image.h"
#include "aom/aom_integer.h"
#include "av1/common/quant_common.h"
// Calculate the error of a filtered and unfiltered block
void aom_clpf_detect_c(const uint8_t *rec, const uint8_t *org, int rstride,
int ostride, int x0, int y0, int width, int height,
int *sum0, int *sum1, unsigned int strength) {
int *sum0, int *sum1, unsigned int strength, int size) {
int x, y;
for (y = y0; y < y0 + 8; y++) {
for (x = x0; x < x0 + 8; x++) {
for (y = y0; y < y0 + size; y++) {
for (x = x0; x < x0 + size; x++) {
int O = org[y * ostride + x];
int X = rec[y * rstride + x];
int A = rec[AOMMAX(0, y - 1) * rstride + x];
......@@ -39,11 +40,11 @@ void aom_clpf_detect_c(const uint8_t *rec, const uint8_t *org, int rstride,
void aom_clpf_detect_multi_c(const uint8_t *rec, const uint8_t *org,
int rstride, int ostride, int x0, int y0,
int width, int height, int *sum) {
int width, int height, int *sum, int size) {
int x, y;
for (y = y0; y < y0 + 8; y++) {
for (x = x0; x < x0 + 8; x++) {
for (y = y0; y < y0 + size; y++) {
for (x = x0; x < x0 + size; x++) {
int O = org[y * ostride + x];
int X = rec[y * rstride + x];
int A = rec[AOMMAX(0, y - 1) * rstride + x];
......@@ -71,10 +72,10 @@ void aom_clpf_detect_multi_c(const uint8_t *rec, const uint8_t *org,
void aom_clpf_detect_hbd_c(const uint16_t *rec, const uint16_t *org,
int rstride, int ostride, int x0, int y0, int width,
int height, int *sum0, int *sum1,
unsigned int strength, int shift) {
unsigned int strength, int shift, int size) {
int x, y;
for (y = y0; y < y0 + 8; y++) {
for (x = x0; x < x0 + 8; x++) {
for (y = y0; y < y0 + size; y++) {
for (x = x0; x < x0 + size; x++) {
int O = org[y * ostride + x] >> shift;
int X = rec[y * rstride + x] >> shift;
int A = rec[AOMMAX(0, y - 1) * rstride + x] >> shift;
......@@ -94,11 +95,12 @@ void aom_clpf_detect_hbd_c(const uint16_t *rec, const uint16_t *org,
// aom_clpf_detect_multi_c() apart from "rec" and "org".
void aom_clpf_detect_multi_hbd_c(const uint16_t *rec, const uint16_t *org,
int rstride, int ostride, int x0, int y0,
int width, int height, int *sum, int shift) {
int width, int height, int *sum, int shift,
int size) {
int x, y;
for (y = y0; y < y0 + 8; y++) {
for (x = x0; x < x0 + 8; x++) {
for (y = y0; y < y0 + size; y++) {
for (x = x0; x < x0 + size; x++) {
int O = org[y * ostride + x] >> shift;
int X = rec[y * rstride + x] >> shift;
int A = rec[AOMMAX(0, y - 1) * rstride + x] >> shift;
......@@ -125,31 +127,45 @@ void aom_clpf_detect_multi_hbd_c(const uint16_t *rec, const uint16_t *org,
int av1_clpf_decision(int k, int l, const YV12_BUFFER_CONFIG *rec,
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
int block_size, int w, int h, unsigned int strength,
unsigned int fb_size_log2, uint8_t *res) {
unsigned int fb_size_log2, uint8_t *res, int plane) {
int m, n, sum0 = 0, sum1 = 0;
const int subx = plane != AOM_PLANE_Y && rec->subsampling_x;
const int suby = plane != AOM_PLANE_Y && rec->subsampling_y;
uint8_t *rec_buffer =
plane != AOM_PLANE_Y
? (plane == AOM_PLANE_U ? rec->u_buffer : rec->v_buffer)
: rec->y_buffer;
uint8_t *org_buffer =
plane != AOM_PLANE_Y
? (plane == AOM_PLANE_U ? org->u_buffer : org->v_buffer)
: org->y_buffer;
int rec_width = plane != AOM_PLANE_Y ? rec->uv_crop_width : rec->y_crop_width;
int rec_height =
plane != AOM_PLANE_Y ? rec->uv_crop_height : rec->y_crop_height;
int rec_stride = plane != AOM_PLANE_Y ? rec->uv_stride : rec->y_stride;
int org_stride = plane != AOM_PLANE_Y ? org->uv_stride : org->y_stride;
for (m = 0; m < h; m++) {
for (n = 0; n < w; n++) {
int xpos = (l << fb_size_log2) + n * block_size;
int ypos = (k << fb_size_log2) + m * block_size;
const int bs = MAX_MIB_SIZE;
if (!cm->mi_grid_visible[ypos / bs * cm->mi_stride + xpos / bs]
if (!cm->mi_grid_visible[(ypos << suby) / MI_SIZE * cm->mi_stride +
(xpos << subx) / MI_SIZE]
->mbmi.skip) {
#if CONFIG_AOM_HIGHBITDEPTH
if (cm->use_highbitdepth) {
aom_clpf_detect_hbd(CONVERT_TO_SHORTPTR(rec->y_buffer),
CONVERT_TO_SHORTPTR(org->y_buffer), rec->y_stride,
org->y_stride, xpos, ypos, rec->y_crop_width,
rec->y_crop_height, &sum0, &sum1, strength,
cm->bit_depth - 8);
aom_clpf_detect_hbd(
CONVERT_TO_SHORTPTR(rec_buffer), CONVERT_TO_SHORTPTR(org_buffer),
rec_stride, org_stride, xpos, ypos, rec_width, rec_height, &sum0,
&sum1, strength, cm->bit_depth - 8, block_size);
} else {
aom_clpf_detect(rec->y_buffer, org->y_buffer, rec->y_stride,
org->y_stride, xpos, ypos, rec->y_crop_width,
rec->y_crop_height, &sum0, &sum1, strength);
aom_clpf_detect(rec_buffer, org_buffer, rec_stride, org_stride, xpos,
ypos, rec_width, rec_height, &sum0, &sum1, strength,
block_size);
}
#else
aom_clpf_detect(rec->y_buffer, org->y_buffer, rec->y_stride,
org->y_stride, xpos, ypos, rec->y_crop_width,
rec->y_crop_height, &sum0, &sum1, strength);
aom_clpf_detect(rec_buffer, org_buffer, rec_stride, org_stride, xpos,
ypos, rec_width, rec_height, &sum0, &sum1, strength,
block_size);
#endif
}
}
......@@ -161,6 +177,7 @@ int av1_clpf_decision(int k, int l, const YV12_BUFFER_CONFIG *rec,
// Calculate the square error of all filter settings. Result:
// res[0][0] : unfiltered
// res[0][1-3] : strength=1,2,4, no signals
// (Only for luma:)
// res[1][0] : (bit count, fb size = 128)
// res[1][1-3] : strength=1,2,4, fb size = 128
// res[2][0] : (bit count, fb size = 64)
......@@ -170,12 +187,28 @@ int av1_clpf_decision(int k, int l, const YV12_BUFFER_CONFIG *rec,
static int clpf_rdo(int y, int x, const YV12_BUFFER_CONFIG *rec,
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
unsigned int block_size, unsigned int fb_size_log2, int w,
int h, int64_t res[4][4]) {
int h, int64_t res[4][4], int plane) {
int c, m, n, filtered = 0;
int sum[4];
const int subx = plane != AOM_PLANE_Y && rec->subsampling_x;
const int suby = plane != AOM_PLANE_Y && rec->subsampling_y;
int bslog = get_msb(block_size);
uint8_t *rec_buffer =
plane != AOM_PLANE_Y
? (plane == AOM_PLANE_U ? rec->u_buffer : rec->v_buffer)
: rec->y_buffer;
uint8_t *org_buffer =
plane != AOM_PLANE_Y
? (plane == AOM_PLANE_U ? org->u_buffer : org->v_buffer)
: org->y_buffer;
int rec_width = plane != AOM_PLANE_Y ? rec->uv_crop_width : rec->y_crop_width;
int rec_height =
plane != AOM_PLANE_Y ? rec->uv_crop_height : rec->y_crop_height;
int rec_stride = plane != AOM_PLANE_Y ? rec->uv_stride : rec->y_stride;
int org_stride = plane != AOM_PLANE_Y ? org->uv_stride : org->y_stride;
sum[0] = sum[1] = sum[2] = sum[3] = 0;
if (fb_size_log2 > (unsigned int)get_msb(MAX_FB_SIZE) - 3) {
if (plane == AOM_PLANE_Y &&
fb_size_log2 > (unsigned int)get_msb(MAX_FB_SIZE) - 3) {
int w1, h1, w2, h2, i, sum1, sum2, sum3, oldfiltered;
fb_size_log2--;
......@@ -190,16 +223,17 @@ static int clpf_rdo(int y, int x, const YV12_BUFFER_CONFIG *rec,
oldfiltered = res[i][0];
res[i][0] = 0;
filtered =
clpf_rdo(y, x, rec, org, cm, block_size, fb_size_log2, w1, h1, res);
filtered = clpf_rdo(y, x, rec, org, cm, block_size, fb_size_log2, w1, h1,
res, plane);
if (1 << (fb_size_log2 - bslog) < w)
filtered |= clpf_rdo(y, x + (1 << fb_size_log2), rec, org, cm, block_size,
fb_size_log2, w2, h1, res);
fb_size_log2, w2, h1, res, plane);
if (1 << (fb_size_log2 - bslog) < h) {
filtered |= clpf_rdo(y + (1 << fb_size_log2), x, rec, org, cm, block_size,
fb_size_log2, w1, h2, res);
filtered |= clpf_rdo(y + (1 << fb_size_log2), x + (1 << fb_size_log2),
rec, org, cm, block_size, fb_size_log2, w2, h2, res);
fb_size_log2, w1, h2, res, plane);
filtered |=
clpf_rdo(y + (1 << fb_size_log2), x + (1 << fb_size_log2), rec, org,
cm, block_size, fb_size_log2, w2, h2, res, plane);
}
res[i][1] = AOMMIN(sum1 + res[i][0], res[i][1]);
......@@ -213,32 +247,31 @@ static int clpf_rdo(int y, int x, const YV12_BUFFER_CONFIG *rec,
for (n = 0; n < w; n++) {
int xpos = x + n * block_size;
int ypos = y + m * block_size;
if (!cm->mi_grid_visible[ypos / MAX_MIB_SIZE * cm->mi_stride +
xpos / MAX_MIB_SIZE]
if (!cm->mi_grid_visible[(ypos << suby) / MI_SIZE * cm->mi_stride +
(xpos << subx) / MI_SIZE]
->mbmi.skip) {
#if CONFIG_AOM_HIGHBITDEPTH
if (cm->use_highbitdepth) {
aom_clpf_detect_multi_hbd(CONVERT_TO_SHORTPTR(rec->y_buffer),
CONVERT_TO_SHORTPTR(org->y_buffer),
rec->y_stride, org->y_stride, xpos, ypos,
rec->y_crop_width, rec->y_crop_height, sum,
cm->bit_depth - 8);
aom_clpf_detect_multi_hbd(
CONVERT_TO_SHORTPTR(rec_buffer), CONVERT_TO_SHORTPTR(org_buffer),
rec_stride, org_stride, xpos, ypos, rec_width, rec_height, sum,
cm->bit_depth - 8, block_size);
} else {
aom_clpf_detect_multi(rec->y_buffer, org->y_buffer, rec->y_stride,
org->y_stride, xpos, ypos, rec->y_crop_width,
rec->y_crop_height, sum);
aom_clpf_detect_multi(rec_buffer, org_buffer, rec_stride, org_stride,
xpos, ypos, rec_width, rec_height, sum,
block_size);
}
#else
aom_clpf_detect_multi(rec->y_buffer, org->y_buffer, rec->y_stride,
org->y_stride, xpos, ypos, rec->y_crop_width,
rec->y_crop_height, sum);
aom_clpf_detect_multi(rec_buffer, org_buffer, rec_stride, org_stride,
xpos, ypos, rec_width, rec_height, sum,
block_size);
#endif
filtered = 1;
}
}
}
for (c = 0; c < 4; c++) {
for (c = 0; c < (plane == AOM_PLANE_Y ? 4 : 1); c++) {
res[c][0] += sum[0];
res[c][1] += sum[1];
res[c][2] += sum[2];
......@@ -249,30 +282,42 @@ static int clpf_rdo(int y, int x, const YV12_BUFFER_CONFIG *rec,
void av1_clpf_test_frame(const YV12_BUFFER_CONFIG *rec,
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
int *best_strength, int *best_bs) {
int *best_strength, int *best_bs, int plane) {
int c, j, k, l;