Commit 97535038 authored by Steinar Midtskogen's avatar Steinar Midtskogen Committed by Yaowu Xu

Move CLPF block signals from frame to SB level.

These signals were in the uncompressed frame header (as a temporary
hack), which caused two problems:

* We don't want that header to be duplicated in the slice header
* It was necessary to signal the number of bits to transmit up front

However, the filter size can be 128x128 which is greater than the SB
size, and a decoder wouldn't be able to know whether to read a bit or
not until the final SB of that 128x128 block has been decoded
(depending on whether the 128x128 is all skip or not).  Therefore the
signalling was changed for 128x128 blocks so that every top left SB of
a 128x128 filter block contains a signal regardless of whether the
block is all skip or not.  Also, all the MB's of 128x128 block are
filtered even if they are skip MB's.  This gives the signal a purpose
even when the 128x128 block is all skip, and it also gives a slight
coding gain as it leaves a way to filter skip blocks, which was
previously forbidden.

Low latency:
PSNR YCbCr:     -0.19%     -0.14%     -0.06%
   PSNRHVS:     -0.15%
      SSIM:     -0.13%
    MSSSIM:     -0.15%
 CIEDE2000:     -0.19%

High latency:
PSNR YCbCr:     -0.03%     -0.01%     -0.09%
   PSNRHVS:      0.04%
      SSIM:      0.00%
    MSSSIM:      0.02%
 CIEDE2000:     -0.02%

Change-Id: I69ba7144d07d388b4f0968f6a53558f480979171
parent 2d5f752a
......@@ -14,14 +14,6 @@
#include "aom/aom_image.h"
#include "aom_dsp/aom_dsp_common.h"
int av1_clpf_maxbits(const AV1_COMMON *cm) {
return get_msb(
ALIGN_POWER_OF_TWO(cm->mi_cols * MI_SIZE, cm->clpf_size + 4) *
ALIGN_POWER_OF_TWO(cm->mi_rows * MI_SIZE, cm->clpf_size + 4) >>
(cm->clpf_size * 2 + 8)) +
1;
}
int av1_clpf_sample(int X, int A, int B, int C, int D, int E, int F, int b) {
int delta = 4 * clamp(A - X, -b, b) + clamp(B - X, -b, b) +
3 * clamp(C - X, -b, b) + 3 * clamp(D - X, -b, b) +
......@@ -73,14 +65,14 @@ void aom_clpf_block_hbd_c(const uint16_t *src, uint16_t *dst, int sstride,
#endif
// Return number of filtered blocks
int av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
const YV12_BUFFER_CONFIG *org, AV1_COMMON *cm,
int enable_fb_flag, unsigned int strength,
unsigned int fb_size_log2, uint8_t *blocks, int plane,
int (*decision)(int, int, const YV12_BUFFER_CONFIG *,
const YV12_BUFFER_CONFIG *,
const AV1_COMMON *cm, int, int, int,
unsigned int, unsigned int, uint8_t *)) {
void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
const YV12_BUFFER_CONFIG *org, AV1_COMMON *cm,
int enable_fb_flag, unsigned int strength,
unsigned int fb_size_log2, int plane,
int (*decision)(int, int, const YV12_BUFFER_CONFIG *,
const YV12_BUFFER_CONFIG *,
const AV1_COMMON *cm, int, int, int,
unsigned int, unsigned int, int8_t *)) {
/* Constrained low-pass filter (CLPF) */
int c, k, l, m, n;
const int subx = plane != AOM_PLANE_Y && frame->subsampling_x;
......@@ -95,7 +87,6 @@ int av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
int dstride = bs;
const int num_fb_hor = (width + (1 << fb_size_log2) - 1) >> fb_size_log2;
const int num_fb_ver = (height + (1 << fb_size_log2) - 1) >> fb_size_log2;
int block_index = 0;
uint8_t *cache = NULL;
uint8_t **cache_ptr = NULL;
uint8_t **cache_dst = NULL;
......@@ -125,7 +116,7 @@ int av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
for (k = 0; k < num_fb_ver; k++) {
for (l = 0; l < num_fb_hor; l++) {
int h, w;
int allskip = 1;
int allskip = !(enable_fb_flag && fb_size_log2 == MAX_FB_SIZE_LOG2);
const int xoff = l << fb_size_log2;
const int yoff = k << fb_size_log2;
for (m = 0; allskip && m < (1 << fb_size_log2) / bs; m++) {
......@@ -148,8 +139,11 @@ int av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
w += !w << fb_size_log2;
if (!allskip && // Do not filter the block if all is skip encoded
(!enable_fb_flag ||
// Only called if fb_flag enabled (luma only)
decision(k, l, frame, org, cm, bs, w / bs, h / bs, strength,
fb_size_log2, blocks + block_index))) {
fb_size_log2,
cm->clpf_blocks + yoff / MIN_FB_SIZE * cm->clpf_stride +
xoff / MIN_FB_SIZE))) {
// Iterate over all smaller blocks inside the filter block
for (m = 0; m < ((h + bs - 1) >> bslog); m++) {
for (n = 0; n < ((w + bs - 1) >> bslog); n++) {
......@@ -160,8 +154,9 @@ int av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
sizey = AOMMIN(height - ypos, bs);
if (!cm->mi_grid_visible[(ypos << suby) / MI_SIZE * cm->mi_stride +
(xpos << subx) / MI_SIZE]
->mbmi.skip) { // Not skip block
// Temporary buffering needed if filtering in-place
->mbmi.skip ||
(enable_fb_flag && fb_size_log2 == MAX_FB_SIZE_LOG2)) {
// Temporary buffering needed for in-place filtering
if (cache_ptr[cache_idx]) {
// Copy filtered block back into the frame
#if CONFIG_AOM_HIGHBITDEPTH
......@@ -247,7 +242,6 @@ int av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
}
}
}
block_index += !allskip; // Count number of blocks filtered
}
}
......@@ -287,6 +281,4 @@ int av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
aom_free(cache);
aom_free(cache_ptr);
aom_free(cache_dst);
return block_index;
}
......@@ -13,17 +13,19 @@
#include "av1/common/reconinter.h"
#define MAX_FB_SIZE 128
#define MAX_FB_SIZE_LOG2 7
#define MIN_FB_SIZE_LOG2 5
#define MAX_FB_SIZE (1 << MAX_FB_SIZE_LOG2)
#define MIN_FB_SIZE (1 << MIN_FB_SIZE_LOG2)
int av1_clpf_maxbits(const AV1_COMMON *cm);
int av1_clpf_sample(int X, int A, int B, int C, int D, int E, int F, int b);
int av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
const YV12_BUFFER_CONFIG *org, AV1_COMMON *cm,
int enable_fb_flag, unsigned int strength,
unsigned int fb_size_log2, uint8_t *blocks, int plane,
int (*decision)(int, int, const YV12_BUFFER_CONFIG *,
const YV12_BUFFER_CONFIG *,
const AV1_COMMON *cm, int, int, int,
unsigned int, unsigned int, uint8_t *));
void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
const YV12_BUFFER_CONFIG *org, AV1_COMMON *cm,
int enable_fb_flag, unsigned int strength,
unsigned int fb_size_log2, int plane,
int (*decision)(int, int, const YV12_BUFFER_CONFIG *,
const YV12_BUFFER_CONFIG *,
const AV1_COMMON *cm, int, int, int,
unsigned int, unsigned int, int8_t *));
#endif
......@@ -246,7 +246,17 @@ typedef enum {
PALETTE_COLORS
} PALETTE_COLOR;
#ifdef CONFIG_CLPF
#define CLPF_NOFLAG -1
typedef enum {
CLPF_NOSIZE = 0,
CLPF_32X32 = 1,
CLPF_64X64 = 2,
CLPF_128X128 = 3
} CLPF_BLOCK_SIZE;
#endif
typedef enum ATTRIBUTE_PACKED {
DC_PRED, // Average of above and left pixels
V_PRED, // Vertical
H_PRED, // Horizontal
......
......@@ -151,12 +151,27 @@ typedef struct AV1Common {
int use_highbitdepth;
#endif
#if CONFIG_CLPF
int clpf_numblocks;
int clpf_size;
// Two bits are used to signal the strength for all blocks and the
// valid values are:
// 0: no filtering
// 1: strength = 1
// 2: strength = 2
// 3: strength = 4
int clpf_strength_y;
int clpf_strength_u;
int clpf_strength_v;
uint8_t *clpf_blocks;
// If clpf_strength_y is not 0, another two bits are used to signal
// the filter block size. The valid values for clfp_size are:
// 0: no block signalling
// 1: 32x32
// 2: 64x64
// 3: 128x128
CLPF_BLOCK_SIZE clpf_size;
// Buffer for storing whether to filter individual blocks.
int8_t *clpf_blocks;
int clpf_stride;
#endif
YV12_BUFFER_CONFIG *frame_to_show;
......
......@@ -1440,6 +1440,22 @@ static int read_skip(AV1_COMMON *cm, const MACROBLOCKD *xd, int segment_id,
}
}
#endif // CONFIG_SUPERTX
#if CONFIG_CLPF
static int clpf_all_skip(const AV1_COMMON *cm, int mi_col, int mi_row,
int size) {
int r, c;
int skip = 1;
const int maxc = AOMMIN(size, cm->mi_cols - mi_col);
const int maxr = AOMMIN(size, cm->mi_rows - mi_row);
for (r = 0; r < maxr && skip; r++) {
for (c = 0; c < maxc && skip; c++) {
skip &= !!cm->mi_grid_visible[(mi_row + r) * cm->mi_stride + mi_col + c]
->mbmi.skip;
}
}
return skip;
}
#endif
// TODO(slavarnway): eliminate bsize and subsize in future commits
static void decode_partition(AV1Decoder *const pbi, MACROBLOCKD *const xd,
......@@ -1772,6 +1788,43 @@ static void decode_partition(AV1Decoder *const pbi, MACROBLOCKD *const xd,
if (bsize >= BLOCK_8X8 &&
(bsize == BLOCK_8X8 || partition != PARTITION_SPLIT))
dec_update_partition_context(xd, mi_row, mi_col, subsize, num_8x8_wh);
#if CONFIG_CLPF
if (bsize == BLOCK_64X64 && cm->clpf_strength_y &&
cm->clpf_size != CLPF_NOSIZE) {
const int tl = mi_row * MI_SIZE / MIN_FB_SIZE * cm->clpf_stride +
mi_col * MI_SIZE / MIN_FB_SIZE;
if (!((mi_row * MI_SIZE) & 127) && !((mi_col * MI_SIZE) & 127) &&
cm->clpf_size == CLPF_128X128) {
cm->clpf_blocks[tl] = aom_read_literal(r, 1, ACCT_STR);
} else if (cm->clpf_size == CLPF_64X64 &&
!clpf_all_skip(cm, mi_col, mi_row, 64 / MI_SIZE)) {
cm->clpf_blocks[tl] = aom_read_literal(r, 1, ACCT_STR);
} else if (cm->clpf_size == CLPF_32X32) {
const int tr = tl + 1;
const int bl = tl + cm->clpf_stride;
const int br = tr + cm->clpf_stride;
const int size = 32 / MI_SIZE;
// Up to four bits per SB
if (!clpf_all_skip(cm, mi_col, mi_row, size))
cm->clpf_blocks[tl] = aom_read_literal(r, 1, ACCT_STR);
if (mi_col + size < cm->mi_cols &&
!clpf_all_skip(cm, mi_col + size, mi_row, size))
cm->clpf_blocks[tr] = aom_read_literal(r, 1, ACCT_STR);
if (mi_row + size < cm->mi_rows &&
!clpf_all_skip(cm, mi_col, mi_row + size, size))
cm->clpf_blocks[bl] = aom_read_literal(r, 1, ACCT_STR);
if (mi_col + size < cm->mi_cols && mi_row + size < cm->mi_rows &&
!clpf_all_skip(cm, mi_col + size, mi_row + size, size))
cm->clpf_blocks[br] = aom_read_literal(r, 1, ACCT_STR);
}
}
#endif
#if CONFIG_DERING
if (bsize == BLOCK_64X64) {
if (cm->dering_level != 0 && !sb_all_skip(cm, mi_row, mi_col)) {
......@@ -2045,20 +2098,26 @@ static void setup_loopfilter(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
}
#if CONFIG_CLPF
static void setup_clpf(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
static void setup_clpf(AV1Decoder *pbi, struct aom_read_bit_buffer *rb) {
AV1_COMMON *const cm = &pbi->common;
const int width = pbi->cur_buf->buf.y_crop_width;
const int height = pbi->cur_buf->buf.y_crop_height;
cm->clpf_blocks = 0;
cm->clpf_strength_y = aom_rb_read_literal(rb, 2);
cm->clpf_strength_u = aom_rb_read_literal(rb, 2);
cm->clpf_strength_v = aom_rb_read_literal(rb, 2);
if (cm->clpf_strength_y) {
cm->clpf_size = aom_rb_read_literal(rb, 2);
if (cm->clpf_size) {
int i;
cm->clpf_numblocks = aom_rb_read_literal(rb, av1_clpf_maxbits(cm));
CHECK_MEM_ERROR(cm, cm->clpf_blocks, aom_malloc(cm->clpf_numblocks));
for (i = 0; i < cm->clpf_numblocks; i++) {
cm->clpf_blocks[i] = aom_rb_read_literal(rb, 1);
}
if (cm->clpf_size != CLPF_NOSIZE) {
int size;
cm->clpf_stride =
((width + MIN_FB_SIZE - 1) & ~(MIN_FB_SIZE - 1)) >> MIN_FB_SIZE_LOG2;
size =
cm->clpf_stride * ((height + MIN_FB_SIZE - 1) & ~(MIN_FB_SIZE - 1)) >>
MIN_FB_SIZE_LOG2;
CHECK_MEM_ERROR(cm, cm->clpf_blocks, aom_malloc(size));
memset(cm->clpf_blocks, -1, size);
}
}
}
......@@ -2068,7 +2127,7 @@ static int clpf_bit(UNUSED int k, UNUSED int l,
UNUSED const YV12_BUFFER_CONFIG *org,
UNUSED const AV1_COMMON *cm, UNUSED int block_size,
UNUSED int w, UNUSED int h, UNUSED unsigned int strength,
UNUSED unsigned int fb_size_log2, uint8_t *bit) {
UNUSED unsigned int fb_size_log2, int8_t *bit) {
return *bit;
}
#endif
......@@ -3361,7 +3420,7 @@ static size_t read_uncompressed_header(AV1Decoder *pbi,
setup_loopfilter(cm, rb);
#if CONFIG_CLPF
setup_clpf(cm, rb);
setup_clpf(pbi, rb);
#endif
#if CONFIG_DERING
setup_dering(cm, rb);
......@@ -3933,18 +3992,18 @@ void av1_decode_frame(AV1Decoder *pbi, const uint8_t *data,
if (!cm->skip_loop_filter) {
const YV12_BUFFER_CONFIG *const frame = &pbi->cur_buf->buf;
if (cm->clpf_strength_y) {
av1_clpf_frame(frame, NULL, cm, !!cm->clpf_size,
av1_clpf_frame(frame, NULL, cm, cm->clpf_size != CLPF_NOSIZE,
cm->clpf_strength_y + (cm->clpf_strength_y == 3),
4 + cm->clpf_size, cm->clpf_blocks, AOM_PLANE_Y, clpf_bit);
4 + cm->clpf_size, AOM_PLANE_Y, clpf_bit);
}
if (cm->clpf_strength_u) {
av1_clpf_frame(frame, NULL, cm, 0,
cm->clpf_strength_u + (cm->clpf_strength_u == 3), 4, NULL,
av1_clpf_frame(frame, NULL, cm, 0, // No block signals for chroma
cm->clpf_strength_u + (cm->clpf_strength_u == 3), 4,
AOM_PLANE_U, NULL);
}
if (cm->clpf_strength_v) {
av1_clpf_frame(frame, NULL, cm, 0,
cm->clpf_strength_v + (cm->clpf_strength_v == 3), 4, NULL,
av1_clpf_frame(frame, NULL, cm, 0, // No block signals for chroma
cm->clpf_strength_v + (cm->clpf_strength_v == 3), 4,
AOM_PLANE_V, NULL);
}
}
......
......@@ -1869,6 +1869,37 @@ static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile,
(bsize == BLOCK_8X8 || partition != PARTITION_SPLIT))
update_partition_context(xd, mi_row, mi_col, subsize, bsize);
#if CONFIG_CLPF
if (bsize == BLOCK_64X64 && cm->clpf_blocks && cm->clpf_strength_y &&
cm->clpf_size != CLPF_NOSIZE) {
const int tl = mi_row * MI_SIZE / MIN_FB_SIZE * cm->clpf_stride +
mi_col * MI_SIZE / MIN_FB_SIZE;
const int tr = tl + 1;
const int bl = tl + cm->clpf_stride;
const int br = tr + cm->clpf_stride;
// Up to four bits per SB.
// When clpf_size indicates a size larger than the SB size
// (CLPF_128X128), one bit for every fourth SB will be transmitted
// regardless of skip blocks.
if (cm->clpf_blocks[tl] != CLPF_NOFLAG)
aom_write_literal(w, cm->clpf_blocks[tl], 1);
if (mi_col + MI_SIZE / 2 < cm->mi_cols &&
cm->clpf_blocks[tr] != CLPF_NOFLAG)
aom_write_literal(w, cm->clpf_blocks[tr], 1);
if (mi_row + MI_SIZE / 2 < cm->mi_rows &&
cm->clpf_blocks[bl] != CLPF_NOFLAG)
aom_write_literal(w, cm->clpf_blocks[bl], 1);
if (mi_row + MI_SIZE / 2 < cm->mi_rows &&
mi_col + MI_SIZE / 2 < cm->mi_cols &&
cm->clpf_blocks[br] != CLPF_NOFLAG)
aom_write_literal(w, cm->clpf_blocks[br], 1);
}
#endif
#if CONFIG_DERING
if (bsize == BLOCK_64X64 && cm->dering_level != 0 &&
!sb_all_skip(cm, mi_row, mi_col)) {
......@@ -2533,18 +2564,6 @@ static void encode_clpf(const AV1_COMMON *cm, struct aom_write_bit_buffer *wb) {
aom_wb_write_literal(wb, cm->clpf_strength_v, 2);
if (cm->clpf_strength_y) {
aom_wb_write_literal(wb, cm->clpf_size, 2);
if (cm->clpf_size) {
int i;
// TODO(stemidts): The number of bits to transmit could be
// implicitly deduced if transmitted after the filter block or
// after the frame (when it's known whether the block is all
// skip and implicitly unfiltered). And the bits do not have
// 50% probability, so a more efficient coding is possible.
aom_wb_write_literal(wb, cm->clpf_numblocks, av1_clpf_maxbits(cm));
for (i = 0; i < cm->clpf_numblocks; i++) {
aom_wb_write_literal(wb, cm->clpf_blocks ? cm->clpf_blocks[i] : 0, 1);
}
}
}
}
#endif
......
......@@ -127,14 +127,15 @@ void aom_clpf_detect_multi_hbd_c(const uint16_t *rec, const uint16_t *org,
int av1_clpf_decision(int k, int l, const YV12_BUFFER_CONFIG *rec,
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
int block_size, int w, int h, unsigned int strength,
unsigned int fb_size_log2, uint8_t *res) {
unsigned int fb_size_log2, int8_t *res) {
int m, n, sum0 = 0, sum1 = 0;
for (m = 0; m < h; m++) {
for (n = 0; n < w; n++) {
int xpos = (l << fb_size_log2) + n * block_size;
int ypos = (k << fb_size_log2) + m * block_size;
if (!cm->mi_grid_visible[ypos / MI_SIZE * cm->mi_stride + xpos / MI_SIZE]
if (fb_size_log2 == MAX_FB_SIZE_LOG2 ||
!cm->mi_grid_visible[ypos / MI_SIZE * cm->mi_stride + xpos / MI_SIZE]
->mbmi.skip) {
#if CONFIG_AOM_HIGHBITDEPTH
if (cm->use_highbitdepth) {
......@@ -167,6 +168,8 @@ int av1_clpf_decision(int k, int l, const YV12_BUFFER_CONFIG *rec,
// (Only for luma:)
// res[1][0] : (bit count, fb size = 128)
// res[1][1-3] : strength=1,2,4, fb size = 128
// res[1][4] : unfiltered, including skip
// res[1][5-7] : strength=1,2,4, including skip, fb_size = 128
// res[2][0] : (bit count, fb size = 64)
// res[2][1-3] : strength=1,2,4, fb size = 64
// res[3][0] : (bit count, fb size = 32)
......@@ -174,9 +177,9 @@ int av1_clpf_decision(int k, int l, const YV12_BUFFER_CONFIG *rec,
static int clpf_rdo(int y, int x, const YV12_BUFFER_CONFIG *rec,
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
unsigned int block_size, unsigned int fb_size_log2, int w,
int h, int64_t res[4][4], int plane) {
int h, int64_t res[4][8], int plane) {
int c, m, n, filtered = 0;
int sum[4];
int sum[8];
const int subx = plane != AOM_PLANE_Y && rec->subsampling_x;
const int suby = plane != AOM_PLANE_Y && rec->subsampling_y;
int bslog = get_msb(block_size);
......@@ -193,12 +196,12 @@ static int clpf_rdo(int y, int x, const YV12_BUFFER_CONFIG *rec,
plane != AOM_PLANE_Y ? rec->uv_crop_height : rec->y_crop_height;
int rec_stride = plane != AOM_PLANE_Y ? rec->uv_stride : rec->y_stride;
int org_stride = plane != AOM_PLANE_Y ? org->uv_stride : org->y_stride;
sum[0] = sum[1] = sum[2] = sum[3] = 0;
sum[0] = sum[1] = sum[2] = sum[3] = sum[4] = sum[5] = sum[6] = sum[7] = 0;
if (plane == AOM_PLANE_Y &&
fb_size_log2 > (unsigned int)get_msb(MAX_FB_SIZE) - 3) {
int w1, h1, w2, h2, i, sum1, sum2, sum3, oldfiltered;
fb_size_log2--;
filtered = fb_size_log2-- == MAX_FB_SIZE_LOG2;
w1 = AOMMIN(1 << (fb_size_log2 - bslog), w);
h1 = AOMMIN(1 << (fb_size_log2 - bslog), h);
w2 = AOMMIN(w - (1 << (fb_size_log2 - bslog)), w >> 1);
......@@ -210,8 +213,8 @@ static int clpf_rdo(int y, int x, const YV12_BUFFER_CONFIG *rec,
oldfiltered = res[i][0];
res[i][0] = 0;
filtered = clpf_rdo(y, x, rec, org, cm, block_size, fb_size_log2, w1, h1,
res, plane);
filtered |= clpf_rdo(y, x, rec, org, cm, block_size, fb_size_log2, w1, h1,
res, plane);
if (1 << (fb_size_log2 - bslog) < w)
filtered |= clpf_rdo(y, x + (1 << fb_size_log2), rec, org, cm, block_size,
fb_size_log2, w2, h1, res, plane);
......@@ -223,10 +226,18 @@ static int clpf_rdo(int y, int x, const YV12_BUFFER_CONFIG *rec,
cm, block_size, fb_size_log2, w2, h2, res, plane);
}
// Correct sums for unfiltered blocks
res[i][1] = AOMMIN(sum1 + res[i][0], res[i][1]);
res[i][2] = AOMMIN(sum2 + res[i][0], res[i][2]);
res[i][3] = AOMMIN(sum3 + res[i][0], res[i][3]);
if (i == 1) {
res[i][5] = AOMMIN(sum1 + res[i][4], res[i][5]);
res[i][6] = AOMMIN(sum2 + res[i][4], res[i][6]);
res[i][7] = AOMMIN(sum3 + res[i][4], res[i][7]);
}
res[i][0] = oldfiltered + filtered; // Number of signal bits
return filtered;
}
......@@ -234,27 +245,28 @@ static int clpf_rdo(int y, int x, const YV12_BUFFER_CONFIG *rec,
for (n = 0; n < w; n++) {
int xpos = x + n * block_size;
int ypos = y + m * block_size;
if (!cm->mi_grid_visible[(ypos << suby) / MI_SIZE * cm->mi_stride +
(xpos << subx) / MI_SIZE]
->mbmi.skip) {
int skip = // Filtered skip blocks stored only for fb_size == 128
4 *
!!cm->mi_grid_visible[(ypos << suby) / MI_SIZE * cm->mi_stride +
(xpos << subx) / MI_SIZE]
->mbmi.skip;
#if CONFIG_AOM_HIGHBITDEPTH
if (cm->use_highbitdepth) {
aom_clpf_detect_multi_hbd(
CONVERT_TO_SHORTPTR(rec_buffer), CONVERT_TO_SHORTPTR(org_buffer),
rec_stride, org_stride, xpos, ypos, rec_width, rec_height, sum,
cm->bit_depth - 8, block_size);
} else {
aom_clpf_detect_multi(rec_buffer, org_buffer, rec_stride, org_stride,
xpos, ypos, rec_width, rec_height, sum,
block_size);
}
#else
if (cm->use_highbitdepth) {
aom_clpf_detect_multi_hbd(CONVERT_TO_SHORTPTR(rec_buffer),
CONVERT_TO_SHORTPTR(org_buffer), rec_stride,
org_stride, xpos, ypos, rec_width, rec_height,
sum + skip, cm->bit_depth - 8, block_size);
} else {
aom_clpf_detect_multi(rec_buffer, org_buffer, rec_stride, org_stride,
xpos, ypos, rec_width, rec_height, sum,
xpos, ypos, rec_width, rec_height, sum + skip,
block_size);
#endif
filtered = 1;
}
#else
aom_clpf_detect_multi(rec_buffer, org_buffer, rec_stride, org_stride,
xpos, ypos, rec_width, rec_height, sum + skip,
block_size);
#endif
filtered |= !skip;
}
}
......@@ -263,6 +275,12 @@ static int clpf_rdo(int y, int x, const YV12_BUFFER_CONFIG *rec,
res[c][1] += sum[1];
res[c][2] += sum[2];
res[c][3] += sum[3];
if (c != 1) continue;
// Only needed when fb_size == 128
res[c][4] += sum[4];
res[c][5] += sum[5];
res[c][6] += sum[6];
res[c][7] += sum[7];
}
return filtered;
}
......@@ -271,7 +289,7 @@ void av1_clpf_test_frame(const YV12_BUFFER_CONFIG *rec,
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
int *best_strength, int *best_bs, int plane) {
int c, j, k, l;
int64_t best, sums[4][4];
int64_t best, sums[4][8];
int width = plane != AOM_PLANE_Y ? rec->uv_crop_width : rec->y_crop_width;
int height = plane != AOM_PLANE_Y ? rec->uv_crop_height : rec->y_crop_height;
const int bs = MI_SIZE;
......@@ -303,8 +321,14 @@ void av1_clpf_test_frame(const YV12_BUFFER_CONFIG *rec,
}
}
if (plane != AOM_PLANE_Y) // Slightly favour unfiltered chroma
// For fb_size == 128 skip blocks are included in the result.
if (plane == AOM_PLANE_Y) {
sums[1][1] += sums[1][5] - sums[1][4];
sums[1][2] += sums[1][6] - sums[1][4];
sums[1][3] += sums[1][7] - sums[1][4];
} else { // Slightly favour unfiltered chroma
sums[0][0] -= sums[0][0] >> 7;
}
for (j = 0; j < 4; j++) {
static const double lambda_square[] = {
......
......@@ -17,7 +17,7 @@
int av1_clpf_decision(int k, int l, const YV12_BUFFER_CONFIG *rec,
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
int block_size, int w, int h, unsigned int strength,
unsigned int fb_size_log2, uint8_t *res);
unsigned int fb_size_log2, int8_t *res);
void av1_clpf_test_frame(const YV12_BUFFER_CONFIG *rec,
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
......
......@@ -3408,12 +3408,23 @@ static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) {
}
#if CONFIG_CLPF
cm->clpf_strength_y = cm->clpf_strength_u = cm->clpf_strength_v = 0;
cm->clpf_size = 2;
CHECK_MEM_ERROR(
cm, cm->clpf_blocks,
aom_malloc(((cm->frame_to_show->y_crop_width + 31) & ~31) *
((cm->frame_to_show->y_crop_height + 31) & ~31) >>
10));
cm->clpf_size = CLPF_64X64;
// Allocate buffer to hold the status of all filter blocks:
// 1 = On, 0 = off, -1 = implicitly off
{
int size;
cm->clpf_stride = ((cm->frame_to_show->y_crop_width + MIN_FB_SIZE - 1) &
~(MIN_FB_SIZE - 1)) >>
MIN_FB_SIZE_LOG2;
size = cm->clpf_stride *
((cm->frame_to_show->y_crop_height + MIN_FB_SIZE - 1) &
~(MIN_FB_SIZE - 1)) >>
MIN_FB_SIZE_LOG2;
CHECK_MEM_ERROR(cm, cm->clpf_blocks, aom_malloc(size));
memset(cm->clpf_blocks, CLPF_NOFLAG, size);
}
if (!is_lossless_requested(&cpi->oxcf)) {
const YV12_BUFFER_CONFIG *const frame = cm->frame_to_show;
......@@ -3428,20 +3439,18 @@ static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) {
// Apply the filter using the chosen strength
cm->clpf_strength_y = strength_y - (strength_y == 4);
cm->clpf_size =
fb_size_log2 ? fb_size_log2 - get_msb(MAX_FB_SIZE) + 3 : 0;
cm->clpf_numblocks = av1_clpf_frame(
frame, cpi->Source, cm, !!cm->clpf_size, strength_y,
4 + cm->clpf_size, cm->clpf_blocks, AOM_PLANE_Y, av1_clpf_decision);
fb_size_log2 ? fb_size_log2 - MAX_FB_SIZE_LOG2 + 3 : CLPF_NOSIZE;
av1_clpf_frame(frame, cpi->Source, cm, cm->clpf_size != CLPF_NOSIZE,
strength_y, 4 + cm->clpf_size, AOM_PLANE_Y,
av1_clpf_decision);
}
if (strength_u) {
cm->clpf_strength_u = strength_u - (strength_u == 4);
av1_clpf_frame(frame, NULL, cm, 0, strength_u, 4, NULL, AOM_PLANE_U,
NULL);
av1_clpf_frame(frame, NULL, cm, 0, strength_u, 4, AOM_PLANE_U, NULL);
}
if (strength_v) {
cm->clpf_strength_v = strength_v - (strength_v == 4);
av1_clpf_frame(frame, NULL, cm, 0, strength_v, 4, NULL, AOM_PLANE_V,
NULL);
av1_clpf_frame(frame, NULL, cm, 0, strength_v, 4, AOM_PLANE_V, NULL);
}
}
#endif
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment