diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl index 6203336374bc07895a7fc5a2d70a6d24af0cb09f..b31a32ca8cf3e9b75816fb948d59f680628276fe 100644 --- a/aom_dsp/aom_dsp_rtcd_defs.pl +++ b/aom_dsp/aom_dsp_rtcd_defs.pl @@ -854,31 +854,14 @@ specialize qw/aom_lpf_horizontal_4_dual sse2 neon dspr2 msa/; if (aom_config("CONFIG_CDEF") eq "yes") { add_proto qw/void aom_clpf_block_hbd/, "const uint16_t *src, uint16_t *dst, int sstride, int dstride, int x0, int y0, int sizex, int sizey, unsigned int strength, unsigned int bd"; add_proto qw/void aom_clpf_hblock_hbd/, "const uint16_t *src, uint16_t *dst, int sstride, int dstride, int x0, int y0, int sizex, int sizey, unsigned int strength, unsigned int bd"; - if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") { - add_proto qw/void aom_clpf_detect_hbd/, "const uint16_t *rec, const uint16_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum0, int *sum1, unsigned int strength, int size, unsigned int bd, unsigned int dmp"; - add_proto qw/void aom_clpf_detect_multi_hbd/, "const uint16_t *rec, const uint16_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum, int size, unsigned int bd, unsigned int dmp"; - # VS compiling for 32 bit targets does not support vector types in - # structs as arguments, which makes the v256 type of the intrinsics - # hard to support, so optimizations for this target are disabled. - if ($opts{config} !~ /libs-x86-win32-vs.*/) { - specialize qw/aom_clpf_detect_hbd sse2 ssse3 sse4_1 neon/; - specialize qw/aom_clpf_detect_multi_hbd sse2 ssse3 sse4_1 neon/; - } - } - if ($opts{config} !~ /libs-x86-win32-vs.*/) { - specialize qw/aom_clpf_block_hbd sse2 ssse3 sse4_1 neon/; - specialize qw/aom_clpf_hblock_hbd sse2 ssse3 sse4_1 neon/; - } add_proto qw/void aom_clpf_block/, "const uint8_t *src, uint8_t *dst, int sstride, int dstride, int x0, int y0, int sizex, int sizey, unsigned int strength, unsigned int bd"; - add_proto qw/void aom_clpf_detect/, "const uint8_t *rec, const uint8_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum0, int *sum1, unsigned int strength, int size, unsigned int dmp"; - add_proto qw/void aom_clpf_detect_multi/, "const uint8_t *rec, const uint8_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum, int size, unsigned int dmp"; # VS compiling for 32 bit targets does not support vector types in # structs as arguments, which makes the v256 type of the intrinsics # hard to support, so optimizations for this target are disabled. if ($opts{config} !~ /libs-x86-win32-vs.*/) { + specialize qw/aom_clpf_block_hbd sse2 ssse3 sse4_1 neon/; + specialize qw/aom_clpf_hblock_hbd sse2 ssse3 sse4_1 neon/; specialize qw/aom_clpf_block sse2 ssse3 sse4_1 neon/; - specialize qw/aom_clpf_detect sse2 ssse3 sse4_1 neon/; - specialize qw/aom_clpf_detect_multi sse2 ssse3 sse4_1 neon/; } } diff --git a/av1/av1_cx.mk b/av1/av1_cx.mk index 9b624a2995206b75f7670580bfaa292b24931d80..15b258194cb380d97d3068dc3a465df96b1d222e 100644 --- a/av1/av1_cx.mk +++ b/av1/av1_cx.mk @@ -110,13 +110,6 @@ AV1_CX_SRCS-yes += encoder/mbgraph.c AV1_CX_SRCS-yes += encoder/mbgraph.h ifeq ($(CONFIG_CDEF),yes) AV1_CX_SRCS-yes += encoder/pickcdef.c -AV1_CX_SRCS-yes += encoder/clpf_rdo.c -AV1_CX_SRCS-yes += encoder/clpf_rdo.h -AV1_CX_SRCS-yes += encoder/clpf_rdo_simd.h -AV1_CX_SRCS-$(HAVE_SSE2) += encoder/clpf_rdo_sse2.c -AV1_CX_SRCS-$(HAVE_SSSE3) += encoder/clpf_rdo_ssse3.c -AV1_CX_SRCS-$(HAVE_SSE4_1) += encoder/clpf_rdo_sse4.c -AV1_CX_SRCS-$(HAVE_NEON) += encoder/clpf_rdo_neon.c endif ifeq ($(CONFIG_PVQ),yes) # PVQ from daala diff --git a/av1/common/cdef.c b/av1/common/cdef.c index 707f8603290847137d4cc0835e62880c70a99ccc..e2f5b422d3678419ea5cd3f32bb8c5cae68489ef 100644 --- a/av1/common/cdef.c +++ b/av1/common/cdef.c @@ -143,8 +143,8 @@ static void copy_sb8_16(UNUSED AV1_COMMON *cm, uint16_t *dst, int dstride, #endif } -void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd, - int clpf_strength_u, int clpf_strength_v) { +void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, + MACROBLOCKD *xd) { int r, c; int sbr, sbc; int nhsb, nvsb; @@ -162,11 +162,9 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd, int dering_left; int coeff_shift = AOMMAX(cm->bit_depth - 8, 0); int nplanes = 3; - int *lev; int chroma_dering = xd->plane[1].subsampling_x == xd->plane[1].subsampling_y && xd->plane[2].subsampling_x == xd->plane[2].subsampling_y; - lev = cm->cdef_strengths; nvsb = (cm->mi_rows + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE; nhsb = (cm->mi_cols + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE; av1_setup_dst_planes(xd->plane, frame, 0, 0); @@ -193,6 +191,7 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd, dering_left = 1; for (sbc = 0; sbc < nhsb; sbc++) { int level, clpf_strength; + int uv_level, uv_clpf_strength; int nhb, nvb; int cstart = 0; #if 0 // TODO(stemidts/jmvalin): Handle tile borders correctly @@ -205,18 +204,34 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd, nhb = AOMMIN(MAX_MIB_SIZE, cm->mi_cols - MAX_MIB_SIZE * sbc); nvb = AOMMIN(MAX_MIB_SIZE, cm->mi_rows - MAX_MIB_SIZE * sbr); level = dering_level_table - [lev[cm->mi_grid_visible[MAX_MIB_SIZE * sbr * cm->mi_stride + - MAX_MIB_SIZE * sbc] - ->mbmi.cdef_strength] / + [cm->cdef_strengths[cm->mi_grid_visible[MAX_MIB_SIZE * sbr * + cm->mi_stride + + MAX_MIB_SIZE * sbc] + ->mbmi.cdef_strength] / CLPF_STRENGTHS]; clpf_strength = - lev[cm->mi_grid_visible[MAX_MIB_SIZE * sbr * cm->mi_stride + - MAX_MIB_SIZE * sbc] - ->mbmi.cdef_strength] % + cm->cdef_strengths[cm->mi_grid_visible[MAX_MIB_SIZE * sbr * + cm->mi_stride + + MAX_MIB_SIZE * sbc] + ->mbmi.cdef_strength] % CLPF_STRENGTHS; clpf_strength += clpf_strength == 3; + uv_level = dering_level_table + [cm->cdef_uv_strengths[cm->mi_grid_visible[MAX_MIB_SIZE * sbr * + cm->mi_stride + + MAX_MIB_SIZE * sbc] + ->mbmi.cdef_strength] / + CLPF_STRENGTHS]; + uv_clpf_strength = + cm->cdef_uv_strengths[cm->mi_grid_visible[MAX_MIB_SIZE * sbr * + cm->mi_stride + + MAX_MIB_SIZE * sbc] + ->mbmi.cdef_strength] % + CLPF_STRENGTHS; + uv_clpf_strength += uv_clpf_strength == 3; curr_row_dering[sbc] = 0; - if ((level == 0 && clpf_strength == 0) || + if ((level == 0 && clpf_strength == 0 && uv_level == 0 && + uv_clpf_strength == 0) || (dering_count = sb_compute_dering_list( cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE, dlist)) == 0) { dering_left = 0; @@ -232,9 +247,11 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd, int clpf_damping = 3 - (pli != AOM_PLANE_Y) + (cm->base_qindex >> 6); if (pli) { - if (!chroma_dering) level = 0; - clpf_strength = pli == 1 ? clpf_strength_u : clpf_strength_v; - clpf_strength += clpf_strength == 3; + if (chroma_dering) + level = uv_level; + else + level = 0; + clpf_strength = uv_clpf_strength; } if (sbc == nhsb - 1) cend = (nhb << bsize[pli]); @@ -359,12 +376,7 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd, coffset, xd->plane[pli].dst.stride, OD_FILT_VBORDER, (nhb << bsize[pli])); - /* FIXME: This is a temporary hack that uses more conservative - deringing for chroma. */ - if (pli) - threshold = (level * 5 + 4) >> 3 << coeff_shift; - else - threshold = level << coeff_shift; + threshold = level << coeff_shift; if (threshold == 0 && clpf_strength == 0) continue; od_dering(dst, &src[OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER], diff --git a/av1/common/cdef.h b/av1/common/cdef.h index d3c33f255e293cf7388c4931aab0dcfc22df4ab8..e1944cf62580395bef7d700ac758d10cce691a18 100644 --- a/av1/common/cdef.h +++ b/av1/common/cdef.h @@ -33,8 +33,7 @@ extern int dering_level_table[DERING_STRENGTHS]; int sb_all_skip(const AV1_COMMON *const cm, int mi_row, int mi_col); int sb_compute_dering_list(const AV1_COMMON *const cm, int mi_row, int mi_col, dering_list *dlist); -void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd, - int clpf_strength_u, int clpf_strength_v); +void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd); void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref, AV1_COMMON *cm, MACROBLOCKD *xd); diff --git a/av1/common/onyxc_int.h b/av1/common/onyxc_int.h index 17ac451f7c7912c5e349ed80e4ae806f83764bb1..64f9a6e1ec0df131c97bcfec9640fc9ebb24602f 100644 --- a/av1/common/onyxc_int.h +++ b/av1/common/onyxc_int.h @@ -404,9 +404,8 @@ typedef struct AV1Common { #if CONFIG_CDEF int nb_cdef_strengths; int cdef_strengths[CDEF_MAX_STRENGTHS]; + int cdef_uv_strengths[CDEF_MAX_STRENGTHS]; int cdef_bits; - int clpf_strength_u; - int clpf_strength_v; #endif #if CONFIG_DELTA_Q diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c index 990a516d6d2c0511052457d2450005285bd4dd58..a5abb904d1ce9875c77090231c52c7c8f78e7afb 100644 --- a/av1/decoder/decodeframe.c +++ b/av1/decoder/decodeframe.c @@ -2672,9 +2672,8 @@ static void setup_cdef(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) { cm->nb_cdef_strengths = 1 << cm->cdef_bits; for (i = 0; i < cm->nb_cdef_strengths; i++) { cm->cdef_strengths[i] = aom_rb_read_literal(rb, CDEF_STRENGTH_BITS); + cm->cdef_uv_strengths[i] = aom_rb_read_literal(rb, CDEF_STRENGTH_BITS); } - cm->clpf_strength_u = aom_rb_read_literal(rb, 2); - cm->clpf_strength_v = aom_rb_read_literal(rb, 2); } #endif // CONFIG_CDEF @@ -4948,8 +4947,7 @@ void av1_decode_frame(AV1Decoder *pbi, const uint8_t *data, #if CONFIG_CDEF if (!cm->skip_loop_filter) { - av1_cdef_frame(&pbi->cur_buf->buf, cm, &pbi->mb, cm->clpf_strength_u, - cm->clpf_strength_v); + av1_cdef_frame(&pbi->cur_buf->buf, cm, &pbi->mb); } #endif // CONFIG_CDEF diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c index fdc7ac8918d20afff9f75dc19ec626a59dfe7bbb..9252b00407d6aa93cd2f02e6335d4d3500631d3f 100644 --- a/av1/encoder/bitstream.c +++ b/av1/encoder/bitstream.c @@ -3496,9 +3496,8 @@ static void encode_cdef(const AV1_COMMON *cm, struct aom_write_bit_buffer *wb) { aom_wb_write_literal(wb, cm->cdef_bits, 2); for (i = 0; i < cm->nb_cdef_strengths; i++) { aom_wb_write_literal(wb, cm->cdef_strengths[i], CDEF_STRENGTH_BITS); + aom_wb_write_literal(wb, cm->cdef_uv_strengths[i], CDEF_STRENGTH_BITS); } - aom_wb_write_literal(wb, cm->clpf_strength_u, 2); - aom_wb_write_literal(wb, cm->clpf_strength_v, 2); } #endif diff --git a/av1/encoder/clpf_rdo.c b/av1/encoder/clpf_rdo.c deleted file mode 100644 index 01736818bfcef854f0b499708c1a587ef20f75e1..0000000000000000000000000000000000000000 --- a/av1/encoder/clpf_rdo.c +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "av1/common/clpf.h" -#include "./aom_dsp_rtcd.h" -#include "aom/aom_image.h" -#include "aom/aom_integer.h" -#include "av1/common/quant_common.h" - -// Calculate the error of a filtered and unfiltered block -void aom_clpf_detect_c(const uint8_t *rec, const uint8_t *org, int rstride, - int ostride, int x0, int y0, int width, int height, - int *sum0, int *sum1, unsigned int strength, int size, - unsigned int dmp) { - int x, y; - for (y = y0; y < y0 + size; y++) { - for (x = x0; x < x0 + size; x++) { - const int O = org[y * ostride + x]; - const int X = rec[y * rstride + x]; - const int A = rec[AOMMAX(0, y - 2) * rstride + x]; - const int B = rec[AOMMAX(0, y - 1) * rstride + x]; - const int C = rec[y * rstride + AOMMAX(0, x - 2)]; - const int D = rec[y * rstride + AOMMAX(0, x - 1)]; - const int E = rec[y * rstride + AOMMIN(width - 1, x + 1)]; - const int F = rec[y * rstride + AOMMIN(width - 1, x + 2)]; - const int G = rec[AOMMIN(height - 1, y + 1) * rstride + x]; - const int H = rec[AOMMIN(height - 1, y + 2) * rstride + x]; - const int delta = - av1_clpf_sample(X, A, B, C, D, E, F, G, H, strength, dmp); - const int Y = X + delta; - *sum0 += (O - X) * (O - X); - *sum1 += (O - Y) * (O - Y); - } - } -} - -void aom_clpf_detect_multi_c(const uint8_t *rec, const uint8_t *org, - int rstride, int ostride, int x0, int y0, - int width, int height, int *sum, int size, - unsigned int dmp) { - int x, y; - - for (y = y0; y < y0 + size; y++) { - for (x = x0; x < x0 + size; x++) { - const int O = org[y * ostride + x]; - const int X = rec[y * rstride + x]; - const int A = rec[AOMMAX(0, y - 2) * rstride + x]; - const int B = rec[AOMMAX(0, y - 1) * rstride + x]; - const int C = rec[y * rstride + AOMMAX(0, x - 2)]; - const int D = rec[y * rstride + AOMMAX(0, x - 1)]; - const int E = rec[y * rstride + AOMMIN(width - 1, x + 1)]; - const int F = rec[y * rstride + AOMMIN(width - 1, x + 2)]; - const int G = rec[AOMMIN(height - 1, y + 1) * rstride + x]; - const int H = rec[AOMMIN(height - 1, y + 2) * rstride + x]; - const int delta1 = av1_clpf_sample(X, A, B, C, D, E, F, G, H, 1, dmp); - const int delta2 = av1_clpf_sample(X, A, B, C, D, E, F, G, H, 2, dmp); - const int delta3 = av1_clpf_sample(X, A, B, C, D, E, F, G, H, 4, dmp); - const int F1 = X + delta1; - const int F2 = X + delta2; - const int F3 = X + delta3; - sum[0] += (O - X) * (O - X); - sum[1] += (O - F1) * (O - F1); - sum[2] += (O - F2) * (O - F2); - sum[3] += (O - F3) * (O - F3); - } - } -} - -#if CONFIG_AOM_HIGHBITDEPTH -// Identical to aom_clpf_detect_c() apart from "rec" and "org". -void aom_clpf_detect_hbd_c(const uint16_t *rec, const uint16_t *org, - int rstride, int ostride, int x0, int y0, int width, - int height, int *sum0, int *sum1, - unsigned int strength, int size, unsigned int bd, - unsigned int dmp) { - const int shift = bd - 8; - int x, y; - for (y = y0; y < y0 + size; y++) { - for (x = x0; x < x0 + size; x++) { - const int O = org[y * ostride + x] >> shift; - const int X = rec[y * rstride + x] >> shift; - const int A = rec[AOMMAX(0, y - 2) * rstride + x] >> shift; - const int B = rec[AOMMAX(0, y - 1) * rstride + x] >> shift; - const int C = rec[y * rstride + AOMMAX(0, x - 2)] >> shift; - const int D = rec[y * rstride + AOMMAX(0, x - 1)] >> shift; - const int E = rec[y * rstride + AOMMIN(width - 1, x + 1)] >> shift; - const int F = rec[y * rstride + AOMMIN(width - 1, x + 2)] >> shift; - const int G = rec[AOMMIN(height - 1, y + 1) * rstride + x] >> shift; - const int H = rec[AOMMIN(height - 1, y + 2) * rstride + x] >> shift; - const int delta = av1_clpf_sample(X, A, B, C, D, E, F, G, H, - strength >> shift, dmp - shift); - const int Y = X + delta; - *sum0 += (O - X) * (O - X); - *sum1 += (O - Y) * (O - Y); - } - } -} - -// aom_clpf_detect_multi_c() apart from "rec" and "org". -void aom_clpf_detect_multi_hbd_c(const uint16_t *rec, const uint16_t *org, - int rstride, int ostride, int x0, int y0, - int width, int height, int *sum, int size, - unsigned int bd, unsigned int dmp) { - const int shift = bd - 8; - int x, y; - - for (y = y0; y < y0 + size; y++) { - for (x = x0; x < x0 + size; x++) { - int O = org[y * ostride + x] >> shift; - int X = rec[y * rstride + x] >> shift; - const int A = rec[AOMMAX(0, y - 2) * rstride + x] >> shift; - const int B = rec[AOMMAX(0, y - 1) * rstride + x] >> shift; - const int C = rec[y * rstride + AOMMAX(0, x - 2)] >> shift; - const int D = rec[y * rstride + AOMMAX(0, x - 1)] >> shift; - const int E = rec[y * rstride + AOMMIN(width - 1, x + 1)] >> shift; - const int F = rec[y * rstride + AOMMIN(width - 1, x + 2)] >> shift; - const int G = rec[AOMMIN(height - 1, y + 1) * rstride + x] >> shift; - const int H = rec[AOMMIN(height - 1, y + 2) * rstride + x] >> shift; - const int delta1 = - av1_clpf_sample(X, A, B, C, D, E, F, G, H, 1, dmp - shift); - const int delta2 = - av1_clpf_sample(X, A, B, C, D, E, F, G, H, 2, dmp - shift); - const int delta3 = - av1_clpf_sample(X, A, B, C, D, E, F, G, H, 4, dmp - shift); - const int F1 = X + delta1; - const int F2 = X + delta2; - const int F3 = X + delta3; - sum[0] += (O - X) * (O - X); - sum[1] += (O - F1) * (O - F1); - sum[2] += (O - F2) * (O - F2); - sum[3] += (O - F3) * (O - F3); - } - } -} -#endif - -// Calculate the square error of all filter settings. Result: -// res[0][0] : unfiltered -// res[0][1-3] : strength=1,2,4, no signals -static void clpf_rdo(const YV12_BUFFER_CONFIG *rec, - const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm, - unsigned int block_size, int w, int h, uint64_t res[4], - int plane) { - int m, n; - int sum[4]; - const int subx = plane != AOM_PLANE_Y && rec->subsampling_x; - const int suby = plane != AOM_PLANE_Y && rec->subsampling_y; - uint8_t *rec_buffer = - plane != AOM_PLANE_Y - ? (plane == AOM_PLANE_U ? rec->u_buffer : rec->v_buffer) - : rec->y_buffer; - uint8_t *org_buffer = - plane != AOM_PLANE_Y - ? (plane == AOM_PLANE_U ? org->u_buffer : org->v_buffer) - : org->y_buffer; - int rec_width = plane != AOM_PLANE_Y ? rec->uv_crop_width : rec->y_crop_width; - int rec_height = - plane != AOM_PLANE_Y ? rec->uv_crop_height : rec->y_crop_height; - int rec_stride = plane != AOM_PLANE_Y ? rec->uv_stride : rec->y_stride; - int org_stride = plane != AOM_PLANE_Y ? org->uv_stride : org->y_stride; - int damping = - cm->bit_depth - 5 - (plane != AOM_PLANE_Y) + (cm->base_qindex >> 6); - - sum[0] = sum[1] = sum[2] = sum[3] = 0; - - for (m = 0; m < h; m++) { - for (n = 0; n < w; n++) { - int xpos = n * block_size; - int ypos = m * block_size; - if (!cm->mi_grid_visible[(ypos << suby) / MI_SIZE * cm->mi_stride + - (xpos << subx) / MI_SIZE] - ->mbmi.skip) { -#if CONFIG_AOM_HIGHBITDEPTH - if (cm->use_highbitdepth) { - aom_clpf_detect_multi_hbd( - CONVERT_TO_SHORTPTR(rec_buffer), CONVERT_TO_SHORTPTR(org_buffer), - rec_stride, org_stride, xpos, ypos, rec_width, rec_height, sum, - block_size, cm->bit_depth, damping); - } else { - aom_clpf_detect_multi(rec_buffer, org_buffer, rec_stride, org_stride, - xpos, ypos, rec_width, rec_height, sum, - block_size, damping); - } -#else - aom_clpf_detect_multi(rec_buffer, org_buffer, rec_stride, org_stride, - xpos, ypos, rec_width, rec_height, sum, - block_size, damping); -#endif - } - } - } - - res[0] += sum[0]; - res[1] += sum[1]; - res[2] += sum[2]; - res[3] += sum[3]; -} - -void av1_clpf_test_plane(const YV12_BUFFER_CONFIG *rec, - const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm, - int *best_strength, int plane) { - int i; - uint64_t best, sums[4]; - int width = plane != AOM_PLANE_Y ? rec->uv_crop_width : rec->y_crop_width; - int height = plane != AOM_PLANE_Y ? rec->uv_crop_height : rec->y_crop_height; - const int bs = MI_SIZE; - const int bslog = get_msb(bs); - - memset(sums, 0, sizeof(sums)); - - clpf_rdo(rec, org, cm, bs, width >> bslog, height >> bslog, sums, plane); - - // Add a favourable bias for conservative strengths - for (i = 0; i < 4; i++) sums[i] -= sums[i] >> (7 + i); - - // Tag the strength to the error - for (i = 0; i < 4; i++) sums[i] = (sums[i] << 2) + i; - - // Identify the strength with the smallest error - best = (uint64_t)1 << 63; - for (i = 0; i < 4; i++) - if (sums[i] < best) best = sums[i]; - *best_strength = best & 3 ? 1 << ((best - 1) & 3) : 0; -} diff --git a/av1/encoder/clpf_rdo.h b/av1/encoder/clpf_rdo.h deleted file mode 100644 index e137378165386da1a812373a0b5d4d11b2f07959..0000000000000000000000000000000000000000 --- a/av1/encoder/clpf_rdo.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AV1_ENCODER_CLPF_H_ -#define AV1_ENCODER_CLPF_H_ - -#include "av1/common/reconinter.h" - -void av1_clpf_test_plane(const YV12_BUFFER_CONFIG *rec, - const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm, - int *best_strength, int plane); - -#endif diff --git a/av1/encoder/clpf_rdo_neon.c b/av1/encoder/clpf_rdo_neon.c deleted file mode 100644 index 02053c518069f40f8c93571dbbeda7cc4b681609..0000000000000000000000000000000000000000 --- a/av1/encoder/clpf_rdo_neon.c +++ /dev/null @@ -1,14 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "aom_dsp/aom_simd.h" -#define SIMD_FUNC(name) name##_neon -#include "./clpf_rdo_simd.h" diff --git a/av1/encoder/clpf_rdo_sse2.c b/av1/encoder/clpf_rdo_sse2.c deleted file mode 100644 index 99847c01a7efc6d71cffd64758ca44b423925121..0000000000000000000000000000000000000000 --- a/av1/encoder/clpf_rdo_sse2.c +++ /dev/null @@ -1,14 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "aom_dsp/aom_simd.h" -#define SIMD_FUNC(name) name##_sse2 -#include "./clpf_rdo_simd.h" diff --git a/av1/encoder/clpf_rdo_sse4.c b/av1/encoder/clpf_rdo_sse4.c deleted file mode 100644 index 049f5371cc1f8711c9275464ade2b3d819fcd395..0000000000000000000000000000000000000000 --- a/av1/encoder/clpf_rdo_sse4.c +++ /dev/null @@ -1,14 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "aom_dsp/aom_simd.h" -#define SIMD_FUNC(name) name##_sse4_1 -#include "./clpf_rdo_simd.h" diff --git a/av1/encoder/clpf_rdo_ssse3.c b/av1/encoder/clpf_rdo_ssse3.c deleted file mode 100644 index 35b23b2d2791f350a9ddbea414d9fca4ec87b79e..0000000000000000000000000000000000000000 --- a/av1/encoder/clpf_rdo_ssse3.c +++ /dev/null @@ -1,14 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "aom_dsp/aom_simd.h" -#define SIMD_FUNC(name) name##_ssse3 -#include "./clpf_rdo_simd.h" diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c index 757d589005e4b6ccb7909785f9d581e40f5664f7..963d7f55db2f512b11757fc6a1b74a53ca46cedc 100644 --- a/av1/encoder/encoder.c +++ b/av1/encoder/encoder.c @@ -19,7 +19,6 @@ #if CONFIG_CDEF #include "av1/common/cdef.h" #include "av1/common/clpf.h" -#include "av1/encoder/clpf_rdo.h" #endif // CONFIG_CDEF #include "av1/common/filter.h" #include "av1/common/idct.h" @@ -3522,7 +3521,6 @@ static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) { } #if CONFIG_CDEF if (is_lossless_requested(&cpi->oxcf)) { - cm->clpf_strength_u = cm->clpf_strength_v = 0; cm->cdef_bits = 0; cm->cdef_strengths[0] = 0; cm->nb_cdef_strengths = 1; @@ -3531,12 +3529,7 @@ static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) { av1_cdef_search(cm->frame_to_show, cpi->Source, cm, xd); // Apply the filter - av1_cdef_frame(cm->frame_to_show, cm, xd, cm->clpf_strength_u, - cm->clpf_strength_v); - - // Pack the clpf chroma strengths into two bits each - cm->clpf_strength_u -= cm->clpf_strength_u == 4; - cm->clpf_strength_v -= cm->clpf_strength_v == 4; + av1_cdef_frame(cm->frame_to_show, cm, xd); } #endif #if CONFIG_LOOP_RESTORATION diff --git a/av1/encoder/pickcdef.c b/av1/encoder/pickcdef.c index cb8a500d25c4e7004e84622705c6cb7be1b2945c..96320d34f2f3e8a376ffb8f9d3f29d7ad8f49290 100644 --- a/av1/encoder/pickcdef.c +++ b/av1/encoder/pickcdef.c @@ -17,7 +17,6 @@ #include "av1/common/cdef.h" #include "av1/common/onyxc_int.h" #include "av1/common/reconinter.h" -#include "av1/encoder/clpf_rdo.h" #include "av1/encoder/encoder.h" #define TOTAL_STRENGTHS (DERING_STRENGTHS * CLPF_STRENGTHS) @@ -79,12 +78,12 @@ static uint64_t joint_strength_search(int *best_lev, int nb_strengths, } static double compute_dist(uint16_t *x, int xstride, uint16_t *y, int ystride, - int nhb, int nvb, int coeff_shift) { + int nhb, int nvb, int coeff_shift, int bsize) { int i, j; double sum; sum = 0; - for (i = 0; i < nvb << 3; i++) { - for (j = 0; j < nhb << 3; j++) { + for (i = 0; i < nvb << bsize; i++) { + for (j = 0; j < nhb << bsize; j++) { double tmp; tmp = x[i * xstride + j] - y[i * ystride + j]; sum += tmp * tmp; @@ -97,11 +96,11 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref, AV1_COMMON *cm, MACROBLOCKD *xd) { int r, c; int sbr, sbc; - uint16_t *src; - uint16_t *ref_coeff; + uint16_t *src[3]; + uint16_t *ref_coeff[3]; dering_list dlist[MAX_MIB_SIZE * MAX_MIB_SIZE]; int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } }; - int stride; + int stride[3]; int bsize[3]; int dec[3]; int pli; @@ -114,8 +113,8 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref, int nvsb = (cm->mi_rows + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE; int nhsb = (cm->mi_cols + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE; int *sb_index = aom_malloc(nvsb * nhsb * sizeof(*sb_index)); - uint64_t(*mse)[DERING_STRENGTHS * CLPF_STRENGTHS] = - aom_malloc(sizeof(*mse) * nvsb * nhsb); + int *selected_strength = aom_malloc(nvsb * nhsb * sizeof(*sb_index)); + uint64_t(*mse[3])[TOTAL_STRENGTHS]; int clpf_damping = 3 + (cm->base_qindex >> 6); int i; int best_lev[CDEF_MAX_STRENGTHS]; @@ -123,35 +122,56 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref, int nb_strength_bits; int quantizer; double lambda; + int nplanes = 3; + int chroma_dering = + xd->plane[1].subsampling_x == xd->plane[1].subsampling_y && + xd->plane[2].subsampling_x == xd->plane[2].subsampling_y; quantizer = av1_ac_quant(cm->base_qindex, 0, cm->bit_depth) >> (cm->bit_depth - 8); lambda = .12 * quantizer * quantizer / 256.; - src = aom_memalign(32, sizeof(*src) * cm->mi_rows * cm->mi_cols * 64); - ref_coeff = - aom_memalign(32, sizeof(*ref_coeff) * cm->mi_rows * cm->mi_cols * 64); av1_setup_dst_planes(xd->plane, frame, 0, 0); - for (pli = 0; pli < 3; pli++) { + for (pli = 0; pli < nplanes; pli++) { + uint8_t *ref_buffer; + int ref_stride; + switch (pli) { + case 0: + ref_buffer = ref->y_buffer; + ref_stride = ref->y_stride; + break; + case 1: + ref_buffer = ref->u_buffer; + ref_stride = ref->uv_stride; + break; + case 2: + ref_buffer = ref->v_buffer; + ref_stride = ref->uv_stride; + break; + } + mse[pli] = aom_malloc(sizeof(**mse) * nvsb * nhsb); + src[pli] = aom_memalign(32, sizeof(*src) * cm->mi_rows * cm->mi_cols * 64); + ref_coeff[pli] = + aom_memalign(32, sizeof(*ref_coeff) * cm->mi_rows * cm->mi_cols * 64); dec[pli] = xd->plane[pli].subsampling_x; bsize[pli] = OD_DERING_SIZE_LOG2 - dec[pli]; - } - stride = cm->mi_cols << bsize[0]; - for (r = 0; r < cm->mi_rows << bsize[0]; ++r) { - for (c = 0; c < cm->mi_cols << bsize[0]; ++c) { + stride[pli] = cm->mi_cols << 3; + for (r = 0; r < cm->mi_rows << bsize[pli]; ++r) { + for (c = 0; c < cm->mi_cols << bsize[pli]; ++c) { #if CONFIG_AOM_HIGHBITDEPTH - if (cm->use_highbitdepth) { - src[r * stride + c] = CONVERT_TO_SHORTPTR( - xd->plane[0].dst.buf)[r * xd->plane[0].dst.stride + c]; - ref_coeff[r * stride + c] = - CONVERT_TO_SHORTPTR(ref->y_buffer)[r * ref->y_stride + c]; - } else { + if (cm->use_highbitdepth) { + src[pli][r * stride[pli] + c] = CONVERT_TO_SHORTPTR( + xd->plane[pli].dst.buf)[r * xd->plane[pli].dst.stride + c]; + ref_coeff[pli][r * stride[pli] + c] = + CONVERT_TO_SHORTPTR(ref_buffer)[r * ref_stride + c]; + } else { #endif - src[r * stride + c] = - xd->plane[0].dst.buf[r * xd->plane[0].dst.stride + c]; - ref_coeff[r * stride + c] = ref->y_buffer[r * ref->y_stride + c]; + src[pli][r * stride[pli] + c] = + xd->plane[pli].dst.buf[r * xd->plane[pli].dst.stride + c]; + ref_coeff[pli][r * stride[pli] + c] = ref_buffer[r * ref_stride + c]; #if CONFIG_AOM_HIGHBITDEPTH - } + } #endif + } } } sb_count = 0; @@ -175,44 +195,49 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref, int j; level = dering_level_table[gi / CLPF_STRENGTHS]; threshold = level << coeff_shift; - for (r = 0; r < nvb << bsize[0]; r++) { - for (c = 0; c < nhb << bsize[0]; c++) { - dst[(r * MAX_MIB_SIZE << bsize[0]) + c] = - src[((sbr * MAX_MIB_SIZE << bsize[0]) + r) * stride + - (sbc * MAX_MIB_SIZE << bsize[0]) + c]; + for (pli = 0; pli < nplanes; pli++) { + if (pli > 0 && !chroma_dering) threshold = 0; + for (r = 0; r < nvb << bsize[pli]; r++) { + for (c = 0; c < nhb << bsize[pli]; c++) { + dst[(r * MAX_MIB_SIZE << bsize[pli]) + c] = + src[pli] + [((sbr * MAX_MIB_SIZE << bsize[pli]) + r) * stride[pli] + + (sbc * MAX_MIB_SIZE << bsize[pli]) + c]; + } } - } - in = inbuf + OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER; - /* We avoid filtering the pixels for which some of the pixels to average - are outside the frame. We could change the filter instead, but it - would - add special cases for any future vectorization. */ - for (i = 0; i < OD_DERING_INBUF_SIZE; i++) - inbuf[i] = OD_DERING_VERY_LARGE; - for (i = -OD_FILT_VBORDER * (sbr != 0); - i < (nvb << bsize[0]) + OD_FILT_VBORDER * (sbr != nvsb - 1); i++) { - for (j = -OD_FILT_HBORDER * (sbc != 0); - j < (nhb << bsize[0]) + OD_FILT_HBORDER * (sbc != nhsb - 1); - j++) { - uint16_t *x; - x = &src[(sbr * stride * MAX_MIB_SIZE << bsize[0]) + - (sbc * MAX_MIB_SIZE << bsize[0])]; - in[i * OD_FILT_BSTRIDE + j] = x[i * stride + j]; + in = inbuf + OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER; + /* We avoid filtering the pixels for which some of the pixels to + average + are outside the frame. We could change the filter instead, but it + would add special cases for any future vectorization. */ + for (i = 0; i < OD_DERING_INBUF_SIZE; i++) + inbuf[i] = OD_DERING_VERY_LARGE; + for (i = -OD_FILT_VBORDER * (sbr != 0); + i < (nvb << bsize[pli]) + OD_FILT_VBORDER * (sbr != nvsb - 1); + i++) { + for (j = -OD_FILT_HBORDER * (sbc != 0); + j < (nhb << bsize[pli]) + OD_FILT_HBORDER * (sbc != nhsb - 1); + j++) { + uint16_t *x; + x = &src[pli][(sbr * stride[pli] * MAX_MIB_SIZE << bsize[pli]) + + (sbc * MAX_MIB_SIZE << bsize[pli])]; + in[i * OD_FILT_BSTRIDE + j] = x[i * stride[pli] + j]; + } } + clpf_strength = gi % CLPF_STRENGTHS; + od_dering(tmp_dst, in, dec[pli], dir, pli, dlist, dering_count, + threshold, clpf_strength + (clpf_strength == 3), + clpf_damping, coeff_shift); + copy_dering_16bit_to_16bit(dst, MAX_MIB_SIZE << bsize[pli], tmp_dst, + dlist, dering_count, bsize[pli]); + mse[pli][sb_count][gi] = (int)compute_dist( + dst, MAX_MIB_SIZE << bsize[pli], + &ref_coeff[pli][(sbr * stride[pli] * MAX_MIB_SIZE << bsize[pli]) + + (sbc * MAX_MIB_SIZE << bsize[pli])], + stride[pli], nhb, nvb, coeff_shift, bsize[pli]); + sb_index[sb_count] = + MAX_MIB_SIZE * sbr * cm->mi_stride + MAX_MIB_SIZE * sbc; } - clpf_strength = gi % CLPF_STRENGTHS; - od_dering(tmp_dst, in, 0, dir, 0, dlist, dering_count, threshold, - clpf_strength + (clpf_strength == 3), clpf_damping, - coeff_shift); - copy_dering_16bit_to_16bit(dst, MAX_MIB_SIZE << bsize[0], tmp_dst, - dlist, dering_count, bsize[0]); - mse[sb_count][gi] = (int)compute_dist( - dst, MAX_MIB_SIZE << bsize[0], - &ref_coeff[(sbr * stride * MAX_MIB_SIZE << bsize[0]) + - (sbc * MAX_MIB_SIZE << bsize[0])], - stride, nhb, nvb, coeff_shift); - sb_index[sb_count] = - MAX_MIB_SIZE * sbr * cm->mi_stride + MAX_MIB_SIZE * sbc; } sb_count++; } @@ -222,7 +247,7 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref, /* Search for different number of signalling bits. */ for (i = 0; i <= 3; i++) { nb_strengths = 1 << i; - tot_mse = joint_strength_search(best_lev, nb_strengths, mse, sb_count); + tot_mse = joint_strength_search(best_lev, nb_strengths, mse[0], sb_count); /* Count superblock signalling cost. */ tot_mse += (uint64_t)(sb_count * lambda * i); /* Count header signalling cost. */ @@ -243,21 +268,44 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref, uint64_t best_mse = (uint64_t)1 << 63; best_gi = 0; for (gi = 0; gi < cm->nb_cdef_strengths; gi++) { - if (mse[i][best_lev[gi]] < best_mse) { + if (mse[0][i][best_lev[gi]] < best_mse) { best_gi = gi; - best_mse = mse[i][best_lev[gi]]; + best_mse = mse[0][i][best_lev[gi]]; } } + selected_strength[i] = best_gi; cm->mi_grid_visible[sb_index[i]]->mbmi.cdef_strength = best_gi; } - - aom_free(src); - aom_free(ref_coeff); - aom_free(mse); + int str; + /* For each strength option we picked in luma, find the optimal chroma + strength. */ + if (nplanes >= 3) { + for (str = 0; str < cm->nb_cdef_strengths; str++) { + int gi; + int best_gi = 0; + best_tot_mse = (uint64_t)1 << 63; + for (gi = 0; gi < TOTAL_STRENGTHS; gi++) { + tot_mse = 0; + for (i = 0; i < sb_count; i++) { + if (selected_strength[i] == str) { + tot_mse += mse[1][i][gi] + mse[2][i][gi]; + } + } + if (tot_mse < best_tot_mse) { + best_gi = gi; + best_tot_mse = tot_mse; + } + } + cm->cdef_uv_strengths[str] = best_gi; + } + } else { + for (str = 0; str < nb_strengths; str++) selected_strength[str] = 0; + } + for (pli = 0; pli < nplanes; pli++) { + aom_free(src[pli]); + aom_free(ref_coeff[pli]); + aom_free(mse[pli]); + } aom_free(sb_index); - - av1_clpf_test_plane(cm->frame_to_show, ref, cm, &cm->clpf_strength_u, - AOM_PLANE_U); - av1_clpf_test_plane(cm->frame_to_show, ref, cm, &cm->clpf_strength_v, - AOM_PLANE_V); + aom_free(selected_strength); }