From 8bf837f153ef522e2e914362d9fed21658b41d91 Mon Sep 17 00:00:00 2001 From: Yaowu Xu Date: Fri, 29 Jul 2016 15:38:55 -0700 Subject: [PATCH] Cherry pick from AOM: 68e7e4d0 Remove VP9_CAP_POSTPROC 0738390c Remove vp9_temporal denoise b89861a4 Remove vp9-postproc Change-Id: I4ecaa0ac83a519c8174a494378fc23df610ff2a8 --- test/datarate_test.cc | 62 --- vp10/common/alloccommon.c | 9 - vp10/common/mfqe.c | 400 ----------------- vp10/common/mfqe.h | 31 -- vp10/common/mips/msa/mfqe_msa.c | 137 ------ vp10/common/onyxc_int.h | 12 - vp10/common/postproc.c | 721 ------------------------------ vp10/common/postproc.h | 53 --- vp10/common/ppflags.h | 43 -- vp10/common/vp10_rtcd_defs.pl | 45 -- vp10/common/x86/mfqe_sse2.asm | 287 ------------ vp10/common/x86/postproc_sse2.asm | 632 -------------------------- vp10/decoder/decoder.c | 20 +- vp10/decoder/decoder.h | 4 +- vp10/encoder/context_tree.h | 9 - vp10/encoder/denoiser.c | 521 --------------------- vp10/encoder/denoiser.h | 72 --- vp10/encoder/encoder.c | 106 +---- vp10/encoder/encoder.h | 13 +- vp10/encoder/x86/denoiser_sse2.c | 373 ---------------- vp10/vp10_common.mk | 14 - vp10/vp10_cx_iface.c | 20 +- vp10/vp10_dx_iface.c | 36 +- vp10/vp10cx.mk | 9 - 24 files changed, 8 insertions(+), 3621 deletions(-) delete mode 100644 vp10/common/mfqe.c delete mode 100644 vp10/common/mfqe.h delete mode 100644 vp10/common/mips/msa/mfqe_msa.c delete mode 100644 vp10/common/postproc.c delete mode 100644 vp10/common/postproc.h delete mode 100644 vp10/common/ppflags.h delete mode 100644 vp10/common/x86/mfqe_sse2.asm delete mode 100644 vp10/common/x86/postproc_sse2.asm delete mode 100644 vp10/encoder/denoiser.c delete mode 100644 vp10/encoder/denoiser.h delete mode 100644 vp10/encoder/x86/denoiser_sse2.c diff --git a/test/datarate_test.cc b/test/datarate_test.cc index fe516ce6f..4c7f7c58e 100644 --- a/test/datarate_test.cc +++ b/test/datarate_test.cc @@ -704,68 +704,6 @@ TEST_P(DatarateTestVP9Large, BasicRateTargeting3TemporalLayersFrameDropping) { } } -#if CONFIG_VP9_TEMPORAL_DENOISING -// Check basic datarate targeting, for a single bitrate, when denoiser is on. -TEST_P(DatarateTestVP9Large, DenoiserLevels) { - cfg_.rc_buf_initial_sz = 500; - cfg_.rc_buf_optimal_sz = 500; - cfg_.rc_buf_sz = 1000; - cfg_.rc_dropframe_thresh = 1; - cfg_.rc_min_quantizer = 2; - cfg_.rc_max_quantizer = 56; - cfg_.rc_end_usage = VPX_CBR; - cfg_.g_lag_in_frames = 0; - - ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, - 30, 1, 0, 140); - - // For the temporal denoiser (#if CONFIG_VP9_TEMPORAL_DENOISING), - // there is only one denoiser mode: denoiserYonly(which is 1), - // but may add more modes in the future. - cfg_.rc_target_bitrate = 300; - ResetModel(); - // Turn on the denoiser. - denoiser_on_ = 1; - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85) - << " The datarate for the file is lower than target by too much!"; - ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15) - << " The datarate for the file is greater than target by too much!"; -} - -// Check basic datarate targeting, for a single bitrate, when denoiser is off -// and on. -TEST_P(DatarateTestVP9Large, DenoiserOffOn) { - cfg_.rc_buf_initial_sz = 500; - cfg_.rc_buf_optimal_sz = 500; - cfg_.rc_buf_sz = 1000; - cfg_.rc_dropframe_thresh = 1; - cfg_.rc_min_quantizer = 2; - cfg_.rc_max_quantizer = 56; - cfg_.rc_end_usage = VPX_CBR; - cfg_.g_lag_in_frames = 0; - - ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, - 30, 1, 0, 299); - - // For the temporal denoiser (#if CONFIG_VP9_TEMPORAL_DENOISING), - // there is only one denoiser mode: denoiserYonly(which is 1), - // but may add more modes in the future. - cfg_.rc_target_bitrate = 300; - ResetModel(); - // The denoiser is off by default. - denoiser_on_ = 0; - // Set the offon test flag. - denoiser_offon_test_ = 1; - denoiser_offon_period_ = 100; - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85) - << " The datarate for the file is lower than target by too much!"; - ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15) - << " The datarate for the file is greater than target by too much!"; -} -#endif // CONFIG_VP9_TEMPORAL_DENOISING - class DatarateOnePassCbrSvc : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWith2Params { public: diff --git a/vp10/common/alloccommon.c b/vp10/common/alloccommon.c index abdc72b6a..afb801338 100644 --- a/vp10/common/alloccommon.c +++ b/vp10/common/alloccommon.c @@ -87,15 +87,6 @@ void vp10_free_restoration_buffers(VP10_COMMON *cm) { } #endif // CONFIG_LOOP_RESTORATION -void vp10_free_postproc_buffers(VP10_COMMON *cm) { -#if CONFIG_VP9_POSTPROC - vpx_free_frame_buffer(&cm->post_proc_buffer); - vpx_free_frame_buffer(&cm->post_proc_buffer_int); -#else - (void)cm; -#endif -} - void vp10_free_context_buffers(VP10_COMMON *cm) { int i; cm->free_mi(cm); diff --git a/vp10/common/mfqe.c b/vp10/common/mfqe.c deleted file mode 100644 index 52756bd6a..000000000 --- a/vp10/common/mfqe.c +++ /dev/null @@ -1,400 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./vpx_config.h" -#include "./vp10_rtcd.h" -#include "./vpx_dsp_rtcd.h" -#include "./vpx_scale_rtcd.h" - -#include "vp10/common/onyxc_int.h" -#include "vp10/common/postproc.h" - -// TODO(jackychen): Replace this function with SSE2 code. There is -// one SSE2 implementation in vp8, so will consider how to share it -// between vp8 and vp9. -static void filter_by_weight(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - int block_size, int src_weight) { - const int dst_weight = (1 << MFQE_PRECISION) - src_weight; - const int rounding_bit = 1 << (MFQE_PRECISION - 1); - int r, c; - - for (r = 0; r < block_size; r++) { - for (c = 0; c < block_size; c++) { - dst[c] = (src[c] * src_weight + dst[c] * dst_weight + rounding_bit) - >> MFQE_PRECISION; - } - src += src_stride; - dst += dst_stride; - } -} - -void vp10_filter_by_weight8x8_c(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, int src_weight) { - filter_by_weight(src, src_stride, dst, dst_stride, 8, src_weight); -} - -void vp10_filter_by_weight16x16_c(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - int src_weight) { - filter_by_weight(src, src_stride, dst, dst_stride, 16, src_weight); -} - -static void filter_by_weight32x32(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, int weight) { - vp10_filter_by_weight16x16(src, src_stride, dst, dst_stride, weight); - vp10_filter_by_weight16x16(src + 16, src_stride, dst + 16, dst_stride, - weight); - vp10_filter_by_weight16x16(src + src_stride * 16, src_stride, - dst + dst_stride * 16, dst_stride, weight); - vp10_filter_by_weight16x16(src + src_stride * 16 + 16, src_stride, - dst + dst_stride * 16 + 16, dst_stride, weight); -} - -static void filter_by_weight64x64(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, int weight) { - filter_by_weight32x32(src, src_stride, dst, dst_stride, weight); - filter_by_weight32x32(src + 32, src_stride, dst + 32, - dst_stride, weight); - filter_by_weight32x32(src + src_stride * 32, src_stride, - dst + dst_stride * 32, dst_stride, weight); - filter_by_weight32x32(src + src_stride * 32 + 32, src_stride, - dst + dst_stride * 32 + 32, dst_stride, weight); -} - -static void apply_ifactor(const uint8_t *y, int y_stride, uint8_t *yd, - int yd_stride, const uint8_t *u, const uint8_t *v, - int uv_stride, uint8_t *ud, uint8_t *vd, - int uvd_stride, BLOCK_SIZE block_size, - int weight) { - if (block_size == BLOCK_16X16) { - vp10_filter_by_weight16x16(y, y_stride, yd, yd_stride, weight); - vp10_filter_by_weight8x8(u, uv_stride, ud, uvd_stride, weight); - vp10_filter_by_weight8x8(v, uv_stride, vd, uvd_stride, weight); - } else if (block_size == BLOCK_32X32) { - filter_by_weight32x32(y, y_stride, yd, yd_stride, weight); - vp10_filter_by_weight16x16(u, uv_stride, ud, uvd_stride, weight); - vp10_filter_by_weight16x16(v, uv_stride, vd, uvd_stride, weight); - } else if (block_size == BLOCK_64X64) { - filter_by_weight64x64(y, y_stride, yd, yd_stride, weight); - filter_by_weight32x32(u, uv_stride, ud, uvd_stride, weight); - filter_by_weight32x32(v, uv_stride, vd, uvd_stride, weight); - } -} - -// TODO(jackychen): Determine whether replace it with assembly code. -static void copy_mem8x8(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride) { - int r; - for (r = 0; r < 8; r++) { - memcpy(dst, src, 8); - src += src_stride; - dst += dst_stride; - } -} - -static void copy_mem16x16(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride) { - int r; - for (r = 0; r < 16; r++) { - memcpy(dst, src, 16); - src += src_stride; - dst += dst_stride; - } -} - -static void copy_mem32x32(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride) { - copy_mem16x16(src, src_stride, dst, dst_stride); - copy_mem16x16(src + 16, src_stride, dst + 16, dst_stride); - copy_mem16x16(src + src_stride * 16, src_stride, - dst + dst_stride * 16, dst_stride); - copy_mem16x16(src + src_stride * 16 + 16, src_stride, - dst + dst_stride * 16 + 16, dst_stride); -} - -void copy_mem64x64(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride) { - copy_mem32x32(src, src_stride, dst, dst_stride); - copy_mem32x32(src + 32, src_stride, dst + 32, dst_stride); - copy_mem32x32(src + src_stride * 32, src_stride, - dst + src_stride * 32, dst_stride); - copy_mem32x32(src + src_stride * 32 + 32, src_stride, - dst + src_stride * 32 + 32, dst_stride); -} - -static void copy_block(const uint8_t *y, const uint8_t *u, const uint8_t *v, - int y_stride, int uv_stride, uint8_t *yd, uint8_t *ud, - uint8_t *vd, int yd_stride, int uvd_stride, - BLOCK_SIZE bs) { - if (bs == BLOCK_16X16) { - copy_mem16x16(y, y_stride, yd, yd_stride); - copy_mem8x8(u, uv_stride, ud, uvd_stride); - copy_mem8x8(v, uv_stride, vd, uvd_stride); - } else if (bs == BLOCK_32X32) { - copy_mem32x32(y, y_stride, yd, yd_stride); - copy_mem16x16(u, uv_stride, ud, uvd_stride); - copy_mem16x16(v, uv_stride, vd, uvd_stride); - } else { - copy_mem64x64(y, y_stride, yd, yd_stride); - copy_mem32x32(u, uv_stride, ud, uvd_stride); - copy_mem32x32(v, uv_stride, vd, uvd_stride); - } -} - -static void get_thr(BLOCK_SIZE bs, int qdiff, int *sad_thr, int *vdiff_thr) { - const int adj = qdiff >> MFQE_PRECISION; - if (bs == BLOCK_16X16) { - *sad_thr = 7 + adj; - } else if (bs == BLOCK_32X32) { - *sad_thr = 6 + adj; - } else { // BLOCK_64X64 - *sad_thr = 5 + adj; - } - *vdiff_thr = 125 + qdiff; -} - -static void mfqe_block(BLOCK_SIZE bs, const uint8_t *y, const uint8_t *u, - const uint8_t *v, int y_stride, int uv_stride, - uint8_t *yd, uint8_t *ud, uint8_t *vd, int yd_stride, - int uvd_stride, int qdiff) { - int sad, sad_thr, vdiff, vdiff_thr; - uint32_t sse; - - get_thr(bs, qdiff, &sad_thr, &vdiff_thr); - - if (bs == BLOCK_16X16) { - vdiff = (vpx_variance16x16(y, y_stride, yd, yd_stride, &sse) + 128) >> 8; - sad = (vpx_sad16x16(y, y_stride, yd, yd_stride) + 128) >> 8; - } else if (bs == BLOCK_32X32) { - vdiff = (vpx_variance32x32(y, y_stride, yd, yd_stride, &sse) + 512) >> 10; - sad = (vpx_sad32x32(y, y_stride, yd, yd_stride) + 512) >> 10; - } else /* if (bs == BLOCK_64X64) */ { - vdiff = (vpx_variance64x64(y, y_stride, yd, yd_stride, &sse) + 2048) >> 12; - sad = (vpx_sad64x64(y, y_stride, yd, yd_stride) + 2048) >> 12; - } - - // vdiff > sad * 3 means vdiff should not be too small, otherwise, - // it might be a lighting change in smooth area. When there is a - // lighting change in smooth area, it is dangerous to do MFQE. - if (sad > 1 && vdiff > sad * 3) { - const int weight = 1 << MFQE_PRECISION; - int ifactor = weight * sad * vdiff / (sad_thr * vdiff_thr); - // When ifactor equals weight, no MFQE is done. - if (ifactor > weight) { - ifactor = weight; - } - apply_ifactor(y, y_stride, yd, yd_stride, u, v, uv_stride, ud, vd, - uvd_stride, bs, ifactor); - } else { - // Copy the block from current frame (i.e., no mfqe is done). - copy_block(y, u, v, y_stride, uv_stride, yd, ud, vd, - yd_stride, uvd_stride, bs); - } -} - -static int mfqe_decision(MODE_INFO *mi, BLOCK_SIZE cur_bs) { - // Check the motion in current block(for inter frame), - // or check the motion in the correlated block in last frame (for keyframe). - const int mv_len_square = mi->mbmi.mv[0].as_mv.row * - mi->mbmi.mv[0].as_mv.row + - mi->mbmi.mv[0].as_mv.col * - mi->mbmi.mv[0].as_mv.col; - const int mv_threshold = 100; - return mi->mbmi.mode >= NEARESTMV && // Not an intra block - cur_bs >= BLOCK_16X16 && - mv_len_square <= mv_threshold; -} - -// Process each partiton in a super block, recursively. -static void mfqe_partition(VP10_COMMON *cm, MODE_INFO *mi, BLOCK_SIZE bs, - const uint8_t *y, const uint8_t *u, - const uint8_t *v, int y_stride, int uv_stride, - uint8_t *yd, uint8_t *ud, uint8_t *vd, - int yd_stride, int uvd_stride) { - int mi_offset, y_offset, uv_offset; - const BLOCK_SIZE cur_bs = mi->mbmi.sb_type; - const int qdiff = cm->base_qindex - cm->postproc_state.last_base_qindex; - const int bsl = b_width_log2_lookup[bs]; - PARTITION_TYPE partition = partition_lookup[bsl][cur_bs]; - const BLOCK_SIZE subsize = get_subsize(bs, partition); - - if (cur_bs < BLOCK_8X8) { - // If there are blocks smaller than 8x8, it must be on the boundary. - return; - } - // No MFQE on blocks smaller than 16x16 - if (bs == BLOCK_16X16) { - partition = PARTITION_NONE; - } - if (bs == BLOCK_64X64) { - mi_offset = 4; - y_offset = 32; - uv_offset = 16; - } else { - mi_offset = 2; - y_offset = 16; - uv_offset = 8; - } - switch (partition) { - BLOCK_SIZE mfqe_bs, bs_tmp; - case PARTITION_HORZ: - if (bs == BLOCK_64X64) { - mfqe_bs = BLOCK_64X32; - bs_tmp = BLOCK_32X32; - } else { - mfqe_bs = BLOCK_32X16; - bs_tmp = BLOCK_16X16; - } - if (mfqe_decision(mi, mfqe_bs)) { - // Do mfqe on the first square partition. - mfqe_block(bs_tmp, y, u, v, y_stride, uv_stride, - yd, ud, vd, yd_stride, uvd_stride, qdiff); - // Do mfqe on the second square partition. - mfqe_block(bs_tmp, y + y_offset, u + uv_offset, v + uv_offset, - y_stride, uv_stride, yd + y_offset, ud + uv_offset, - vd + uv_offset, yd_stride, uvd_stride, qdiff); - } - if (mfqe_decision(mi + mi_offset * cm->mi_stride, mfqe_bs)) { - // Do mfqe on the first square partition. - mfqe_block(bs_tmp, y + y_offset * y_stride, u + uv_offset * uv_stride, - v + uv_offset * uv_stride, y_stride, uv_stride, - yd + y_offset * yd_stride, ud + uv_offset * uvd_stride, - vd + uv_offset * uvd_stride, yd_stride, uvd_stride, qdiff); - // Do mfqe on the second square partition. - mfqe_block(bs_tmp, y + y_offset * y_stride + y_offset, - u + uv_offset * uv_stride + uv_offset, - v + uv_offset * uv_stride + uv_offset, y_stride, - uv_stride, yd + y_offset * yd_stride + y_offset, - ud + uv_offset * uvd_stride + uv_offset, - vd + uv_offset * uvd_stride + uv_offset, - yd_stride, uvd_stride, qdiff); - } - break; - case PARTITION_VERT: - if (bs == BLOCK_64X64) { - mfqe_bs = BLOCK_32X64; - bs_tmp = BLOCK_32X32; - } else { - mfqe_bs = BLOCK_16X32; - bs_tmp = BLOCK_16X16; - } - if (mfqe_decision(mi, mfqe_bs)) { - // Do mfqe on the first square partition. - mfqe_block(bs_tmp, y, u, v, y_stride, uv_stride, - yd, ud, vd, yd_stride, uvd_stride, qdiff); - // Do mfqe on the second square partition. - mfqe_block(bs_tmp, y + y_offset * y_stride, u + uv_offset * uv_stride, - v + uv_offset * uv_stride, y_stride, uv_stride, - yd + y_offset * yd_stride, ud + uv_offset * uvd_stride, - vd + uv_offset * uvd_stride, yd_stride, uvd_stride, qdiff); - } - if (mfqe_decision(mi + mi_offset, mfqe_bs)) { - // Do mfqe on the first square partition. - mfqe_block(bs_tmp, y + y_offset, u + uv_offset, v + uv_offset, - y_stride, uv_stride, yd + y_offset, ud + uv_offset, - vd + uv_offset, yd_stride, uvd_stride, qdiff); - // Do mfqe on the second square partition. - mfqe_block(bs_tmp, y + y_offset * y_stride + y_offset, - u + uv_offset * uv_stride + uv_offset, - v + uv_offset * uv_stride + uv_offset, y_stride, - uv_stride, yd + y_offset * yd_stride + y_offset, - ud + uv_offset * uvd_stride + uv_offset, - vd + uv_offset * uvd_stride + uv_offset, - yd_stride, uvd_stride, qdiff); - } - break; - case PARTITION_NONE: - if (mfqe_decision(mi, cur_bs)) { - // Do mfqe on this partition. - mfqe_block(cur_bs, y, u, v, y_stride, uv_stride, - yd, ud, vd, yd_stride, uvd_stride, qdiff); - } else { - // Copy the block from current frame(i.e., no mfqe is done). - copy_block(y, u, v, y_stride, uv_stride, yd, ud, vd, - yd_stride, uvd_stride, bs); - } - break; - case PARTITION_SPLIT: - // Recursion on four square partitions, e.g. if bs is 64X64, - // then look into four 32X32 blocks in it. - mfqe_partition(cm, mi, subsize, y, u, v, y_stride, uv_stride, yd, ud, vd, - yd_stride, uvd_stride); - mfqe_partition(cm, mi + mi_offset, subsize, y + y_offset, u + uv_offset, - v + uv_offset, y_stride, uv_stride, yd + y_offset, - ud + uv_offset, vd + uv_offset, yd_stride, uvd_stride); - mfqe_partition(cm, mi + mi_offset * cm->mi_stride, subsize, - y + y_offset * y_stride, u + uv_offset * uv_stride, - v + uv_offset * uv_stride, y_stride, uv_stride, - yd + y_offset * yd_stride, ud + uv_offset * uvd_stride, - vd + uv_offset * uvd_stride, yd_stride, uvd_stride); - mfqe_partition(cm, mi + mi_offset * cm->mi_stride + mi_offset, - subsize, y + y_offset * y_stride + y_offset, - u + uv_offset * uv_stride + uv_offset, - v + uv_offset * uv_stride + uv_offset, y_stride, - uv_stride, yd + y_offset * yd_stride + y_offset, - ud + uv_offset * uvd_stride + uv_offset, - vd + uv_offset * uvd_stride + uv_offset, - yd_stride, uvd_stride); - break; - default: - assert(0); - } -} - -void vp10_mfqe(VP10_COMMON *cm) { - int mi_row, mi_col; - // Current decoded frame. - const YV12_BUFFER_CONFIG *show = cm->frame_to_show; - // Last decoded frame and will store the MFQE result. - YV12_BUFFER_CONFIG *dest = &cm->post_proc_buffer; - -#if CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES - // TODO(any): Fix for ext parition types and 128 superblocks - assert(0); -#endif // CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES - - // Loop through each super block. - for (mi_row = 0; mi_row < cm->mi_rows; mi_row += MAX_MIB_SIZE) { - for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) { - MODE_INFO *mi; - MODE_INFO *mi_local = cm->mi + (mi_row * cm->mi_stride + mi_col); - // Motion Info in last frame. - MODE_INFO *mi_prev = cm->postproc_state.prev_mi + - (mi_row * cm->mi_stride + mi_col); - const uint32_t y_stride = show->y_stride; - const uint32_t uv_stride = show->uv_stride; - const uint32_t yd_stride = dest->y_stride; - const uint32_t uvd_stride = dest->uv_stride; - const uint32_t row_offset_y = mi_row << 3; - const uint32_t row_offset_uv = mi_row << 2; - const uint32_t col_offset_y = mi_col << 3; - const uint32_t col_offset_uv = mi_col << 2; - const uint8_t *y = show->y_buffer + row_offset_y * y_stride + - col_offset_y; - const uint8_t *u = show->u_buffer + row_offset_uv * uv_stride + - col_offset_uv; - const uint8_t *v = show->v_buffer + row_offset_uv * uv_stride + - col_offset_uv; - uint8_t *yd = dest->y_buffer + row_offset_y * yd_stride + col_offset_y; - uint8_t *ud = dest->u_buffer + row_offset_uv * uvd_stride + - col_offset_uv; - uint8_t *vd = dest->v_buffer + row_offset_uv * uvd_stride + - col_offset_uv; - if (frame_is_intra_only(cm)) { - mi = mi_prev; - } else { - mi = mi_local; - } - mfqe_partition(cm, mi, BLOCK_64X64, y, u, v, y_stride, uv_stride, yd, ud, - vd, yd_stride, uvd_stride); - } - } -} diff --git a/vp10/common/mfqe.h b/vp10/common/mfqe.h deleted file mode 100644 index 7bedd119f..000000000 --- a/vp10/common/mfqe.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP10_COMMON_MFQE_H_ -#define VP10_COMMON_MFQE_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -// Multiframe Quality Enhancement. -// The aim for MFQE is to replace pixel blocks in the current frame with -// the correlated pixel blocks (with higher quality) in the last frame. -// The replacement can only be taken in stationary blocks by checking -// the motion of the blocks and other conditions such as the SAD of -// the current block and correlated block, the variance of the block -// difference, etc. -void vp10_mfqe(struct VP10Common *cm); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP10_COMMON_MFQE_H_ diff --git a/vp10/common/mips/msa/mfqe_msa.c b/vp10/common/mips/msa/mfqe_msa.c deleted file mode 100644 index 3a593a1a1..000000000 --- a/vp10/common/mips/msa/mfqe_msa.c +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) 2015 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./vp10_rtcd.h" -#include "vp10/common/onyxc_int.h" -#include "vpx_dsp/mips/macros_msa.h" - -static void filter_by_weight8x8_msa(const uint8_t *src_ptr, int32_t src_stride, - uint8_t *dst_ptr, int32_t dst_stride, - int32_t src_weight) { - int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight; - int32_t row; - uint64_t src0_d, src1_d, dst0_d, dst1_d; - v16i8 src0 = { 0 }; - v16i8 src1 = { 0 }; - v16i8 dst0 = { 0 }; - v16i8 dst1 = { 0 }; - v8i16 src_wt, dst_wt, res_h_r, res_h_l, src_r, src_l, dst_r, dst_l; - - src_wt = __msa_fill_h(src_weight); - dst_wt = __msa_fill_h(dst_weight); - - for (row = 2; row--;) { - LD2(src_ptr, src_stride, src0_d, src1_d); - src_ptr += (2 * src_stride); - LD2(dst_ptr, dst_stride, dst0_d, dst1_d); - INSERT_D2_SB(src0_d, src1_d, src0); - INSERT_D2_SB(dst0_d, dst1_d, dst0); - - LD2(src_ptr, src_stride, src0_d, src1_d); - src_ptr += (2 * src_stride); - LD2((dst_ptr + 2 * dst_stride), dst_stride, dst0_d, dst1_d); - INSERT_D2_SB(src0_d, src1_d, src1); - INSERT_D2_SB(dst0_d, dst1_d, dst1); - - UNPCK_UB_SH(src0, src_r, src_l); - UNPCK_UB_SH(dst0, dst_r, dst_l); - res_h_r = (src_r * src_wt); - res_h_r += (dst_r * dst_wt); - res_h_l = (src_l * src_wt); - res_h_l += (dst_l * dst_wt); - SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION); - dst0 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r); - ST8x2_UB(dst0, dst_ptr, dst_stride); - dst_ptr += (2 * dst_stride); - - UNPCK_UB_SH(src1, src_r, src_l); - UNPCK_UB_SH(dst1, dst_r, dst_l); - res_h_r = (src_r * src_wt); - res_h_r += (dst_r * dst_wt); - res_h_l = (src_l * src_wt); - res_h_l += (dst_l * dst_wt); - SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION); - dst1 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r); - ST8x2_UB(dst1, dst_ptr, dst_stride); - dst_ptr += (2 * dst_stride); - } -} - -static void filter_by_weight16x16_msa(const uint8_t *src_ptr, - int32_t src_stride, - uint8_t *dst_ptr, - int32_t dst_stride, - int32_t src_weight) { - int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight; - int32_t row; - v16i8 src0, src1, src2, src3, dst0, dst1, dst2, dst3; - v8i16 src_wt, dst_wt, res_h_r, res_h_l, src_r, src_l, dst_r, dst_l; - - src_wt = __msa_fill_h(src_weight); - dst_wt = __msa_fill_h(dst_weight); - - for (row = 4; row--;) { - LD_SB4(src_ptr, src_stride, src0, src1, src2, src3); - src_ptr += (4 * src_stride); - LD_SB4(dst_ptr, dst_stride, dst0, dst1, dst2, dst3); - - UNPCK_UB_SH(src0, src_r, src_l); - UNPCK_UB_SH(dst0, dst_r, dst_l); - res_h_r = (src_r * src_wt); - res_h_r += (dst_r * dst_wt); - res_h_l = (src_l * src_wt); - res_h_l += (dst_l * dst_wt); - SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION); - PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr); - dst_ptr += dst_stride; - - UNPCK_UB_SH(src1, src_r, src_l); - UNPCK_UB_SH(dst1, dst_r, dst_l); - res_h_r = (src_r * src_wt); - res_h_r += (dst_r * dst_wt); - res_h_l = (src_l * src_wt); - res_h_l += (dst_l * dst_wt); - SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION); - PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr); - dst_ptr += dst_stride; - - UNPCK_UB_SH(src2, src_r, src_l); - UNPCK_UB_SH(dst2, dst_r, dst_l); - res_h_r = (src_r * src_wt); - res_h_r += (dst_r * dst_wt); - res_h_l = (src_l * src_wt); - res_h_l += (dst_l * dst_wt); - SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION); - PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr); - dst_ptr += dst_stride; - - UNPCK_UB_SH(src3, src_r, src_l); - UNPCK_UB_SH(dst3, dst_r, dst_l); - res_h_r = (src_r * src_wt); - res_h_r += (dst_r * dst_wt); - res_h_l = (src_l * src_wt); - res_h_l += (dst_l * dst_wt); - SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION); - PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr); - dst_ptr += dst_stride; - } -} - -void vp10_filter_by_weight8x8_msa(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - int src_weight) { - filter_by_weight8x8_msa(src, src_stride, dst, dst_stride, src_weight); -} - -void vp10_filter_by_weight16x16_msa(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - int src_weight) { - filter_by_weight16x16_msa(src, src_stride, dst, dst_stride, src_weight); -} diff --git a/vp10/common/onyxc_int.h b/vp10/common/onyxc_int.h index 1225540f4..8a38980d7 100644 --- a/vp10/common/onyxc_int.h +++ b/vp10/common/onyxc_int.h @@ -26,10 +26,6 @@ #include "vp10/common/tile_common.h" #include "vp10/common/restoration.h" -#if CONFIG_VP9_POSTPROC -#include "vp10/common/postproc.h" -#endif - #ifdef __cplusplus extern "C" { #endif @@ -171,10 +167,6 @@ typedef struct VP10Common { int new_fb_idx; -#if CONFIG_VP9_POSTPROC - YV12_BUFFER_CONFIG post_proc_buffer; - YV12_BUFFER_CONFIG post_proc_buffer_int; -#endif #if CONFIG_LOOP_RESTORATION YV12_BUFFER_CONFIG tmp_loop_buf; #endif // CONFIG_LOOP_RESTORATION @@ -317,10 +309,6 @@ typedef struct VP10Common { vpx_bit_depth_t bit_depth; vpx_bit_depth_t dequant_bit_depth; // bit_depth of current dequantizer -#if CONFIG_VP9_POSTPROC - struct postproc_state postproc_state; -#endif - int error_resilient_mode; #if !CONFIG_EXT_TILE diff --git a/vp10/common/postproc.c b/vp10/common/postproc.c deleted file mode 100644 index 0b742bd5e..000000000 --- a/vp10/common/postproc.c +++ /dev/null @@ -1,721 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include -#include - -#include "./vpx_config.h" -#include "./vpx_dsp_rtcd.h" -#include "./vpx_scale_rtcd.h" -#include "./vp10_rtcd.h" - -#include "vpx_dsp/vpx_dsp_common.h" -#include "vpx_ports/mem.h" -#include "vpx_ports/system_state.h" -#include "vpx_scale/vpx_scale.h" -#include "vpx_scale/yv12config.h" - -#include "vp10/common/onyxc_int.h" -#include "vp10/common/postproc.h" -#include "vp10/common/textblit.h" - -#if CONFIG_VP9_POSTPROC -static const short kernel5[] = { - 1, 1, 4, 1, 1 -}; - -const short vp10_rv[] = { - 8, 5, 2, 2, 8, 12, 4, 9, 8, 3, - 0, 3, 9, 0, 0, 0, 8, 3, 14, 4, - 10, 1, 11, 14, 1, 14, 9, 6, 12, 11, - 8, 6, 10, 0, 0, 8, 9, 0, 3, 14, - 8, 11, 13, 4, 2, 9, 0, 3, 9, 6, - 1, 2, 3, 14, 13, 1, 8, 2, 9, 7, - 3, 3, 1, 13, 13, 6, 6, 5, 2, 7, - 11, 9, 11, 8, 7, 3, 2, 0, 13, 13, - 14, 4, 12, 5, 12, 10, 8, 10, 13, 10, - 4, 14, 4, 10, 0, 8, 11, 1, 13, 7, - 7, 14, 6, 14, 13, 2, 13, 5, 4, 4, - 0, 10, 0, 5, 13, 2, 12, 7, 11, 13, - 8, 0, 4, 10, 7, 2, 7, 2, 2, 5, - 3, 4, 7, 3, 3, 14, 14, 5, 9, 13, - 3, 14, 3, 6, 3, 0, 11, 8, 13, 1, - 13, 1, 12, 0, 10, 9, 7, 6, 2, 8, - 5, 2, 13, 7, 1, 13, 14, 7, 6, 7, - 9, 6, 10, 11, 7, 8, 7, 5, 14, 8, - 4, 4, 0, 8, 7, 10, 0, 8, 14, 11, - 3, 12, 5, 7, 14, 3, 14, 5, 2, 6, - 11, 12, 12, 8, 0, 11, 13, 1, 2, 0, - 5, 10, 14, 7, 8, 0, 4, 11, 0, 8, - 0, 3, 10, 5, 8, 0, 11, 6, 7, 8, - 10, 7, 13, 9, 2, 5, 1, 5, 10, 2, - 4, 3, 5, 6, 10, 8, 9, 4, 11, 14, - 0, 10, 0, 5, 13, 2, 12, 7, 11, 13, - 8, 0, 4, 10, 7, 2, 7, 2, 2, 5, - 3, 4, 7, 3, 3, 14, 14, 5, 9, 13, - 3, 14, 3, 6, 3, 0, 11, 8, 13, 1, - 13, 1, 12, 0, 10, 9, 7, 6, 2, 8, - 5, 2, 13, 7, 1, 13, 14, 7, 6, 7, - 9, 6, 10, 11, 7, 8, 7, 5, 14, 8, - 4, 4, 0, 8, 7, 10, 0, 8, 14, 11, - 3, 12, 5, 7, 14, 3, 14, 5, 2, 6, - 11, 12, 12, 8, 0, 11, 13, 1, 2, 0, - 5, 10, 14, 7, 8, 0, 4, 11, 0, 8, - 0, 3, 10, 5, 8, 0, 11, 6, 7, 8, - 10, 7, 13, 9, 2, 5, 1, 5, 10, 2, - 4, 3, 5, 6, 10, 8, 9, 4, 11, 14, - 3, 8, 3, 7, 8, 5, 11, 4, 12, 3, - 11, 9, 14, 8, 14, 13, 4, 3, 1, 2, - 14, 6, 5, 4, 4, 11, 4, 6, 2, 1, - 5, 8, 8, 12, 13, 5, 14, 10, 12, 13, - 0, 9, 5, 5, 11, 10, 13, 9, 10, 13, -}; - -static const uint8_t q_diff_thresh = 20; -static const uint8_t last_q_thresh = 170; - -void vp10_post_proc_down_and_across_c(const uint8_t *src_ptr, - uint8_t *dst_ptr, - int src_pixels_per_line, - int dst_pixels_per_line, - int rows, - int cols, - int flimit) { - uint8_t const *p_src; - uint8_t *p_dst; - int row, col, i, v, kernel; - int pitch = src_pixels_per_line; - uint8_t d[8]; - (void)dst_pixels_per_line; - - for (row = 0; row < rows; row++) { - /* post_proc_down for one row */ - p_src = src_ptr; - p_dst = dst_ptr; - - for (col = 0; col < cols; col++) { - kernel = 4; - v = p_src[col]; - - for (i = -2; i <= 2; i++) { - if (abs(v - p_src[col + i * pitch]) > flimit) - goto down_skip_convolve; - - kernel += kernel5[2 + i] * p_src[col + i * pitch]; - } - - v = (kernel >> 3); - down_skip_convolve: - p_dst[col] = v; - } - - /* now post_proc_across */ - p_src = dst_ptr; - p_dst = dst_ptr; - - for (i = 0; i < 8; i++) - d[i] = p_src[i]; - - for (col = 0; col < cols; col++) { - kernel = 4; - v = p_src[col]; - - d[col & 7] = v; - - for (i = -2; i <= 2; i++) { - if (abs(v - p_src[col + i]) > flimit) - goto across_skip_convolve; - - kernel += kernel5[2 + i] * p_src[col + i]; - } - - d[col & 7] = (kernel >> 3); - across_skip_convolve: - - if (col >= 2) - p_dst[col - 2] = d[(col - 2) & 7]; - } - - /* handle the last two pixels */ - p_dst[col - 2] = d[(col - 2) & 7]; - p_dst[col - 1] = d[(col - 1) & 7]; - - - /* next row */ - src_ptr += pitch; - dst_ptr += pitch; - } -} - -#if CONFIG_VP9_HIGHBITDEPTH -void vp10_highbd_post_proc_down_and_across_c(const uint16_t *src_ptr, - uint16_t *dst_ptr, - int src_pixels_per_line, - int dst_pixels_per_line, - int rows, - int cols, - int flimit) { - uint16_t const *p_src; - uint16_t *p_dst; - int row, col, i, v, kernel; - int pitch = src_pixels_per_line; - uint16_t d[8]; - - for (row = 0; row < rows; row++) { - // post_proc_down for one row. - p_src = src_ptr; - p_dst = dst_ptr; - - for (col = 0; col < cols; col++) { - kernel = 4; - v = p_src[col]; - - for (i = -2; i <= 2; i++) { - if (abs(v - p_src[col + i * pitch]) > flimit) - goto down_skip_convolve; - - kernel += kernel5[2 + i] * p_src[col + i * pitch]; - } - - v = (kernel >> 3); - - down_skip_convolve: - p_dst[col] = v; - } - - /* now post_proc_across */ - p_src = dst_ptr; - p_dst = dst_ptr; - - for (i = 0; i < 8; i++) - d[i] = p_src[i]; - - for (col = 0; col < cols; col++) { - kernel = 4; - v = p_src[col]; - - d[col & 7] = v; - - for (i = -2; i <= 2; i++) { - if (abs(v - p_src[col + i]) > flimit) - goto across_skip_convolve; - - kernel += kernel5[2 + i] * p_src[col + i]; - } - - d[col & 7] = (kernel >> 3); - - across_skip_convolve: - if (col >= 2) - p_dst[col - 2] = d[(col - 2) & 7]; - } - - /* handle the last two pixels */ - p_dst[col - 2] = d[(col - 2) & 7]; - p_dst[col - 1] = d[(col - 1) & 7]; - - - /* next row */ - src_ptr += pitch; - dst_ptr += dst_pixels_per_line; - } -} -#endif // CONFIG_VP9_HIGHBITDEPTH - -static int q2mbl(int x) { - if (x < 20) x = 20; - - x = 50 + (x - 50) * 10 / 8; - return x * x / 3; -} - -void vp10_mbpost_proc_across_ip_c(uint8_t *src, int pitch, - int rows, int cols, int flimit) { - int r, c, i; - uint8_t *s = src; - uint8_t d[16]; - - for (r = 0; r < rows; r++) { - int sumsq = 0; - int sum = 0; - - for (i = -8; i <= 6; i++) { - sumsq += s[i] * s[i]; - sum += s[i]; - d[i + 8] = 0; - } - - for (c = 0; c < cols + 8; c++) { - int x = s[c + 7] - s[c - 8]; - int y = s[c + 7] + s[c - 8]; - - sum += x; - sumsq += x * y; - - d[c & 15] = s[c]; - - if (sumsq * 15 - sum * sum < flimit) { - d[c & 15] = (8 + sum + s[c]) >> 4; - } - - s[c - 8] = d[(c - 8) & 15]; - } - s += pitch; - } -} - -#if CONFIG_VP9_HIGHBITDEPTH -void vp10_highbd_mbpost_proc_across_ip_c(uint16_t *src, int pitch, - int rows, int cols, int flimit) { - int r, c, i; - - uint16_t *s = src; - uint16_t d[16]; - - - for (r = 0; r < rows; r++) { - int sumsq = 0; - int sum = 0; - - for (i = -8; i <= 6; i++) { - sumsq += s[i] * s[i]; - sum += s[i]; - d[i + 8] = 0; - } - - for (c = 0; c < cols + 8; c++) { - int x = s[c + 7] - s[c - 8]; - int y = s[c + 7] + s[c - 8]; - - sum += x; - sumsq += x * y; - - d[c & 15] = s[c]; - - if (sumsq * 15 - sum * sum < flimit) { - d[c & 15] = (8 + sum + s[c]) >> 4; - } - - s[c - 8] = d[(c - 8) & 15]; - } - - s += pitch; - } -} -#endif // CONFIG_VP9_HIGHBITDEPTH - -void vp10_mbpost_proc_down_c(uint8_t *dst, int pitch, - int rows, int cols, int flimit) { - int r, c, i; - const short *rv3 = &vp10_rv[63 & rand()]; // NOLINT - - for (c = 0; c < cols; c++) { - uint8_t *s = &dst[c]; - int sumsq = 0; - int sum = 0; - uint8_t d[16]; - const short *rv2 = rv3 + ((c * 17) & 127); - - for (i = -8; i <= 6; i++) { - sumsq += s[i * pitch] * s[i * pitch]; - sum += s[i * pitch]; - } - - for (r = 0; r < rows + 8; r++) { - sumsq += s[7 * pitch] * s[ 7 * pitch] - s[-8 * pitch] * s[-8 * pitch]; - sum += s[7 * pitch] - s[-8 * pitch]; - d[r & 15] = s[0]; - - if (sumsq * 15 - sum * sum < flimit) { - d[r & 15] = (rv2[r & 127] + sum + s[0]) >> 4; - } - - s[-8 * pitch] = d[(r - 8) & 15]; - s += pitch; - } - } -} - -#if CONFIG_VP9_HIGHBITDEPTH -void vp10_highbd_mbpost_proc_down_c(uint16_t *dst, int pitch, - int rows, int cols, int flimit) { - int r, c, i; - const int16_t *rv3 = &vp10_rv[63 & rand()]; // NOLINT - - for (c = 0; c < cols; c++) { - uint16_t *s = &dst[c]; - int sumsq = 0; - int sum = 0; - uint16_t d[16]; - const int16_t *rv2 = rv3 + ((c * 17) & 127); - - for (i = -8; i <= 6; i++) { - sumsq += s[i * pitch] * s[i * pitch]; - sum += s[i * pitch]; - } - - for (r = 0; r < rows + 8; r++) { - sumsq += s[7 * pitch] * s[ 7 * pitch] - s[-8 * pitch] * s[-8 * pitch]; - sum += s[7 * pitch] - s[-8 * pitch]; - d[r & 15] = s[0]; - - if (sumsq * 15 - sum * sum < flimit) { - d[r & 15] = (rv2[r & 127] + sum + s[0]) >> 4; - } - - s[-8 * pitch] = d[(r - 8) & 15]; - s += pitch; - } - } -} -#endif // CONFIG_VP9_HIGHBITDEPTH - -static void deblock_and_de_macro_block(YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *post, - int q, - int low_var_thresh, - int flag) { - double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065; - int ppl = (int)(level + .5); - (void) low_var_thresh; - (void) flag; - -#if CONFIG_VP9_HIGHBITDEPTH - if (source->flags & YV12_FLAG_HIGHBITDEPTH) { - vp10_highbd_post_proc_down_and_across(CONVERT_TO_SHORTPTR(source->y_buffer), - CONVERT_TO_SHORTPTR(post->y_buffer), - source->y_stride, post->y_stride, - source->y_height, source->y_width, - ppl); - - vp10_highbd_mbpost_proc_across_ip(CONVERT_TO_SHORTPTR(post->y_buffer), - post->y_stride, post->y_height, - post->y_width, q2mbl(q)); - - vp10_highbd_mbpost_proc_down(CONVERT_TO_SHORTPTR(post->y_buffer), - post->y_stride, post->y_height, - post->y_width, q2mbl(q)); - - vp10_highbd_post_proc_down_and_across(CONVERT_TO_SHORTPTR(source->u_buffer), - CONVERT_TO_SHORTPTR(post->u_buffer), - source->uv_stride, post->uv_stride, - source->uv_height, source->uv_width, - ppl); - vp10_highbd_post_proc_down_and_across(CONVERT_TO_SHORTPTR(source->v_buffer), - CONVERT_TO_SHORTPTR(post->v_buffer), - source->uv_stride, post->uv_stride, - source->uv_height, source->uv_width, - ppl); - } else { - vp10_post_proc_down_and_across(source->y_buffer, post->y_buffer, - source->y_stride, post->y_stride, - source->y_height, source->y_width, ppl); - - vp10_mbpost_proc_across_ip(post->y_buffer, post->y_stride, post->y_height, - post->y_width, q2mbl(q)); - - vp10_mbpost_proc_down(post->y_buffer, post->y_stride, post->y_height, - post->y_width, q2mbl(q)); - - vp10_post_proc_down_and_across(source->u_buffer, post->u_buffer, - source->uv_stride, post->uv_stride, - source->uv_height, source->uv_width, ppl); - vp10_post_proc_down_and_across(source->v_buffer, post->v_buffer, - source->uv_stride, post->uv_stride, - source->uv_height, source->uv_width, ppl); - } -#else - vp10_post_proc_down_and_across(source->y_buffer, post->y_buffer, - source->y_stride, post->y_stride, - source->y_height, source->y_width, ppl); - - vp10_mbpost_proc_across_ip(post->y_buffer, post->y_stride, post->y_height, - post->y_width, q2mbl(q)); - - vp10_mbpost_proc_down(post->y_buffer, post->y_stride, post->y_height, - post->y_width, q2mbl(q)); - - vp10_post_proc_down_and_across(source->u_buffer, post->u_buffer, - source->uv_stride, post->uv_stride, - source->uv_height, source->uv_width, ppl); - vp10_post_proc_down_and_across(source->v_buffer, post->v_buffer, - source->uv_stride, post->uv_stride, - source->uv_height, source->uv_width, ppl); -#endif // CONFIG_VP9_HIGHBITDEPTH -} - -void vp10_deblock(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, - int q) { - const int ppl = (int)(6.0e-05 * q * q * q - 0.0067 * q * q + 0.306 * q - + 0.0065 + 0.5); - int i; - - const uint8_t *const srcs[3] = {src->y_buffer, src->u_buffer, src->v_buffer}; - const int src_strides[3] = {src->y_stride, src->uv_stride, src->uv_stride}; - const int src_widths[3] = {src->y_width, src->uv_width, src->uv_width}; - const int src_heights[3] = {src->y_height, src->uv_height, src->uv_height}; - - uint8_t *const dsts[3] = {dst->y_buffer, dst->u_buffer, dst->v_buffer}; - const int dst_strides[3] = {dst->y_stride, dst->uv_stride, dst->uv_stride}; - - for (i = 0; i < MAX_MB_PLANE; ++i) { -#if CONFIG_VP9_HIGHBITDEPTH - assert((src->flags & YV12_FLAG_HIGHBITDEPTH) == - (dst->flags & YV12_FLAG_HIGHBITDEPTH)); - if (src->flags & YV12_FLAG_HIGHBITDEPTH) { - vp10_highbd_post_proc_down_and_across(CONVERT_TO_SHORTPTR(srcs[i]), - CONVERT_TO_SHORTPTR(dsts[i]), - src_strides[i], dst_strides[i], - src_heights[i], src_widths[i], ppl); - } else { - vp10_post_proc_down_and_across(srcs[i], dsts[i], - src_strides[i], dst_strides[i], - src_heights[i], src_widths[i], ppl); - } -#else - vp10_post_proc_down_and_across(srcs[i], dsts[i], - src_strides[i], dst_strides[i], - src_heights[i], src_widths[i], ppl); -#endif // CONFIG_VP9_HIGHBITDEPTH - } -} - -void vp10_denoise(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, - int q) { - const int ppl = (int)(6.0e-05 * q * q * q - 0.0067 * q * q + 0.306 * q - + 0.0065 + 0.5); - int i; - - const uint8_t *const srcs[3] = {src->y_buffer, src->u_buffer, src->v_buffer}; - const int src_strides[3] = {src->y_stride, src->uv_stride, src->uv_stride}; - const int src_widths[3] = {src->y_width, src->uv_width, src->uv_width}; - const int src_heights[3] = {src->y_height, src->uv_height, src->uv_height}; - - uint8_t *const dsts[3] = {dst->y_buffer, dst->u_buffer, dst->v_buffer}; - const int dst_strides[3] = {dst->y_stride, dst->uv_stride, dst->uv_stride}; - - for (i = 0; i < MAX_MB_PLANE; ++i) { - const int src_stride = src_strides[i]; - const int src_width = src_widths[i] - 4; - const int src_height = src_heights[i] - 4; - const int dst_stride = dst_strides[i]; - -#if CONFIG_VP9_HIGHBITDEPTH - assert((src->flags & YV12_FLAG_HIGHBITDEPTH) == - (dst->flags & YV12_FLAG_HIGHBITDEPTH)); - if (src->flags & YV12_FLAG_HIGHBITDEPTH) { - const uint16_t *const src_plane = CONVERT_TO_SHORTPTR( - srcs[i] + 2 * src_stride + 2); - uint16_t *const dst_plane = CONVERT_TO_SHORTPTR( - dsts[i] + 2 * dst_stride + 2); - vp10_highbd_post_proc_down_and_across(src_plane, dst_plane, src_stride, - dst_stride, src_height, src_width, - ppl); - } else { - const uint8_t *const src_plane = srcs[i] + 2 * src_stride + 2; - uint8_t *const dst_plane = dsts[i] + 2 * dst_stride + 2; - - vp10_post_proc_down_and_across(src_plane, dst_plane, src_stride, - dst_stride, src_height, src_width, ppl); - } -#else - const uint8_t *const src_plane = srcs[i] + 2 * src_stride + 2; - uint8_t *const dst_plane = dsts[i] + 2 * dst_stride + 2; - vp10_post_proc_down_and_across(src_plane, dst_plane, src_stride, dst_stride, - src_height, src_width, ppl); -#endif - } -} - -static double gaussian(double sigma, double mu, double x) { - return 1 / (sigma * sqrt(2.0 * 3.14159265)) * - (exp(-(x - mu) * (x - mu) / (2 * sigma * sigma))); -} - -static void fillrd(struct postproc_state *state, int q, int a) { - char char_dist[300]; - - double sigma; - int ai = a, qi = q, i; - - vpx_clear_system_state(); - - sigma = ai + .5 + .6 * (63 - qi) / 63.0; - - /* set up a lookup table of 256 entries that matches - * a gaussian distribution with sigma determined by q. - */ - { - int next, j; - - next = 0; - - for (i = -32; i < 32; i++) { - int a_i = (int)(0.5 + 256 * gaussian(sigma, 0, i)); - - if (a_i) { - for (j = 0; j < a_i; j++) { - char_dist[next + j] = (char) i; - } - - next = next + j; - } - } - - for (; next < 256; next++) - char_dist[next] = 0; - } - - for (i = 0; i < 3072; i++) { - state->noise[i] = char_dist[rand() & 0xff]; // NOLINT - } - - for (i = 0; i < 16; i++) { - state->blackclamp[i] = -char_dist[0]; - state->whiteclamp[i] = -char_dist[0]; - state->bothclamp[i] = -2 * char_dist[0]; - } - - state->last_q = q; - state->last_noise = a; -} - -static void swap_mi_and_prev_mi(VP10_COMMON *cm) { - // Current mip will be the prev_mip for the next frame. - MODE_INFO *temp = cm->postproc_state.prev_mip; - cm->postproc_state.prev_mip = cm->mip; - cm->mip = temp; - - // Update the upper left visible macroblock ptrs. - cm->mi = cm->mip + cm->mi_stride + 1; - cm->postproc_state.prev_mi = cm->postproc_state.prev_mip + cm->mi_stride + 1; -} - -int vp10_post_proc_frame(struct VP10Common *cm, - YV12_BUFFER_CONFIG *dest, vp10_ppflags_t *ppflags) { - const int q = VPXMIN(105, cm->lf.filter_level * 2); - const int flags = ppflags->post_proc_flag; - YV12_BUFFER_CONFIG *const ppbuf = &cm->post_proc_buffer; - struct postproc_state *const ppstate = &cm->postproc_state; - - if (!cm->frame_to_show) - return -1; - - if (!flags) { - *dest = *cm->frame_to_show; - return 0; - } - - vpx_clear_system_state(); - - // Alloc memory for prev_mip in the first frame. - if (cm->current_video_frame == 1) { - cm->postproc_state.last_base_qindex = cm->base_qindex; - cm->postproc_state.last_frame_valid = 1; - ppstate->prev_mip = vpx_calloc(cm->mi_alloc_size, sizeof(*cm->mip)); - if (!ppstate->prev_mip) { - return 1; - } - ppstate->prev_mi = ppstate->prev_mip + cm->mi_stride + 1; - memset(ppstate->prev_mip, 0, - cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mip)); - } - - // Allocate post_proc_buffer_int if needed. - if ((flags & VP9D_MFQE) && !cm->post_proc_buffer_int.buffer_alloc) { - if ((flags & VP9D_DEMACROBLOCK) || (flags & VP9D_DEBLOCK)) { - const int width = ALIGN_POWER_OF_TWO(cm->width, 4); - const int height = ALIGN_POWER_OF_TWO(cm->height, 4); - - if (vpx_alloc_frame_buffer(&cm->post_proc_buffer_int, width, height, - cm->subsampling_x, cm->subsampling_y, -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth, -#endif // CONFIG_VP9_HIGHBITDEPTH - VPX_ENC_BORDER_IN_PIXELS, - cm->byte_alignment) < 0) { - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate MFQE framebuffer"); - } - - // Ensure that postproc is set to all 0s so that post proc - // doesn't pull random data in from edge. - memset(cm->post_proc_buffer_int.buffer_alloc, 128, - cm->post_proc_buffer.frame_size); - } - } - - if (vpx_realloc_frame_buffer(&cm->post_proc_buffer, cm->width, cm->height, - cm->subsampling_x, cm->subsampling_y, -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth, -#endif - VPX_DEC_BORDER_IN_PIXELS, cm->byte_alignment, - NULL, NULL, NULL) < 0) - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate post-processing buffer"); - - if ((flags & VP9D_MFQE) && cm->current_video_frame >= 2 && - cm->postproc_state.last_frame_valid && cm->bit_depth == 8 && - cm->postproc_state.last_base_qindex <= last_q_thresh && - cm->base_qindex - cm->postproc_state.last_base_qindex >= q_diff_thresh) { - vp10_mfqe(cm); - // TODO(jackychen): Consider whether enable deblocking by default - // if mfqe is enabled. Need to take both the quality and the speed - // into consideration. - if ((flags & VP9D_DEMACROBLOCK) || (flags & VP9D_DEBLOCK)) { - vp8_yv12_copy_frame(ppbuf, &cm->post_proc_buffer_int); - } - if ((flags & VP9D_DEMACROBLOCK) && cm->post_proc_buffer_int.buffer_alloc) { - deblock_and_de_macro_block(&cm->post_proc_buffer_int, ppbuf, - q + (ppflags->deblocking_level - 5) * 10, - 1, 0); - } else if (flags & VP9D_DEBLOCK) { - vp10_deblock(&cm->post_proc_buffer_int, ppbuf, q); - } else { - vp8_yv12_copy_frame(&cm->post_proc_buffer_int, ppbuf); - } - } else if (flags & VP9D_DEMACROBLOCK) { - deblock_and_de_macro_block(cm->frame_to_show, ppbuf, - q + (ppflags->deblocking_level - 5) * 10, 1, 0); - } else if (flags & VP9D_DEBLOCK) { - vp10_deblock(cm->frame_to_show, ppbuf, q); - } else { - vp8_yv12_copy_frame(cm->frame_to_show, ppbuf); - } - - cm->postproc_state.last_base_qindex = cm->base_qindex; - cm->postproc_state.last_frame_valid = 1; - - if (flags & VP9D_ADDNOISE) { - const int noise_level = ppflags->noise_level; - if (ppstate->last_q != q || - ppstate->last_noise != noise_level) { - fillrd(ppstate, 63 - q, noise_level); - } - - vpx_plane_add_noise(ppbuf->y_buffer, ppstate->noise, ppstate->blackclamp, - ppstate->whiteclamp, ppstate->bothclamp, - ppbuf->y_width, ppbuf->y_height, ppbuf->y_stride); - } - - *dest = *ppbuf; - - /* handle problem with extending borders */ - dest->y_width = cm->width; - dest->y_height = cm->height; - dest->uv_width = dest->y_width >> cm->subsampling_x; - dest->uv_height = dest->y_height >> cm->subsampling_y; - - swap_mi_and_prev_mi(cm); - return 0; -} -#endif // CONFIG_VP9_POSTPROC diff --git a/vp10/common/postproc.h b/vp10/common/postproc.h deleted file mode 100644 index e2ce0dcc8..000000000 --- a/vp10/common/postproc.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP10_COMMON_POSTPROC_H_ -#define VP10_COMMON_POSTPROC_H_ - -#include "vpx_ports/mem.h" -#include "vpx_scale/yv12config.h" -#include "vp10/common/blockd.h" -#include "vp10/common/mfqe.h" -#include "vp10/common/ppflags.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct postproc_state { - int last_q; - int last_noise; - char noise[3072]; - int last_base_qindex; - int last_frame_valid; - MODE_INFO *prev_mip; - MODE_INFO *prev_mi; - DECLARE_ALIGNED(16, char, blackclamp[16]); - DECLARE_ALIGNED(16, char, whiteclamp[16]); - DECLARE_ALIGNED(16, char, bothclamp[16]); -}; - -struct VP10Common; - -#define MFQE_PRECISION 4 - -int vp10_post_proc_frame(struct VP10Common *cm, - YV12_BUFFER_CONFIG *dest, vp10_ppflags_t *flags); - -void vp10_denoise(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q); - -void vp10_deblock(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP10_COMMON_POSTPROC_H_ diff --git a/vp10/common/ppflags.h b/vp10/common/ppflags.h deleted file mode 100644 index 8592fe906..000000000 --- a/vp10/common/ppflags.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP10_COMMON_PPFLAGS_H_ -#define VP10_COMMON_PPFLAGS_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -enum { - VP9D_NOFILTERING = 0, - VP9D_DEBLOCK = 1 << 0, - VP9D_DEMACROBLOCK = 1 << 1, - VP9D_ADDNOISE = 1 << 2, - VP9D_DEBUG_TXT_FRAME_INFO = 1 << 3, - VP9D_DEBUG_TXT_MBLK_MODES = 1 << 4, - VP9D_DEBUG_TXT_DC_DIFF = 1 << 5, - VP9D_DEBUG_TXT_RATE_INFO = 1 << 6, - VP9D_DEBUG_DRAW_MV = 1 << 7, - VP9D_DEBUG_CLR_BLK_MODES = 1 << 8, - VP9D_DEBUG_CLR_FRM_REF_BLKS = 1 << 9, - VP9D_MFQE = 1 << 10 -}; - -typedef struct { - int post_proc_flag; - int deblocking_level; - int noise_level; -} vp10_ppflags_t; - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP10_COMMON_PPFLAGS_H_ diff --git a/vp10/common/vp10_rtcd_defs.pl b/vp10/common/vp10_rtcd_defs.pl index 29dfa2abe..0ca48a317 100644 --- a/vp10/common/vp10_rtcd_defs.pl +++ b/vp10/common/vp10_rtcd_defs.pl @@ -34,29 +34,6 @@ if ($opts{arch} eq "x86_64") { $avx2_x86_64 = 'avx2'; } -# -# post proc -# -if (vpx_config("CONFIG_VP9_POSTPROC") eq "yes") { -add_proto qw/void vp10_mbpost_proc_down/, "uint8_t *dst, int pitch, int rows, int cols, int flimit"; -specialize qw/vp10_mbpost_proc_down sse2/; -$vp10_mbpost_proc_down_sse2=vp10_mbpost_proc_down_xmm; - -add_proto qw/void vp10_mbpost_proc_across_ip/, "uint8_t *src, int pitch, int rows, int cols, int flimit"; -specialize qw/vp10_mbpost_proc_across_ip sse2/; -$vp10_mbpost_proc_across_ip_sse2=vp10_mbpost_proc_across_ip_xmm; - -add_proto qw/void vp10_post_proc_down_and_across/, "const uint8_t *src_ptr, uint8_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit"; -specialize qw/vp10_post_proc_down_and_across sse2/; -$vp10_post_proc_down_and_across_sse2=vp10_post_proc_down_and_across_xmm; - -add_proto qw/void vp10_filter_by_weight16x16/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight"; -specialize qw/vp10_filter_by_weight16x16 sse2 msa/; - -add_proto qw/void vp10_filter_by_weight8x8/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight"; -specialize qw/vp10_filter_by_weight8x8 sse2 msa/; -} - # # 10/12-tap convolution filters # @@ -350,20 +327,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vp10_highbd_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps"; specialize qw/vp10_highbd_convolve8_avg_vert/, "$sse2_x86_64"; - # - # post proc - # - if (vpx_config("CONFIG_VP9_POSTPROC") eq "yes") { - add_proto qw/void vp10_highbd_mbpost_proc_down/, "uint16_t *dst, int pitch, int rows, int cols, int flimit"; - specialize qw/vp10_highbd_mbpost_proc_down/; - - add_proto qw/void vp10_highbd_mbpost_proc_across_ip/, "uint16_t *src, int pitch, int rows, int cols, int flimit"; - specialize qw/vp10_highbd_mbpost_proc_across_ip/; - - add_proto qw/void vp10_highbd_post_proc_down_and_across/, "const uint16_t *src_ptr, uint16_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit"; - specialize qw/vp10_highbd_post_proc_down_and_across/; - } - # # dct # @@ -392,14 +355,6 @@ if (vpx_config("CONFIG_VP10_ENCODER") eq "yes") { # ENCODEMB INVOKE -# -# Denoiser -# -if (vpx_config("CONFIG_VP9_TEMPORAL_DENOISING") eq "yes") { - add_proto qw/int vp10_denoiser_filter/, "const uint8_t *sig, int sig_stride, const uint8_t *mc_avg, int mc_avg_stride, uint8_t *avg, int avg_stride, int increase_denoising, BLOCK_SIZE bs, int motion_magnitude"; - specialize qw/vp10_denoiser_filter sse2/; -} - if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # the transform coefficients are held in 32-bit # values, so the assembler code for vp10_block_error can no longer be used. diff --git a/vp10/common/x86/mfqe_sse2.asm b/vp10/common/x86/mfqe_sse2.asm deleted file mode 100644 index e714d06db..000000000 --- a/vp10/common/x86/mfqe_sse2.asm +++ /dev/null @@ -1,287 +0,0 @@ -; -; Copyright (c) 2015 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -; This file is a duplicate of mfqe_sse2.asm in VP8. -; TODO(jackychen): Find a way to fix the duplicate. -%include "vpx_ports/x86_abi_support.asm" - -;void vp10_filter_by_weight16x16_sse2 -;( -; unsigned char *src, -; int src_stride, -; unsigned char *dst, -; int dst_stride, -; int src_weight -;) -global sym(vp10_filter_by_weight16x16_sse2) PRIVATE -sym(vp10_filter_by_weight16x16_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - SAVE_XMM 6 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - movd xmm0, arg(4) ; src_weight - pshuflw xmm0, xmm0, 0x0 ; replicate to all low words - punpcklqdq xmm0, xmm0 ; replicate to all hi words - - movdqa xmm1, [GLOBAL(tMFQE)] - psubw xmm1, xmm0 ; dst_weight - - mov rax, arg(0) ; src - mov rsi, arg(1) ; src_stride - mov rdx, arg(2) ; dst - mov rdi, arg(3) ; dst_stride - - mov rcx, 16 ; loop count - pxor xmm6, xmm6 - -.combine - movdqa xmm2, [rax] - movdqa xmm4, [rdx] - add rax, rsi - - ; src * src_weight - movdqa xmm3, xmm2 - punpcklbw xmm2, xmm6 - punpckhbw xmm3, xmm6 - pmullw xmm2, xmm0 - pmullw xmm3, xmm0 - - ; dst * dst_weight - movdqa xmm5, xmm4 - punpcklbw xmm4, xmm6 - punpckhbw xmm5, xmm6 - pmullw xmm4, xmm1 - pmullw xmm5, xmm1 - - ; sum, round and shift - paddw xmm2, xmm4 - paddw xmm3, xmm5 - paddw xmm2, [GLOBAL(tMFQE_round)] - paddw xmm3, [GLOBAL(tMFQE_round)] - psrlw xmm2, 4 - psrlw xmm3, 4 - - packuswb xmm2, xmm3 - movdqa [rdx], xmm2 - add rdx, rdi - - dec rcx - jnz .combine - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - - ret - -;void vp10_filter_by_weight8x8_sse2 -;( -; unsigned char *src, -; int src_stride, -; unsigned char *dst, -; int dst_stride, -; int src_weight -;) -global sym(vp10_filter_by_weight8x8_sse2) PRIVATE -sym(vp10_filter_by_weight8x8_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - movd xmm0, arg(4) ; src_weight - pshuflw xmm0, xmm0, 0x0 ; replicate to all low words - punpcklqdq xmm0, xmm0 ; replicate to all hi words - - movdqa xmm1, [GLOBAL(tMFQE)] - psubw xmm1, xmm0 ; dst_weight - - mov rax, arg(0) ; src - mov rsi, arg(1) ; src_stride - mov rdx, arg(2) ; dst - mov rdi, arg(3) ; dst_stride - - mov rcx, 8 ; loop count - pxor xmm4, xmm4 - -.combine - movq xmm2, [rax] - movq xmm3, [rdx] - add rax, rsi - - ; src * src_weight - punpcklbw xmm2, xmm4 - pmullw xmm2, xmm0 - - ; dst * dst_weight - punpcklbw xmm3, xmm4 - pmullw xmm3, xmm1 - - ; sum, round and shift - paddw xmm2, xmm3 - paddw xmm2, [GLOBAL(tMFQE_round)] - psrlw xmm2, 4 - - packuswb xmm2, xmm4 - movq [rdx], xmm2 - add rdx, rdi - - dec rcx - jnz .combine - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - - ret - -;void vp10_variance_and_sad_16x16_sse2 | arg -;( -; unsigned char *src1, 0 -; int stride1, 1 -; unsigned char *src2, 2 -; int stride2, 3 -; unsigned int *variance, 4 -; unsigned int *sad, 5 -;) -global sym(vp10_variance_and_sad_16x16_sse2) PRIVATE -sym(vp10_variance_and_sad_16x16_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - mov rax, arg(0) ; src1 - mov rcx, arg(1) ; stride1 - mov rdx, arg(2) ; src2 - mov rdi, arg(3) ; stride2 - - mov rsi, 16 ; block height - - ; Prep accumulator registers - pxor xmm3, xmm3 ; SAD - pxor xmm4, xmm4 ; sum of src2 - pxor xmm5, xmm5 ; sum of src2^2 - - ; Because we're working with the actual output frames - ; we can't depend on any kind of data alignment. -.accumulate - movdqa xmm0, [rax] ; src1 - movdqa xmm1, [rdx] ; src2 - add rax, rcx ; src1 + stride1 - add rdx, rdi ; src2 + stride2 - - ; SAD(src1, src2) - psadbw xmm0, xmm1 - paddusw xmm3, xmm0 - - ; SUM(src2) - pxor xmm2, xmm2 - psadbw xmm2, xmm1 ; sum src2 by misusing SAD against 0 - paddusw xmm4, xmm2 - - ; pmaddubsw would be ideal if it took two unsigned values. instead, - ; it expects a signed and an unsigned value. so instead we zero extend - ; and operate on words. - pxor xmm2, xmm2 - movdqa xmm0, xmm1 - punpcklbw xmm0, xmm2 - punpckhbw xmm1, xmm2 - pmaddwd xmm0, xmm0 - pmaddwd xmm1, xmm1 - paddd xmm5, xmm0 - paddd xmm5, xmm1 - - sub rsi, 1 - jnz .accumulate - - ; phaddd only operates on adjacent double words. - ; Finalize SAD and store - movdqa xmm0, xmm3 - psrldq xmm0, 8 - paddusw xmm0, xmm3 - paddd xmm0, [GLOBAL(t128)] - psrld xmm0, 8 - - mov rax, arg(5) - movd [rax], xmm0 - - ; Accumulate sum of src2 - movdqa xmm0, xmm4 - psrldq xmm0, 8 - paddusw xmm0, xmm4 - ; Square src2. Ignore high value - pmuludq xmm0, xmm0 - psrld xmm0, 8 - - ; phaddw could be used to sum adjacent values but we want - ; all the values summed. promote to doubles, accumulate, - ; shift and sum - pxor xmm2, xmm2 - movdqa xmm1, xmm5 - punpckldq xmm1, xmm2 - punpckhdq xmm5, xmm2 - paddd xmm1, xmm5 - movdqa xmm2, xmm1 - psrldq xmm1, 8 - paddd xmm1, xmm2 - - psubd xmm1, xmm0 - - ; (variance + 128) >> 8 - paddd xmm1, [GLOBAL(t128)] - psrld xmm1, 8 - mov rax, arg(4) - - movd [rax], xmm1 - - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - -SECTION_RODATA -align 16 -t128: -%ifndef __NASM_VER__ - ddq 128 -%elif CONFIG_BIG_ENDIAN - dq 0, 128 -%else - dq 128, 0 -%endif -align 16 -tMFQE: ; 1 << MFQE_PRECISION - times 8 dw 0x10 -align 16 -tMFQE_round: ; 1 << (MFQE_PRECISION - 1) - times 8 dw 0x08 diff --git a/vp10/common/x86/postproc_sse2.asm b/vp10/common/x86/postproc_sse2.asm deleted file mode 100644 index d477a65c2..000000000 --- a/vp10/common/x86/postproc_sse2.asm +++ /dev/null @@ -1,632 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -;void vp10_post_proc_down_and_across_xmm -;( -; unsigned char *src_ptr, -; unsigned char *dst_ptr, -; int src_pixels_per_line, -; int dst_pixels_per_line, -; int rows, -; int cols, -; int flimit -;) -global sym(vp10_post_proc_down_and_across_xmm) PRIVATE -sym(vp10_post_proc_down_and_across_xmm): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - -%if ABI_IS_32BIT=1 && CONFIG_PIC=1 - ALIGN_STACK 16, rax - ; move the global rd onto the stack, since we don't have enough registers - ; to do PIC addressing - movdqa xmm0, [GLOBAL(rd42)] - sub rsp, 16 - movdqa [rsp], xmm0 -%define RD42 [rsp] -%else -%define RD42 [GLOBAL(rd42)] -%endif - - - movd xmm2, dword ptr arg(6) ;flimit - punpcklwd xmm2, xmm2 - punpckldq xmm2, xmm2 - punpcklqdq xmm2, xmm2 - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(1) ;dst_ptr - - movsxd rcx, DWORD PTR arg(4) ;rows - movsxd rax, DWORD PTR arg(2) ;src_pixels_per_line ; destination pitch? - pxor xmm0, xmm0 ; mm0 = 00000000 - -.nextrow: - - xor rdx, rdx ; clear out rdx for use as loop counter -.nextcol: - movq xmm3, QWORD PTR [rsi] ; mm4 = r0 p0..p7 - punpcklbw xmm3, xmm0 ; mm3 = p0..p3 - movdqa xmm1, xmm3 ; mm1 = p0..p3 - psllw xmm3, 2 ; - - movq xmm5, QWORD PTR [rsi + rax] ; mm4 = r1 p0..p7 - punpcklbw xmm5, xmm0 ; mm5 = r1 p0..p3 - paddusw xmm3, xmm5 ; mm3 += mm6 - - ; thresholding - movdqa xmm7, xmm1 ; mm7 = r0 p0..p3 - psubusw xmm7, xmm5 ; mm7 = r0 p0..p3 - r1 p0..p3 - psubusw xmm5, xmm1 ; mm5 = r1 p0..p3 - r0 p0..p3 - paddusw xmm7, xmm5 ; mm7 = abs(r0 p0..p3 - r1 p0..p3) - pcmpgtw xmm7, xmm2 - - movq xmm5, QWORD PTR [rsi + 2*rax] ; mm4 = r2 p0..p7 - punpcklbw xmm5, xmm0 ; mm5 = r2 p0..p3 - paddusw xmm3, xmm5 ; mm3 += mm5 - - ; thresholding - movdqa xmm6, xmm1 ; mm6 = r0 p0..p3 - psubusw xmm6, xmm5 ; mm6 = r0 p0..p3 - r2 p0..p3 - psubusw xmm5, xmm1 ; mm5 = r2 p0..p3 - r2 p0..p3 - paddusw xmm6, xmm5 ; mm6 = abs(r0 p0..p3 - r2 p0..p3) - pcmpgtw xmm6, xmm2 - por xmm7, xmm6 ; accumulate thresholds - - - neg rax - movq xmm5, QWORD PTR [rsi+2*rax] ; mm4 = r-2 p0..p7 - punpcklbw xmm5, xmm0 ; mm5 = r-2 p0..p3 - paddusw xmm3, xmm5 ; mm3 += mm5 - - ; thresholding - movdqa xmm6, xmm1 ; mm6 = r0 p0..p3 - psubusw xmm6, xmm5 ; mm6 = p0..p3 - r-2 p0..p3 - psubusw xmm5, xmm1 ; mm5 = r-2 p0..p3 - p0..p3 - paddusw xmm6, xmm5 ; mm6 = abs(r0 p0..p3 - r-2 p0..p3) - pcmpgtw xmm6, xmm2 - por xmm7, xmm6 ; accumulate thresholds - - movq xmm4, QWORD PTR [rsi+rax] ; mm4 = r-1 p0..p7 - punpcklbw xmm4, xmm0 ; mm4 = r-1 p0..p3 - paddusw xmm3, xmm4 ; mm3 += mm5 - - ; thresholding - movdqa xmm6, xmm1 ; mm6 = r0 p0..p3 - psubusw xmm6, xmm4 ; mm6 = p0..p3 - r-2 p0..p3 - psubusw xmm4, xmm1 ; mm5 = r-1 p0..p3 - p0..p3 - paddusw xmm6, xmm4 ; mm6 = abs(r0 p0..p3 - r-1 p0..p3) - pcmpgtw xmm6, xmm2 - por xmm7, xmm6 ; accumulate thresholds - - - paddusw xmm3, RD42 ; mm3 += round value - psraw xmm3, 3 ; mm3 /= 8 - - pand xmm1, xmm7 ; mm1 select vals > thresh from source - pandn xmm7, xmm3 ; mm7 select vals < thresh from blurred result - paddusw xmm1, xmm7 ; combination - - packuswb xmm1, xmm0 ; pack to bytes - movq QWORD PTR [rdi], xmm1 ; - - neg rax ; pitch is positive - add rsi, 8 - add rdi, 8 - - add rdx, 8 - cmp edx, dword arg(5) ;cols - - jl .nextcol - - ; done with the all cols, start the across filtering in place - sub rsi, rdx - sub rdi, rdx - - xor rdx, rdx - movq mm0, QWORD PTR [rdi-8]; - -.acrossnextcol: - movq xmm7, QWORD PTR [rdi +rdx -2] - movd xmm4, DWORD PTR [rdi +rdx +6] - - pslldq xmm4, 8 - por xmm4, xmm7 - - movdqa xmm3, xmm4 - psrldq xmm3, 2 - punpcklbw xmm3, xmm0 ; mm3 = p0..p3 - movdqa xmm1, xmm3 ; mm1 = p0..p3 - psllw xmm3, 2 - - - movdqa xmm5, xmm4 - psrldq xmm5, 3 - punpcklbw xmm5, xmm0 ; mm5 = p1..p4 - paddusw xmm3, xmm5 ; mm3 += mm6 - - ; thresholding - movdqa xmm7, xmm1 ; mm7 = p0..p3 - psubusw xmm7, xmm5 ; mm7 = p0..p3 - p1..p4 - psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3 - paddusw xmm7, xmm5 ; mm7 = abs(p0..p3 - p1..p4) - pcmpgtw xmm7, xmm2 - - movdqa xmm5, xmm4 - psrldq xmm5, 4 - punpcklbw xmm5, xmm0 ; mm5 = p2..p5 - paddusw xmm3, xmm5 ; mm3 += mm5 - - ; thresholding - movdqa xmm6, xmm1 ; mm6 = p0..p3 - psubusw xmm6, xmm5 ; mm6 = p0..p3 - p1..p4 - psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3 - paddusw xmm6, xmm5 ; mm6 = abs(p0..p3 - p1..p4) - pcmpgtw xmm6, xmm2 - por xmm7, xmm6 ; accumulate thresholds - - - movdqa xmm5, xmm4 ; mm5 = p-2..p5 - punpcklbw xmm5, xmm0 ; mm5 = p-2..p1 - paddusw xmm3, xmm5 ; mm3 += mm5 - - ; thresholding - movdqa xmm6, xmm1 ; mm6 = p0..p3 - psubusw xmm6, xmm5 ; mm6 = p0..p3 - p1..p4 - psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3 - paddusw xmm6, xmm5 ; mm6 = abs(p0..p3 - p1..p4) - pcmpgtw xmm6, xmm2 - por xmm7, xmm6 ; accumulate thresholds - - psrldq xmm4, 1 ; mm4 = p-1..p5 - punpcklbw xmm4, xmm0 ; mm4 = p-1..p2 - paddusw xmm3, xmm4 ; mm3 += mm5 - - ; thresholding - movdqa xmm6, xmm1 ; mm6 = p0..p3 - psubusw xmm6, xmm4 ; mm6 = p0..p3 - p1..p4 - psubusw xmm4, xmm1 ; mm5 = p1..p4 - p0..p3 - paddusw xmm6, xmm4 ; mm6 = abs(p0..p3 - p1..p4) - pcmpgtw xmm6, xmm2 - por xmm7, xmm6 ; accumulate thresholds - - paddusw xmm3, RD42 ; mm3 += round value - psraw xmm3, 3 ; mm3 /= 8 - - pand xmm1, xmm7 ; mm1 select vals > thresh from source - pandn xmm7, xmm3 ; mm7 select vals < thresh from blurred result - paddusw xmm1, xmm7 ; combination - - packuswb xmm1, xmm0 ; pack to bytes - movq QWORD PTR [rdi+rdx-8], mm0 ; store previous four bytes - movdq2q mm0, xmm1 - - add rdx, 8 - cmp edx, dword arg(5) ;cols - jl .acrossnextcol; - - ; last 8 pixels - movq QWORD PTR [rdi+rdx-8], mm0 - - ; done with this rwo - add rsi,rax ; next line - mov eax, dword arg(3) ;dst_pixels_per_line ; destination pitch? - add rdi,rax ; next destination - mov eax, dword arg(2) ;src_pixels_per_line ; destination pitch? - - dec rcx ; decrement count - jnz .nextrow ; next row - -%if ABI_IS_32BIT=1 && CONFIG_PIC=1 - add rsp,16 - pop rsp -%endif - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret -%undef RD42 - - -;void vp10_mbpost_proc_down_xmm(unsigned char *dst, -; int pitch, int rows, int cols,int flimit) -extern sym(vp10_rv) -global sym(vp10_mbpost_proc_down_xmm) PRIVATE -sym(vp10_mbpost_proc_down_xmm): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 128+16 - - ; unsigned char d[16][8] at [rsp] - ; create flimit2 at [rsp+128] - mov eax, dword ptr arg(4) ;flimit - mov [rsp+128], eax - mov [rsp+128+4], eax - mov [rsp+128+8], eax - mov [rsp+128+12], eax -%define flimit4 [rsp+128] - -%if ABI_IS_32BIT=0 - lea r8, [GLOBAL(sym(vp10_rv))] -%endif - - ;rows +=8; - add dword arg(2), 8 - - ;for(c=0; ccommon; int ret = -1; -#if !CONFIG_VP9_POSTPROC - (void)*flags; -#endif - if (pbi->ready_for_new_data == 1) return ret; @@ -486,18 +478,8 @@ int vp10_get_raw_frame(VP10Decoder *pbi, YV12_BUFFER_CONFIG *sd, return ret; pbi->ready_for_new_data = 1; - -#if CONFIG_VP9_POSTPROC - if (!cm->show_existing_frame) { - ret = vp10_post_proc_frame(cm, sd, flags); - } else { - *sd = *cm->frame_to_show; - ret = 0; - } -#else *sd = *cm->frame_to_show; ret = 0; -#endif /*!CONFIG_POSTPROC*/ vpx_clear_system_state(); return ret; } diff --git a/vp10/decoder/decoder.h b/vp10/decoder/decoder.h index e4be2f40d..83e976e8a 100644 --- a/vp10/decoder/decoder.h +++ b/vp10/decoder/decoder.h @@ -20,7 +20,6 @@ #include "vp10/common/thread_common.h" #include "vp10/common/onyxc_int.h" -#include "vp10/common/ppflags.h" #include "vp10/decoder/dthread.h" #ifdef __cplusplus @@ -101,8 +100,7 @@ typedef struct VP10Decoder { int vp10_receive_compressed_data(struct VP10Decoder *pbi, size_t size, const uint8_t **dest); -int vp10_get_raw_frame(struct VP10Decoder *pbi, YV12_BUFFER_CONFIG *sd, - vp10_ppflags_t *flags); +int vp10_get_raw_frame(struct VP10Decoder *pbi, YV12_BUFFER_CONFIG *sd); int vp10_get_frame_to_show(struct VP10Decoder *pbi, YV12_BUFFER_CONFIG *frame); diff --git a/vp10/encoder/context_tree.h b/vp10/encoder/context_tree.h index a788788f9..f12dd3d49 100644 --- a/vp10/encoder/context_tree.h +++ b/vp10/encoder/context_tree.h @@ -58,15 +58,6 @@ typedef struct { int rate; int64_t dist; -#if CONFIG_VP9_TEMPORAL_DENOISING - unsigned int newmv_sse; - unsigned int zeromv_sse; - PREDICTION_MODE best_sse_inter_mode; - int_mv best_sse_mv; - MV_REFERENCE_FRAME best_reference_frame; - MV_REFERENCE_FRAME best_zeromv_reference_frame; -#endif - // motion vector cache for adaptive motion search control in partition // search loop MV pred_mv[MAX_REF_FRAMES]; diff --git a/vp10/encoder/denoiser.c b/vp10/encoder/denoiser.c deleted file mode 100644 index 1ff854de6..000000000 --- a/vp10/encoder/denoiser.c +++ /dev/null @@ -1,521 +0,0 @@ -/* - * Copyright (c) 2012 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include -#include "./vpx_dsp_rtcd.h" -#include "vpx_dsp/vpx_dsp_common.h" -#include "vpx_scale/yv12config.h" -#include "vpx/vpx_integer.h" -#include "vp10/common/reconinter.h" -#include "vp10/encoder/context_tree.h" -#include "vp10/encoder/denoiser.h" - -/* The VP9 denoiser is a work-in-progress. It currently is only designed to work - * with speed 6, though it (inexplicably) seems to also work with speed 5 (one - * would need to modify the source code in vp10_pickmode.c and vp10_encoder.c to - * make the calls to the vp10_denoiser_* functions when in speed 5). - * - * The implementation is very similar to that of the VP8 denoiser. While - * choosing the motion vectors / reference frames, the denoiser is run, and if - * it did not modify the signal to much, the denoised block is copied to the - * signal. - */ - -#ifdef OUTPUT_YUV_DENOISED -static void make_grayscale(YV12_BUFFER_CONFIG *yuv); -#endif - -static int absdiff_thresh(BLOCK_SIZE bs, int increase_denoising) { - (void)bs; - return 3 + (increase_denoising ? 1 : 0); -} - -static int delta_thresh(BLOCK_SIZE bs, int increase_denoising) { - (void)bs; - (void)increase_denoising; - return 4; -} - -static int noise_motion_thresh(BLOCK_SIZE bs, int increase_denoising) { - (void)bs; - (void)increase_denoising; - return 625; -} - -static unsigned int sse_thresh(BLOCK_SIZE bs, int increase_denoising) { - return (1 << num_pels_log2_lookup[bs]) * (increase_denoising ? 60 : 40); -} - -static int sse_diff_thresh(BLOCK_SIZE bs, int increase_denoising, - int motion_magnitude) { - if (motion_magnitude > - noise_motion_thresh(bs, increase_denoising)) { - return 0; - } else { - return (1 << num_pels_log2_lookup[bs]) * 20; - } -} - -int total_adj_strong_thresh(BLOCK_SIZE bs, int increase_denoising) { - return (1 << num_pels_log2_lookup[bs]) * (increase_denoising ? 3 : 2); -} - -static int total_adj_weak_thresh(BLOCK_SIZE bs, int increase_denoising) { - return (1 << num_pels_log2_lookup[bs]) * (increase_denoising ? 3 : 2); -} - -// TODO(jackychen): If increase_denoising is enabled in the future, -// we might need to update the code for calculating 'total_adj' in -// case the C code is not bit-exact with corresponding sse2 code. -int vp10_denoiser_filter_c(const uint8_t *sig, int sig_stride, - const uint8_t *mc_avg, - int mc_avg_stride, - uint8_t *avg, int avg_stride, - int increase_denoising, - BLOCK_SIZE bs, - int motion_magnitude) { - int r, c; - const uint8_t *sig_start = sig; - const uint8_t *mc_avg_start = mc_avg; - uint8_t *avg_start = avg; - int diff, adj, absdiff, delta; - int adj_val[] = {3, 4, 6}; - int total_adj = 0; - int shift_inc = 1; - - // If motion_magnitude is small, making the denoiser more aggressive by - // increasing the adjustment for each level. Add another increment for - // blocks that are labeled for increase denoising. - if (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) { - if (increase_denoising) { - shift_inc = 2; - } - adj_val[0] += shift_inc; - adj_val[1] += shift_inc; - adj_val[2] += shift_inc; - } - - // First attempt to apply a strong temporal denoising filter. - for (r = 0; r < (4 << b_height_log2_lookup[bs]); ++r) { - for (c = 0; c < (4 << b_width_log2_lookup[bs]); ++c) { - diff = mc_avg[c] - sig[c]; - absdiff = abs(diff); - - if (absdiff <= absdiff_thresh(bs, increase_denoising)) { - avg[c] = mc_avg[c]; - total_adj += diff; - } else { - switch (absdiff) { - case 4: case 5: case 6: case 7: - adj = adj_val[0]; - break; - case 8: case 9: case 10: case 11: - case 12: case 13: case 14: case 15: - adj = adj_val[1]; - break; - default: - adj = adj_val[2]; - } - if (diff > 0) { - avg[c] = VPXMIN(UINT8_MAX, sig[c] + adj); - total_adj += adj; - } else { - avg[c] = VPXMAX(0, sig[c] - adj); - total_adj -= adj; - } - } - } - sig += sig_stride; - avg += avg_stride; - mc_avg += mc_avg_stride; - } - - // If the strong filter did not modify the signal too much, we're all set. - if (abs(total_adj) <= total_adj_strong_thresh(bs, increase_denoising)) { - return FILTER_BLOCK; - } - - // Otherwise, we try to dampen the filter if the delta is not too high. - delta = ((abs(total_adj) - total_adj_strong_thresh(bs, increase_denoising)) - >> num_pels_log2_lookup[bs]) + 1; - - if (delta >= delta_thresh(bs, increase_denoising)) { - return COPY_BLOCK; - } - - mc_avg = mc_avg_start; - avg = avg_start; - sig = sig_start; - for (r = 0; r < (4 << b_height_log2_lookup[bs]); ++r) { - for (c = 0; c < (4 << b_width_log2_lookup[bs]); ++c) { - diff = mc_avg[c] - sig[c]; - adj = abs(diff); - if (adj > delta) { - adj = delta; - } - if (diff > 0) { - // Diff positive means we made positive adjustment above - // (in first try/attempt), so now make negative adjustment to bring - // denoised signal down. - avg[c] = VPXMAX(0, avg[c] - adj); - total_adj -= adj; - } else { - // Diff negative means we made negative adjustment above - // (in first try/attempt), so now make positive adjustment to bring - // denoised signal up. - avg[c] = VPXMIN(UINT8_MAX, avg[c] + adj); - total_adj += adj; - } - } - sig += sig_stride; - avg += avg_stride; - mc_avg += mc_avg_stride; - } - - // We can use the filter if it has been sufficiently dampened - if (abs(total_adj) <= total_adj_weak_thresh(bs, increase_denoising)) { - return FILTER_BLOCK; - } - return COPY_BLOCK; -} - -static uint8_t *block_start(uint8_t *framebuf, int stride, - int mi_row, int mi_col) { - return framebuf + (stride * mi_row * MI_SIZE) + (mi_col * MI_SIZE); -} - -static VP9_DENOISER_DECISION perform_motion_compensation(VP9_DENOISER *denoiser, - MACROBLOCK *mb, - BLOCK_SIZE bs, - int increase_denoising, - int mi_row, - int mi_col, - PICK_MODE_CONTEXT *ctx, - int *motion_magnitude - ) { - int mv_col, mv_row; - int sse_diff = ctx->zeromv_sse - ctx->newmv_sse; - MV_REFERENCE_FRAME frame; - MACROBLOCKD *filter_mbd = &mb->e_mbd; - MB_MODE_INFO *mbmi = &filter_mbd->mi[0]->mbmi; - MB_MODE_INFO saved_mbmi; - int i, j; - struct buf_2d saved_dst[MAX_MB_PLANE]; - struct buf_2d saved_pre[MAX_MB_PLANE][2]; // 2 pre buffers - - mv_col = ctx->best_sse_mv.as_mv.col; - mv_row = ctx->best_sse_mv.as_mv.row; - *motion_magnitude = mv_row * mv_row + mv_col * mv_col; - frame = ctx->best_reference_frame; - - saved_mbmi = *mbmi; - - // If the best reference frame uses inter-prediction and there is enough of a - // difference in sum-squared-error, use it. - if (frame != INTRA_FRAME && - sse_diff > sse_diff_thresh(bs, increase_denoising, *motion_magnitude)) { - mbmi->ref_frame[0] = ctx->best_reference_frame; - mbmi->mode = ctx->best_sse_inter_mode; - mbmi->mv[0] = ctx->best_sse_mv; - } else { - // Otherwise, use the zero reference frame. - frame = ctx->best_zeromv_reference_frame; - - mbmi->ref_frame[0] = ctx->best_zeromv_reference_frame; -#if CONFIG_EXT_INTER - if (has_second_ref(mbmi)) - mbmi->mode = ZERO_ZEROMV; - else -#endif // CONFIG_EXT_INTER - mbmi->mode = ZEROMV; - mbmi->mv[0].as_int = 0; - -#if CONFIG_EXT_INTER - if (has_second_ref(mbmi)) - ctx->best_sse_inter_mode = ZERO_ZEROMV; - else -#endif // CONFIG_EXT_INTER - ctx->best_sse_inter_mode = ZEROMV; - ctx->best_sse_mv.as_int = 0; - ctx->newmv_sse = ctx->zeromv_sse; - } - - if (ctx->newmv_sse > sse_thresh(bs, increase_denoising)) { - // Restore everything to its original state - *mbmi = saved_mbmi; - return COPY_BLOCK; - } - if (*motion_magnitude > - (noise_motion_thresh(bs, increase_denoising) << 3)) { - // Restore everything to its original state - *mbmi = saved_mbmi; - return COPY_BLOCK; - } - - // We will restore these after motion compensation. - for (i = 0; i < MAX_MB_PLANE; ++i) { - for (j = 0; j < 2; ++j) { - saved_pre[i][j] = filter_mbd->plane[i].pre[j]; - } - saved_dst[i] = filter_mbd->plane[i].dst; - } - - // Set the pointers in the MACROBLOCKD to point to the buffers in the denoiser - // struct. - for (j = 0; j < 2; ++j) { - filter_mbd->plane[0].pre[j].buf = - block_start(denoiser->running_avg_y[frame].y_buffer, - denoiser->running_avg_y[frame].y_stride, - mi_row, mi_col); - filter_mbd->plane[0].pre[j].stride = - denoiser->running_avg_y[frame].y_stride; - filter_mbd->plane[1].pre[j].buf = - block_start(denoiser->running_avg_y[frame].u_buffer, - denoiser->running_avg_y[frame].uv_stride, - mi_row, mi_col); - filter_mbd->plane[1].pre[j].stride = - denoiser->running_avg_y[frame].uv_stride; - filter_mbd->plane[2].pre[j].buf = - block_start(denoiser->running_avg_y[frame].v_buffer, - denoiser->running_avg_y[frame].uv_stride, - mi_row, mi_col); - filter_mbd->plane[2].pre[j].stride = - denoiser->running_avg_y[frame].uv_stride; - } - filter_mbd->plane[0].dst.buf = - block_start(denoiser->mc_running_avg_y.y_buffer, - denoiser->mc_running_avg_y.y_stride, - mi_row, mi_col); - filter_mbd->plane[0].dst.stride = denoiser->mc_running_avg_y.y_stride; - filter_mbd->plane[1].dst.buf = - block_start(denoiser->mc_running_avg_y.u_buffer, - denoiser->mc_running_avg_y.uv_stride, - mi_row, mi_col); - filter_mbd->plane[1].dst.stride = denoiser->mc_running_avg_y.uv_stride; - filter_mbd->plane[2].dst.buf = - block_start(denoiser->mc_running_avg_y.v_buffer, - denoiser->mc_running_avg_y.uv_stride, - mi_row, mi_col); - filter_mbd->plane[2].dst.stride = denoiser->mc_running_avg_y.uv_stride; - - vp10_build_inter_predictors_sby(filter_mbd, mv_row, mv_col, bs); - - // Restore everything to its original state - *mbmi = saved_mbmi; - for (i = 0; i < MAX_MB_PLANE; ++i) { - for (j = 0; j < 2; ++j) { - filter_mbd->plane[i].pre[j] = saved_pre[i][j]; - } - filter_mbd->plane[i].dst = saved_dst[i]; - } - - mv_row = ctx->best_sse_mv.as_mv.row; - mv_col = ctx->best_sse_mv.as_mv.col; - - return FILTER_BLOCK; -} - -void vp10_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb, - int mi_row, int mi_col, BLOCK_SIZE bs, - PICK_MODE_CONTEXT *ctx) { - int motion_magnitude = 0; - VP9_DENOISER_DECISION decision = FILTER_BLOCK; - YV12_BUFFER_CONFIG avg = denoiser->running_avg_y[INTRA_FRAME]; - YV12_BUFFER_CONFIG mc_avg = denoiser->mc_running_avg_y; - uint8_t *avg_start = block_start(avg.y_buffer, avg.y_stride, mi_row, mi_col); - uint8_t *mc_avg_start = block_start(mc_avg.y_buffer, mc_avg.y_stride, - mi_row, mi_col); - struct buf_2d src = mb->plane[0].src; - - decision = perform_motion_compensation(denoiser, mb, bs, - denoiser->increase_denoising, - mi_row, mi_col, ctx, - &motion_magnitude); - - if (decision == FILTER_BLOCK) { - decision = vp10_denoiser_filter(src.buf, src.stride, - mc_avg_start, mc_avg.y_stride, - avg_start, avg.y_stride, - 0, bs, motion_magnitude); - } - - if (decision == FILTER_BLOCK) { - vpx_convolve_copy(avg_start, avg.y_stride, src.buf, src.stride, - NULL, 0, NULL, 0, - num_4x4_blocks_wide_lookup[bs] << 2, - num_4x4_blocks_high_lookup[bs] << 2); - } else { // COPY_BLOCK - vpx_convolve_copy(src.buf, src.stride, avg_start, avg.y_stride, - NULL, 0, NULL, 0, - num_4x4_blocks_wide_lookup[bs] << 2, - num_4x4_blocks_high_lookup[bs] << 2); - } -} - -static void copy_frame(YV12_BUFFER_CONFIG dest, const YV12_BUFFER_CONFIG src) { - int r; - const uint8_t *srcbuf = src.y_buffer; - uint8_t *destbuf = dest.y_buffer; - - assert(dest.y_width == src.y_width); - assert(dest.y_height == src.y_height); - - for (r = 0; r < dest.y_height; ++r) { - memcpy(destbuf, srcbuf, dest.y_width); - destbuf += dest.y_stride; - srcbuf += src.y_stride; - } -} - -static void swap_frame_buffer(YV12_BUFFER_CONFIG *dest, - YV12_BUFFER_CONFIG *src) { - uint8_t *tmp_buf = dest->y_buffer; - assert(dest->y_width == src->y_width); - assert(dest->y_height == src->y_height); - dest->y_buffer = src->y_buffer; - src->y_buffer = tmp_buf; -} - -void vp10_denoiser_update_frame_info(VP9_DENOISER *denoiser, - YV12_BUFFER_CONFIG src, - FRAME_TYPE frame_type, - int refresh_last_frame, -#if CONFIG_EXT_REFS - int refresh_bwd_ref_frame, -#endif // CONFIG_EXT_REFS - int refresh_alt_ref_frame, - int refresh_golden_frame) { - if (frame_type == KEY_FRAME) { - int i; - // Start at 1 so as not to overwrite the INTRA_FRAME - for (i = 1; i < MAX_REF_FRAMES; ++i) - copy_frame(denoiser->running_avg_y[i], src); - return; - } - - /* For non key frames */ - if (refresh_alt_ref_frame) { - swap_frame_buffer(&denoiser->running_avg_y[ALTREF_FRAME], - &denoiser->running_avg_y[INTRA_FRAME]); - } - if (refresh_golden_frame) { - swap_frame_buffer(&denoiser->running_avg_y[GOLDEN_FRAME], - &denoiser->running_avg_y[INTRA_FRAME]); - } - // TODO(zoeliu): To explore whether when show_existing_frame == 1 should be - // handled differently. - if (refresh_last_frame) { - swap_frame_buffer(&denoiser->running_avg_y[LAST_FRAME], - &denoiser->running_avg_y[INTRA_FRAME]); - } -#if CONFIG_EXT_REFS - if (refresh_bwd_ref_frame) { - swap_frame_buffer(&denoiser->running_avg_y[BWDREF_FRAME], - &denoiser->running_avg_y[INTRA_FRAME]); - } -#endif // CONFIG_EXT_REFS -} - -void vp10_denoiser_reset_frame_stats(PICK_MODE_CONTEXT *ctx) { - ctx->zeromv_sse = UINT_MAX; - ctx->newmv_sse = UINT_MAX; -} - -void vp10_denoiser_update_frame_stats(MB_MODE_INFO *mbmi, unsigned int sse, - PREDICTION_MODE mode, - PICK_MODE_CONTEXT *ctx) { - // TODO(tkopp): Use both MVs if possible - if (mbmi->mv[0].as_int == 0 && sse < ctx->zeromv_sse) { - ctx->zeromv_sse = sse; - ctx->best_zeromv_reference_frame = mbmi->ref_frame[0]; - } - - if (mbmi->mv[0].as_int != 0 && sse < ctx->newmv_sse) { - ctx->newmv_sse = sse; - ctx->best_sse_inter_mode = mode; - ctx->best_sse_mv = mbmi->mv[0]; - ctx->best_reference_frame = mbmi->ref_frame[0]; - } -} - -int vp10_denoiser_alloc(VP9_DENOISER *denoiser, int width, int height, - int ssx, int ssy, -#if CONFIG_VP9_HIGHBITDEPTH - int use_highbitdepth, -#endif - int border) { - int i, fail; - const int legacy_byte_alignment = 0; - assert(denoiser != NULL); - - for (i = 0; i < MAX_REF_FRAMES; ++i) { - fail = vpx_alloc_frame_buffer(&denoiser->running_avg_y[i], width, height, - ssx, ssy, -#if CONFIG_VP9_HIGHBITDEPTH - use_highbitdepth, -#endif - border, legacy_byte_alignment); - if (fail) { - vp10_denoiser_free(denoiser); - return 1; - } -#ifdef OUTPUT_YUV_DENOISED - make_grayscale(&denoiser->running_avg_y[i]); -#endif - } - - fail = vpx_alloc_frame_buffer(&denoiser->mc_running_avg_y, width, height, - ssx, ssy, -#if CONFIG_VP9_HIGHBITDEPTH - use_highbitdepth, -#endif - border, legacy_byte_alignment); - if (fail) { - vp10_denoiser_free(denoiser); - return 1; - } -#ifdef OUTPUT_YUV_DENOISED - make_grayscale(&denoiser->running_avg_y[i]); -#endif - denoiser->increase_denoising = 0; - denoiser->frame_buffer_initialized = 1; - - return 0; -} - -void vp10_denoiser_free(VP9_DENOISER *denoiser) { - int i; - denoiser->frame_buffer_initialized = 0; - if (denoiser == NULL) { - return; - } - for (i = 0; i < MAX_REF_FRAMES; ++i) { - vpx_free_frame_buffer(&denoiser->running_avg_y[i]); - } - vpx_free_frame_buffer(&denoiser->mc_running_avg_y); -} - -#ifdef OUTPUT_YUV_DENOISED -static void make_grayscale(YV12_BUFFER_CONFIG *yuv) { - int r, c; - uint8_t *u = yuv->u_buffer; - uint8_t *v = yuv->v_buffer; - - for (r = 0; r < yuv->uv_height; ++r) { - for (c = 0; c < yuv->uv_width; ++c) { - u[c] = UINT8_MAX / 2; - v[c] = UINT8_MAX / 2; - } - u += yuv->uv_stride; - v += yuv->uv_stride; - } -} -#endif diff --git a/vp10/encoder/denoiser.h b/vp10/encoder/denoiser.h deleted file mode 100644 index d7fdf0cff..000000000 --- a/vp10/encoder/denoiser.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2012 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_ENCODER_DENOISER_H_ -#define VP9_ENCODER_DENOISER_H_ - -#include "vp10/encoder/block.h" -#include "vpx_scale/yv12config.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define MOTION_MAGNITUDE_THRESHOLD (8 * 3) - -typedef enum vp10_denoiser_decision { - COPY_BLOCK, - FILTER_BLOCK -} VP9_DENOISER_DECISION; - -typedef struct vp10_denoiser { - YV12_BUFFER_CONFIG running_avg_y[MAX_REF_FRAMES]; - YV12_BUFFER_CONFIG mc_running_avg_y; - int increase_denoising; - int frame_buffer_initialized; -} VP9_DENOISER; - -void vp10_denoiser_update_frame_info(VP9_DENOISER *denoiser, - YV12_BUFFER_CONFIG src, - FRAME_TYPE frame_type, - int refresh_last_frame, -#if CONFIG_EXT_REFS - int refresh_bwd_ref_frame, -#endif // CONFIG_EXT_REFS - int refresh_alt_ref_frame, - int refresh_golden_frame); - -void vp10_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb, - int mi_row, int mi_col, BLOCK_SIZE bs, - PICK_MODE_CONTEXT *ctx); - -void vp10_denoiser_reset_frame_stats(PICK_MODE_CONTEXT *ctx); - -void vp10_denoiser_update_frame_stats(MB_MODE_INFO *mbmi, - unsigned int sse, PREDICTION_MODE mode, - PICK_MODE_CONTEXT *ctx); - -int vp10_denoiser_alloc(VP9_DENOISER *denoiser, int width, int height, - int ssx, int ssy, -#if CONFIG_VP9_HIGHBITDEPTH - int use_highbitdepth, -#endif - int border); - -#if CONFIG_VP9_TEMPORAL_DENOISING -int total_adj_strong_thresh(BLOCK_SIZE bs, int increase_denoising); -#endif - -void vp10_denoiser_free(VP9_DENOISER *denoiser); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_ENCODER_DENOISER_H_ diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c index a56c3e327..8ec2479be 100644 --- a/vp10/encoder/encoder.c +++ b/vp10/encoder/encoder.c @@ -17,9 +17,6 @@ #include "vp10/common/alloccommon.h" #include "vp10/common/filter.h" #include "vp10/common/idct.h" -#if CONFIG_VP9_POSTPROC -#include "vp10/common/postproc.h" -#endif #include "vp10/common/reconinter.h" #include "vp10/common/reconintra.h" #include "vp10/common/tile_common.h" @@ -458,9 +455,6 @@ static void dealloc_compressor_data(VP10_COMP *cpi) { vpx_free_frame_buffer(&cpi->upsampled_ref_bufs[i].buf); vp10_free_ref_frame_buffers(cm->buffer_pool); -#if CONFIG_VP9_POSTPROC - vp10_free_postproc_buffers(cm); -#endif // CONFIG_VP9_POSTPROC #if CONFIG_LOOP_RESTORATION vp10_free_restoration_buffers(cm); #endif // CONFIG_LOOP_RESTORATION @@ -2542,11 +2536,6 @@ VP10_COMP *vp10_create_compressor(VP10EncoderConfig *oxcf, cpi->td.mb.nmvsadcost_hp[1] = &cpi->nmvsadcosts_hp[1][MV_MAX]; cal_nmvsadcosts_hp(cpi->td.mb.nmvsadcost_hp); -#if CONFIG_VP9_TEMPORAL_DENOISING -#ifdef OUTPUT_YUV_DENOISED - yuv_denoised_file = fopen("denoised.yuv", "ab"); -#endif -#endif #ifdef OUTPUT_YUV_SKINMAP yuv_skinmap_file = fopen("skinmap.yuv", "ab"); #endif @@ -2880,10 +2869,6 @@ void vp10_remove_compressor(VP10_COMP *cpi) { #endif } -#if CONFIG_VP9_TEMPORAL_DENOISING - vp10_denoiser_free(&(cpi->denoiser)); -#endif - for (t = 0; t < cpi->num_workers; ++t) { VPxWorker *const worker = &cpi->workers[t]; EncWorkerData *const thread_data = &cpi->tile_thr_data[t]; @@ -2923,16 +2908,8 @@ void vp10_remove_compressor(VP10_COMP *cpi) { vp10_remove_common(cm); vp10_free_ref_frame_buffers(cm->buffer_pool); -#if CONFIG_VP9_POSTPROC - vp10_free_postproc_buffers(cm); -#endif vpx_free(cpi); -#if CONFIG_VP9_TEMPORAL_DENOISING -#ifdef OUTPUT_YUV_DENOISED - fclose(yuv_denoised_file); -#endif -#endif #ifdef OUTPUT_YUV_SKINMAP fclose(yuv_skinmap_file); #endif @@ -3667,20 +3644,6 @@ void vp10_update_reference_frames(VP10_COMP *cpi) { // Dump out all reference frame images. dump_ref_frame_images(cpi); #endif // DUMP_REF_FRAME_IMAGES - -#if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0) { - vp10_denoiser_update_frame_info(&cpi->denoiser, - *cpi->Source, - cpi->common.frame_type, - cpi->refresh_last_frame, -#if CONFIG_EXT_REFS - cpi->refresh_bwd_ref_frame, -#endif // CONFIG_EXT_REFS - cpi->refresh_alt_ref_frame, - cpi->refresh_golden_frame); - } -#endif } static void loopfilter_frame(VP10_COMP *cpi, VP10_COMMON *cm) { @@ -4056,31 +4019,6 @@ static void set_size_dependent_vars(VP10_COMP *cpi, int *q, // lagged coding, and if the relevant speed feature flag is set. if (oxcf->pass == 2 && cpi->sf.static_segmentation) configure_static_seg_features(cpi); - -#if CONFIG_VP9_POSTPROC - if (oxcf->noise_sensitivity > 0) { - int l = 0; - switch (oxcf->noise_sensitivity) { - case 1: - l = 20; - break; - case 2: - l = 40; - break; - case 3: - l = 60; - break; - case 4: - case 5: - l = 100; - break; - case 6: - l = 150; - break; - } - vp10_denoise(cpi->Source, cpi->Source, l); - } -#endif // CONFIG_VP9_POSTPROC } static void init_motion_estimation(VP10_COMP *cpi) { @@ -4965,15 +4903,6 @@ static void encode_frame_to_data_rate(VP10_COMP *cpi, encode_with_recode_loop(cpi, size, dest); } -#if CONFIG_VP9_TEMPORAL_DENOISING -#ifdef OUTPUT_YUV_DENOISED - if (oxcf->noise_sensitivity > 0) { - vp10_write_yuv_frame_420(&cpi->denoiser.running_avg_y[INTRA_FRAME], - yuv_denoised_file); - } -#endif // OUTPUT_YUV_DENOISED -#endif // CONFIG_VP9_TEMPORAL_DENOISING - #ifdef OUTPUT_YUV_SKINMAP if (cpi->common.current_video_frame > 1) { vp10_compute_skin_map(cpi, yuv_skinmap_file); @@ -5194,23 +5123,6 @@ static void check_initial_width(VP10_COMP *cpi, } } -#if CONFIG_VP9_TEMPORAL_DENOISING -static void setup_denoiser_buffer(VP10_COMP *cpi) { - VP10_COMMON *const cm = &cpi->common; - if (cpi->oxcf.noise_sensitivity > 0 && - !cpi->denoiser.frame_buffer_initialized) { - if (vp10_denoiser_alloc(&cpi->denoiser, cm->width, cm->height, - cm->subsampling_x, cm->subsampling_y, -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth, -#endif - VPX_ENC_BORDER_IN_PIXELS)) - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate denoiser"); - } -} -#endif - int vp10_receive_raw_frame(VP10_COMP *cpi, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time) { @@ -5229,9 +5141,6 @@ int vp10_receive_raw_frame(VP10_COMP *cpi, unsigned int frame_flags, check_initial_width(cpi, subsampling_x, subsampling_y); #endif // CONFIG_VP9_HIGHBITDEPTH -#if CONFIG_VP9_TEMPORAL_DENOISING - setup_denoiser_buffer(cpi); -#endif vpx_usec_timer_start(&timer); if (vp10_lookahead_push(cpi->lookahead, sd, time_stamp, end_time, @@ -5777,20 +5686,12 @@ int vp10_get_compressed_data(VP10_COMP *cpi, unsigned int *frame_flags, return 0; } -int vp10_get_preview_raw_frame(VP10_COMP *cpi, YV12_BUFFER_CONFIG *dest, - vp10_ppflags_t *flags) { +int vp10_get_preview_raw_frame(VP10_COMP *cpi, YV12_BUFFER_CONFIG *dest) { VP10_COMMON *cm = &cpi->common; -#if !CONFIG_VP9_POSTPROC - (void)flags; -#endif - if (!cm->show_frame) { return -1; } else { int ret; -#if CONFIG_VP9_POSTPROC - ret = vp10_post_proc_frame(cm, dest, flags); -#else if (cm->frame_to_show) { *dest = *cm->frame_to_show; dest->y_width = cm->width; @@ -5801,7 +5702,6 @@ int vp10_get_preview_raw_frame(VP10_COMP *cpi, YV12_BUFFER_CONFIG *dest, } else { ret = -1; } -#endif // !CONFIG_VP9_POSTPROC vpx_clear_system_state(); return ret; } @@ -5847,10 +5747,6 @@ int vp10_set_size_literal(VP10_COMP *cpi, unsigned int width, check_initial_width(cpi, 1, 1); #endif // CONFIG_VP9_HIGHBITDEPTH -#if CONFIG_VP9_TEMPORAL_DENOISING - setup_denoiser_buffer(cpi); -#endif - if (width) { cm->width = width; if (cm->width > cpi->initial_width) { diff --git a/vp10/encoder/encoder.h b/vp10/encoder/encoder.h index be1941370..ea24bb42c 100644 --- a/vp10/encoder/encoder.h +++ b/vp10/encoder/encoder.h @@ -17,11 +17,9 @@ #include "vpx/vp8cx.h" #include "vp10/common/alloccommon.h" -#include "vp10/common/ppflags.h" #include "vp10/common/entropymode.h" #include "vp10/common/thread_common.h" #include "vp10/common/onyxc_int.h" - #include "vp10/encoder/aq_cyclicrefresh.h" #if CONFIG_ANS #include "vp10/encoder/buf_ans.h" @@ -39,10 +37,6 @@ #include "vp10/encoder/tokenize.h" #include "vp10/encoder/variance_tree.h" -#if CONFIG_VP9_TEMPORAL_DENOISING -#include "vp10/encoder/denoiser.h" -#endif - #if CONFIG_INTERNAL_STATS #include "vpx_dsp/ssim.h" #endif @@ -586,10 +580,6 @@ typedef struct VP10_COMP { TileBufferEnc tile_buffers[MAX_TILE_ROWS][MAX_TILE_COLS]; -#if CONFIG_VP9_TEMPORAL_DENOISING - VP9_DENOISER denoiser; -#endif - int resize_pending; int resize_state; int resize_scale_num; @@ -654,8 +644,7 @@ int vp10_get_compressed_data(VP10_COMP *cpi, unsigned int *frame_flags, size_t *size, uint8_t *dest, int64_t *time_stamp, int64_t *time_end, int flush); -int vp10_get_preview_raw_frame(VP10_COMP *cpi, YV12_BUFFER_CONFIG *dest, - vp10_ppflags_t *flags); +int vp10_get_preview_raw_frame(VP10_COMP *cpi, YV12_BUFFER_CONFIG *dest); int vp10_get_last_show_frame(VP10_COMP *cpi, YV12_BUFFER_CONFIG *frame); diff --git a/vp10/encoder/x86/denoiser_sse2.c b/vp10/encoder/x86/denoiser_sse2.c deleted file mode 100644 index 5c1303a72..000000000 --- a/vp10/encoder/x86/denoiser_sse2.c +++ /dev/null @@ -1,373 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "./vpx_config.h" -#include "./vp10_rtcd.h" - -#include "vpx_ports/emmintrin_compat.h" -#include "vpx/vpx_integer.h" -#include "vp10/common/reconinter.h" -#include "vp10/encoder/context_tree.h" -#include "vp10/encoder/denoiser.h" -#include "vpx_mem/vpx_mem.h" - -// Compute the sum of all pixel differences of this MB. -static INLINE int sum_diff_16x1(__m128i acc_diff) { - const __m128i k_1 = _mm_set1_epi16(1); - const __m128i acc_diff_lo = - _mm_srai_epi16(_mm_unpacklo_epi8(acc_diff, acc_diff), 8); - const __m128i acc_diff_hi = - _mm_srai_epi16(_mm_unpackhi_epi8(acc_diff, acc_diff), 8); - const __m128i acc_diff_16 = _mm_add_epi16(acc_diff_lo, acc_diff_hi); - const __m128i hg_fe_dc_ba = _mm_madd_epi16(acc_diff_16, k_1); - const __m128i hgfe_dcba = - _mm_add_epi32(hg_fe_dc_ba, _mm_srli_si128(hg_fe_dc_ba, 8)); - const __m128i hgfedcba = - _mm_add_epi32(hgfe_dcba, _mm_srli_si128(hgfe_dcba, 4)); - return _mm_cvtsi128_si32(hgfedcba); -} - -// Denoise a 16x1 vector. -static INLINE __m128i vp10_denoiser_16x1_sse2(const uint8_t *sig, - const uint8_t *mc_running_avg_y, - uint8_t *running_avg_y, - const __m128i *k_0, - const __m128i *k_4, - const __m128i *k_8, - const __m128i *k_16, - const __m128i *l3, - const __m128i *l32, - const __m128i *l21, - __m128i acc_diff) { - // Calculate differences - const __m128i v_sig = _mm_loadu_si128((const __m128i *)(&sig[0])); - const __m128i v_mc_running_avg_y = - _mm_loadu_si128((const __m128i *)(&mc_running_avg_y[0])); - __m128i v_running_avg_y; - const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg_y, v_sig); - const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg_y); - // Obtain the sign. FF if diff is negative. - const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, *k_0); - // Clamp absolute difference to 16 to be used to get mask. Doing this - // allows us to use _mm_cmpgt_epi8, which operates on signed byte. - const __m128i clamped_absdiff = - _mm_min_epu8(_mm_or_si128(pdiff, ndiff), *k_16); - // Get masks for l2 l1 and l0 adjustments. - const __m128i mask2 = _mm_cmpgt_epi8(*k_16, clamped_absdiff); - const __m128i mask1 = _mm_cmpgt_epi8(*k_8, clamped_absdiff); - const __m128i mask0 = _mm_cmpgt_epi8(*k_4, clamped_absdiff); - // Get adjustments for l2, l1, and l0. - __m128i adj2 = _mm_and_si128(mask2, *l32); - const __m128i adj1 = _mm_and_si128(mask1, *l21); - const __m128i adj0 = _mm_and_si128(mask0, clamped_absdiff); - __m128i adj, padj, nadj; - - // Combine the adjustments and get absolute adjustments. - adj2 = _mm_add_epi8(adj2, adj1); - adj = _mm_sub_epi8(*l3, adj2); - adj = _mm_andnot_si128(mask0, adj); - adj = _mm_or_si128(adj, adj0); - - // Restore the sign and get positive and negative adjustments. - padj = _mm_andnot_si128(diff_sign, adj); - nadj = _mm_and_si128(diff_sign, adj); - - // Calculate filtered value. - v_running_avg_y = _mm_adds_epu8(v_sig, padj); - v_running_avg_y = _mm_subs_epu8(v_running_avg_y, nadj); - _mm_storeu_si128((__m128i *)running_avg_y, v_running_avg_y); - - // Adjustments <=7, and each element in acc_diff can fit in signed - // char. - acc_diff = _mm_adds_epi8(acc_diff, padj); - acc_diff = _mm_subs_epi8(acc_diff, nadj); - return acc_diff; -} - -// Denoise a 16x1 vector with a weaker filter. -static INLINE __m128i vp10_denoiser_adj_16x1_sse2( - const uint8_t *sig, const uint8_t *mc_running_avg_y, - uint8_t *running_avg_y, const __m128i k_0, - const __m128i k_delta, __m128i acc_diff) { - __m128i v_running_avg_y = _mm_loadu_si128((__m128i *)(&running_avg_y[0])); - // Calculate differences. - const __m128i v_sig = _mm_loadu_si128((const __m128i *)(&sig[0])); - const __m128i v_mc_running_avg_y = - _mm_loadu_si128((const __m128i *)(&mc_running_avg_y[0])); - const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg_y, v_sig); - const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg_y); - // Obtain the sign. FF if diff is negative. - const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, k_0); - // Clamp absolute difference to delta to get the adjustment. - const __m128i adj = - _mm_min_epu8(_mm_or_si128(pdiff, ndiff), k_delta); - // Restore the sign and get positive and negative adjustments. - __m128i padj, nadj; - padj = _mm_andnot_si128(diff_sign, adj); - nadj = _mm_and_si128(diff_sign, adj); - // Calculate filtered value. - v_running_avg_y = _mm_subs_epu8(v_running_avg_y, padj); - v_running_avg_y = _mm_adds_epu8(v_running_avg_y, nadj); - _mm_storeu_si128((__m128i *)running_avg_y, v_running_avg_y); - - // Accumulate the adjustments. - acc_diff = _mm_subs_epi8(acc_diff, padj); - acc_diff = _mm_adds_epi8(acc_diff, nadj); - return acc_diff; -} - -// Denoiser for 4xM and 8xM blocks. -static int vp10_denoiser_NxM_sse2_small( - const uint8_t *sig, int sig_stride, const uint8_t *mc_running_avg_y, - int mc_avg_y_stride, uint8_t *running_avg_y, int avg_y_stride, - int increase_denoising, BLOCK_SIZE bs, int motion_magnitude, int width) { - int sum_diff_thresh, r, sum_diff = 0; - const int shift_inc = (increase_denoising && - motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? - 1 : 0; - uint8_t sig_buffer[8][16], mc_running_buffer[8][16], running_buffer[8][16]; - __m128i acc_diff = _mm_setzero_si128(); - const __m128i k_0 = _mm_setzero_si128(); - const __m128i k_4 = _mm_set1_epi8(4 + shift_inc); - const __m128i k_8 = _mm_set1_epi8(8); - const __m128i k_16 = _mm_set1_epi8(16); - // Modify each level's adjustment according to motion_magnitude. - const __m128i l3 = _mm_set1_epi8( - (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 7 + shift_inc : 6); - // Difference between level 3 and level 2 is 2. - const __m128i l32 = _mm_set1_epi8(2); - // Difference between level 2 and level 1 is 1. - const __m128i l21 = _mm_set1_epi8(1); - const uint8_t shift = (width == 4) ? 2 : 1; - - for (r = 0; r < ((4 << b_height_log2_lookup[bs]) >> shift); ++r) { - memcpy(sig_buffer[r], sig, width); - memcpy(sig_buffer[r] + width, sig + sig_stride, width); - memcpy(mc_running_buffer[r], mc_running_avg_y, width); - memcpy(mc_running_buffer[r] + width, - mc_running_avg_y + mc_avg_y_stride, width); - memcpy(running_buffer[r], running_avg_y, width); - memcpy(running_buffer[r] + width, running_avg_y + avg_y_stride, width); - if (width == 4) { - memcpy(sig_buffer[r] + width * 2, sig + sig_stride * 2, width); - memcpy(sig_buffer[r] + width * 3, sig + sig_stride * 3, width); - memcpy(mc_running_buffer[r] + width * 2, - mc_running_avg_y + mc_avg_y_stride * 2, width); - memcpy(mc_running_buffer[r] + width * 3, - mc_running_avg_y + mc_avg_y_stride * 3, width); - memcpy(running_buffer[r] + width * 2, - running_avg_y + avg_y_stride * 2, width); - memcpy(running_buffer[r] + width * 3, - running_avg_y + avg_y_stride * 3, width); - } - acc_diff = vp10_denoiser_16x1_sse2(sig_buffer[r], - mc_running_buffer[r], - running_buffer[r], - &k_0, &k_4, &k_8, &k_16, - &l3, &l32, &l21, acc_diff); - memcpy(running_avg_y, running_buffer[r], width); - memcpy(running_avg_y + avg_y_stride, running_buffer[r] + width, width); - if (width == 4) { - memcpy(running_avg_y + avg_y_stride * 2, - running_buffer[r] + width * 2, width); - memcpy(running_avg_y + avg_y_stride * 3, - running_buffer[r] + width * 3, width); - } - // Update pointers for next iteration. - sig += (sig_stride << shift); - mc_running_avg_y += (mc_avg_y_stride << shift); - running_avg_y += (avg_y_stride << shift); - } - - { - sum_diff = sum_diff_16x1(acc_diff); - sum_diff_thresh = total_adj_strong_thresh(bs, increase_denoising); - if (abs(sum_diff) > sum_diff_thresh) { - // Before returning to copy the block (i.e., apply no denoising), - // check if we can still apply some (weaker) temporal filtering to - // this block, that would otherwise not be denoised at all. Simplest - // is to apply an additional adjustment to running_avg_y to bring it - // closer to sig. The adjustment is capped by a maximum delta, and - // chosen such that in most cases the resulting sum_diff will be - // within the acceptable range given by sum_diff_thresh. - - // The delta is set by the excess of absolute pixel diff over the - // threshold. - const int delta = ((abs(sum_diff) - sum_diff_thresh) >> - num_pels_log2_lookup[bs]) + 1; - // Only apply the adjustment for max delta up to 3. - if (delta < 4) { - const __m128i k_delta = _mm_set1_epi8(delta); - running_avg_y -= avg_y_stride * (4 << b_height_log2_lookup[bs]); - for (r = 0; r < ((4 << b_height_log2_lookup[bs]) >> shift); ++r) { - acc_diff = vp10_denoiser_adj_16x1_sse2( - sig_buffer[r], mc_running_buffer[r], running_buffer[r], - k_0, k_delta, acc_diff); - memcpy(running_avg_y, running_buffer[r], width); - memcpy(running_avg_y + avg_y_stride, - running_buffer[r] + width, width); - if (width == 4) { - memcpy(running_avg_y + avg_y_stride * 2, - running_buffer[r] + width * 2, width); - memcpy(running_avg_y + avg_y_stride * 3, - running_buffer[r] + width * 3, width); - } - // Update pointers for next iteration. - running_avg_y += (avg_y_stride << shift); - } - sum_diff = sum_diff_16x1(acc_diff); - if (abs(sum_diff) > sum_diff_thresh) { - return COPY_BLOCK; - } - } else { - return COPY_BLOCK; - } - } - } - return FILTER_BLOCK; -} - -// Denoiser for 16xM, 32xM and 64xM blocks -static int vp10_denoiser_NxM_sse2_big(const uint8_t *sig, int sig_stride, - const uint8_t *mc_running_avg_y, - int mc_avg_y_stride, - uint8_t *running_avg_y, - int avg_y_stride, - int increase_denoising, BLOCK_SIZE bs, - int motion_magnitude) { - int sum_diff_thresh, r, c, sum_diff = 0; - const int shift_inc = (increase_denoising && - motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? - 1 : 0; - __m128i acc_diff[4][4]; - const __m128i k_0 = _mm_setzero_si128(); - const __m128i k_4 = _mm_set1_epi8(4 + shift_inc); - const __m128i k_8 = _mm_set1_epi8(8); - const __m128i k_16 = _mm_set1_epi8(16); - // Modify each level's adjustment according to motion_magnitude. - const __m128i l3 = _mm_set1_epi8( - (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 7 + shift_inc : 6); - // Difference between level 3 and level 2 is 2. - const __m128i l32 = _mm_set1_epi8(2); - // Difference between level 2 and level 1 is 1. - const __m128i l21 = _mm_set1_epi8(1); - - for (c = 0; c < 4; ++c) { - for (r = 0; r < 4; ++r) { - acc_diff[c][r] = _mm_setzero_si128(); - } - } - - for (r = 0; r < (4 << b_height_log2_lookup[bs]); ++r) { - for (c = 0; c < (4 << b_width_log2_lookup[bs]); c += 16) { - acc_diff[c>>4][r>>4] = vp10_denoiser_16x1_sse2( - sig, mc_running_avg_y, running_avg_y, &k_0, &k_4, - &k_8, &k_16, &l3, &l32, &l21, acc_diff[c>>4][r>>4]); - // Update pointers for next iteration. - sig += 16; - mc_running_avg_y += 16; - running_avg_y += 16; - } - - if ((r + 1) % 16 == 0 || (bs == BLOCK_16X8 && r == 7)) { - for (c = 0; c < (4 << b_width_log2_lookup[bs]); c += 16) { - sum_diff += sum_diff_16x1(acc_diff[c>>4][r>>4]); - } - } - - // Update pointers for next iteration. - sig = sig - 16 * ((4 << b_width_log2_lookup[bs]) >> 4) + sig_stride; - mc_running_avg_y = mc_running_avg_y - - 16 * ((4 << b_width_log2_lookup[bs]) >> 4) + - mc_avg_y_stride; - running_avg_y = running_avg_y - - 16 * ((4 << b_width_log2_lookup[bs]) >> 4) + - avg_y_stride; - } - - { - sum_diff_thresh = total_adj_strong_thresh(bs, increase_denoising); - if (abs(sum_diff) > sum_diff_thresh) { - const int delta = ((abs(sum_diff) - sum_diff_thresh) >> - num_pels_log2_lookup[bs]) + 1; - - // Only apply the adjustment for max delta up to 3. - if (delta < 4) { - const __m128i k_delta = _mm_set1_epi8(delta); - sig -= sig_stride * (4 << b_height_log2_lookup[bs]); - mc_running_avg_y -= mc_avg_y_stride * (4 << b_height_log2_lookup[bs]); - running_avg_y -= avg_y_stride * (4 << b_height_log2_lookup[bs]); - sum_diff = 0; - for (r = 0; r < (4 << b_height_log2_lookup[bs]); ++r) { - for (c = 0; c < (4 << b_width_log2_lookup[bs]); c += 16) { - acc_diff[c>>4][r>>4] = vp10_denoiser_adj_16x1_sse2( - sig, mc_running_avg_y, running_avg_y, k_0, - k_delta, acc_diff[c>>4][r>>4]); - // Update pointers for next iteration. - sig += 16; - mc_running_avg_y += 16; - running_avg_y += 16; - } - - if ((r + 1) % 16 == 0 || (bs == BLOCK_16X8 && r == 7)) { - for (c = 0; c < (4 << b_width_log2_lookup[bs]); c += 16) { - sum_diff += sum_diff_16x1(acc_diff[c>>4][r>>4]); - } - } - sig = sig - 16 * ((4 << b_width_log2_lookup[bs]) >> 4) + sig_stride; - mc_running_avg_y = mc_running_avg_y - - 16 * ((4 << b_width_log2_lookup[bs]) >> 4) + - mc_avg_y_stride; - running_avg_y = running_avg_y - - 16 * ((4 << b_width_log2_lookup[bs]) >> 4) + - avg_y_stride; - } - if (abs(sum_diff) > sum_diff_thresh) { - return COPY_BLOCK; - } - } else { - return COPY_BLOCK; - } - } - } - return FILTER_BLOCK; -} - -int vp10_denoiser_filter_sse2(const uint8_t *sig, int sig_stride, - const uint8_t *mc_avg, - int mc_avg_stride, - uint8_t *avg, int avg_stride, - int increase_denoising, - BLOCK_SIZE bs, - int motion_magnitude) { - if (bs == BLOCK_4X4 || bs == BLOCK_4X8) { - return vp10_denoiser_NxM_sse2_small(sig, sig_stride, - mc_avg, mc_avg_stride, - avg, avg_stride, - increase_denoising, - bs, motion_magnitude, 4); - } else if (bs == BLOCK_8X4 || bs == BLOCK_8X8 || bs == BLOCK_8X16) { - return vp10_denoiser_NxM_sse2_small(sig, sig_stride, - mc_avg, mc_avg_stride, - avg, avg_stride, - increase_denoising, - bs, motion_magnitude, 8); - } else if (bs < BLOCK_SIZES) { - return vp10_denoiser_NxM_sse2_big(sig, sig_stride, - mc_avg, mc_avg_stride, - avg, avg_stride, - increase_denoising, - bs, motion_magnitude); - } else { - return COPY_BLOCK; - } -} diff --git a/vp10/vp10_common.mk b/vp10/vp10_common.mk index 80e7ddcb4..0a9dfaeef 100644 --- a/vp10/vp10_common.mk +++ b/vp10/vp10_common.mk @@ -11,7 +11,6 @@ VP10_COMMON_SRCS-yes += vp10_common.mk VP10_COMMON_SRCS-yes += vp10_iface_common.h VP10_COMMON_SRCS-yes += common/ans.h -VP10_COMMON_SRCS-yes += common/ppflags.h VP10_COMMON_SRCS-yes += common/alloccommon.c VP10_COMMON_SRCS-yes += common/blockd.c VP10_COMMON_SRCS-yes += common/debugmodes.c @@ -86,17 +85,8 @@ VP10_COMMON_SRCS-yes += common/vp10_convolve.h VP10_COMMON_SRCS-$(CONFIG_ANS) += common/ans.h VP10_COMMON_SRCS-$(CONFIG_ANS) += common/divide.h VP10_COMMON_SRCS-$(CONFIG_ANS) += common/divide.c - -VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/postproc.h -VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/postproc.c VP10_COMMON_SRCS-$(CONFIG_LOOP_RESTORATION) += common/restoration.h VP10_COMMON_SRCS-$(CONFIG_LOOP_RESTORATION) += common/restoration.c -VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/mfqe.h -VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/mfqe.c -ifeq ($(CONFIG_VP9_POSTPROC),yes) -VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/mfqe_sse2.asm -VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/postproc_sse2.asm -endif ifeq (yes,$(filter yes,$(CONFIG_GLOBAL_MOTION) $(CONFIG_WARPED_MOTION))) VP10_COMMON_SRCS-yes += common/warped_motion.h VP10_COMMON_SRCS-yes += common/warped_motion.c @@ -113,10 +103,6 @@ VP10_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/idct4x4_msa.c VP10_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/idct8x8_msa.c VP10_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/idct16x16_msa.c -ifeq ($(CONFIG_VP9_POSTPROC),yes) -VP10_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/mfqe_msa.c -endif - VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idct_intrin_sse2.c VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp10_fwd_txfm_sse2.c VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp10_fwd_dct32x32_impl_sse2.h diff --git a/vp10/vp10_cx_iface.c b/vp10/vp10_cx_iface.c index a707d11f7..fa175c055 100644 --- a/vp10/vp10_cx_iface.c +++ b/vp10/vp10_cx_iface.c @@ -1186,34 +1186,16 @@ static vpx_codec_err_t ctrl_get_new_frame_image(vpx_codec_alg_priv_t *ctx, static vpx_codec_err_t ctrl_set_previewpp(vpx_codec_alg_priv_t *ctx, va_list args) { -#if CONFIG_VP9_POSTPROC - vp8_postproc_cfg_t *config = va_arg(args, vp8_postproc_cfg_t *); - if (config != NULL) { - ctx->preview_ppcfg = *config; - return VPX_CODEC_OK; - } else { - return VPX_CODEC_INVALID_PARAM; - } -#else (void)ctx; (void)args; return VPX_CODEC_INCAPABLE; -#endif } static vpx_image_t *encoder_get_preview(vpx_codec_alg_priv_t *ctx) { YV12_BUFFER_CONFIG sd; - vp10_ppflags_t flags; - vp10_zero(flags); - - if (ctx->preview_ppcfg.post_proc_flag) { - flags.post_proc_flag = ctx->preview_ppcfg.post_proc_flag; - flags.deblocking_level = ctx->preview_ppcfg.deblocking_level; - flags.noise_level = ctx->preview_ppcfg.noise_level; - } - if (vp10_get_preview_raw_frame(ctx->cpi, &sd, &flags) == 0) { + if (vp10_get_preview_raw_frame(ctx->cpi, &sd) == 0) { yuvconfig2image(&ctx->preview_img, &sd, NULL); return &ctx->preview_img; } else { diff --git a/vp10/vp10_dx_iface.c b/vp10/vp10_dx_iface.c index f416eb978..352b10204 100644 --- a/vp10/vp10_dx_iface.c +++ b/vp10/vp10_dx_iface.c @@ -30,8 +30,6 @@ #include "vp10/vp10_iface_common.h" -#define VP9_CAP_POSTPROC (CONFIG_VP9_POSTPROC ? VPX_CODEC_CAP_POSTPROC : 0) - typedef vpx_codec_stream_info_t vp10_stream_info_t; // This limit is due to framebuffer numbers. @@ -122,9 +120,6 @@ static vpx_codec_err_t decoder_destroy(vpx_codec_alg_priv_t *ctx) { (FrameWorkerData *)worker->data1; vpx_get_worker_interface()->end(worker); vp10_remove_common(&frame_worker_data->pbi->common); -#if CONFIG_VP9_POSTPROC - vp10_free_postproc_buffers(&frame_worker_data->pbi->common); -#endif #if CONFIG_LOOP_RESTORATION vp10_free_restoration_buffers(&frame_worker_data->pbi->common); #endif // CONFIG_LOOP_RESTORATION @@ -319,15 +314,6 @@ static void set_default_ppflags(vp8_postproc_cfg_t *cfg) { cfg->noise_level = 0; } -static void set_ppflags(const vpx_codec_alg_priv_t *ctx, - vp10_ppflags_t *flags) { - flags->post_proc_flag = - ctx->postproc_cfg.post_proc_flag; - - flags->deblocking_level = ctx->postproc_cfg.deblocking_level; - flags->noise_level = ctx->postproc_cfg.noise_level; -} - static int frame_worker_hook(void *arg1, void *arg2) { FrameWorkerData *const frame_worker_data = (FrameWorkerData *)arg1; const uint8_t *data = frame_worker_data->data; @@ -565,7 +551,6 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx, static void wait_worker_and_cache_frame(vpx_codec_alg_priv_t *ctx) { YV12_BUFFER_CONFIG sd; - vp10_ppflags_t flags = {0, 0, 0}; const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); VPxWorker *const worker = &ctx->frame_workers[ctx->next_output_worker_id]; FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; @@ -578,7 +563,7 @@ static void wait_worker_and_cache_frame(vpx_codec_alg_priv_t *ctx) { check_resync(ctx, frame_worker_data->pbi); - if (vp10_get_raw_frame(frame_worker_data->pbi, &sd, &flags) == 0) { + if (vp10_get_raw_frame(frame_worker_data->pbi, &sd) == 0) { VP10_COMMON *const cm = &frame_worker_data->pbi->common; RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; ctx->frame_cache[ctx->frame_cache_write].fb_idx = cm->new_fb_idx; @@ -757,7 +742,6 @@ static vpx_image_t *decoder_get_frame(vpx_codec_alg_priv_t *ctx, if (*iter == NULL && ctx->frame_workers != NULL) { do { YV12_BUFFER_CONFIG sd; - vp10_ppflags_t flags = {0, 0, 0}; const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); VPxWorker *const worker = &ctx->frame_workers[ctx->next_output_worker_id]; @@ -765,8 +749,6 @@ static vpx_image_t *decoder_get_frame(vpx_codec_alg_priv_t *ctx, (FrameWorkerData *)worker->data1; ctx->next_output_worker_id = (ctx->next_output_worker_id + 1) % ctx->num_frame_workers; - if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC) - set_ppflags(ctx, &flags); // Wait for the frame from worker thread. if (winterface->sync(worker)) { // Check if worker has received any frames. @@ -775,7 +757,7 @@ static vpx_image_t *decoder_get_frame(vpx_codec_alg_priv_t *ctx, frame_worker_data->received_frame = 0; check_resync(ctx, frame_worker_data->pbi); } - if (vp10_get_raw_frame(frame_worker_data->pbi, &sd, &flags) == 0) { + if (vp10_get_raw_frame(frame_worker_data->pbi, &sd) == 0) { VP10_COMMON *const cm = &frame_worker_data->pbi->common; RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; release_last_output_frame(ctx); @@ -949,21 +931,9 @@ static vpx_codec_err_t ctrl_get_new_frame_image(vpx_codec_alg_priv_t *ctx, static vpx_codec_err_t ctrl_set_postproc(vpx_codec_alg_priv_t *ctx, va_list args) { -#if CONFIG_VP9_POSTPROC - vp8_postproc_cfg_t *data = va_arg(args, vp8_postproc_cfg_t *); - - if (data) { - ctx->postproc_cfg_set = 1; - ctx->postproc_cfg = *((vp8_postproc_cfg_t *)data); - return VPX_CODEC_OK; - } else { - return VPX_CODEC_INVALID_PARAM; - } -#else (void)ctx; (void)args; return VPX_CODEC_INCAPABLE; -#endif } static vpx_codec_err_t ctrl_set_dbg_options(vpx_codec_alg_priv_t *ctx, @@ -1193,7 +1163,7 @@ static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = { CODEC_INTERFACE(vpx_codec_vp10_dx) = { "WebM Project VP10 Decoder" VERSION_STRING, VPX_CODEC_INTERNAL_ABI_VERSION, - VPX_CODEC_CAP_DECODER | VP9_CAP_POSTPROC | + VPX_CODEC_CAP_DECODER | VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER, // vpx_codec_caps_t decoder_init, // vpx_codec_init_fn_t decoder_destroy, // vpx_codec_destroy_fn_t diff --git a/vp10/vp10cx.mk b/vp10/vp10cx.mk index a0cc7e799..735ec5b6c 100644 --- a/vp10/vp10cx.mk +++ b/vp10/vp10cx.mk @@ -28,8 +28,6 @@ VP10_CX_SRCS-yes += encoder/cost.c VP10_CX_SRCS-yes += encoder/dct.c VP10_CX_SRCS-yes += encoder/hybrid_fwd_txfm.c VP10_CX_SRCS-yes += encoder/hybrid_fwd_txfm.h -VP10_CX_SRCS-$(CONFIG_VP9_TEMPORAL_DENOISING) += encoder/denoiser.c -VP10_CX_SRCS-$(CONFIG_VP9_TEMPORAL_DENOISING) += encoder/denoiser.h VP10_CX_SRCS-yes += encoder/encodeframe.c VP10_CX_SRCS-yes += encoder/encodeframe.h VP10_CX_SRCS-yes += encoder/encodemb.c @@ -88,10 +86,6 @@ VP10_CX_SRCS-yes += encoder/aq_complexity.c VP10_CX_SRCS-yes += encoder/aq_complexity.h VP10_CX_SRCS-yes += encoder/skin_detection.c VP10_CX_SRCS-yes += encoder/skin_detection.h -ifeq ($(CONFIG_VP9_POSTPROC),yes) -VP10_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/postproc.h -VP10_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/postproc.c -endif VP10_CX_SRCS-yes += encoder/temporal_filter.c VP10_CX_SRCS-yes += encoder/temporal_filter.h VP10_CX_SRCS-yes += encoder/mbgraph.c @@ -118,9 +112,6 @@ VP10_CX_SRCS-$(HAVE_SSE4_1) += common/x86/highbd_inv_txfm_sse4.c VP10_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/vp10_highbd_quantize_sse4.c endif -ifeq ($(CONFIG_VP9_TEMPORAL_DENOISING),yes) -VP10_CX_SRCS-$(HAVE_SSE2) += encoder/x86/denoiser_sse2.c -endif ifeq ($(CONFIG_EXT_INTER),yes) VP10_CX_SRCS-yes += encoder/wedge_utils.c VP10_CX_SRCS-$(HAVE_SSE2) += encoder/x86/wedge_utils_sse2.c -- GitLab