Commit 8bf837f1 authored by Yaowu Xu's avatar Yaowu Xu

Cherry pick from AOM:

68e7e4d0 Remove VP9_CAP_POSTPROC
0738390c Remove vp9_temporal denoise
b89861a4 Remove vp9-postproc

Change-Id: I4ecaa0ac83a519c8174a494378fc23df610ff2a8
parent 134710af
......@@ -704,68 +704,6 @@ TEST_P(DatarateTestVP9Large, BasicRateTargeting3TemporalLayersFrameDropping) {
}
}
#if CONFIG_VP9_TEMPORAL_DENOISING
// Check basic datarate targeting, for a single bitrate, when denoiser is on.
TEST_P(DatarateTestVP9Large, DenoiserLevels) {
cfg_.rc_buf_initial_sz = 500;
cfg_.rc_buf_optimal_sz = 500;
cfg_.rc_buf_sz = 1000;
cfg_.rc_dropframe_thresh = 1;
cfg_.rc_min_quantizer = 2;
cfg_.rc_max_quantizer = 56;
cfg_.rc_end_usage = VPX_CBR;
cfg_.g_lag_in_frames = 0;
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
30, 1, 0, 140);
// For the temporal denoiser (#if CONFIG_VP9_TEMPORAL_DENOISING),
// there is only one denoiser mode: denoiserYonly(which is 1),
// but may add more modes in the future.
cfg_.rc_target_bitrate = 300;
ResetModel();
// Turn on the denoiser.
denoiser_on_ = 1;
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85)
<< " The datarate for the file is lower than target by too much!";
ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15)
<< " The datarate for the file is greater than target by too much!";
}
// Check basic datarate targeting, for a single bitrate, when denoiser is off
// and on.
TEST_P(DatarateTestVP9Large, DenoiserOffOn) {
cfg_.rc_buf_initial_sz = 500;
cfg_.rc_buf_optimal_sz = 500;
cfg_.rc_buf_sz = 1000;
cfg_.rc_dropframe_thresh = 1;
cfg_.rc_min_quantizer = 2;
cfg_.rc_max_quantizer = 56;
cfg_.rc_end_usage = VPX_CBR;
cfg_.g_lag_in_frames = 0;
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
30, 1, 0, 299);
// For the temporal denoiser (#if CONFIG_VP9_TEMPORAL_DENOISING),
// there is only one denoiser mode: denoiserYonly(which is 1),
// but may add more modes in the future.
cfg_.rc_target_bitrate = 300;
ResetModel();
// The denoiser is off by default.
denoiser_on_ = 0;
// Set the offon test flag.
denoiser_offon_test_ = 1;
denoiser_offon_period_ = 100;
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85)
<< " The datarate for the file is lower than target by too much!";
ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15)
<< " The datarate for the file is greater than target by too much!";
}
#endif // CONFIG_VP9_TEMPORAL_DENOISING
class DatarateOnePassCbrSvc : public ::libvpx_test::EncoderTest,
public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
public:
......
......@@ -87,15 +87,6 @@ void vp10_free_restoration_buffers(VP10_COMMON *cm) {
}
#endif // CONFIG_LOOP_RESTORATION
void vp10_free_postproc_buffers(VP10_COMMON *cm) {
#if CONFIG_VP9_POSTPROC
vpx_free_frame_buffer(&cm->post_proc_buffer);
vpx_free_frame_buffer(&cm->post_proc_buffer_int);
#else
(void)cm;
#endif
}
void vp10_free_context_buffers(VP10_COMMON *cm) {
int i;
cm->free_mi(cm);
......
This diff is collapsed.
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP10_COMMON_MFQE_H_
#define VP10_COMMON_MFQE_H_
#ifdef __cplusplus
extern "C" {
#endif
// Multiframe Quality Enhancement.
// The aim for MFQE is to replace pixel blocks in the current frame with
// the correlated pixel blocks (with higher quality) in the last frame.
// The replacement can only be taken in stationary blocks by checking
// the motion of the blocks and other conditions such as the SAD of
// the current block and correlated block, the variance of the block
// difference, etc.
void vp10_mfqe(struct VP10Common *cm);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP10_COMMON_MFQE_H_
/*
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vp10_rtcd.h"
#include "vp10/common/onyxc_int.h"
#include "vpx_dsp/mips/macros_msa.h"
static void filter_by_weight8x8_msa(const uint8_t *src_ptr, int32_t src_stride,
uint8_t *dst_ptr, int32_t dst_stride,
int32_t src_weight) {
int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight;
int32_t row;
uint64_t src0_d, src1_d, dst0_d, dst1_d;
v16i8 src0 = { 0 };
v16i8 src1 = { 0 };
v16i8 dst0 = { 0 };
v16i8 dst1 = { 0 };
v8i16 src_wt, dst_wt, res_h_r, res_h_l, src_r, src_l, dst_r, dst_l;
src_wt = __msa_fill_h(src_weight);
dst_wt = __msa_fill_h(dst_weight);
for (row = 2; row--;) {
LD2(src_ptr, src_stride, src0_d, src1_d);
src_ptr += (2 * src_stride);
LD2(dst_ptr, dst_stride, dst0_d, dst1_d);
INSERT_D2_SB(src0_d, src1_d, src0);
INSERT_D2_SB(dst0_d, dst1_d, dst0);
LD2(src_ptr, src_stride, src0_d, src1_d);
src_ptr += (2 * src_stride);
LD2((dst_ptr + 2 * dst_stride), dst_stride, dst0_d, dst1_d);
INSERT_D2_SB(src0_d, src1_d, src1);
INSERT_D2_SB(dst0_d, dst1_d, dst1);
UNPCK_UB_SH(src0, src_r, src_l);
UNPCK_UB_SH(dst0, dst_r, dst_l);
res_h_r = (src_r * src_wt);
res_h_r += (dst_r * dst_wt);
res_h_l = (src_l * src_wt);
res_h_l += (dst_l * dst_wt);
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
dst0 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r);
ST8x2_UB(dst0, dst_ptr, dst_stride);
dst_ptr += (2 * dst_stride);
UNPCK_UB_SH(src1, src_r, src_l);
UNPCK_UB_SH(dst1, dst_r, dst_l);
res_h_r = (src_r * src_wt);
res_h_r += (dst_r * dst_wt);
res_h_l = (src_l * src_wt);
res_h_l += (dst_l * dst_wt);
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
dst1 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r);
ST8x2_UB(dst1, dst_ptr, dst_stride);
dst_ptr += (2 * dst_stride);
}
}
static void filter_by_weight16x16_msa(const uint8_t *src_ptr,
int32_t src_stride,
uint8_t *dst_ptr,
int32_t dst_stride,
int32_t src_weight) {
int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight;
int32_t row;
v16i8 src0, src1, src2, src3, dst0, dst1, dst2, dst3;
v8i16 src_wt, dst_wt, res_h_r, res_h_l, src_r, src_l, dst_r, dst_l;
src_wt = __msa_fill_h(src_weight);
dst_wt = __msa_fill_h(dst_weight);
for (row = 4; row--;) {
LD_SB4(src_ptr, src_stride, src0, src1, src2, src3);
src_ptr += (4 * src_stride);
LD_SB4(dst_ptr, dst_stride, dst0, dst1, dst2, dst3);
UNPCK_UB_SH(src0, src_r, src_l);
UNPCK_UB_SH(dst0, dst_r, dst_l);
res_h_r = (src_r * src_wt);
res_h_r += (dst_r * dst_wt);
res_h_l = (src_l * src_wt);
res_h_l += (dst_l * dst_wt);
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
dst_ptr += dst_stride;
UNPCK_UB_SH(src1, src_r, src_l);
UNPCK_UB_SH(dst1, dst_r, dst_l);
res_h_r = (src_r * src_wt);
res_h_r += (dst_r * dst_wt);
res_h_l = (src_l * src_wt);
res_h_l += (dst_l * dst_wt);
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
dst_ptr += dst_stride;
UNPCK_UB_SH(src2, src_r, src_l);
UNPCK_UB_SH(dst2, dst_r, dst_l);
res_h_r = (src_r * src_wt);
res_h_r += (dst_r * dst_wt);
res_h_l = (src_l * src_wt);
res_h_l += (dst_l * dst_wt);
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
dst_ptr += dst_stride;
UNPCK_UB_SH(src3, src_r, src_l);
UNPCK_UB_SH(dst3, dst_r, dst_l);
res_h_r = (src_r * src_wt);
res_h_r += (dst_r * dst_wt);
res_h_l = (src_l * src_wt);
res_h_l += (dst_l * dst_wt);
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
dst_ptr += dst_stride;
}
}
void vp10_filter_by_weight8x8_msa(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
int src_weight) {
filter_by_weight8x8_msa(src, src_stride, dst, dst_stride, src_weight);
}
void vp10_filter_by_weight16x16_msa(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
int src_weight) {
filter_by_weight16x16_msa(src, src_stride, dst, dst_stride, src_weight);
}
......@@ -26,10 +26,6 @@
#include "vp10/common/tile_common.h"
#include "vp10/common/restoration.h"
#if CONFIG_VP9_POSTPROC
#include "vp10/common/postproc.h"
#endif
#ifdef __cplusplus
extern "C" {
#endif
......@@ -171,10 +167,6 @@ typedef struct VP10Common {
int new_fb_idx;
#if CONFIG_VP9_POSTPROC
YV12_BUFFER_CONFIG post_proc_buffer;
YV12_BUFFER_CONFIG post_proc_buffer_int;
#endif
#if CONFIG_LOOP_RESTORATION
YV12_BUFFER_CONFIG tmp_loop_buf;
#endif // CONFIG_LOOP_RESTORATION
......@@ -317,10 +309,6 @@ typedef struct VP10Common {
vpx_bit_depth_t bit_depth;
vpx_bit_depth_t dequant_bit_depth; // bit_depth of current dequantizer
#if CONFIG_VP9_POSTPROC
struct postproc_state postproc_state;
#endif
int error_resilient_mode;
#if !CONFIG_EXT_TILE
......
This diff is collapsed.
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP10_COMMON_POSTPROC_H_
#define VP10_COMMON_POSTPROC_H_
#include "vpx_ports/mem.h"
#include "vpx_scale/yv12config.h"
#include "vp10/common/blockd.h"
#include "vp10/common/mfqe.h"
#include "vp10/common/ppflags.h"
#ifdef __cplusplus
extern "C" {
#endif
struct postproc_state {
int last_q;
int last_noise;
char noise[3072];
int last_base_qindex;
int last_frame_valid;
MODE_INFO *prev_mip;
MODE_INFO *prev_mi;
DECLARE_ALIGNED(16, char, blackclamp[16]);
DECLARE_ALIGNED(16, char, whiteclamp[16]);
DECLARE_ALIGNED(16, char, bothclamp[16]);
};
struct VP10Common;
#define MFQE_PRECISION 4
int vp10_post_proc_frame(struct VP10Common *cm,
YV12_BUFFER_CONFIG *dest, vp10_ppflags_t *flags);
void vp10_denoise(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q);
void vp10_deblock(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP10_COMMON_POSTPROC_H_
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP10_COMMON_PPFLAGS_H_
#define VP10_COMMON_PPFLAGS_H_
#ifdef __cplusplus
extern "C" {
#endif
enum {
VP9D_NOFILTERING = 0,
VP9D_DEBLOCK = 1 << 0,
VP9D_DEMACROBLOCK = 1 << 1,
VP9D_ADDNOISE = 1 << 2,
VP9D_DEBUG_TXT_FRAME_INFO = 1 << 3,
VP9D_DEBUG_TXT_MBLK_MODES = 1 << 4,
VP9D_DEBUG_TXT_DC_DIFF = 1 << 5,
VP9D_DEBUG_TXT_RATE_INFO = 1 << 6,
VP9D_DEBUG_DRAW_MV = 1 << 7,
VP9D_DEBUG_CLR_BLK_MODES = 1 << 8,
VP9D_DEBUG_CLR_FRM_REF_BLKS = 1 << 9,
VP9D_MFQE = 1 << 10
};
typedef struct {
int post_proc_flag;
int deblocking_level;
int noise_level;
} vp10_ppflags_t;
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP10_COMMON_PPFLAGS_H_
......@@ -34,29 +34,6 @@ if ($opts{arch} eq "x86_64") {
$avx2_x86_64 = 'avx2';
}
#
# post proc
#
if (vpx_config("CONFIG_VP9_POSTPROC") eq "yes") {
add_proto qw/void vp10_mbpost_proc_down/, "uint8_t *dst, int pitch, int rows, int cols, int flimit";
specialize qw/vp10_mbpost_proc_down sse2/;
$vp10_mbpost_proc_down_sse2=vp10_mbpost_proc_down_xmm;
add_proto qw/void vp10_mbpost_proc_across_ip/, "uint8_t *src, int pitch, int rows, int cols, int flimit";
specialize qw/vp10_mbpost_proc_across_ip sse2/;
$vp10_mbpost_proc_across_ip_sse2=vp10_mbpost_proc_across_ip_xmm;
add_proto qw/void vp10_post_proc_down_and_across/, "const uint8_t *src_ptr, uint8_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit";
specialize qw/vp10_post_proc_down_and_across sse2/;
$vp10_post_proc_down_and_across_sse2=vp10_post_proc_down_and_across_xmm;
add_proto qw/void vp10_filter_by_weight16x16/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight";
specialize qw/vp10_filter_by_weight16x16 sse2 msa/;
add_proto qw/void vp10_filter_by_weight8x8/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight";
specialize qw/vp10_filter_by_weight8x8 sse2 msa/;
}
#
# 10/12-tap convolution filters
#
......@@ -350,20 +327,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp10_highbd_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
specialize qw/vp10_highbd_convolve8_avg_vert/, "$sse2_x86_64";
#
# post proc
#
if (vpx_config("CONFIG_VP9_POSTPROC") eq "yes") {
add_proto qw/void vp10_highbd_mbpost_proc_down/, "uint16_t *dst, int pitch, int rows, int cols, int flimit";
specialize qw/vp10_highbd_mbpost_proc_down/;
add_proto qw/void vp10_highbd_mbpost_proc_across_ip/, "uint16_t *src, int pitch, int rows, int cols, int flimit";
specialize qw/vp10_highbd_mbpost_proc_across_ip/;
add_proto qw/void vp10_highbd_post_proc_down_and_across/, "const uint16_t *src_ptr, uint16_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit";
specialize qw/vp10_highbd_post_proc_down_and_across/;
}
#
# dct
#
......@@ -392,14 +355,6 @@ if (vpx_config("CONFIG_VP10_ENCODER") eq "yes") {
# ENCODEMB INVOKE
#
# Denoiser
#
if (vpx_config("CONFIG_VP9_TEMPORAL_DENOISING") eq "yes") {
add_proto qw/int vp10_denoiser_filter/, "const uint8_t *sig, int sig_stride, const uint8_t *mc_avg, int mc_avg_stride, uint8_t *avg, int avg_stride, int increase_denoising, BLOCK_SIZE bs, int motion_magnitude";
specialize qw/vp10_denoiser_filter sse2/;
}
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
# the transform coefficients are held in 32-bit
# values, so the assembler code for vp10_block_error can no longer be used.
......
;
; Copyright (c) 2015 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
; This file is a duplicate of mfqe_sse2.asm in VP8.
; TODO(jackychen): Find a way to fix the duplicate.
%include "vpx_ports/x86_abi_support.asm"
;void vp10_filter_by_weight16x16_sse2
;(
; unsigned char *src,
; int src_stride,
; unsigned char *dst,
; int dst_stride,
; int src_weight
;)
global sym(vp10_filter_by_weight16x16_sse2) PRIVATE
sym(vp10_filter_by_weight16x16_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
SAVE_XMM 6
GET_GOT rbx
push rsi
push rdi
; end prolog
movd xmm0, arg(4) ; src_weight
pshuflw xmm0, xmm0, 0x0 ; replicate to all low words
punpcklqdq xmm0, xmm0 ; replicate to all hi words
movdqa xmm1, [GLOBAL(tMFQE)]
psubw xmm1, xmm0 ; dst_weight
mov rax, arg(0) ; src
mov rsi, arg(1) ; src_stride
mov rdx, arg(2) ; dst
mov rdi, arg(3) ; dst_stride
mov rcx, 16 ; loop count
pxor xmm6, xmm6
.combine
movdqa xmm2, [rax]
movdqa xmm4, [rdx]
add rax, rsi
; src * src_weight
movdqa xmm3, xmm2
punpcklbw xmm2, xmm6
punpckhbw xmm3, xmm6
pmullw xmm2, xmm0
pmullw xmm3, xmm0
; dst * dst_weight
movdqa xmm5, xmm4
punpcklbw xmm4, xmm6
punpckhbw xmm5, xmm6
pmullw xmm4, xmm1
pmullw xmm5, xmm1
; sum, round and shift
paddw xmm2, xmm4
paddw xmm3, xmm5
paddw xmm2, [GLOBAL(tMFQE_round)]
paddw xmm3, [GLOBAL(tMFQE_round)]
psrlw xmm2, 4
psrlw xmm3, 4
packuswb xmm2, xmm3
movdqa [rdx], xmm2
add rdx, rdi
dec rcx
jnz .combine
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
;void vp10_filter_by_weight8x8_sse2
;(
; unsigned char *src,
; int src_stride,
; unsigned char *dst,
; int dst_stride,
; int src_weight
;)
global sym(vp10_filter_by_weight8x8_sse2) PRIVATE
sym(vp10_filter_by_weight8x8_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
GET_GOT rbx
push rsi
push rdi
; end prolog
movd xmm0, arg(4) ; src_weight
pshuflw xmm0, xmm0, 0x0 ; replicate to all low words
punpcklqdq xmm0, xmm0 ; replicate to all hi words
movdqa xmm1, [GLOBAL(tMFQE)]
psubw xmm1, xmm0 ; dst_weight
mov rax, arg(0) ; src
mov rsi, arg(1) ; src_stride
mov rdx, arg(2) ; dst
mov rdi, arg(3) ; dst_stride
mov rcx, 8 ; loop count
pxor xmm4, xmm4
.combine
movq xmm2, [rax]
movq xmm3, [rdx]
add rax, rsi
; src * src_weight
punpcklbw xmm2, xmm4
pmullw xmm2, xmm0
; dst * dst_weight
punpcklbw xmm3, xmm4
pmullw xmm3, xmm1
; sum, round and shift
paddw xmm2, xmm3
paddw xmm2, [GLOBAL(tMFQE_round)]
psrlw xmm2, 4
packuswb xmm2, xmm4
movq [rdx], xmm2
add rdx, rdi
dec rcx
jnz .combine
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
;void vp10_variance_and_sad_16x16_sse2 | arg
;(
; unsigned char *src1, 0
; int stride1, 1
; unsigned char *src2, 2
; int stride2, 3
; unsigned int *variance, 4
; unsigned int *sad, 5
;)
global sym(vp10_variance_and_sad_16x16_sse2) PRIVATE
sym(vp10_variance_and_sad_16x16_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
GET_GOT rbx
push rsi
push rdi
; end prolog
mov rax, arg(0) ; src1
mov rcx, arg(1) ; stride1
mov rdx, arg(2) ; src2
mov rdi, arg(3) ; stride2
mov rsi, 16 ; block height