Commit a9f47968 authored by JackyChen's avatar JackyChen Committed by Gerrit Code Review
Browse files

Merge "Add SSE2 code and unit test for VP9 denoiser."

parents a7555158 80465dae
......@@ -135,13 +135,16 @@ ifeq ($(CONFIG_VP9_ENCODER),yes)
LIBVPX_TEST_SRCS-$(CONFIG_SPATIAL_SVC) += svc_test.cc
endif
ifeq ($(CONFIG_VP9_TEMPORAL_DENOISING),yes)
LIBVPX_TEST_SRCS-$(CONFIG_VP9_TEMPORAL_DENOISING) += vp9_denoiser_sse2_test.cc
endif
endif # VP9
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += sad_test.cc
endif # CONFIG_SHARED
##
## TEST DATA
##
......
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "test/acm_random.h"
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
#include "test/util.h"
#include "vpx_scale/yv12config.h"
#include "vpx/vpx_integer.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/encoder/vp9_context_tree.h"
#include "vp9/encoder/vp9_denoiser.h"
using libvpx_test::ACMRandom;
namespace {
const int kNumPixels = 64 * 64;
class VP9DenoiserTest
: public ::testing::TestWithParam<int> {
public:
virtual ~VP9DenoiserTest() {}
virtual void SetUp() {
bs = (BLOCK_SIZE)GetParam();
}
virtual void TearDown() { libvpx_test::ClearSystemState(); }
protected:
BLOCK_SIZE bs;
};
TEST_P(VP9DenoiserTest, BitexactCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 4000;
// Allocate the space for input and output,
// where sig_block is the block to be denoised,
// mc_avg_block is the denoised reference block,
// avg_block_c is the denoised result from C code,
// avg_block_sse2 is the denoised result from SSE2 code.
DECLARE_ALIGNED_ARRAY(16, uint8_t, sig_block, kNumPixels);
DECLARE_ALIGNED_ARRAY(16, uint8_t, mc_avg_block, kNumPixels);
DECLARE_ALIGNED_ARRAY(16, uint8_t, avg_block_c, kNumPixels);
DECLARE_ALIGNED_ARRAY(16, uint8_t, avg_block_sse2, kNumPixels);
for (int i = 0; i < count_test_block; ++i) {
// Generate random motion magnitude, 20% of which exceed the threshold.
uint8_t motion_magnitude_random
= rnd.Rand8() % (uint8_t)(MOTION_MAGNITUDE_THRESHOLD * 1.2);
// Initialize a test block with random number in range [0, 255].
for (int j = 0; j < kNumPixels; ++j) {
int temp = 0;
sig_block[j] = rnd.Rand8();
// The pixels in mc_avg_block are generated by adding a random
// number in range [-19, 19] to corresponding pixels in sig_block.
temp = sig_block[j] + (rnd.Rand8() % 2 == 0? -1 : 1) *
(rnd.Rand8()%20);
// Clip.
mc_avg_block[j] = (temp < 0? 0 : (temp > 255? 255 : temp));
}
ASM_REGISTER_STATE_CHECK(vp9_denoiser_filter_c(sig_block, 64,
mc_avg_block, 64, avg_block_c, 64,
0, bs, motion_magnitude_random));
ASM_REGISTER_STATE_CHECK(vp9_denoiser_filter_sse2(sig_block, 64,
mc_avg_block, 64, avg_block_sse2, 64,
0, bs, motion_magnitude_random));
// Test bitexactness.
for (int h = 0; h < (4 << b_height_log2_lookup[bs]); ++h) {
for (int w = 0; w < (4 << b_width_log2_lookup[bs]); ++w) {
EXPECT_EQ(avg_block_c[h * 64 + w], avg_block_sse2[h * 64 + w]);
}
}
}
}
// Test for all block size.
INSTANTIATE_TEST_CASE_P(
SSE2, VP9DenoiserTest,
::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, BLOCK_8X8,
BLOCK_8X16, BLOCK_16X8, BLOCK_16X16, BLOCK_16X32,
BLOCK_32X16, BLOCK_32X32, BLOCK_32X64, BLOCK_64X32,
BLOCK_64X64));
} // namespace
......@@ -36,7 +36,6 @@ const int size_group_lookup[BLOCK_SIZES] =
const int num_pels_log2_lookup[BLOCK_SIZES] =
{4, 5, 5, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12};
const PARTITION_TYPE partition_lookup[][BLOCK_SIZES] = {
{ // 4X4
// 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64
......
......@@ -1115,6 +1115,14 @@ specialize qw/vp9_get_mb_ss/, "$sse2_x86inc";
add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
specialize qw/vp9_subtract_block neon/, "$sse2_x86inc";
#
# Denoiser
#
if (vpx_config("CONFIG_VP9_TEMPORAL_DENOISING") eq "yes") {
add_proto qw/int vp9_denoiser_filter/, "const uint8_t *sig, int sig_stride, const uint8_t *mc_avg, int mc_avg_stride, uint8_t *avg, int avg_stride, int increase_denoising, BLOCK_SIZE bs, int motion_magnitude";
specialize qw/vp9_denoiser_filter sse2/;
}
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
# the transform coefficients are held in 32-bit
# values, so the assembler code for vp9_block_error can no longer be used.
......
......@@ -31,9 +31,6 @@
static void make_grayscale(YV12_BUFFER_CONFIG *yuv);
#endif
static const int widths[] = {4, 4, 8, 8, 8, 16, 16, 16, 32, 32, 32, 64, 64};
static const int heights[] = {4, 8, 4, 8, 16, 8, 16, 32, 16, 32, 64, 32, 64};
static int absdiff_thresh(BLOCK_SIZE bs, int increase_denoising) {
(void)bs;
return 3 + (increase_denoising ? 1 : 0);
......@@ -52,7 +49,9 @@ static int noise_motion_thresh(BLOCK_SIZE bs, int increase_denoising) {
}
static unsigned int sse_thresh(BLOCK_SIZE bs, int increase_denoising) {
return widths[bs] * heights[bs] * (increase_denoising ? 60 : 40);
return (4 << b_width_log2_lookup[bs]) *
(4 << b_height_log2_lookup[bs]) *
(increase_denoising ? 60 : 40);
}
static int sse_diff_thresh(BLOCK_SIZE bs, int increase_denoising,
......@@ -61,25 +60,31 @@ static int sse_diff_thresh(BLOCK_SIZE bs, int increase_denoising,
noise_motion_thresh(bs, increase_denoising)) {
return 0;
} else {
return widths[bs] * heights[bs] * 20;
return (4 << b_width_log2_lookup[bs]) *
(4 << b_height_log2_lookup[bs]) * 20;
}
}
static int total_adj_strong_thresh(BLOCK_SIZE bs, int increase_denoising) {
return widths[bs] * heights[bs] * (increase_denoising ? 3 : 2);
int total_adj_strong_thresh(BLOCK_SIZE bs, int increase_denoising) {
return (4 << b_width_log2_lookup[bs]) *
(4 << b_height_log2_lookup[bs]) * (increase_denoising ? 3 : 2);
}
static int total_adj_weak_thresh(BLOCK_SIZE bs, int increase_denoising) {
return widths[bs] * heights[bs] * (increase_denoising ? 3 : 2);
return (4 << b_width_log2_lookup[bs]) *
(4 << b_height_log2_lookup[bs]) * (increase_denoising ? 3 : 2);
}
static VP9_DENOISER_DECISION denoiser_filter(const uint8_t *sig, int sig_stride,
const uint8_t *mc_avg,
int mc_avg_stride,
uint8_t *avg, int avg_stride,
int increase_denoising,
BLOCK_SIZE bs,
int motion_magnitude) {
// TODO(jackychen): If increase_denoising is enabled in the future,
// we might need to update the code for calculating 'total_adj' in
// case the C code is not bit-exact with corresponding sse2 code.
int vp9_denoiser_filter_c(const uint8_t *sig, int sig_stride,
const uint8_t *mc_avg,
int mc_avg_stride,
uint8_t *avg, int avg_stride,
int increase_denoising,
BLOCK_SIZE bs,
int motion_magnitude) {
int r, c;
const uint8_t *sig_start = sig;
const uint8_t *mc_avg_start = mc_avg;
......@@ -102,8 +107,8 @@ static VP9_DENOISER_DECISION denoiser_filter(const uint8_t *sig, int sig_stride,
}
// First attempt to apply a strong temporal denoising filter.
for (r = 0; r < heights[bs]; ++r) {
for (c = 0; c < widths[bs]; ++c) {
for (r = 0; r < (4 << b_height_log2_lookup[bs]); ++r) {
for (c = 0; c < (4 << b_width_log2_lookup[bs]); ++c) {
diff = mc_avg[c] - sig[c];
absdiff = abs(diff);
......@@ -152,8 +157,8 @@ static VP9_DENOISER_DECISION denoiser_filter(const uint8_t *sig, int sig_stride,
mc_avg = mc_avg_start;
avg = avg_start;
sig = sig_start;
for (r = 0; r < heights[bs]; ++r) {
for (c = 0; c < widths[bs]; ++c) {
for (r = 0; r < (4 << b_height_log2_lookup[bs]); ++r) {
for (c = 0; c < (4 << b_width_log2_lookup[bs]); ++c) {
diff = mc_avg[c] - sig[c];
adj = abs(diff);
if (adj > delta) {
......@@ -193,8 +198,8 @@ static uint8_t *block_start(uint8_t *framebuf, int stride,
static void copy_block(uint8_t *dest, int dest_stride,
const uint8_t *src, int src_stride, BLOCK_SIZE bs) {
int r;
for (r = 0; r < heights[bs]; ++r) {
vpx_memcpy(dest, src, widths[bs]);
for (r = 0; r < (4 << b_height_log2_lookup[bs]); ++r) {
vpx_memcpy(dest, src, (4 << b_width_log2_lookup[bs]));
dest += dest_stride;
src += src_stride;
}
......@@ -336,10 +341,10 @@ void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb,
&motion_magnitude);
if (decision == FILTER_BLOCK) {
decision = denoiser_filter(src.buf, src.stride,
mc_avg_start, mc_avg.y_stride,
avg_start, avg.y_stride,
0, bs, motion_magnitude);
decision = vp9_denoiser_filter(src.buf, src.stride,
mc_avg_start, mc_avg.y_stride,
avg_start, avg.y_stride,
0, bs, motion_magnitude);
}
if (decision == FILTER_BLOCK) {
......
......@@ -55,6 +55,10 @@ int vp9_denoiser_alloc(VP9_DENOISER *denoiser, int width, int height,
#endif
int border);
#if CONFIG_VP9_TEMPORAL_DENOISING
int total_adj_strong_thresh(BLOCK_SIZE bs, int increase_denoising);
#endif
void vp9_denoiser_free(VP9_DENOISER *denoiser);
#ifdef __cplusplus
......
This diff is collapsed.
......@@ -120,6 +120,10 @@ VP9_CX_SRCS-$(ARCH_X86_64) += encoder/x86/vp9_ssim_opt_x86_64.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct32x32_sse2.c
ifeq ($(CONFIG_VP9_TEMPORAL_DENOISING),yes)
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_denoiser_sse2.c
endif
VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_dct32x32_avx2.c
VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_dct_avx2.c
VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_error_intrin_avx2.c
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment