Commit 7756e989 authored by Ronald S. Bultje's avatar Ronald S. Bultje Committed by Gerrit Code Review
Browse files

Merge "Add subtract_block SSE2 version and unit test."

parents 9a480482 25c588b1
......@@ -66,6 +66,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += sad_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += set_roi.cc
LIBVPX_TEST_SRCS-yes += sixtap_predict_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += subtract_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_subtract_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += variance_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += vp8_decrypt_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += vp8_fdct4x4_test.cc
......
/*
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "test/acm_random.h"
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
extern "C" {
#include "./vpx_config.h"
#include "./vp9_rtcd.h"
#include "vp9/common/vp9_blockd.h"
}
typedef void (*subtract_fn_t)(int rows, int cols,
int16_t *diff_ptr, ptrdiff_t diff_stride,
const uint8_t *src_ptr, ptrdiff_t src_stride,
const uint8_t *pred_ptr, ptrdiff_t pred_stride);
namespace vp9 {
class VP9SubtractBlockTest : public ::testing::TestWithParam<subtract_fn_t> {
public:
virtual void TearDown() {
libvpx_test::ClearSystemState();
}
};
using libvpx_test::ACMRandom;
TEST_P(VP9SubtractBlockTest, SimpleSubtract) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
// FIXME(rbultje) split in its own file
for (BLOCK_SIZE_TYPE bsize = BLOCK_SIZE_AB4X4; bsize < BLOCK_SIZE_TYPES;
bsize = static_cast<BLOCK_SIZE_TYPE>(static_cast<int>(bsize) + 1)) {
const int block_width = 4 << b_width_log2(bsize);
const int block_height = 4 << b_height_log2(bsize);
int16_t *diff = new int16_t[block_width * block_height * 2];
uint8_t *pred = new uint8_t[block_width * block_height * 2];
uint8_t *src = new uint8_t[block_width * block_height * 2];
for (int n = 0; n < 100; n++) {
for (int r = 0; r < block_height; ++r) {
for (int c = 0; c < block_width * 2; ++c) {
src[r * block_width * 2 + c] = rnd.Rand8();
pred[r * block_width * 2 + c] = rnd.Rand8();
}
}
GetParam()(block_height, block_width, diff, block_width,
src, block_width, pred, block_width);
for (int r = 0; r < block_height; ++r) {
for (int c = 0; c < block_width; ++c) {
EXPECT_EQ(diff[r * block_width + c],
(src[r * block_width + c] -
pred[r * block_width + c])) << "r = " << r
<< ", c = " << c
<< ", bs = " << bsize;
}
}
GetParam()(block_height, block_width, diff, block_width * 2,
src, block_width * 2, pred, block_width * 2);
for (int r = 0; r < block_height; ++r) {
for (int c = 0; c < block_width; ++c) {
EXPECT_EQ(diff[r * block_width * 2 + c],
(src[r * block_width * 2 + c] -
pred[r * block_width * 2 + c])) << "r = " << r
<< ", c = " << c
<< ", bs = " << bsize;
}
}
}
delete[] diff;
delete[] pred;
delete[] src;
}
}
INSTANTIATE_TEST_CASE_P(C, VP9SubtractBlockTest,
::testing::Values(vp9_subtract_block_c));
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P(SSE2, VP9SubtractBlockTest,
::testing::Values(vp9_subtract_block_sse2));
#endif
} // namespace vp9
......@@ -533,6 +533,9 @@ prototype int vp9_block_error "int16_t *coeff, int16_t *dqcoeff, int block_size"
specialize vp9_block_error mmx sse2
vp9_block_error_sse2=vp9_block_error_xmm
prototype void vp9_subtract_block "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride"
specialize vp9_subtract_block sse2
#
# Structured Similarity (SSIM)
#
......
......@@ -22,10 +22,10 @@
DECLARE_ALIGNED(16, extern const uint8_t,
vp9_pt_energy_class[MAX_ENTROPY_TOKENS]);
void vp9_subtract_block(int rows, int cols,
int16_t *diff_ptr, int diff_stride,
const uint8_t *src_ptr, int src_stride,
const uint8_t *pred_ptr, int pred_stride) {
void vp9_subtract_block_c(int rows, int cols,
int16_t *diff_ptr, ptrdiff_t diff_stride,
const uint8_t *src_ptr, ptrdiff_t src_stride,
const uint8_t *pred_ptr, ptrdiff_t pred_stride) {
int r, c;
for (r = 0; r < rows; r++) {
......
......@@ -42,10 +42,6 @@ void vp9_encode_sbuv(VP9_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize);
void vp9_xform_quant_sby(VP9_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize);
void vp9_xform_quant_sbuv(VP9_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize);
void vp9_subtract_block(int rows, int cols,
int16_t *diff_ptr, int diff_stride,
const uint8_t *src_ptr, int src_stride,
const uint8_t *pred_ptr, int pred_stride);
void vp9_subtract_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize);
void vp9_subtract_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize);
void vp9_subtract_sb(MACROBLOCK *xd, BLOCK_SIZE_TYPE bsize);
......
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
%include "vpx_ports/x86_abi_support.asm"
;void vp9_subtract_b_mmx_impl(unsigned char *z, int src_stride,
; short *diff, unsigned char *Predictor,
; int pitch);
global sym(vp9_subtract_b_mmx_impl) PRIVATE
sym(vp9_subtract_b_mmx_impl):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
; end prolog
mov rdi, arg(2) ;diff
mov rax, arg(3) ;Predictor
mov rsi, arg(0) ;z
movsxd rdx, dword ptr arg(1);src_stride;
movsxd rcx, dword ptr arg(4);pitch
pxor mm7, mm7
movd mm0, [rsi]
movd mm1, [rax]
punpcklbw mm0, mm7
punpcklbw mm1, mm7
psubw mm0, mm1
movq [rdi], mm0
movd mm0, [rsi+rdx]
movd mm1, [rax+rcx]
punpcklbw mm0, mm7
punpcklbw mm1, mm7
psubw mm0, mm1
movq [rdi+rcx*2],mm0
movd mm0, [rsi+rdx*2]
movd mm1, [rax+rcx*2]
punpcklbw mm0, mm7
punpcklbw mm1, mm7
psubw mm0, mm1
movq [rdi+rcx*4], mm0
lea rsi, [rsi+rdx*2]
lea rcx, [rcx+rcx*2]
movd mm0, [rsi+rdx]
movd mm1, [rax+rcx]
punpcklbw mm0, mm7
punpcklbw mm1, mm7
psubw mm0, mm1
movq [rdi+rcx*2], mm0
; begin epilog
pop rdi
pop rsi
UNSHADOW_ARGS
pop rbp
ret
;void vp9_subtract_mby_mmx(short *diff, unsigned char *src, unsigned char *pred, int stride)
global sym(vp9_subtract_mby_mmx) PRIVATE
sym(vp9_subtract_mby_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
push rsi
push rdi
; end prolog
mov rsi, arg(1) ;src
mov rdi, arg(0) ;diff
mov rax, arg(2) ;pred
movsxd rdx, dword ptr arg(3) ;stride
mov rcx, 16
pxor mm0, mm0
.submby_loop:
movq mm1, [rsi]
movq mm3, [rax]
movq mm2, mm1
movq mm4, mm3
punpcklbw mm1, mm0
punpcklbw mm3, mm0
punpckhbw mm2, mm0
punpckhbw mm4, mm0
psubw mm1, mm3
psubw mm2, mm4
movq [rdi], mm1
movq [rdi+8], mm2
movq mm1, [rsi+8]
movq mm3, [rax+8]
movq mm2, mm1
movq mm4, mm3
punpcklbw mm1, mm0
punpcklbw mm3, mm0
punpckhbw mm2, mm0
punpckhbw mm4, mm0
psubw mm1, mm3
psubw mm2, mm4
movq [rdi+16], mm1
movq [rdi+24], mm2
add rdi, 32
add rax, 16
lea rsi, [rsi+rdx]
sub rcx, 1
jnz .submby_loop
pop rdi
pop rsi
; begin epilog
UNSHADOW_ARGS
pop rbp
ret
;void vp9_subtract_mbuv_mmx(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride)
global sym(vp9_subtract_mbuv_mmx) PRIVATE
sym(vp9_subtract_mbuv_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
; end prolog
;short *udiff = diff + 256;
;short *vdiff = diff + 320;
;unsigned char *upred = pred + 256;
;unsigned char *vpred = pred + 320;
;unsigned char *z = usrc;
;unsigned short *diff = udiff;
;unsigned char *Predictor= upred;
mov rdi, arg(0) ;diff
mov rax, arg(3) ;pred
mov rsi, arg(1) ;z = usrc
add rdi, 256*2 ;diff = diff + 256 (shorts)
add rax, 256 ;Predictor = pred + 256
movsxd rdx, dword ptr arg(4) ;stride;
pxor mm7, mm7
movq mm0, [rsi]
movq mm1, [rax]
movq mm3, mm0
movq mm4, mm1
punpcklbw mm0, mm7
punpcklbw mm1, mm7
punpckhbw mm3, mm7
punpckhbw mm4, mm7
psubw mm0, mm1
psubw mm3, mm4
movq [rdi], mm0
movq [rdi+8], mm3
movq mm0, [rsi+rdx]
movq mm1, [rax+8]
movq mm3, mm0
movq mm4, mm1
punpcklbw mm0, mm7
punpcklbw mm1, mm7
punpckhbw mm3, mm7
punpckhbw mm4, mm7
psubw mm0, mm1
psubw mm3, mm4
movq [rdi+16], mm0
movq [rdi+24], mm3
movq mm0, [rsi+rdx*2]
movq mm1, [rax+16]
movq mm3, mm0
movq mm4, mm1
punpcklbw mm0, mm7
punpcklbw mm1, mm7
punpckhbw mm3, mm7
punpckhbw mm4, mm7
psubw mm0, mm1
psubw mm3, mm4
movq [rdi+32], mm0
movq [rdi+40], mm3
lea rsi, [rsi+rdx*2]
movq mm0, [rsi+rdx]
movq mm1, [rax+24]
movq mm3, mm0
movq mm4, mm1
punpcklbw mm0, mm7
punpcklbw mm1, mm7
punpckhbw mm3, mm7
punpckhbw mm4, mm7
psubw mm0, mm1
psubw mm3, mm4
movq [rdi+48], mm0
movq [rdi+56], mm3
add rdi, 64
add rax, 32
lea rsi, [rsi+rdx*2]
movq mm0, [rsi]
movq mm1, [rax]
movq mm3, mm0
movq mm4, mm1
punpcklbw mm0, mm7
punpcklbw mm1, mm7
punpckhbw mm3, mm7
punpckhbw mm4, mm7
psubw mm0, mm1
psubw mm3, mm4
movq [rdi], mm0
movq [rdi+8], mm3
movq mm0, [rsi+rdx]
movq mm1, [rax+8]
movq mm3, mm0
movq mm4, mm1
punpcklbw mm0, mm7
punpcklbw mm1, mm7
punpckhbw mm3, mm7
punpckhbw mm4, mm7
psubw mm0, mm1
psubw mm3, mm4
movq [rdi+16], mm0
movq [rdi+24], mm3
movq mm0, [rsi+rdx*2]
movq mm1, [rax+16]
movq mm3, mm0
movq mm4, mm1
punpcklbw mm0, mm7
punpcklbw mm1, mm7
punpckhbw mm3, mm7
punpckhbw mm4, mm7
psubw mm0, mm1
psubw mm3, mm4
movq [rdi+32], mm0
movq [rdi+40], mm3
lea rsi, [rsi+rdx*2]
movq mm0, [rsi+rdx]
movq mm1, [rax+24]
movq mm3, mm0
movq mm4, mm1
punpcklbw mm0, mm7
punpcklbw mm1, mm7
punpckhbw mm3, mm7
punpckhbw mm4, mm7
psubw mm0, mm1
psubw mm3, mm4
movq [rdi+48], mm0
movq [rdi+56], mm3
;unsigned char *z = vsrc;
;unsigned short *diff = vdiff;
;unsigned char *Predictor= vpred;
mov rdi, arg(0) ;diff
mov rax, arg(3) ;pred
mov rsi, arg(2) ;z = usrc
add rdi, 320*2 ;diff = diff + 320 (shorts)
add rax, 320 ;Predictor = pred + 320
movsxd rdx, dword ptr arg(4) ;stride;
pxor mm7, mm7
movq mm0, [rsi]
movq mm1, [rax]
movq mm3, mm0
movq mm4, mm1
punpcklbw mm0, mm7
punpcklbw mm1, mm7
punpckhbw mm3, mm7
punpckhbw mm4, mm7
psubw mm0, mm1
psubw mm3, mm4
movq [rdi], mm0
movq [rdi+8], mm3
movq mm0, [rsi+rdx]
movq mm1, [rax+8]
movq mm3, mm0
movq mm4, mm1
punpcklbw mm0, mm7
punpcklbw mm1, mm7
punpckhbw mm3, mm7
punpckhbw mm4, mm7
psubw mm0, mm1
psubw mm3, mm4
movq [rdi+16], mm0
movq [rdi+24], mm3
movq mm0, [rsi+rdx*2]
movq mm1, [rax+16]
movq mm3, mm0
movq mm4, mm1
punpcklbw mm0, mm7
punpcklbw mm1, mm7
punpckhbw mm3, mm7
punpckhbw mm4, mm7
psubw mm0, mm1
psubw mm3, mm4
movq [rdi+32], mm0
movq [rdi+40], mm3
lea rsi, [rsi+rdx*2]
movq mm0, [rsi+rdx]
movq mm1, [rax+24]
movq mm3, mm0
movq mm4, mm1
punpcklbw mm0, mm7
punpcklbw mm1, mm7
punpckhbw mm3, mm7
punpckhbw mm4, mm7
psubw mm0, mm1
psubw mm3, mm4
movq [rdi+48], mm0
movq [rdi+56], mm3
add rdi, 64
add rax, 32
lea rsi, [rsi+rdx*2]
movq mm0, [rsi]
movq mm1, [rax]
movq mm3, mm0
movq mm4, mm1
punpcklbw mm0, mm7
punpcklbw mm1, mm7
punpckhbw mm3, mm7
punpckhbw mm4, mm7
psubw mm0, mm1
psubw mm3, mm4
movq [rdi], mm0
movq [rdi+8], mm3
movq mm0, [rsi+rdx]
movq mm1, [rax+8]
movq mm3, mm0
movq mm4, mm1
punpcklbw mm0, mm7
punpcklbw mm1, mm7
punpckhbw mm3, mm7
punpckhbw mm4, mm7
psubw mm0, mm1
psubw mm3, mm4
movq [rdi+16], mm0
movq [rdi+24], mm3
movq mm0, [rsi+rdx*2]
movq mm1, [rax+16]
movq mm3, mm0
movq mm4, mm1
punpcklbw mm0, mm7
punpcklbw mm1, mm7
punpckhbw mm3, mm7
punpckhbw mm4, mm7
psubw mm0, mm1
psubw mm3, mm4
movq [rdi+32], mm0
movq [rdi+40], mm3
lea rsi, [rsi+rdx*2]
movq mm0, [rsi+rdx]
movq mm1, [rax+24]
movq mm3, mm0
movq mm4, mm1
punpcklbw mm0, mm7
punpcklbw mm1, mm7
punpckhbw mm3, mm7
punpckhbw mm4, mm7
psubw mm0, mm1
psubw mm3, mm4
movq [rdi+48], mm0
movq [rdi+56], mm3
; begin epilog
pop rdi
pop rsi
UNSHADOW_ARGS
pop rbp
ret
......@@ -8,349 +8,121 @@
; be found in the AUTHORS file in the root of the source tree.
;
%include "vpx_ports/x86_abi_support.asm"
;void vp9_subtract_b_sse2_impl(unsigned char *z, int src_stride,
; short *diff, unsigned char *Predictor,
; int pitch);
global sym(vp9_subtract_b_sse2_impl) PRIVATE
sym(vp9_subtract_b_sse2_impl):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
GET_GOT rbx
push rsi
push rdi
; end prolog
mov rdi, arg(2) ;diff
mov rax, arg(3) ;Predictor
mov rsi, arg(0) ;z
movsxd rdx, dword ptr arg(1);src_stride;
movsxd rcx, dword ptr arg(4);pitch
pxor mm7, mm7
movd mm0, [rsi]