Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Xiph.Org
aom-rav1e
Commits
7756e989
Commit
7756e989
authored
Jun 21, 2013
by
Ronald S. Bultje
Committed by
Gerrit Code Review
Jun 21, 2013
Browse files
Merge "Add subtract_block SSE2 version and unit test."
parents
9a480482
25c588b1
Changes
9
Hide whitespace changes
Inline
Side-by-side
test/test.mk
View file @
7756e989
...
...
@@ -66,6 +66,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += sad_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER)
+=
set_roi.cc
LIBVPX_TEST_SRCS-yes
+=
sixtap_predict_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER)
+=
subtract_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER)
+=
vp9_subtract_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER)
+=
variance_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER)
+=
vp8_decrypt_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER)
+=
vp8_fdct4x4_test.cc
...
...
test/vp9_subtract_test.cc
0 → 100644
View file @
7756e989
/*
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include
"third_party/googletest/src/include/gtest/gtest.h"
#include
"test/acm_random.h"
#include
"test/clear_system_state.h"
#include
"test/register_state_check.h"
extern
"C"
{
#include
"./vpx_config.h"
#include
"./vp9_rtcd.h"
#include
"vp9/common/vp9_blockd.h"
}
typedef
void
(
*
subtract_fn_t
)(
int
rows
,
int
cols
,
int16_t
*
diff_ptr
,
ptrdiff_t
diff_stride
,
const
uint8_t
*
src_ptr
,
ptrdiff_t
src_stride
,
const
uint8_t
*
pred_ptr
,
ptrdiff_t
pred_stride
);
namespace
vp9
{
class
VP9SubtractBlockTest
:
public
::
testing
::
TestWithParam
<
subtract_fn_t
>
{
public:
virtual
void
TearDown
()
{
libvpx_test
::
ClearSystemState
();
}
};
using
libvpx_test
::
ACMRandom
;
TEST_P
(
VP9SubtractBlockTest
,
SimpleSubtract
)
{
ACMRandom
rnd
(
ACMRandom
::
DeterministicSeed
());
// FIXME(rbultje) split in its own file
for
(
BLOCK_SIZE_TYPE
bsize
=
BLOCK_SIZE_AB4X4
;
bsize
<
BLOCK_SIZE_TYPES
;
bsize
=
static_cast
<
BLOCK_SIZE_TYPE
>
(
static_cast
<
int
>
(
bsize
)
+
1
))
{
const
int
block_width
=
4
<<
b_width_log2
(
bsize
);
const
int
block_height
=
4
<<
b_height_log2
(
bsize
);
int16_t
*
diff
=
new
int16_t
[
block_width
*
block_height
*
2
];
uint8_t
*
pred
=
new
uint8_t
[
block_width
*
block_height
*
2
];
uint8_t
*
src
=
new
uint8_t
[
block_width
*
block_height
*
2
];
for
(
int
n
=
0
;
n
<
100
;
n
++
)
{
for
(
int
r
=
0
;
r
<
block_height
;
++
r
)
{
for
(
int
c
=
0
;
c
<
block_width
*
2
;
++
c
)
{
src
[
r
*
block_width
*
2
+
c
]
=
rnd
.
Rand8
();
pred
[
r
*
block_width
*
2
+
c
]
=
rnd
.
Rand8
();
}
}
GetParam
()(
block_height
,
block_width
,
diff
,
block_width
,
src
,
block_width
,
pred
,
block_width
);
for
(
int
r
=
0
;
r
<
block_height
;
++
r
)
{
for
(
int
c
=
0
;
c
<
block_width
;
++
c
)
{
EXPECT_EQ
(
diff
[
r
*
block_width
+
c
],
(
src
[
r
*
block_width
+
c
]
-
pred
[
r
*
block_width
+
c
]))
<<
"r = "
<<
r
<<
", c = "
<<
c
<<
", bs = "
<<
bsize
;
}
}
GetParam
()(
block_height
,
block_width
,
diff
,
block_width
*
2
,
src
,
block_width
*
2
,
pred
,
block_width
*
2
);
for
(
int
r
=
0
;
r
<
block_height
;
++
r
)
{
for
(
int
c
=
0
;
c
<
block_width
;
++
c
)
{
EXPECT_EQ
(
diff
[
r
*
block_width
*
2
+
c
],
(
src
[
r
*
block_width
*
2
+
c
]
-
pred
[
r
*
block_width
*
2
+
c
]))
<<
"r = "
<<
r
<<
", c = "
<<
c
<<
", bs = "
<<
bsize
;
}
}
}
delete
[]
diff
;
delete
[]
pred
;
delete
[]
src
;
}
}
INSTANTIATE_TEST_CASE_P
(
C
,
VP9SubtractBlockTest
,
::
testing
::
Values
(
vp9_subtract_block_c
));
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P
(
SSE2
,
VP9SubtractBlockTest
,
::
testing
::
Values
(
vp9_subtract_block_sse2
));
#endif
}
// namespace vp9
vp9/common/vp9_rtcd_defs.sh
View file @
7756e989
...
...
@@ -533,6 +533,9 @@ prototype int vp9_block_error "int16_t *coeff, int16_t *dqcoeff, int block_size"
specialize vp9_block_error mmx sse2
vp9_block_error_sse2
=
vp9_block_error_xmm
prototype void vp9_subtract_block
"int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride"
specialize vp9_subtract_block sse2
#
# Structured Similarity (SSIM)
#
...
...
vp9/encoder/vp9_encodemb.c
View file @
7756e989
...
...
@@ -22,10 +22,10 @@
DECLARE_ALIGNED
(
16
,
extern
const
uint8_t
,
vp9_pt_energy_class
[
MAX_ENTROPY_TOKENS
]);
void
vp9_subtract_block
(
int
rows
,
int
cols
,
int16_t
*
diff_ptr
,
in
t
diff_stride
,
const
uint8_t
*
src_ptr
,
in
t
src_stride
,
const
uint8_t
*
pred_ptr
,
in
t
pred_stride
)
{
void
vp9_subtract_block
_c
(
int
rows
,
int
cols
,
int16_t
*
diff_ptr
,
ptrdiff_
t
diff_stride
,
const
uint8_t
*
src_ptr
,
ptrdiff_
t
src_stride
,
const
uint8_t
*
pred_ptr
,
ptrdiff_
t
pred_stride
)
{
int
r
,
c
;
for
(
r
=
0
;
r
<
rows
;
r
++
)
{
...
...
vp9/encoder/vp9_encodemb.h
View file @
7756e989
...
...
@@ -42,10 +42,6 @@ void vp9_encode_sbuv(VP9_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize);
void
vp9_xform_quant_sby
(
VP9_COMMON
*
cm
,
MACROBLOCK
*
x
,
BLOCK_SIZE_TYPE
bsize
);
void
vp9_xform_quant_sbuv
(
VP9_COMMON
*
cm
,
MACROBLOCK
*
x
,
BLOCK_SIZE_TYPE
bsize
);
void
vp9_subtract_block
(
int
rows
,
int
cols
,
int16_t
*
diff_ptr
,
int
diff_stride
,
const
uint8_t
*
src_ptr
,
int
src_stride
,
const
uint8_t
*
pred_ptr
,
int
pred_stride
);
void
vp9_subtract_sby
(
MACROBLOCK
*
x
,
BLOCK_SIZE_TYPE
bsize
);
void
vp9_subtract_sbuv
(
MACROBLOCK
*
x
,
BLOCK_SIZE_TYPE
bsize
);
void
vp9_subtract_sb
(
MACROBLOCK
*
xd
,
BLOCK_SIZE_TYPE
bsize
);
...
...
vp9/encoder/x86/vp9_subtract_mmx.asm
deleted
100644 → 0
View file @
9a480482
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
%include "vpx_ports/x86_abi_support.asm"
;void vp9_subtract_b_mmx_impl(unsigned char *z, int src_stride,
; short *diff, unsigned char *Predictor,
; int pitch);
global
sym
(
vp9_subtract_b_mmx_impl
)
PRIVATE
sym
(
vp9_subtract_b_mmx_impl
):
push
rbp
mov
rbp
,
rsp
SHADOW_ARGS_TO_STACK
5
push
rsi
push
rdi
; end prolog
mov
rdi
,
arg
(
2
)
;diff
mov
rax
,
arg
(
3
)
;Predictor
mov
rsi
,
arg
(
0
)
;z
movsxd
rdx
,
dword
ptr
arg
(
1
)
;src_stride;
movsxd
rcx
,
dword
ptr
arg
(
4
)
;pitch
pxor
mm7
,
mm7
movd
mm0
,
[
rsi
]
movd
mm1
,
[
rax
]
punpcklbw
mm0
,
mm7
punpcklbw
mm1
,
mm7
psubw
mm0
,
mm1
movq
[
rdi
],
mm0
movd
mm0
,
[
rsi
+
rdx
]
movd
mm1
,
[
rax
+
rcx
]
punpcklbw
mm0
,
mm7
punpcklbw
mm1
,
mm7
psubw
mm0
,
mm1
movq
[
rdi
+
rcx
*
2
],
mm0
movd
mm0
,
[
rsi
+
rdx
*
2
]
movd
mm1
,
[
rax
+
rcx
*
2
]
punpcklbw
mm0
,
mm7
punpcklbw
mm1
,
mm7
psubw
mm0
,
mm1
movq
[
rdi
+
rcx
*
4
],
mm0
lea
rsi
,
[
rsi
+
rdx
*
2
]
lea
rcx
,
[
rcx
+
rcx
*
2
]
movd
mm0
,
[
rsi
+
rdx
]
movd
mm1
,
[
rax
+
rcx
]
punpcklbw
mm0
,
mm7
punpcklbw
mm1
,
mm7
psubw
mm0
,
mm1
movq
[
rdi
+
rcx
*
2
],
mm0
; begin epilog
pop
rdi
pop
rsi
UNSHADOW_ARGS
pop
rbp
ret
;void vp9_subtract_mby_mmx(short *diff, unsigned char *src, unsigned char *pred, int stride)
global
sym
(
vp9_subtract_mby_mmx
)
PRIVATE
sym
(
vp9_subtract_mby_mmx
):
push
rbp
mov
rbp
,
rsp
SHADOW_ARGS_TO_STACK
4
push
rsi
push
rdi
; end prolog
mov
rsi
,
arg
(
1
)
;src
mov
rdi
,
arg
(
0
)
;diff
mov
rax
,
arg
(
2
)
;pred
movsxd
rdx
,
dword
ptr
arg
(
3
)
;stride
mov
rcx
,
16
pxor
mm0
,
mm0
.submby_loop:
movq
mm1
,
[
rsi
]
movq
mm3
,
[
rax
]
movq
mm2
,
mm1
movq
mm4
,
mm3
punpcklbw
mm1
,
mm0
punpcklbw
mm3
,
mm0
punpckhbw
mm2
,
mm0
punpckhbw
mm4
,
mm0
psubw
mm1
,
mm3
psubw
mm2
,
mm4
movq
[
rdi
],
mm1
movq
[
rdi
+
8
],
mm2
movq
mm1
,
[
rsi
+
8
]
movq
mm3
,
[
rax
+
8
]
movq
mm2
,
mm1
movq
mm4
,
mm3
punpcklbw
mm1
,
mm0
punpcklbw
mm3
,
mm0
punpckhbw
mm2
,
mm0
punpckhbw
mm4
,
mm0
psubw
mm1
,
mm3
psubw
mm2
,
mm4
movq
[
rdi
+
16
],
mm1
movq
[
rdi
+
24
],
mm2
add
rdi
,
32
add
rax
,
16
lea
rsi
,
[
rsi
+
rdx
]
sub
rcx
,
1
jnz
.submby_loop
pop
rdi
pop
rsi
; begin epilog
UNSHADOW_ARGS
pop
rbp
ret
;void vp9_subtract_mbuv_mmx(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride)
global
sym
(
vp9_subtract_mbuv_mmx
)
PRIVATE
sym
(
vp9_subtract_mbuv_mmx
):
push
rbp
mov
rbp
,
rsp
SHADOW_ARGS_TO_STACK
5
push
rsi
push
rdi
; end prolog
;short *udiff = diff + 256;
;short *vdiff = diff + 320;
;unsigned char *upred = pred + 256;
;unsigned char *vpred = pred + 320;
;unsigned char *z = usrc;
;unsigned short *diff = udiff;
;unsigned char *Predictor= upred;
mov
rdi
,
arg
(
0
)
;diff
mov
rax
,
arg
(
3
)
;pred
mov
rsi
,
arg
(
1
)
;z = usrc
add
rdi
,
256
*
2
;diff = diff + 256 (shorts)
add
rax
,
256
;Predictor = pred + 256
movsxd
rdx
,
dword
ptr
arg
(
4
)
;stride;
pxor
mm7
,
mm7
movq
mm0
,
[
rsi
]
movq
mm1
,
[
rax
]
movq
mm3
,
mm0
movq
mm4
,
mm1
punpcklbw
mm0
,
mm7
punpcklbw
mm1
,
mm7
punpckhbw
mm3
,
mm7
punpckhbw
mm4
,
mm7
psubw
mm0
,
mm1
psubw
mm3
,
mm4
movq
[
rdi
],
mm0
movq
[
rdi
+
8
],
mm3
movq
mm0
,
[
rsi
+
rdx
]
movq
mm1
,
[
rax
+
8
]
movq
mm3
,
mm0
movq
mm4
,
mm1
punpcklbw
mm0
,
mm7
punpcklbw
mm1
,
mm7
punpckhbw
mm3
,
mm7
punpckhbw
mm4
,
mm7
psubw
mm0
,
mm1
psubw
mm3
,
mm4
movq
[
rdi
+
16
],
mm0
movq
[
rdi
+
24
],
mm3
movq
mm0
,
[
rsi
+
rdx
*
2
]
movq
mm1
,
[
rax
+
16
]
movq
mm3
,
mm0
movq
mm4
,
mm1
punpcklbw
mm0
,
mm7
punpcklbw
mm1
,
mm7
punpckhbw
mm3
,
mm7
punpckhbw
mm4
,
mm7
psubw
mm0
,
mm1
psubw
mm3
,
mm4
movq
[
rdi
+
32
],
mm0
movq
[
rdi
+
40
],
mm3
lea
rsi
,
[
rsi
+
rdx
*
2
]
movq
mm0
,
[
rsi
+
rdx
]
movq
mm1
,
[
rax
+
24
]
movq
mm3
,
mm0
movq
mm4
,
mm1
punpcklbw
mm0
,
mm7
punpcklbw
mm1
,
mm7
punpckhbw
mm3
,
mm7
punpckhbw
mm4
,
mm7
psubw
mm0
,
mm1
psubw
mm3
,
mm4
movq
[
rdi
+
48
],
mm0
movq
[
rdi
+
56
],
mm3
add
rdi
,
64
add
rax
,
32
lea
rsi
,
[
rsi
+
rdx
*
2
]
movq
mm0
,
[
rsi
]
movq
mm1
,
[
rax
]
movq
mm3
,
mm0
movq
mm4
,
mm1
punpcklbw
mm0
,
mm7
punpcklbw
mm1
,
mm7
punpckhbw
mm3
,
mm7
punpckhbw
mm4
,
mm7
psubw
mm0
,
mm1
psubw
mm3
,
mm4
movq
[
rdi
],
mm0
movq
[
rdi
+
8
],
mm3
movq
mm0
,
[
rsi
+
rdx
]
movq
mm1
,
[
rax
+
8
]
movq
mm3
,
mm0
movq
mm4
,
mm1
punpcklbw
mm0
,
mm7
punpcklbw
mm1
,
mm7
punpckhbw
mm3
,
mm7
punpckhbw
mm4
,
mm7
psubw
mm0
,
mm1
psubw
mm3
,
mm4
movq
[
rdi
+
16
],
mm0
movq
[
rdi
+
24
],
mm3
movq
mm0
,
[
rsi
+
rdx
*
2
]
movq
mm1
,
[
rax
+
16
]
movq
mm3
,
mm0
movq
mm4
,
mm1
punpcklbw
mm0
,
mm7
punpcklbw
mm1
,
mm7
punpckhbw
mm3
,
mm7
punpckhbw
mm4
,
mm7
psubw
mm0
,
mm1
psubw
mm3
,
mm4
movq
[
rdi
+
32
],
mm0
movq
[
rdi
+
40
],
mm3
lea
rsi
,
[
rsi
+
rdx
*
2
]
movq
mm0
,
[
rsi
+
rdx
]
movq
mm1
,
[
rax
+
24
]
movq
mm3
,
mm0
movq
mm4
,
mm1
punpcklbw
mm0
,
mm7
punpcklbw
mm1
,
mm7
punpckhbw
mm3
,
mm7
punpckhbw
mm4
,
mm7
psubw
mm0
,
mm1
psubw
mm3
,
mm4
movq
[
rdi
+
48
],
mm0
movq
[
rdi
+
56
],
mm3
;unsigned char *z = vsrc;
;unsigned short *diff = vdiff;
;unsigned char *Predictor= vpred;
mov
rdi
,
arg
(
0
)
;diff
mov
rax
,
arg
(
3
)
;pred
mov
rsi
,
arg
(
2
)
;z = usrc
add
rdi
,
320
*
2
;diff = diff + 320 (shorts)
add
rax
,
320
;Predictor = pred + 320
movsxd
rdx
,
dword
ptr
arg
(
4
)
;stride;
pxor
mm7
,
mm7
movq
mm0
,
[
rsi
]
movq
mm1
,
[
rax
]
movq
mm3
,
mm0
movq
mm4
,
mm1
punpcklbw
mm0
,
mm7
punpcklbw
mm1
,
mm7
punpckhbw
mm3
,
mm7
punpckhbw
mm4
,
mm7
psubw
mm0
,
mm1
psubw
mm3
,
mm4
movq
[
rdi
],
mm0
movq
[
rdi
+
8
],
mm3
movq
mm0
,
[
rsi
+
rdx
]
movq
mm1
,
[
rax
+
8
]
movq
mm3
,
mm0
movq
mm4
,
mm1
punpcklbw
mm0
,
mm7
punpcklbw
mm1
,
mm7
punpckhbw
mm3
,
mm7
punpckhbw
mm4
,
mm7
psubw
mm0
,
mm1
psubw
mm3
,
mm4
movq
[
rdi
+
16
],
mm0
movq
[
rdi
+
24
],
mm3
movq
mm0
,
[
rsi
+
rdx
*
2
]
movq
mm1
,
[
rax
+
16
]
movq
mm3
,
mm0
movq
mm4
,
mm1
punpcklbw
mm0
,
mm7
punpcklbw
mm1
,
mm7
punpckhbw
mm3
,
mm7
punpckhbw
mm4
,
mm7
psubw
mm0
,
mm1
psubw
mm3
,
mm4
movq
[
rdi
+
32
],
mm0
movq
[
rdi
+
40
],
mm3
lea
rsi
,
[
rsi
+
rdx
*
2
]
movq
mm0
,
[
rsi
+
rdx
]
movq
mm1
,
[
rax
+
24
]
movq
mm3
,
mm0
movq
mm4
,
mm1
punpcklbw
mm0
,
mm7
punpcklbw
mm1
,
mm7
punpckhbw
mm3
,
mm7
punpckhbw
mm4
,
mm7
psubw
mm0
,
mm1
psubw
mm3
,
mm4
movq
[
rdi
+
48
],
mm0
movq
[
rdi
+
56
],
mm3
add
rdi
,
64
add
rax
,
32
lea
rsi
,
[
rsi
+
rdx
*
2
]
movq
mm0
,
[
rsi
]
movq
mm1
,
[
rax
]
movq
mm3
,
mm0
movq
mm4
,
mm1
punpcklbw
mm0
,
mm7
punpcklbw
mm1
,
mm7
punpckhbw
mm3
,
mm7
punpckhbw
mm4
,
mm7
psubw
mm0
,
mm1
psubw
mm3
,
mm4
movq
[
rdi
],
mm0
movq
[
rdi
+
8
],
mm3
movq
mm0
,
[
rsi
+
rdx
]
movq
mm1
,
[
rax
+
8
]
movq
mm3
,
mm0
movq
mm4
,
mm1
punpcklbw
mm0
,
mm7
punpcklbw
mm1
,
mm7
punpckhbw
mm3
,
mm7
punpckhbw
mm4
,
mm7
psubw
mm0
,
mm1
psubw
mm3
,
mm4
movq
[
rdi
+
16
],
mm0
movq
[
rdi
+
24
],
mm3
movq
mm0
,
[
rsi
+
rdx
*
2
]
movq
mm1
,
[
rax
+
16
]
movq
mm3
,
mm0
movq
mm4
,
mm1
punpcklbw
mm0
,
mm7
punpcklbw
mm1
,
mm7
punpckhbw
mm3
,
mm7
punpckhbw
mm4
,
mm7
psubw
mm0
,
mm1
psubw
mm3
,
mm4
movq
[
rdi
+
32
],
mm0
movq
[
rdi
+
40
],
mm3
lea
rsi
,
[
rsi
+
rdx
*
2
]
movq
mm0
,
[
rsi
+
rdx
]
movq
mm1
,
[
rax
+
24
]
movq
mm3
,
mm0
movq
mm4
,
mm1
punpcklbw
mm0
,
mm7
punpcklbw
mm1
,
mm7
punpckhbw
mm3
,
mm7
punpckhbw
mm4
,
mm7
psubw
mm0
,
mm1
psubw
mm3
,
mm4
movq
[
rdi
+
48
],
mm0
movq
[
rdi
+
56
],
mm3
; begin epilog
pop
rdi
pop
rsi
UNSHADOW_ARGS
pop
rbp
ret
vp9/encoder/x86/vp9_subtract_sse2.asm
View file @
7756e989
...
...
@@ -8,349 +8,121 @@
; be found in the AUTHORS file in the root of the source tree.
;
%include "vpx_ports/x86_abi_support.asm"
;void vp9_subtract_b_sse2_impl(unsigned char *z, int src_stride,
; short *diff, unsigned char *Predictor,
; int pitch);
global
sym
(
vp9_subtract_b_sse2_impl
)
PRIVATE
sym
(
vp9_subtract_b_sse2_impl
):
push
rbp
mov
rbp
,
rsp
SHADOW_ARGS_TO_STACK
5
GET_GOT
rbx
push
rsi
push
rdi
; end prolog
mov
rdi
,
arg
(
2
)
;diff
mov
rax
,
arg
(
3
)
;Predictor
mov
rsi
,
arg
(
0
)
;z
movsxd
rdx
,
dword
ptr
arg
(
1
)
;src_stride;
movsxd
rcx
,
dword
ptr
arg
(
4
)
;pitch
pxor
mm7
,
mm7
movd
mm0
,
[
rsi
]