Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
A
aom-rav1e
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Xiph.Org
aom-rav1e
Commits
1f19ebba
Commit
1f19ebba
authored
Sep 06, 2014
by
Dmitry Kovalev
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Replacing vp9_get_mb_ss_sse2 asm implementation with intrinsics.
Change-Id: Ib4f5dd733eb2939b108070a01e83da5d9990bac0
parent
89963bf5
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
80 additions
and
71 deletions
+80
-71
test/variance_test.cc
test/variance_test.cc
+63
-0
vp9/encoder/vp9_variance.c
vp9/encoder/vp9_variance.c
+2
-1
vp9/encoder/x86/vp9_variance_impl_sse2.asm
vp9/encoder/x86/vp9_variance_impl_sse2.asm
+0
-69
vp9/encoder/x86/vp9_variance_sse2.c
vp9/encoder/x86/vp9_variance_sse2.c
+15
-0
vp9/vp9cx.mk
vp9/vp9cx.mk
+0
-1
No files found.
test/variance_test.cc
View file @
1f19ebba
...
...
@@ -35,6 +35,14 @@ using ::std::tr1::make_tuple;
using
::
std
::
tr1
::
tuple
;
using
libvpx_test
::
ACMRandom
;
static
unsigned
int
mb_ss_ref
(
const
int16_t
*
src
)
{
unsigned
int
res
=
0
;
for
(
int
i
=
0
;
i
<
256
;
++
i
)
{
res
+=
src
[
i
]
*
src
[
i
];
}
return
res
;
}
static
unsigned
int
variance_ref
(
const
uint8_t
*
ref
,
const
uint8_t
*
src
,
int
l2w
,
int
l2h
,
unsigned
int
*
sse_ptr
)
{
int
se
=
0
;
...
...
@@ -76,6 +84,50 @@ static unsigned int subpel_variance_ref(const uint8_t *ref, const uint8_t *src,
return
sse
-
(((
int64_t
)
se
*
se
)
>>
(
l2w
+
l2h
));
}
typedef
unsigned
int
(
*
SumOfSquaresFunction
)(
const
int16_t
*
src
);
class
SumOfSquaresTest
:
public
::
testing
::
TestWithParam
<
SumOfSquaresFunction
>
{
public:
SumOfSquaresTest
()
:
func_
(
GetParam
())
{}
virtual
~
SumOfSquaresTest
()
{
libvpx_test
::
ClearSystemState
();
}
protected:
void
ConstTest
();
void
RefTest
();
SumOfSquaresFunction
func_
;
ACMRandom
rnd_
;
};
void
SumOfSquaresTest
::
ConstTest
()
{
int16_t
mem
[
256
];
unsigned
int
res
;
for
(
int
v
=
0
;
v
<
256
;
++
v
)
{
for
(
int
i
=
0
;
i
<
256
;
++
i
)
{
mem
[
i
]
=
v
;
}
ASM_REGISTER_STATE_CHECK
(
res
=
func_
(
mem
));
EXPECT_EQ
(
256u
*
(
v
*
v
),
res
);
}
}
void
SumOfSquaresTest
::
RefTest
()
{
int16_t
mem
[
256
];
for
(
int
i
=
0
;
i
<
100
;
++
i
)
{
for
(
int
j
=
0
;
j
<
256
;
++
j
)
{
mem
[
j
]
=
rnd_
.
Rand8
()
-
rnd_
.
Rand8
();
}
const
unsigned
int
expected
=
mb_ss_ref
(
mem
);
unsigned
int
res
;
ASM_REGISTER_STATE_CHECK
(
res
=
func_
(
mem
));
EXPECT_EQ
(
expected
,
res
);
}
}
template
<
typename
VarianceFunctionType
>
class
VarianceTest
:
public
::
testing
::
TestWithParam
<
tuple
<
int
,
int
,
VarianceFunctionType
>
>
{
...
...
@@ -362,6 +414,13 @@ INSTANTIATE_TEST_CASE_P(
namespace
vp9
{
#if CONFIG_VP9_ENCODER
TEST_P
(
SumOfSquaresTest
,
Const
)
{
ConstTest
();
}
TEST_P
(
SumOfSquaresTest
,
Ref
)
{
RefTest
();
}
INSTANTIATE_TEST_CASE_P
(
C
,
SumOfSquaresTest
,
::
testing
::
Values
(
vp9_get_mb_ss_c
));
typedef
VarianceTest
<
vp9_variance_fn_t
>
VP9VarianceTest
;
typedef
SubpelVarianceTest
<
vp9_subpixvariance_fn_t
>
VP9SubpelVarianceTest
;
typedef
SubpelVarianceTest
<
vp9_subp_avg_variance_fn_t
>
VP9SubpelAvgVarianceTest
;
...
...
@@ -487,6 +546,10 @@ INSTANTIATE_TEST_CASE_P(
#if HAVE_SSE2
#if CONFIG_USE_X86INC
INSTANTIATE_TEST_CASE_P
(
SSE2
,
SumOfSquaresTest
,
::
testing
::
Values
(
vp9_get_mb_ss_sse2
));
const
vp9_variance_fn_t
variance4x4_sse2
=
vp9_variance4x4_sse2
;
const
vp9_variance_fn_t
variance4x8_sse2
=
vp9_variance4x8_sse2
;
const
vp9_variance_fn_t
variance8x4_sse2
=
vp9_variance8x4_sse2
;
...
...
vp9/encoder/vp9_variance.c
View file @
1f19ebba
...
...
@@ -103,8 +103,9 @@ static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
unsigned
int
vp9_get_mb_ss_c
(
const
int16_t
*
src_ptr
)
{
unsigned
int
i
,
sum
=
0
;
for
(
i
=
0
;
i
<
256
;
i
++
)
for
(
i
=
0
;
i
<
256
;
++
i
)
{
sum
+=
src_ptr
[
i
]
*
src_ptr
[
i
];
}
return
sum
;
}
...
...
vp9/encoder/x86/vp9_variance_impl_sse2.asm
deleted
100644 → 0
View file @
89963bf5
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
%include "vpx_ports/x86_abi_support.asm"
;unsigned int vp9_get_mb_ss_sse2
;(
; short *src_ptr
;)
global
sym
(
vp9_get_mb_ss_sse2
)
PRIVATE
sym
(
vp9_get_mb_ss_sse2
):
push
rbp
mov
rbp
,
rsp
SHADOW_ARGS_TO_STACK
1
GET_GOT
rbx
push
rsi
push
rdi
sub
rsp
,
16
; end prolog
mov
rax
,
arg
(
0
)
;[src_ptr]
mov
rcx
,
8
pxor
xmm4
,
xmm4
.NEXTROW:
movdqa
xmm0
,
[
rax
]
movdqa
xmm1
,
[
rax
+
16
]
movdqa
xmm2
,
[
rax
+
32
]
movdqa
xmm3
,
[
rax
+
48
]
pmaddwd
xmm0
,
xmm0
pmaddwd
xmm1
,
xmm1
pmaddwd
xmm2
,
xmm2
pmaddwd
xmm3
,
xmm3
paddd
xmm0
,
xmm1
paddd
xmm2
,
xmm3
paddd
xmm4
,
xmm0
paddd
xmm4
,
xmm2
add
rax
,
0x40
dec
rcx
ja
.NEXTROW
movdqa
xmm3
,
xmm4
psrldq
xmm4
,
8
paddd
xmm4
,
xmm3
movdqa
xmm3
,
xmm4
psrldq
xmm4
,
4
paddd
xmm4
,
xmm3
movq
rax
,
xmm4
; begin epilog
add
rsp
,
16
pop
rdi
pop
rsi
REST
ORE_GOT
UNSHADOW_ARGS
pop
rbp
ret
vp9/encoder/x86/vp9_variance_sse2.c
View file @
1f19ebba
...
...
@@ -19,6 +19,21 @@ typedef unsigned int (*variance_fn_t) (const unsigned char *src, int src_stride,
const
unsigned
char
*
ref
,
int
ref_stride
,
unsigned
int
*
sse
,
int
*
sum
);
unsigned
int
vp9_get_mb_ss_sse2
(
const
int16_t
*
src
)
{
__m128i
vsum
=
_mm_setzero_si128
();
int
i
;
for
(
i
=
0
;
i
<
32
;
++
i
)
{
const
__m128i
v
=
_mm_loadu_si128
((
const
__m128i
*
)
src
);
vsum
=
_mm_add_epi32
(
vsum
,
_mm_madd_epi16
(
v
,
v
));
src
+=
8
;
}
vsum
=
_mm_add_epi32
(
vsum
,
_mm_srli_si128
(
vsum
,
8
));
vsum
=
_mm_add_epi32
(
vsum
,
_mm_srli_si128
(
vsum
,
4
));
return
_mm_cvtsi128_si32
(
vsum
);
}
#define READ64(p, stride, i) \
_mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const uint32_t *)(p + i * stride)), \
_mm_cvtsi32_si128(*(const uint32_t *)(p + (i + 1) * stride)))
...
...
vp9/vp9cx.mk
View file @
1f19ebba
...
...
@@ -93,7 +93,6 @@ VP9_CX_SRCS-yes += encoder/vp9_temporal_filter.h
VP9_CX_SRCS-yes
+=
encoder/vp9_mbgraph.c
VP9_CX_SRCS-yes
+=
encoder/vp9_mbgraph.h
VP9_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/vp9_variance_impl_sse2.asm
VP9_CX_SRCS-$(HAVE_AVX2)
+=
encoder/x86/vp9_variance_impl_intrin_avx2.c
VP9_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/vp9_sad4d_sse2.asm
VP9_CX_SRCS-$(HAVE_AVX2)
+=
encoder/x86/vp9_sad4d_intrin_avx2.c
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment