Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
A
aom-rav1e
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Xiph.Org
aom-rav1e
Commits
2f55beb3
Commit
2f55beb3
authored
May 11, 2016
by
Linfeng Zhang
Committed by
Gerrit Code Review
May 11, 2016
Browse files
Options
Browse Files
Download
Plain Diff
Merge "remove mmx variance functions"
parents
70bdf643
d0ffae82
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
8 additions
and
519 deletions
+8
-519
test/variance_test.cc
test/variance_test.cc
+0
-14
vpx_dsp/vpx_dsp_rtcd_defs.pl
vpx_dsp/vpx_dsp_rtcd_defs.pl
+8
-8
vpx_dsp/x86/variance_impl_mmx.asm
vpx_dsp/x86/variance_impl_mmx.asm
+0
-401
vpx_dsp/x86/variance_mmx.c
vpx_dsp/x86/variance_mmx.c
+0
-96
No files found.
test/variance_test.cc
View file @
2f55beb3
...
...
@@ -977,20 +977,6 @@ INSTANTIATE_TEST_CASE_P(
#endif // CONFIG_VP9_HIGHBITDEPTH
#if HAVE_MMX
INSTANTIATE_TEST_CASE_P
(
MMX
,
VpxMseTest
,
::
testing
::
Values
(
make_tuple
(
4
,
4
,
&
vpx_mse16x16_mmx
)));
INSTANTIATE_TEST_CASE_P
(
MMX
,
SumOfSquaresTest
,
::
testing
::
Values
(
vpx_get_mb_ss_mmx
));
INSTANTIATE_TEST_CASE_P
(
MMX
,
VpxVarianceTest
,
::
testing
::
Values
(
make_tuple
(
4
,
4
,
&
vpx_variance16x16_mmx
,
0
),
make_tuple
(
4
,
3
,
&
vpx_variance16x8_mmx
,
0
),
make_tuple
(
3
,
4
,
&
vpx_variance8x16_mmx
,
0
),
make_tuple
(
3
,
3
,
&
vpx_variance8x8_mmx
,
0
),
make_tuple
(
2
,
2
,
&
vpx_variance4x4_mmx
,
0
)));
INSTANTIATE_TEST_CASE_P
(
MMX
,
VpxSubpelVarianceTest
,
::
testing
::
Values
(
make_tuple
(
4
,
4
,
&
vpx_sub_pixel_variance16x16_mmx
,
0
),
...
...
vpx_dsp/vpx_dsp_rtcd_defs.pl
View file @
2f55beb3
...
...
@@ -1407,16 +1407,16 @@ add_proto qw/unsigned int vpx_variance16x32/, "const uint8_t *src_ptr, int sourc
specialize
qw/vpx_variance16x32 sse2 msa/
;
add_proto
qw/unsigned int vpx_variance16x16/
,
"
const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse
";
specialize
qw/vpx_variance16x16
mmx
sse2 avx2 media neon msa/
;
specialize
qw/vpx_variance16x16 sse2 avx2 media neon msa/
;
add_proto
qw/unsigned int vpx_variance16x8/
,
"
const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse
";
specialize
qw/vpx_variance16x8
mmx
sse2 neon msa/
;
specialize
qw/vpx_variance16x8 sse2 neon msa/
;
add_proto
qw/unsigned int vpx_variance8x16/
,
"
const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse
";
specialize
qw/vpx_variance8x16
mmx
sse2 neon msa/
;
specialize
qw/vpx_variance8x16 sse2 neon msa/
;
add_proto
qw/unsigned int vpx_variance8x8/
,
"
const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse
";
specialize
qw/vpx_variance8x8
mmx
sse2 media neon msa/
;
specialize
qw/vpx_variance8x8 sse2 media neon msa/
;
add_proto
qw/unsigned int vpx_variance8x4/
,
"
const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse
";
specialize
qw/vpx_variance8x4 sse2 msa/
;
...
...
@@ -1425,7 +1425,7 @@ add_proto qw/unsigned int vpx_variance4x8/, "const uint8_t *src_ptr, int source_
specialize
qw/vpx_variance4x8 sse2 msa/
;
add_proto
qw/unsigned int vpx_variance4x4/
,
"
const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse
";
specialize
qw/vpx_variance4x4
mmx
sse2 msa/
;
specialize
qw/vpx_variance4x4 sse2 msa/
;
#
# Specialty Variance
...
...
@@ -1434,10 +1434,10 @@ add_proto qw/void vpx_get16x16var/, "const uint8_t *src_ptr, int source_stride,
specialize
qw/vpx_get16x16var sse2 avx2 neon msa/
;
add_proto
qw/void vpx_get8x8var/
,
"
const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum
";
specialize
qw/vpx_get8x8var
mmx
sse2 neon msa/
;
specialize
qw/vpx_get8x8var sse2 neon msa/
;
add_proto
qw/unsigned int vpx_mse16x16/
,
"
const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse
";
specialize
qw/vpx_mse16x16
mmx
sse2 avx2 media neon msa/
;
specialize
qw/vpx_mse16x16 sse2 avx2 media neon msa/
;
add_proto
qw/unsigned int vpx_mse16x8/
,
"
const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse
";
specialize
qw/vpx_mse16x8 sse2 msa/
;
...
...
@@ -1449,7 +1449,7 @@ add_proto qw/unsigned int vpx_mse8x8/, "const uint8_t *src_ptr, int source_stri
specialize
qw/vpx_mse8x8 sse2 msa/
;
add_proto
qw/unsigned int vpx_get_mb_ss/
,
"
const int16_t *
";
specialize
qw/vpx_get_mb_ss
mmx
sse2 msa/
;
specialize
qw/vpx_get_mb_ss sse2 msa/
;
add_proto
qw/unsigned int vpx_get4x4sse_cs/
,
"
const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride
";
specialize
qw/vpx_get4x4sse_cs neon msa/
;
...
...
vpx_dsp/x86/variance_impl_mmx.asm
View file @
2f55beb3
...
...
@@ -13,407 +13,6 @@
%define mmx_filter_shift 7
;unsigned int vpx_get_mb_ss_mmx( short *src_ptr )
global
sym
(
vpx_get_mb_ss_mmx
)
PRIVATE
sym
(
vpx_get_mb_ss_mmx
):
push
rbp
mov
rbp
,
rsp
SHADOW_ARGS_TO_STACK
7
GET_GOT
rbx
push
rsi
push
rdi
sub
rsp
,
8
; end prolog
mov
rax
,
arg
(
0
)
;src_ptr
mov
rcx
,
16
pxor
mm4
,
mm4
.NEXTROW:
movq
mm0
,
[
rax
]
movq
mm1
,
[
rax
+
8
]
movq
mm2
,
[
rax
+
16
]
movq
mm3
,
[
rax
+
24
]
pmaddwd
mm0
,
mm0
pmaddwd
mm1
,
mm1
pmaddwd
mm2
,
mm2
pmaddwd
mm3
,
mm3
paddd
mm4
,
mm0
paddd
mm4
,
mm1
paddd
mm4
,
mm2
paddd
mm4
,
mm3
add
rax
,
32
dec
rcx
ja
.NEXTROW
movq
QWORD
PTR
[
rsp
],
mm4
;return sum[0]+sum[1];
movsxd
rax
,
dword
ptr
[
rsp
]
movsxd
rcx
,
dword
ptr
[
rsp
+
4
]
add
rax
,
rcx
; begin epilog
add
rsp
,
8
pop
rdi
pop
rsi
REST
ORE_GOT
UNSHADOW_ARGS
pop
rbp
ret
;void vpx_get8x8var_mmx
;(
; unsigned char *src_ptr,
; int source_stride,
; unsigned char *ref_ptr,
; int recon_stride,
; unsigned int *SSE,
; int *Sum
;)
global
sym
(
vpx_get8x8var_mmx
)
PRIVATE
sym
(
vpx_get8x8var_mmx
):
push
rbp
mov
rbp
,
rsp
SHADOW_ARGS_TO_STACK
6
push
rsi
push
rdi
push
rbx
sub
rsp
,
16
; end prolog
pxor
mm5
,
mm5
; Blank mmx6
pxor
mm6
,
mm6
; Blank mmx7
pxor
mm7
,
mm7
; Blank mmx7
mov
rax
,
arg
(
0
)
;[src_ptr] ; Load base addresses
mov
rbx
,
arg
(
2
)
;[ref_ptr]
movsxd
rcx
,
dword
ptr
arg
(
1
)
;[source_stride]
movsxd
rdx
,
dword
ptr
arg
(
3
)
;[recon_stride]
; Row 1
movq
mm0
,
[
rax
]
; Copy eight bytes to mm0
movq
mm1
,
[
rbx
]
; Copy eight bytes to mm1
movq
mm2
,
mm0
; Take copies
movq
mm3
,
mm1
; Take copies
punpcklbw
mm0
,
mm6
; unpack to higher prrcision
punpcklbw
mm1
,
mm6
punpckhbw
mm2
,
mm6
; unpack to higher prrcision
punpckhbw
mm3
,
mm6
psubsw
mm0
,
mm1
; A-B (low order) to MM0
psubsw
mm2
,
mm3
; A-B (high order) to MM2
paddw
mm5
,
mm0
; accumulate differences in mm5
paddw
mm5
,
mm2
; accumulate differences in mm5
pmaddwd
mm0
,
mm0
; square and accumulate
pmaddwd
mm2
,
mm2
; square and accumulate
add
rbx
,
rdx
; Inc pointer into ref data
add
rax
,
rcx
; Inc pointer into the new data
movq
mm1
,
[
rbx
]
; Copy eight bytes to mm1
paddd
mm7
,
mm0
; accumulate in mm7
paddd
mm7
,
mm2
; accumulate in mm7
; Row 2
movq
mm0
,
[
rax
]
; Copy eight bytes to mm0
movq
mm2
,
mm0
; Take copies
movq
mm3
,
mm1
; Take copies
punpcklbw
mm0
,
mm6
; unpack to higher prrcision
punpcklbw
mm1
,
mm6
punpckhbw
mm2
,
mm6
; unpack to higher prrcision
punpckhbw
mm3
,
mm6
psubsw
mm0
,
mm1
; A-B (low order) to MM0
psubsw
mm2
,
mm3
; A-B (high order) to MM2
paddw
mm5
,
mm0
; accumulate differences in mm5
paddw
mm5
,
mm2
; accumulate differences in mm5
pmaddwd
mm0
,
mm0
; square and accumulate
pmaddwd
mm2
,
mm2
; square and accumulate
add
rbx
,
rdx
; Inc pointer into ref data
add
rax
,
rcx
; Inc pointer into the new data
movq
mm1
,
[
rbx
]
; Copy eight bytes to mm1
paddd
mm7
,
mm0
; accumulate in mm7
paddd
mm7
,
mm2
; accumulate in mm7
; Row 3
movq
mm0
,
[
rax
]
; Copy eight bytes to mm0
movq
mm2
,
mm0
; Take copies
movq
mm3
,
mm1
; Take copies
punpcklbw
mm0
,
mm6
; unpack to higher prrcision
punpcklbw
mm1
,
mm6
punpckhbw
mm2
,
mm6
; unpack to higher prrcision
punpckhbw
mm3
,
mm6
psubsw
mm0
,
mm1
; A-B (low order) to MM0
psubsw
mm2
,
mm3
; A-B (high order) to MM2
paddw
mm5
,
mm0
; accumulate differences in mm5
paddw
mm5
,
mm2
; accumulate differences in mm5
pmaddwd
mm0
,
mm0
; square and accumulate
pmaddwd
mm2
,
mm2
; square and accumulate
add
rbx
,
rdx
; Inc pointer into ref data
add
rax
,
rcx
; Inc pointer into the new data
movq
mm1
,
[
rbx
]
; Copy eight bytes to mm1
paddd
mm7
,
mm0
; accumulate in mm7
paddd
mm7
,
mm2
; accumulate in mm7
; Row 4
movq
mm0
,
[
rax
]
; Copy eight bytes to mm0
movq
mm2
,
mm0
; Take copies
movq
mm3
,
mm1
; Take copies
punpcklbw
mm0
,
mm6
; unpack to higher prrcision
punpcklbw
mm1
,
mm6
punpckhbw
mm2
,
mm6
; unpack to higher prrcision
punpckhbw
mm3
,
mm6
psubsw
mm0
,
mm1
; A-B (low order) to MM0
psubsw
mm2
,
mm3
; A-B (high order) to MM2
paddw
mm5
,
mm0
; accumulate differences in mm5
paddw
mm5
,
mm2
; accumulate differences in mm5
pmaddwd
mm0
,
mm0
; square and accumulate
pmaddwd
mm2
,
mm2
; square and accumulate
add
rbx
,
rdx
; Inc pointer into ref data
add
rax
,
rcx
; Inc pointer into the new data
movq
mm1
,
[
rbx
]
; Copy eight bytes to mm1
paddd
mm7
,
mm0
; accumulate in mm7
paddd
mm7
,
mm2
; accumulate in mm7
; Row 5
movq
mm0
,
[
rax
]
; Copy eight bytes to mm0
movq
mm2
,
mm0
; Take copies
movq
mm3
,
mm1
; Take copies
punpcklbw
mm0
,
mm6
; unpack to higher prrcision
punpcklbw
mm1
,
mm6
punpckhbw
mm2
,
mm6
; unpack to higher prrcision
punpckhbw
mm3
,
mm6
psubsw
mm0
,
mm1
; A-B (low order) to MM0
psubsw
mm2
,
mm3
; A-B (high order) to MM2
paddw
mm5
,
mm0
; accumulate differences in mm5
paddw
mm5
,
mm2
; accumulate differences in mm5
pmaddwd
mm0
,
mm0
; square and accumulate
pmaddwd
mm2
,
mm2
; square and accumulate
add
rbx
,
rdx
; Inc pointer into ref data
add
rax
,
rcx
; Inc pointer into the new data
movq
mm1
,
[
rbx
]
; Copy eight bytes to mm1
; movq mm4, [rbx + rdx]
paddd
mm7
,
mm0
; accumulate in mm7
paddd
mm7
,
mm2
; accumulate in mm7
; Row 6
movq
mm0
,
[
rax
]
; Copy eight bytes to mm0
movq
mm2
,
mm0
; Take copies
movq
mm3
,
mm1
; Take copies
punpcklbw
mm0
,
mm6
; unpack to higher prrcision
punpcklbw
mm1
,
mm6
punpckhbw
mm2
,
mm6
; unpack to higher prrcision
punpckhbw
mm3
,
mm6
psubsw
mm0
,
mm1
; A-B (low order) to MM0
psubsw
mm2
,
mm3
; A-B (high order) to MM2
paddw
mm5
,
mm0
; accumulate differences in mm5
paddw
mm5
,
mm2
; accumulate differences in mm5
pmaddwd
mm0
,
mm0
; square and accumulate
pmaddwd
mm2
,
mm2
; square and accumulate
add
rbx
,
rdx
; Inc pointer into ref data
add
rax
,
rcx
; Inc pointer into the new data
movq
mm1
,
[
rbx
]
; Copy eight bytes to mm1
paddd
mm7
,
mm0
; accumulate in mm7
paddd
mm7
,
mm2
; accumulate in mm7
; Row 7
movq
mm0
,
[
rax
]
; Copy eight bytes to mm0
movq
mm2
,
mm0
; Take copies
movq
mm3
,
mm1
; Take copies
punpcklbw
mm0
,
mm6
; unpack to higher prrcision
punpcklbw
mm1
,
mm6
punpckhbw
mm2
,
mm6
; unpack to higher prrcision
punpckhbw
mm3
,
mm6
psubsw
mm0
,
mm1
; A-B (low order) to MM0
psubsw
mm2
,
mm3
; A-B (high order) to MM2
paddw
mm5
,
mm0
; accumulate differences in mm5
paddw
mm5
,
mm2
; accumulate differences in mm5
pmaddwd
mm0
,
mm0
; square and accumulate
pmaddwd
mm2
,
mm2
; square and accumulate
add
rbx
,
rdx
; Inc pointer into ref data
add
rax
,
rcx
; Inc pointer into the new data
movq
mm1
,
[
rbx
]
; Copy eight bytes to mm1
paddd
mm7
,
mm0
; accumulate in mm7
paddd
mm7
,
mm2
; accumulate in mm7
; Row 8
movq
mm0
,
[
rax
]
; Copy eight bytes to mm0
movq
mm2
,
mm0
; Take copies
movq
mm3
,
mm1
; Take copies
punpcklbw
mm0
,
mm6
; unpack to higher prrcision
punpcklbw
mm1
,
mm6
punpckhbw
mm2
,
mm6
; unpack to higher prrcision
punpckhbw
mm3
,
mm6
psubsw
mm0
,
mm1
; A-B (low order) to MM0
psubsw
mm2
,
mm3
; A-B (high order) to MM2
paddw
mm5
,
mm0
; accumulate differences in mm5
paddw
mm5
,
mm2
; accumulate differences in mm5
pmaddwd
mm0
,
mm0
; square and accumulate
pmaddwd
mm2
,
mm2
; square and accumulate
add
rbx
,
rdx
; Inc pointer into ref data
add
rax
,
rcx
; Inc pointer into the new data
paddd
mm7
,
mm0
; accumulate in mm7
paddd
mm7
,
mm2
; accumulate in mm7
; Now accumulate the final results.
movq
QWORD
PTR
[
rsp
+
8
],
mm5
; copy back accumulated results into normal memory
movq
QWORD
PTR
[
rsp
],
mm7
; copy back accumulated results into normal memory
movsx
rdx
,
WORD
PTR
[
rsp
+
8
]
movsx
rcx
,
WORD
PTR
[
rsp
+
10
]
movsx
rbx
,
WORD
PTR
[
rsp
+
12
]
movsx
rax
,
WORD
PTR
[
rsp
+
14
]
add
rdx
,
rcx
add
rbx
,
rax
add
rdx
,
rbx
;XSum
movsxd
rax
,
DWORD
PTR
[
rsp
]
movsxd
rcx
,
DWORD
PTR
[
rsp
+
4
]
add
rax
,
rcx
;XXSum
mov
rsi
,
arg
(
4
)
;SSE
mov
rdi
,
arg
(
5
)
;Sum
mov
dword
ptr
[
rsi
],
eax
mov
dword
ptr
[
rdi
],
edx
xor
rax
,
rax
; return 0
; begin epilog
add
rsp
,
16
pop
rbx
pop
rdi
pop
rsi
UNSHADOW_ARGS
pop
rbp
ret
;void
;vpx_get4x4var_mmx
;(
; unsigned char *src_ptr,
; int source_stride,
; unsigned char *ref_ptr,
; int recon_stride,
; unsigned int *SSE,
; int *Sum
;)
global
sym
(
vpx_get4x4var_mmx
)
PRIVATE
sym
(
vpx_get4x4var_mmx
):
push
rbp
mov
rbp
,
rsp
SHADOW_ARGS_TO_STACK
6
push
rsi
push
rdi
push
rbx
sub
rsp
,
16
; end prolog
pxor
mm5
,
mm5
; Blank mmx6
pxor
mm6
,
mm6
; Blank mmx7
pxor
mm7
,
mm7
; Blank mmx7
mov
rax
,
arg
(
0
)
;[src_ptr] ; Load base addresses
mov
rbx
,
arg
(
2
)
;[ref_ptr]
movsxd
rcx
,
dword
ptr
arg
(
1
)
;[source_stride]
movsxd
rdx
,
dword
ptr
arg
(
3
)
;[recon_stride]
; Row 1
movd
mm0
,
[
rax
]
; Copy four bytes to mm0
movd
mm1
,
[
rbx
]
; Copy four bytes to mm1
punpcklbw
mm0
,
mm6
; unpack to higher prrcision
punpcklbw
mm1
,
mm6
psubsw
mm0
,
mm1
; A-B (low order) to MM0
paddw
mm5
,
mm0
; accumulate differences in mm5
pmaddwd
mm0
,
mm0
; square and accumulate
add
rbx
,
rdx
; Inc pointer into ref data
add
rax
,
rcx
; Inc pointer into the new data
movd
mm1
,
[
rbx
]
; Copy four bytes to mm1
paddd
mm7
,
mm0
; accumulate in mm7
; Row 2
movd
mm0
,
[
rax
]
; Copy four bytes to mm0
punpcklbw
mm0
,
mm6
; unpack to higher prrcision
punpcklbw
mm1
,
mm6
psubsw
mm0
,
mm1
; A-B (low order) to MM0
paddw
mm5
,
mm0
; accumulate differences in mm5
pmaddwd
mm0
,
mm0
; square and accumulate
add
rbx
,
rdx
; Inc pointer into ref data
add
rax
,
rcx
; Inc pointer into the new data
movd
mm1
,
[
rbx
]
; Copy four bytes to mm1
paddd
mm7
,
mm0
; accumulate in mm7
; Row 3
movd
mm0
,
[
rax
]
; Copy four bytes to mm0
punpcklbw
mm0
,
mm6
; unpack to higher precision
punpcklbw
mm1
,
mm6
psubsw
mm0
,
mm1
; A-B (low order) to MM0
paddw
mm5
,
mm0
; accumulate differences in mm5
pmaddwd
mm0
,
mm0
; square and accumulate
add
rbx
,
rdx
; Inc pointer into ref data
add
rax
,
rcx
; Inc pointer into the new data
movd
mm1
,
[
rbx
]
; Copy four bytes to mm1
paddd
mm7
,
mm0
; accumulate in mm7
; Row 4
movd
mm0
,
[
rax
]
; Copy four bytes to mm0
punpcklbw
mm0
,
mm6
; unpack to higher prrcision
punpcklbw
mm1
,
mm6
psubsw
mm0
,
mm1
; A-B (low order) to MM0
paddw
mm5
,
mm0
; accumulate differences in mm5
pmaddwd
mm0
,
mm0
; square and accumulate
paddd
mm7
,
mm0
; accumulate in mm7
; Now accumulate the final results.
movq
QWORD
PTR
[
rsp
+
8
],
mm5
; copy back accumulated results into normal memory
movq
QWORD
PTR
[
rsp
],
mm7
; copy back accumulated results into normal memory
movsx
rdx
,
WORD
PTR
[
rsp
+
8
]
movsx
rcx
,
WORD
PTR
[
rsp
+
10
]
movsx
rbx
,
WORD
PTR
[
rsp
+
12
]
movsx
rax
,
WORD
PTR
[
rsp
+
14
]
add
rdx
,
rcx
add
rbx
,
rax
add
rdx
,
rbx
;XSum
movsxd
rax
,
DWORD
PTR
[
rsp
]
movsxd
rcx
,
DWORD
PTR
[
rsp
+
4
]
add
rax
,
rcx
;XXSum
mov
rsi
,
arg
(
4
)
;SSE
mov
rdi
,
arg
(
5
)
;Sum
mov
dword
ptr
[
rsi
],
eax
mov
dword
ptr
[
rdi
],
edx
xor
rax
,
rax
; return 0
; begin epilog
add
rsp
,
16
pop
rbx
pop
rdi
pop
rsi
UNSHADOW_ARGS
pop
rbp
ret
;void vpx_filter_block2d_bil4x4_var_mmx
;(
; unsigned char *ref_ptr,
...
...
vpx_dsp/x86/variance_mmx.c
View file @
2f55beb3
...
...
@@ -23,10 +23,6 @@ DECLARE_ALIGNED(16, static const int16_t, bilinear_filters_mmx[8][8]) = {
{
16
,
16
,
16
,
16
,
112
,
112
,
112
,
112
}
};
extern
void
vpx_get4x4var_mmx
(
const
uint8_t
*
a
,
int
a_stride
,
const
uint8_t
*
b
,
int
b_stride
,
unsigned
int
*
sse
,
int
*
sum
);
extern
void
vpx_filter_block2d_bil4x4_var_mmx
(
const
unsigned
char
*
ref_ptr
,
int
ref_pixels_per_line
,
const
unsigned
char
*
src_ptr
,
...
...
@@ -47,98 +43,6 @@ extern void vpx_filter_block2d_bil_var_mmx(const unsigned char *ref_ptr,
unsigned
int
*
sumsquared
);
unsigned
int
vpx_variance4x4_mmx
(
const
unsigned
char
*
a
,
int
a_stride
,
const
unsigned
char
*
b
,
int
b_stride
,
unsigned
int
*
sse
)
{
unsigned
int
var
;
int
avg
;
vpx_get4x4var_mmx
(
a
,
a_stride
,
b
,
b_stride
,
&
var
,
&
avg
);
*
sse
=
var
;
return
(
var
-
(((
unsigned
int
)
avg
*
avg
)
>>
4
));
}
unsigned
int
vpx_variance8x8_mmx
(
const
unsigned
char
*
a
,
int
a_stride
,
const
unsigned
char
*
b
,
int
b_stride
,
unsigned
int
*
sse
)
{
unsigned
int
var
;
int
avg
;
vpx_get8x8var_mmx
(
a
,
a_stride
,
b
,
b_stride
,
&
var
,
&
avg
);
*
sse
=
var
;
return
(
var
-
(((
unsigned
int
)
avg
*
avg
)
>>
6
));
}
unsigned
int
vpx_mse16x16_mmx
(
const
unsigned
char
*
a
,
int
a_stride
,
const
unsigned
char
*
b
,
int
b_stride
,
unsigned
int
*
sse
)
{
unsigned
int
sse0
,
sse1
,
sse2
,
sse3
,
var
;
int
sum0
,
sum1
,
sum2
,
sum3
;
vpx_get8x8var_mmx
(
a
,
a_stride
,
b
,
b_stride
,
&
sse0
,
&
sum0
);
vpx_get8x8var_mmx
(
a
+
8
,
a_stride
,
b
+
8
,
b_stride
,
&
sse1
,
&
sum1
);
vpx_get8x8var_mmx
(
a
+
8
*
a_stride
,
a_stride
,
b
+
8
*
b_stride
,
b_stride
,
&
sse2
,
&
sum2
);
vpx_get8x8var_mmx
(
a
+
8
*
a_stride
+
8
,
a_stride
,
b
+
8
*
b_stride
+
8
,
b_stride
,
&
sse3
,
&
sum3
);
var
=
sse0
+
sse1
+
sse2
+
sse3
;
*
sse
=
var
;
return
var
;
}
unsigned
int
vpx_variance16x16_mmx
(
const
unsigned
char
*
a
,
int
a_stride
,
const
unsigned
char
*
b
,
int
b_stride
,
unsigned
int
*
sse
)
{
unsigned
int
sse0
,
sse1
,
sse2
,
sse3
,
var
;
int
sum0
,
sum1
,
sum2
,
sum3
,
avg
;
vpx_get8x8var_mmx
(
a
,
a_stride
,
b
,
b_stride
,
&
sse0
,
&
sum0
);
vpx_get8x8var_mmx
(
a
+
8
,
a_stride
,
b
+
8
,
b_stride
,
&
sse1
,
&
sum1
);
vpx_get8x8var_mmx
(
a
+
8
*
a_stride
,
a_stride
,
b
+
8
*
b_stride
,
b_stride
,
&
sse2
,
&
sum2
);
vpx_get8x8var_mmx
(
a
+
8
*
a_stride
+
8
,
a_stride
,
b
+
8
*
b_stride
+
8
,
b_stride
,
&
sse3
,
&
sum3
);
var
=
sse0
+
sse1
+
sse2
+
sse3
;
avg
=
sum0
+
sum1
+
sum2
+
sum3
;
*
sse
=
var
;
return
(
var
-
(((
unsigned
int
)
avg
*
avg
)
>>
8
));
}
unsigned
int
vpx_variance16x8_mmx
(
const
unsigned
char
*
a
,
int
a_stride
,
const
unsigned
char
*
b
,
int
b_stride
,
unsigned
int
*
sse
)
{
unsigned
int
sse0
,
sse1
,
var
;
int
sum0
,
sum1
,
avg
;
vpx_get8x8var_mmx
(
a
,
a_stride
,
b
,
b_stride
,
&
sse0
,
&
sum0
);
vpx_get8x8var_mmx
(
a
+
8
,
a_stride
,
b
+
8
,
b_stride
,
&
sse1
,
&
sum1
);
var
=
sse0
+
sse1
;
avg
=
sum0
+
sum1
;
*
sse
=
var
;
return
(
var
-
(((
unsigned
int
)
avg
*
avg
)
>>
7
));
}
unsigned
int
vpx_variance8x16_mmx
(
const
unsigned
char
*
a
,
int
a_stride
,
const
unsigned
char
*
b
,
int
b_stride
,
unsigned
int
*
sse
)
{
unsigned
int
sse0
,
sse1
,
var
;
int
sum0
,
sum1
,
avg
;
vpx_get8x8var_mmx
(
a
,
a_stride
,
b
,
b_stride
,
&
sse0
,
&
sum0
);
vpx_get8x8var_mmx
(
a
+
8
*
a_stride
,
a_stride
,
b
+
8
*
b_stride
,
b_stride
,
&
sse1
,
&
sum1
);
var
=
sse0
+
sse1
;
avg
=
sum0
+
sum1
;
*
sse
=
var
;
return
(
var
-
(((
unsigned
int
)
avg
*
avg
)
>>
7
));
}
uint32_t
vpx_sub_pixel_variance4x4_mmx
(
const
uint8_t
*
a
,
int
a_stride
,
int
xoffset
,
int
yoffset
,
const
uint8_t
*
b
,
int
b_stride
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment