Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Xiph.Org
aom-rav1e
Commits
0ced7014
Commit
0ced7014
authored
Dec 10, 2010
by
Fritz Koenig
Committed by
Code Review
Dec 10, 2010
Browse files
Merge "vp8 fast quantizer sse2 optimizations for eob."
parents
cb969895
e0cf330c
Changes
4
Hide whitespace changes
Inline
Side-by-side
vp8/common/entropy.c
View file @
0ced7014
...
...
@@ -36,6 +36,14 @@ DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]) =
7
,
11
,
14
,
15
,
};
DECLARE_ALIGNED
(
16
,
const
short
,
vp8_default_inv_zig_zag
[
16
])
=
{
1
,
2
,
6
,
7
,
3
,
5
,
8
,
13
,
4
,
9
,
12
,
14
,
10
,
11
,
15
,
16
};
DECLARE_ALIGNED
(
16
,
short
,
vp8_default_zig_zag_mask
[
16
]);
const
int
vp8_mb_feature_data_bits
[
MB_LVL_MAX
]
=
{
7
,
6
};
...
...
vp8/common/entropy.h
View file @
0ced7014
...
...
@@ -95,6 +95,7 @@ struct VP8Common;
void
vp8_default_coef_probs
(
struct
VP8Common
*
);
extern
DECLARE_ALIGNED
(
16
,
const
int
,
vp8_default_zig_zag1d
[
16
]);
extern
DECLARE_ALIGNED
(
16
,
const
short
,
vp8_default_inv_zig_zag
[
16
]);
extern
short
vp8_default_zig_zag_mask
[
16
];
extern
const
int
vp8_mb_feature_data_bits
[
MB_LVL_MAX
];
...
...
vp8/encoder/x86/quantize_sse2.asm
View file @
0ced7014
...
...
@@ -253,10 +253,9 @@ rq_zigzag_1c:
pop
rbp
ret
;int vp8_fast_quantize_b_impl_sse2(short *coeff_ptr,
; short *qcoeff_ptr,short *dequant_ptr,
; short *scan_
mask
, short *round_ptr,
; short *
inv_
scan_
order
, short *round_ptr,
; short *quant_ptr, short *dqcoeff_ptr);
global
sym
(
vp8_fast_quantize_b_impl_sse2
)
sym
(
vp8_fast_quantize_b_impl_sse2
):
...
...
@@ -265,32 +264,18 @@ sym(vp8_fast_quantize_b_impl_sse2):
SHADOW_ARGS_TO_STACK
7
push
rsi
push
rdi
push
rbx
; end prolog
ALIGN
_STACK
16
,
rax
%define save_xmm6 0
%define save_xmm7 16
%define vp8_fastquantizeb_stack_size save_xmm7 + 16
sub
rsp
,
vp8_fastquantizeb_stack_size
movdqa
XMMWORD
PTR
[
rsp
+
save_xmm6
],
xmm6
movdqa
XMMWORD
PTR
[
rsp
+
save_xmm7
],
xmm7
mov
rdx
,
arg
(
0
)
;coeff_ptr
mov
rcx
,
arg
(
2
)
;dequant_ptr
mov
rax
,
arg
(
3
)
;scan_mask
mov
rdi
,
arg
(
4
)
;round_ptr
mov
rsi
,
arg
(
5
)
;quant_ptr
movdqa
xmm0
,
XMMWORD
PTR
[
rdx
]
movdqa
xmm4
,
XMMWORD
PTR
[
rdx
+
16
]
movdqa
xmm
6
,
XMMWORD
PTR
[
rdi
]
;round lo
movdqa
xmm
7
,
XMMWORD
PTR
[
rdi
+
16
]
;round hi
movdqa
xmm
2
,
XMMWORD
PTR
[
rdi
]
;round lo
movdqa
xmm
3
,
XMMWORD
PTR
[
rdi
+
16
]
;round hi
movdqa
xmm1
,
xmm0
movdqa
xmm5
,
xmm4
...
...
@@ -303,8 +288,8 @@ sym(vp8_fast_quantize_b_impl_sse2):
psubw
xmm1
,
xmm0
;x = abs(z)
psubw
xmm5
,
xmm4
;x = abs(z)
paddw
xmm1
,
xmm
6
paddw
xmm5
,
xmm
7
paddw
xmm1
,
xmm
2
paddw
xmm5
,
xmm
3
pmulhw
xmm1
,
XMMWORD
PTR
[
rsi
]
pmulhw
xmm5
,
XMMWORD
PTR
[
rsi
+
16
]
...
...
@@ -312,8 +297,8 @@ sym(vp8_fast_quantize_b_impl_sse2):
mov
rdi
,
arg
(
1
)
;qcoeff_ptr
mov
rsi
,
arg
(
6
)
;dqcoeff_ptr
movdqa
xmm
6
,
XMMWORD
PTR
[
rcx
]
movdqa
xmm
7
,
XMMWORD
PTR
[
rcx
+
16
]
movdqa
xmm
2
,
XMMWORD
PTR
[
rcx
]
movdqa
xmm
3
,
XMMWORD
PTR
[
rcx
+
16
]
pxor
xmm1
,
xmm0
pxor
xmm5
,
xmm4
...
...
@@ -323,64 +308,47 @@ sym(vp8_fast_quantize_b_impl_sse2):
movdqa
XMMWORD
PTR
[
rdi
],
xmm1
movdqa
XMMWORD
PTR
[
rdi
+
16
],
xmm5
pmullw
xmm
6
,
xmm1
pmullw
xmm
7
,
xmm5
pmullw
xmm
2
,
xmm1
pmullw
xmm
3
,
xmm5
movdqa
xmm2
,
XMMWORD
PTR
[
rax
]
movdqa
xmm3
,
XMMWORD
PTR
[
rax
+
16
]
;
mov
rdi
,
arg
(
3
)
;inv_scan_order
pxor
xmm4
,
xmm4
;clear all bits
; Start with 16
pxor
xmm4
,
xmm4
;clear all bits
pcmpeqw
xmm1
,
xmm4
pcmpeqw
xmm5
,
xmm4
pcmpeqw
xmm4
,
xmm4
;set all bits
pcmpeqw
xmm4
,
xmm4
;set all bits
pxor
xmm1
,
xmm4
pxor
xmm5
,
xmm4
psrlw
xmm1
,
15
psrlw
xmm5
,
15
pmaddwd
xmm1
,
xmm2
pmaddwd
xmm5
,
xmm3
movq
xmm2
,
xmm1
movq
xmm3
,
xmm5
psrldq
xmm1
,
8
psrldq
xmm5
,
8
pand
xmm1
,
XMMWORD
PTR
[
rdi
]
pand
xmm5
,
XMMWORD
PTR
[
rdi
+
16
]
paddd
xmm1
,
xmm5
paddd
xmm2
,
xmm3
pmaxsw
xmm1
,
xmm5
paddd
xmm1
,
xmm2
movq
xmm5
,
xmm1
; now down to 8
pshufd
xmm5
,
xmm1
,
00001110b
psrldq
xmm1
,
4
paddd
xmm5
,
xmm1
pmaxsw
xmm1
,
xmm5
movq
rcx
,
xmm5
and
rcx
,
0xffff
; only 4 left
pshuflw
xmm5
,
xmm1
,
00001110b
xor
rdx
,
rdx
sub
rdx
,
rcx
pmaxsw
xmm1
,
xmm5
bsr
rax
,
rcx
inc
rax
; okay, just 2!
pshuflw
xmm5
,
xmm1
,
00000001b
sar
rdx
,
31
and
rax
,
rdx
pmaxsw
xmm1
,
xmm5
movd
qa
XMMWORD
PTR
[
rsi
],
xmm6
;store dqcoeff
movdqa
XMMWORD
PTR
[
rsi
+
16
],
xmm7
;store dqcoe
ff
movd
rax
,
xmm1
and
rax
,
0x
ff
movdqa
xmm6
,
XMMWORD
PTR
[
rsp
+
save_xmm6
]
movdqa
xmm7
,
XMMWORD
PTR
[
rsp
+
save_xmm7
]
add
rsp
,
vp8_fastquantizeb_stack_size
pop
rsp
movdqa
XMMWORD
PTR
[
rsi
],
xmm2
;store dqcoeff
movdqa
XMMWORD
PTR
[
rsi
+
16
],
xmm3
;store dqcoeff
; begin epilog
pop
rbx
pop
rdi
pop
rsi
UNSHADOW_ARGS
...
...
vp8/encoder/x86/x86_csystemdependent.c
View file @
0ced7014
...
...
@@ -83,7 +83,7 @@ void vp8_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch)
#if HAVE_SSE2
int
vp8_fast_quantize_b_impl_sse2
(
short
*
coeff_ptr
,
short
*
qcoeff_ptr
,
short
*
dequant_ptr
,
short
*
scan_
mask
,
short
*
round_ptr
,
const
short
*
inv_
scan_
order
,
short
*
round_ptr
,
short
*
quant_ptr
,
short
*
dqcoeff_ptr
);
void
vp8_fast_quantize_b_sse2
(
BLOCK
*
b
,
BLOCKD
*
d
)
{
...
...
@@ -99,8 +99,7 @@ void vp8_fast_quantize_b_sse2(BLOCK *b, BLOCKD *d)
coeff_ptr
,
qcoeff_ptr
,
dequant_ptr
,
scan_mask
,
vp8_default_inv_zig_zag
,
round_ptr
,
quant_ptr
,
dqcoeff_ptr
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment