Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Xiph.Org
aom-rav1e
Commits
b8f43aec
Commit
b8f43aec
authored
Nov 02, 2010
by
Scott LaVarnway
Committed by
Code Review
Nov 02, 2010
Browse files
Merge "SSSE3 version of fast quantizer"
parents
90c505f2
ff4a71f4
Changes
3
Hide whitespace changes
Inline
Side-by-side
vp8/encoder/x86/quantize_ssse3.asm
0 → 100755
View file @
b8f43aec
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license and patent
; grant that can be found in the LICENSE file in the root of the source
; tree. All contributing project authors may be found in the AUTHORS
; file in the root of the source tree.
;
%include "vpx_ports/x86_abi_support.asm"
;int vp8_fast_quantize_b_impl_ssse3(short *coeff_ptr
; short *qcoeff_ptr,short *dequant_ptr,
; short *round_ptr,
; short *quant_ptr, short *dqcoeff_ptr);
;
global
sym
(
vp8_fast_quantize_b_impl_ssse3
)
sym
(
vp8_fast_quantize_b_impl_ssse3
):
push
rbp
mov
rbp
,
rsp
SHADOW_ARGS_TO_STACK
6
GET_GOT
rbx
push
rsi
push
rdi
; end prolog
mov
rdx
,
arg
(
0
)
;coeff_ptr
mov
rdi
,
arg
(
3
)
;round_ptr
mov
rsi
,
arg
(
4
)
;quant_ptr
movdqa
xmm0
,
[
rdx
]
movdqa
xmm4
,
[
rdx
+
16
]
movdqa
xmm2
,
[
rdi
]
;round lo
movdqa
xmm3
,
[
rdi
+
16
]
;round hi
movdqa
xmm1
,
xmm0
movdqa
xmm5
,
xmm4
psraw
xmm0
,
15
;sign of z (aka sz)
psraw
xmm4
,
15
;sign of z (aka sz)
pabsw
xmm1
,
xmm1
pabsw
xmm5
,
xmm5
paddw
xmm1
,
xmm2
paddw
xmm5
,
xmm3
pmulhw
xmm1
,
[
rsi
]
pmulhw
xmm5
,
[
rsi
+
16
]
mov
rdi
,
arg
(
1
)
;qcoeff_ptr
mov
rcx
,
arg
(
2
)
;dequant_ptr
mov
rsi
,
arg
(
5
)
;dqcoeff_ptr
pxor
xmm1
,
xmm0
pxor
xmm5
,
xmm4
psubw
xmm1
,
xmm0
psubw
xmm5
,
xmm4
movdqa
[
rdi
],
xmm1
movdqa
[
rdi
+
16
],
xmm5
movdqa
xmm2
,
[
rcx
]
movdqa
xmm3
,
[
rcx
+
16
]
pxor
xmm4
,
xmm4
pmullw
xmm2
,
xmm1
pmullw
xmm3
,
xmm5
pcmpeqw
xmm1
,
xmm4
;non zero mask
pcmpeqw
xmm5
,
xmm4
;non zero mask
packsswb
xmm1
,
xmm5
pshufb
xmm1
,
[
GLOBAL
(
zz_shuf
)]
pmovmskb
edx
,
xmm1
; xor ecx, ecx
; mov eax, -1
;find_eob_loop:
; shr edx, 1
; jc fq_skip
; mov eax, ecx
;fq_skip:
; inc ecx
; cmp ecx, 16
; jne find_eob_loop
xor
rdi
,
rdi
mov
eax
,
-
1
xor
dx
,
ax
;flip the bits for bsr
bsr
eax
,
edx
movdqa
[
rsi
],
xmm2
;store dqcoeff
movdqa
[
rsi
+
16
],
xmm3
;store dqcoeff
sub
edi
,
edx
;check for all zeros in bit mask
sar
edi
,
31
;0 or -1
add
eax
,
1
and
eax
,
edi
;if the bit mask was all zero,
;then eob = 0
; begin epilog
pop
rdi
pop
rsi
REST
ORE_GOT
UNSHADOW_ARGS
pop
rbp
ret
SECTION
_RODATA
align
16
zz_shuf:
db
0
,
1
,
4
,
8
,
5
,
2
,
3
,
6
,
9
,
12
,
13
,
10
,
7
,
11
,
14
,
15
vp8/encoder/x86/x86_csystemdependent.c
View file @
b8f43aec
...
...
@@ -179,6 +179,25 @@ void vp8_subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch)
#endif
#if HAVE_SSSE3
int
vp8_fast_quantize_b_impl_ssse3
(
short
*
coeff_ptr
,
short
*
qcoeff_ptr
,
short
*
dequant_ptr
,
short
*
round_ptr
,
short
*
quant_ptr
,
short
*
dqcoeff_ptr
);
void
vp8_fast_quantize_b_ssse3
(
BLOCK
*
b
,
BLOCKD
*
d
)
{
d
->
eob
=
vp8_fast_quantize_b_impl_ssse3
(
b
->
coeff
,
d
->
qcoeff
,
d
->
dequant
,
b
->
round
,
b
->
quant
,
d
->
dqcoeff
);
}
#endif
void
vp8_arch_x86_encoder_init
(
VP8_COMP
*
cpi
)
{
#if CONFIG_RUNTIME_CPU_DETECT
...
...
@@ -333,6 +352,9 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
{
cpi
->
rtcd
.
variance
.
sad16x16x3
=
vp8_sad16x16x3_ssse3
;
cpi
->
rtcd
.
variance
.
sad16x8x3
=
vp8_sad16x8x3_ssse3
;
cpi
->
rtcd
.
quantize
.
fastquantb
=
vp8_fast_quantize_b_ssse3
;
}
#endif
...
...
vp8/vp8cx.mk
View file @
b8f43aec
...
...
@@ -109,6 +109,7 @@ VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.asm
VP8_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/subtract_sse2.asm
VP8_CX_SRCS-$(HAVE_SSE3)
+=
encoder/x86/sad_sse3.asm
VP8_CX_SRCS-$(HAVE_SSSE3)
+=
encoder/x86/sad_ssse3.asm
VP8_CX_SRCS-$(HAVE_SSSE3)
+=
encoder/x86/quantize_ssse3.asm
VP8_CX_SRCS-$(HAVE_SSE4_1)
+=
encoder/x86/sad_sse4.asm
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64)
+=
encoder/x86/quantize_mmx.asm
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64)
+=
encoder/x86/encodeopt.asm
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment