Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
A
aom-rav1e
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Xiph.Org
aom-rav1e
Commits
b8f43aec
Commit
b8f43aec
authored
14 years ago
by
Scott LaVarnway
Committed by
Code Review
14 years ago
Browse files
Options
Downloads
Plain Diff
Merge "SSSE3 version of fast quantizer"
parents
90c505f2
ff4a71f4
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
vp8/encoder/x86/quantize_ssse3.asm
+114
-0
114 additions, 0 deletions
vp8/encoder/x86/quantize_ssse3.asm
vp8/encoder/x86/x86_csystemdependent.c
+22
-0
22 additions, 0 deletions
vp8/encoder/x86/x86_csystemdependent.c
vp8/vp8cx.mk
+1
-0
1 addition, 0 deletions
vp8/vp8cx.mk
with
137 additions
and
0 deletions
vp8/encoder/x86/quantize_ssse3.asm
0 → 100755
+
114
−
0
View file @
b8f43aec
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license and patent
; grant that can be found in the LICENSE file in the root of the source
; tree. All contributing project authors may be found in the AUTHORS
; file in the root of the source tree.
;
%include "vpx_ports/x86_abi_support.asm"
;int vp8_fast_quantize_b_impl_ssse3(short *coeff_ptr
; short *qcoeff_ptr,short *dequant_ptr,
; short *round_ptr,
; short *quant_ptr, short *dqcoeff_ptr);
;
global
sym
(
vp8_fast_quantize_b_impl_ssse3
)
sym
(
vp8_fast_quantize_b_impl_ssse3
):
push
rbp
mov
rbp
,
rsp
SHADOW_ARGS_TO_STACK
6
GET_GOT
rbx
push
rsi
push
rdi
; end prolog
mov
rdx
,
arg
(
0
)
;coeff_ptr
mov
rdi
,
arg
(
3
)
;round_ptr
mov
rsi
,
arg
(
4
)
;quant_ptr
movdqa
xmm0
,
[
rdx
]
movdqa
xmm4
,
[
rdx
+
16
]
movdqa
xmm2
,
[
rdi
]
;round lo
movdqa
xmm3
,
[
rdi
+
16
]
;round hi
movdqa
xmm1
,
xmm0
movdqa
xmm5
,
xmm4
psraw
xmm0
,
15
;sign of z (aka sz)
psraw
xmm4
,
15
;sign of z (aka sz)
pabsw
xmm1
,
xmm1
pabsw
xmm5
,
xmm5
paddw
xmm1
,
xmm2
paddw
xmm5
,
xmm3
pmulhw
xmm1
,
[
rsi
]
pmulhw
xmm5
,
[
rsi
+
16
]
mov
rdi
,
arg
(
1
)
;qcoeff_ptr
mov
rcx
,
arg
(
2
)
;dequant_ptr
mov
rsi
,
arg
(
5
)
;dqcoeff_ptr
pxor
xmm1
,
xmm0
pxor
xmm5
,
xmm4
psubw
xmm1
,
xmm0
psubw
xmm5
,
xmm4
movdqa
[
rdi
],
xmm1
movdqa
[
rdi
+
16
],
xmm5
movdqa
xmm2
,
[
rcx
]
movdqa
xmm3
,
[
rcx
+
16
]
pxor
xmm4
,
xmm4
pmullw
xmm2
,
xmm1
pmullw
xmm3
,
xmm5
pcmpeqw
xmm1
,
xmm4
;non zero mask
pcmpeqw
xmm5
,
xmm4
;non zero mask
packsswb
xmm1
,
xmm5
pshufb
xmm1
,
[
GLOBAL
(
zz_shuf
)]
pmovmskb
edx
,
xmm1
; xor ecx, ecx
; mov eax, -1
;find_eob_loop:
; shr edx, 1
; jc fq_skip
; mov eax, ecx
;fq_skip:
; inc ecx
; cmp ecx, 16
; jne find_eob_loop
xor
rdi
,
rdi
mov
eax
,
-
1
xor
dx
,
ax
;flip the bits for bsr
bsr
eax
,
edx
movdqa
[
rsi
],
xmm2
;store dqcoeff
movdqa
[
rsi
+
16
],
xmm3
;store dqcoeff
sub
edi
,
edx
;check for all zeros in bit mask
sar
edi
,
31
;0 or -1
add
eax
,
1
and
eax
,
edi
;if the bit mask was all zero,
;then eob = 0
; begin epilog
pop
rdi
pop
rsi
REST
ORE_GOT
UNSHADOW_ARGS
pop
rbp
ret
SECTION
_RODATA
align
16
zz_shuf:
db
0
,
1
,
4
,
8
,
5
,
2
,
3
,
6
,
9
,
12
,
13
,
10
,
7
,
11
,
14
,
15
This diff is collapsed.
Click to expand it.
vp8/encoder/x86/x86_csystemdependent.c
+
22
−
0
View file @
b8f43aec
...
...
@@ -179,6 +179,25 @@ void vp8_subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch)
#endif
#if HAVE_SSSE3
int
vp8_fast_quantize_b_impl_ssse3
(
short
*
coeff_ptr
,
short
*
qcoeff_ptr
,
short
*
dequant_ptr
,
short
*
round_ptr
,
short
*
quant_ptr
,
short
*
dqcoeff_ptr
);
void
vp8_fast_quantize_b_ssse3
(
BLOCK
*
b
,
BLOCKD
*
d
)
{
d
->
eob
=
vp8_fast_quantize_b_impl_ssse3
(
b
->
coeff
,
d
->
qcoeff
,
d
->
dequant
,
b
->
round
,
b
->
quant
,
d
->
dqcoeff
);
}
#endif
void
vp8_arch_x86_encoder_init
(
VP8_COMP
*
cpi
)
{
#if CONFIG_RUNTIME_CPU_DETECT
...
...
@@ -333,6 +352,9 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
{
cpi
->
rtcd
.
variance
.
sad16x16x3
=
vp8_sad16x16x3_ssse3
;
cpi
->
rtcd
.
variance
.
sad16x8x3
=
vp8_sad16x8x3_ssse3
;
cpi
->
rtcd
.
quantize
.
fastquantb
=
vp8_fast_quantize_b_ssse3
;
}
#endif
...
...
This diff is collapsed.
Click to expand it.
vp8/vp8cx.mk
+
1
−
0
View file @
b8f43aec
...
...
@@ -109,6 +109,7 @@ VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.asm
VP8_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/subtract_sse2.asm
VP8_CX_SRCS-$(HAVE_SSE3)
+=
encoder/x86/sad_sse3.asm
VP8_CX_SRCS-$(HAVE_SSSE3)
+=
encoder/x86/sad_ssse3.asm
VP8_CX_SRCS-$(HAVE_SSSE3)
+=
encoder/x86/quantize_ssse3.asm
VP8_CX_SRCS-$(HAVE_SSE4_1)
+=
encoder/x86/sad_sse4.asm
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64)
+=
encoder/x86/quantize_mmx.asm
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64)
+=
encoder/x86/encodeopt.asm
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment