Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Guillaume Martres
aom-rav1e
Commits
1d65b3be
Commit
1d65b3be
authored
May 03, 2014
by
Johann
Committed by
Gerrit Code Review
May 03, 2014
Browse files
Merge "Remove asm_offsets dependency in quantize_b_ssse3"
parents
cf2262c4
570d43c0
Changes
3
Hide whitespace changes
Inline
Side-by-side
vp8/encoder/x86/quantize_ssse3.asm
deleted
100644 → 0
View file @
cf2262c4
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license and patent
; grant that can be found in the LICENSE file in the root of the source
; tree. All contributing project authors may be found in the AUTHORS
; file in the root of the source tree.
;
%include "vpx_ports/x86_abi_support.asm"
%include "vp8_asm_enc_offsets.asm"
; void vp8_fast_quantize_b_ssse3 | arg
; (BLOCK *b, | 0
; BLOCKD *d) | 1
;
global
sym
(
vp8_fast_quantize_b_ssse3
)
PRIVATE
sym
(
vp8_fast_quantize_b_ssse3
):
push
rbp
mov
rbp
,
rsp
GET_GOT
rbx
%if ABI_IS_32BIT
push
rdi
push
rsi
%else
%if LIBVPX_YASM_WIN64
push
rdi
push
rsi
%endif
%endif
; end prolog
%if ABI_IS_32BIT
mov
rdi
,
arg
(
0
)
; BLOCK *b
mov
rsi
,
arg
(
1
)
; BLOCKD *d
%else
%if LIBVPX_YASM_WIN64
mov
rdi
,
rcx
; BLOCK *b
mov
rsi
,
rdx
; BLOCKD *d
%else
;mov rdi, rdi ; BLOCK *b
;mov rsi, rsi ; BLOCKD *d
%endif
%endif
mov
rax
,
[
rdi
+
vp8_block_coeff
]
mov
rcx
,
[
rdi
+
vp8_block_round
]
mov
rdx
,
[
rdi
+
vp8_block_quant_fast
]
; coeff
movdqa
xmm0
,
[
rax
]
movdqa
xmm4
,
[
rax
+
16
]
; round
movdqa
xmm2
,
[
rcx
]
movdqa
xmm3
,
[
rcx
+
16
]
movdqa
xmm1
,
xmm0
movdqa
xmm5
,
xmm4
; sz = z >> 15
psraw
xmm0
,
15
psraw
xmm4
,
15
pabsw
xmm1
,
xmm1
pabsw
xmm5
,
xmm5
paddw
xmm1
,
xmm2
paddw
xmm5
,
xmm3
; quant_fast
pmulhw
xmm1
,
[
rdx
]
pmulhw
xmm5
,
[
rdx
+
16
]
mov
rax
,
[
rsi
+
vp8_blockd_qcoeff
]
mov
rdi
,
[
rsi
+
vp8_blockd_dequant
]
mov
rcx
,
[
rsi
+
vp8_blockd_dqcoeff
]
movdqa
xmm2
,
xmm1
;store y for getting eob
movdqa
xmm3
,
xmm5
pxor
xmm1
,
xmm0
pxor
xmm5
,
xmm4
psubw
xmm1
,
xmm0
psubw
xmm5
,
xmm4
movdqa
[
rax
],
xmm1
movdqa
[
rax
+
16
],
xmm5
movdqa
xmm0
,
[
rdi
]
movdqa
xmm4
,
[
rdi
+
16
]
pmullw
xmm0
,
xmm1
pmullw
xmm4
,
xmm5
pxor
xmm1
,
xmm1
pcmpgtw
xmm2
,
xmm1
;calculate eob
pcmpgtw
xmm3
,
xmm1
packsswb
xmm2
,
xmm3
pshufb
xmm2
,
[
GLOBAL
(
zz_shuf
)]
pmovmskb
edx
,
xmm2
movdqa
[
rcx
],
xmm0
;store dqcoeff
movdqa
[
rcx
+
16
],
xmm4
;store dqcoeff
mov
rcx
,
[
rsi
+
vp8_blockd_eob
]
bsr
eax
,
edx
;count 0
add
eax
,
1
cmp
edx
,
0
;if all 0, eob=0
cmove
eax
,
edx
mov
BYTE
PTR
[
rcx
],
al
;store eob
; begin epilog
%if ABI_IS_32BIT
pop
rsi
pop
rdi
%else
%if LIBVPX_YASM_WIN64
pop
rsi
pop
rdi
%endif
%endif
REST
ORE_GOT
pop
rbp
ret
SECTION
_RODATA
align
16
zz_shuf:
db
0
,
1
,
4
,
8
,
5
,
2
,
3
,
6
,
9
,
12
,
13
,
10
,
7
,
11
,
14
,
15
vp8/encoder/x86/quantize_ssse3.c
0 → 100644
View file @
1d65b3be
/*
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include
<tmmintrin.h>
/* SSSE3 */
#include
"vp8/encoder/block.h"
/* bitscan reverse (bsr) */
#if defined(_MSC_VER)
#include
<intrin.h>
#pragma intrinsic(_BitScanReverse)
static
int
bsr
(
int
mask
)
{
int
eob
;
_BitScanReverse
(
&
eob
,
mask
);
eob
++
;
if
(
mask
==
0
)
eob
=
0
;
return
eob
;
}
#else
static
int
bsr
(
int
mask
)
{
int
eob
;
asm
volatile
(
"bsr %1, %0"
:
"=r"
(
eob
)
:
"r"
(
mask
)
:
"flags"
);
eob
++
;
if
(
mask
==
0
)
eob
=
0
;
return
eob
;
}
#endif
void
vp8_fast_quantize_b_ssse3
(
BLOCK
*
b
,
BLOCKD
*
d
)
{
int
eob
,
mask
;
__m128i
z0
=
_mm_load_si128
((
__m128i
*
)(
b
->
coeff
));
__m128i
z1
=
_mm_load_si128
((
__m128i
*
)(
b
->
coeff
+
8
));
__m128i
round0
=
_mm_load_si128
((
__m128i
*
)(
b
->
round
));
__m128i
round1
=
_mm_load_si128
((
__m128i
*
)(
b
->
round
+
8
));
__m128i
quant_fast0
=
_mm_load_si128
((
__m128i
*
)(
b
->
quant_fast
));
__m128i
quant_fast1
=
_mm_load_si128
((
__m128i
*
)(
b
->
quant_fast
+
8
));
__m128i
dequant0
=
_mm_load_si128
((
__m128i
*
)(
d
->
dequant
));
__m128i
dequant1
=
_mm_load_si128
((
__m128i
*
)(
d
->
dequant
+
8
));
__m128i
sz0
,
sz1
,
x
,
x0
,
x1
,
y0
,
y1
,
zeros
,
abs0
,
abs1
;
DECLARE_ALIGNED
(
16
,
const
uint8_t
,
pshufb_zig_zag_mask
[
16
])
=
{
0
,
1
,
4
,
8
,
5
,
2
,
3
,
6
,
9
,
12
,
13
,
10
,
7
,
11
,
14
,
15
};
__m128i
zig_zag
=
_mm_load_si128
((
const
__m128i
*
)
pshufb_zig_zag_mask
);
/* sign of z: z >> 15 */
sz0
=
_mm_srai_epi16
(
z0
,
15
);
sz1
=
_mm_srai_epi16
(
z1
,
15
);
/* x = abs(z) */
x0
=
_mm_abs_epi16
(
z0
);
x1
=
_mm_abs_epi16
(
z1
);
/* x += round */
x0
=
_mm_add_epi16
(
x0
,
round0
);
x1
=
_mm_add_epi16
(
x1
,
round1
);
/* y = (x * quant) >> 16 */
y0
=
_mm_mulhi_epi16
(
x0
,
quant_fast0
);
y1
=
_mm_mulhi_epi16
(
x1
,
quant_fast1
);
/* ASM saves Y for EOB */
/* I think we can ignore that because adding the sign doesn't change anything
* and multiplying 0 by dequant is OK as well */
abs0
=
y0
;
abs1
=
y1
;
/* Restore the sign bit. */
y0
=
_mm_xor_si128
(
y0
,
sz0
);
y1
=
_mm_xor_si128
(
y1
,
sz1
);
x0
=
_mm_sub_epi16
(
y0
,
sz0
);
x1
=
_mm_sub_epi16
(
y1
,
sz1
);
/* qcoeff = x */
_mm_store_si128
((
__m128i
*
)(
d
->
qcoeff
),
x0
);
_mm_store_si128
((
__m128i
*
)(
d
->
qcoeff
+
8
),
x1
);
/* x * dequant */
x0
=
_mm_mullo_epi16
(
x0
,
dequant0
);
x1
=
_mm_mullo_epi16
(
x1
,
dequant1
);
/* dqcoeff = x * dequant */
_mm_store_si128
((
__m128i
*
)(
d
->
dqcoeff
),
x0
);
_mm_store_si128
((
__m128i
*
)(
d
->
dqcoeff
+
8
),
x1
);
zeros
=
_mm_setzero_si128
();
x0
=
_mm_cmpgt_epi16
(
abs0
,
zeros
);
x1
=
_mm_cmpgt_epi16
(
abs1
,
zeros
);
x
=
_mm_packs_epi16
(
x0
,
x1
);
x
=
_mm_shuffle_epi8
(
x
,
zig_zag
);
mask
=
_mm_movemask_epi8
(
x
);
eob
=
bsr
(
mask
);
*
d
->
eob
=
0xFF
&
eob
;
}
vp8/vp8cx.mk
View file @
1d65b3be
...
...
@@ -88,6 +88,7 @@ VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp8_enc_stubs_mmx.c
VP8_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/dct_sse2.asm
VP8_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/fwalsh_sse2.asm
VP8_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/quantize_sse2.c
VP8_CX_SRCS-$(HAVE_SSSE3)
+=
encoder/x86/quantize_ssse3.c
ifeq
($(CONFIG_TEMPORAL_DENOISING),yes)
VP8_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/denoising_sse2.c
...
...
@@ -96,7 +97,6 @@ endif
VP8_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/subtract_sse2.asm
VP8_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/temporal_filter_apply_sse2.asm
VP8_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/vp8_enc_stubs_sse2.c
VP8_CX_SRCS-$(HAVE_SSSE3)
+=
encoder/x86/quantize_ssse3.asm
VP8_CX_SRCS-$(HAVE_SSE4_1)
+=
encoder/x86/quantize_sse4.asm
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64)
+=
encoder/x86/quantize_mmx.asm
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64)
+=
encoder/x86/encodeopt.asm
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment