Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Xiph.Org
aom-rav1e
Commits
ce04b1aa
Commit
ce04b1aa
authored
Aug 27, 2013
by
hkuang
Committed by
Gerrit Code Review
Aug 27, 2013
Browse files
Merge "Add neon optimize vp9_short_idct8x8_1_add."
parents
7b95f9bf
36e9b820
Changes
3
Hide whitespace changes
Inline
Side-by-side
vp9/common/arm/neon/vp9_short_idct8x8_1_add_neon.asm
0 → 100644
View file @
ce04b1aa
;
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license and patent
; grant that can be found in the LICENSE file in the root of the source
; tree. All contributing project authors may be found in the AUTHORS
; file in the root of the source tree.
;
EXPORT
|
vp9_short_idct8x8_1_add_neon
|
ARM
REQUIRE8
PRESERVE8
AREA
||
.text
||
,
CODE
,
READONLY
,
AL
IGN
=
2
;void vp9_short_idct8x8_1_add_neon(int16_t *input, uint8_t *dest,
; int dest_stride)
;
; r0 int16_t input
; r1 uint8_t *dest
; r2 int dest_stride)
|
vp9_short_idct8x8_1_add_neon
|
PROC
ldrsh
r0
,
[
r0
]
; generate cospi_16_64 = 11585
mov
r12
,
#
0x2d00
add
r12
,
#
0x41
; out = dct_const_round_shift(input[0] * cospi_16_64)
mul
r0
,
r0
,
r12
; input[0] * cospi_16_64
add
r0
,
r0
,
#
0x2000
; +(1 << ((DCT_CONST_BITS) - 1))
asr
r0
,
r0
,
#
14
; >> DCT_CONST_BITS
; out = dct_const_round_shift(out * cospi_16_64)
mul
r0
,
r0
,
r12
; out * cospi_16_64
mov
r12
,
r1
; save dest
add
r0
,
r0
,
#
0x2000
; +(1 << ((DCT_CONST_BITS) - 1))
asr
r0
,
r0
,
#
14
; >> DCT_CONST_BITS
; a1 = ROUND_POWER_OF_TWO(out, 5)
add
r0
,
r0
,
#
16
; + (1 <<((5) - 1))
asr
r0
,
r0
,
#
5
; >> 5
vdup.s16
q0
,
r0
; duplicate a1
; load destination data
vld1.64
{
d2
}
,
[
r1
],
r2
vld1.64
{
d3
}
,
[
r1
],
r2
vld1.64
{
d4
}
,
[
r1
],
r2
vld1.64
{
d5
}
,
[
r1
],
r2
vld1.64
{
d6
}
,
[
r1
],
r2
vld1.64
{
d7
}
,
[
r1
],
r2
vld1.64
{
d16
}
,
[
r1
],
r2
vld1.64
{
d17
}
,
[
r1
]
vaddw.u8
q9
,
q0
,
d2
; dest[x] + a1
vaddw.u8
q10
,
q0
,
d3
; dest[x] + a1
vaddw.u8
q11
,
q0
,
d4
; dest[x] + a1
vaddw.u8
q12
,
q0
,
d5
; dest[x] + a1
vqmovun.s16
d2
,
q9
; clip_pixel
vqmovun.s16
d3
,
q10
; clip_pixel
vqmovun.s16
d30
,
q11
; clip_pixel
vqmovun.s16
d31
,
q12
; clip_pixel
vst1.64
{
d2
}
,
[
r12
],
r2
vst1.64
{
d3
}
,
[
r12
],
r2
vst1.64
{
d30
}
,
[
r12
],
r2
vst1.64
{
d31
}
,
[
r12
],
r2
vaddw.u8
q9
,
q0
,
d6
; dest[x] + a1
vaddw.u8
q10
,
q0
,
d7
; dest[x] + a1
vaddw.u8
q11
,
q0
,
d16
; dest[x] + a1
vaddw.u8
q12
,
q0
,
d17
; dest[x] + a1
vqmovun.s16
d2
,
q9
; clip_pixel
vqmovun.s16
d3
,
q10
; clip_pixel
vqmovun.s16
d30
,
q11
; clip_pixel
vqmovun.s16
d31
,
q12
; clip_pixel
vst1.64
{
d2
}
,
[
r12
],
r2
vst1.64
{
d3
}
,
[
r12
],
r2
vst1.64
{
d30
}
,
[
r12
],
r2
vst1.64
{
d31
}
,
[
r12
],
r2
bx
lr
ENDP
; |vp9_short_idct8x8_1_add_neon|
END
vp9/common/vp9_rtcd_defs.sh
View file @
ce04b1aa
...
...
@@ -301,7 +301,7 @@ prototype void vp9_short_idct4x4_add "int16_t *input, uint8_t *dest, int dest_st
specialize vp9_short_idct4x4_add sse2 neon
prototype void vp9_short_idct8x8_1_add
"int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_short_idct8x8_1_add sse2
specialize vp9_short_idct8x8_1_add sse2
neon
prototype void vp9_short_idct8x8_add
"int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_short_idct8x8_add sse2 neon
...
...
vp9/vp9_common.mk
View file @
ce04b1aa
...
...
@@ -98,6 +98,7 @@ VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_loopfilter_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON)
+=
common/arm/neon/vp9_dc_only_idct_add_neon
$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON)
+=
common/arm/neon/vp9_short_idct4x4_1_add_neon
$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON)
+=
common/arm/neon/vp9_short_idct4x4_add_neon
$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON)
+=
common/arm/neon/vp9_short_idct8x8_1_add_neon
$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON)
+=
common/arm/neon/vp9_short_idct8x8_add_neon
$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON)
+=
common/arm/neon/vp9_short_idct16x16_add_neon
$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON)
+=
common/arm/neon/vp9_mb_lpf_neon
$(ASM)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment