Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Xiph.Org
aom-rav1e
Commits
39f42c87
Commit
39f42c87
authored
Aug 14, 2013
by
hkuang
Committed by
Gerrit Code Review
Aug 14, 2013
Browse files
Merge "Add neon optimize vp9_short_idct16x16_add."
parents
bb072000
cf6beea6
Changes
4
Expand all
Hide whitespace changes
Inline
Side-by-side
vp9/common/arm/neon/vp9_idct16x16_neon.c
0 → 100644
View file @
39f42c87
/*
* Copyright (c) 2013 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include
"./vp9_rtcd.h"
#include
"vp9/common/vp9_common.h"
extern
void
vp9_short_idct16x16_add_neon_pass1
(
int16_t
*
input
,
int16_t
*
output
,
int
output_stride
);
extern
void
vp9_short_idct16x16_add_neon_pass2
(
int16_t
*
src
,
int16_t
*
output
,
int16_t
*
pass1Output
,
int16_t
skip_adding
,
uint8_t
*
dest
,
int
dest_stride
);
extern
void
save_registers
();
extern
void
restore_registers
();
void
vp9_short_idct16x16_add_neon
(
int16_t
*
input
,
uint8_t
*
dest
,
int
dest_stride
)
{
int16_t
pass1_output
[
16
*
16
]
=
{
0
};
int16_t
row_idct_output
[
16
*
16
]
=
{
0
};
// save d8-d15 register values.
save_registers
();
/* Parallel idct on the upper 8 rows */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
// stage 6 result in pass1_output.
vp9_short_idct16x16_add_neon_pass1
(
input
,
pass1_output
,
8
);
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
// with result in pass1(pass1_output) to calculate final result in stage 7
// which will be saved into row_idct_output.
vp9_short_idct16x16_add_neon_pass2
(
input
+
1
,
row_idct_output
,
pass1_output
,
0
,
dest
,
dest_stride
);
/* Parallel idct on the lower 8 rows */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
// stage 6 result in pass1_output.
vp9_short_idct16x16_add_neon_pass1
(
input
+
8
*
16
,
pass1_output
,
8
);
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
// with result in pass1(pass1_output) to calculate final result in stage 7
// which will be saved into row_idct_output.
vp9_short_idct16x16_add_neon_pass2
(
input
+
8
*
16
+
1
,
row_idct_output
+
8
,
pass1_output
,
0
,
dest
,
dest_stride
);
/* Parallel idct on the left 8 columns */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
// stage 6 result in pass1_output.
vp9_short_idct16x16_add_neon_pass1
(
row_idct_output
,
pass1_output
,
8
);
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
// with result in pass1(pass1_output) to calculate final result in stage 7.
// Then add the result to the destination data.
vp9_short_idct16x16_add_neon_pass2
(
row_idct_output
+
1
,
row_idct_output
,
pass1_output
,
1
,
dest
,
dest_stride
);
/* Parallel idct on the right 8 columns */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
// stage 6 result in pass1_output.
vp9_short_idct16x16_add_neon_pass1
(
row_idct_output
+
8
*
16
,
pass1_output
,
8
);
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
// with result in pass1(pass1_output) to calculate final result in stage 7.
// Then add the result to the destination data.
vp9_short_idct16x16_add_neon_pass2
(
row_idct_output
+
8
*
16
+
1
,
row_idct_output
+
8
,
pass1_output
,
1
,
dest
+
8
,
dest_stride
);
// restore d8-d15 register values.
restore_registers
();
return
;
}
vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm
0 → 100644
View file @
39f42c87
This diff is collapsed.
Click to expand it.
vp9/common/vp9_rtcd_defs.sh
View file @
39f42c87
...
...
@@ -313,7 +313,7 @@ prototype void vp9_short_idct16x16_1_add "int16_t *input, uint8_t *dest, int des
specialize vp9_short_idct16x16_1_add sse2
prototype void vp9_short_idct16x16_add
"int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_short_idct16x16_add sse2
specialize vp9_short_idct16x16_add sse2
neon
prototype void vp9_short_idct10_16x16_add
"int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_short_idct10_16x16_add sse2
...
...
vp9/vp9_common.mk
View file @
39f42c87
...
...
@@ -91,12 +91,14 @@ endif
VP9_COMMON_SRCS-$(HAVE_SSE2)
+=
common/x86/vp9_idct_intrin_sse2.c
VP9_COMMON_SRCS-$(HAVE_NEON)
+=
common/arm/neon/vp9_convolve_neon.c
VP9_COMMON_SRCS-$(HAVE_NEON)
+=
common/arm/neon/vp9_idct16x16_neon.c
VP9_COMMON_SRCS-$(HAVE_NEON)
+=
common/arm/neon/vp9_convolve8_neon
$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON)
+=
common/arm/neon/vp9_convolve8_avg_neon
$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON)
+=
common/arm/neon/vp9_loopfilter_neon
$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON)
+=
common/arm/neon/vp9_dc_only_idct_add_neon
$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON)
+=
common/arm/neon/vp9_short_idct4x4_add_neon
$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON)
+=
common/arm/neon/vp9_short_idct8x8_add_neon
$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON)
+=
common/arm/neon/vp9_short_idct16x16_add_neon
$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON)
+=
common/arm/neon/vp9_mb_lpf_neon
$(ASM)
$(eval
$(call
rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.sh))
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment