Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Guillaume Martres
aom-rav1e
Commits
e54dcfe8
Commit
e54dcfe8
authored
Mar 08, 2011
by
Attila Nagy
Browse files
Add vp8_mse16x16_armv6 function
Change-Id: I77e9f2f521a71089228f96e2db72524189364ffb
parent
a0306ea6
Changes
4
Hide whitespace changes
Inline
Side-by-side
vp8/encoder/arm/arm_csystemdependent.c
View file @
e54dcfe8
...
...
@@ -50,8 +50,8 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
cpi
->
rtcd
.
variance
.
halfpixvar16x16_v
=
vp8_variance_halfpixvar16x16_v_armv6
;
cpi
->
rtcd
.
variance
.
halfpixvar16x16_hv
=
vp8_variance_halfpixvar16x16_hv_armv6
;
/*
cpi->rtcd.variance.mse16x16 = vp8_mse16x16_
c
;
cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/
cpi
->
rtcd
.
variance
.
mse16x16
=
vp8_mse16x16_
armv6
;
/*
cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/
/*cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_c;
cpi->rtcd.variance.get8x8var = vp8_get8x8var_c;
...
...
vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm
0 → 100644
View file @
e54dcfe8
;
; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT
|
vp8_mse16x16_armv6
|
ARM
AREA
||
.text
||
,
CODE
,
READONLY
,
AL
IGN
=
2
; r0 unsigned char *src_ptr
; r1 int source_stride
; r2 unsigned char *ref_ptr
; r3 int recon_stride
; stack unsigned int *sse
;
;note: Based on vp8_variance16x16_armv6. In this function, sum is never used.
; So, we can remove this part of calculation.
|
vp8_mse16x16_armv6
|
PROC
push
{
r4
-
r9
,
lr
}
mov
r12
,
#
16
; set loop counter to 16 (=block height)
mov
r4
,
#
0
; initialize sse = 0
loop
; 1st 4 pixels
ldr
r5
,
[
r0
,
#
0x0
]
; load 4 src pixels
ldr
r6
,
[
r2
,
#
0x0
]
; load 4 ref pixels
mov
lr
,
#
0
; constant zero
usub8
r8
,
r5
,
r6
; calculate difference
sel
r7
,
r8
,
lr
; select bytes with positive difference
usub8
r9
,
r6
,
r5
; calculate difference with reversed operands
sel
r8
,
r9
,
lr
; select bytes with negative difference
; calculate partial sums
usad8
r5
,
r7
,
lr
; calculate sum of positive differences
usad8
r6
,
r8
,
lr
; calculate sum of negative differences
orr
r8
,
r8
,
r7
; differences of all 4 pixels
ldr
r5
,
[
r0
,
#
0x4
]
; load 4 src pixels
; calculate sse
uxtb16
r6
,
r8
; byte (two pixels) to halfwords
uxtb16
r7
,
r8
,
ror
#
8
; another two pixels to halfwords
smlad
r4
,
r6
,
r6
,
r4
; dual signed multiply, add and accumulate (1)
; 2nd 4 pixels
ldr
r6
,
[
r2
,
#
0x4
]
; load 4 ref pixels
smlad
r4
,
r7
,
r7
,
r4
; dual signed multiply, add and accumulate (2)
usub8
r8
,
r5
,
r6
; calculate difference
sel
r7
,
r8
,
lr
; select bytes with positive difference
usub8
r9
,
r6
,
r5
; calculate difference with reversed operands
sel
r8
,
r9
,
lr
; select bytes with negative difference
; calculate partial sums
usad8
r5
,
r7
,
lr
; calculate sum of positive differences
usad8
r6
,
r8
,
lr
; calculate sum of negative differences
orr
r8
,
r8
,
r7
; differences of all 4 pixels
ldr
r5
,
[
r0
,
#
0x8
]
; load 4 src pixels
; calculate sse
uxtb16
r6
,
r8
; byte (two pixels) to halfwords
uxtb16
r7
,
r8
,
ror
#
8
; another two pixels to halfwords
smlad
r4
,
r6
,
r6
,
r4
; dual signed multiply, add and accumulate (1)
; 3rd 4 pixels
ldr
r6
,
[
r2
,
#
0x8
]
; load 4 ref pixels
smlad
r4
,
r7
,
r7
,
r4
; dual signed multiply, add and accumulate (2)
usub8
r8
,
r5
,
r6
; calculate difference
sel
r7
,
r8
,
lr
; select bytes with positive difference
usub8
r9
,
r6
,
r5
; calculate difference with reversed operands
sel
r8
,
r9
,
lr
; select bytes with negative difference
; calculate partial sums
usad8
r5
,
r7
,
lr
; calculate sum of positive differences
usad8
r6
,
r8
,
lr
; calculate sum of negative differences
orr
r8
,
r8
,
r7
; differences of all 4 pixels
ldr
r5
,
[
r0
,
#
0xc
]
; load 4 src pixels
; calculate sse
uxtb16
r6
,
r8
; byte (two pixels) to halfwords
uxtb16
r7
,
r8
,
ror
#
8
; another two pixels to halfwords
smlad
r4
,
r6
,
r6
,
r4
; dual signed multiply, add and accumulate (1)
; 4th 4 pixels
ldr
r6
,
[
r2
,
#
0xc
]
; load 4 ref pixels
smlad
r4
,
r7
,
r7
,
r4
; dual signed multiply, add and accumulate (2)
usub8
r8
,
r5
,
r6
; calculate difference
add
r0
,
r0
,
r1
; set src_ptr to next row
sel
r7
,
r8
,
lr
; select bytes with positive difference
usub8
r9
,
r6
,
r5
; calculate difference with reversed operands
add
r2
,
r2
,
r3
; set dst_ptr to next row
sel
r8
,
r9
,
lr
; select bytes with negative difference
; calculate partial sums
usad8
r5
,
r7
,
lr
; calculate sum of positive differences
usad8
r6
,
r8
,
lr
; calculate sum of negative differences
orr
r8
,
r8
,
r7
; differences of all 4 pixels
subs
r12
,
r12
,
#
1
; next row
; calculate sse
uxtb16
r6
,
r8
; byte (two pixels) to halfwords
uxtb16
r7
,
r8
,
ror
#
8
; another two pixels to halfwords
smlad
r4
,
r6
,
r6
,
r4
; dual signed multiply, add and accumulate (1)
smlad
r4
,
r7
,
r7
,
r4
; dual signed multiply, add and accumulate (2)
bne
loop
; return stuff
ldr
r1
,
[
sp
,
#
28
]
; get address of sse
mov
r0
,
r4
; return sse
str
r4
,
[
r1
]
; store sse
pop
{
r4
-
r9
,
pc
}
ENDP
END
vp8/encoder/arm/variance_arm.h
View file @
e54dcfe8
...
...
@@ -20,6 +20,7 @@ extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_armv6);
extern
prototype_variance
(
vp8_variance_halfpixvar16x16_h_armv6
);
extern
prototype_variance
(
vp8_variance_halfpixvar16x16_v_armv6
);
extern
prototype_variance
(
vp8_variance_halfpixvar16x16_hv_armv6
);
extern
prototype_variance
(
vp8_mse16x16_armv6
);
#if !CONFIG_RUNTIME_CPU_DETECT
...
...
@@ -32,6 +33,9 @@ extern prototype_variance(vp8_variance_halfpixvar16x16_hv_armv6);
#undef vp8_variance_var16x16
#define vp8_variance_var16x16 vp8_variance16x16_armv6
#undef vp8_variance_mse16x16
#define vp8_variance_mse16x16 vp8_mse16x16_armv6
#undef vp8_variance_halfpixvar16x16_h
#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_armv6
...
...
vp8/vp8cx_arm.mk
View file @
e54dcfe8
...
...
@@ -36,6 +36,7 @@ VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/armv5te/vp8_packtokens_partitions_ar
# encoder
VP8_CX_SRCS-$(HAVE_ARMV6)
+=
encoder/arm/armv6/vp8_sad16x16_armv6
$(ASM)
VP8_CX_SRCS-$(HAVE_ARMV6)
+=
encoder/arm/armv6/vp8_variance16x16_armv6
$(ASM)
VP8_CX_SRCS-$(HAVE_ARMV6)
+=
encoder/arm/armv6/vp8_mse16x16_armv6
$(ASM)
VP8_CX_SRCS-$(HAVE_ARMV6)
+=
encoder/arm/armv6/walsh_v6
$(ASM)
#File list for neon
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment