Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
A
aom-rav1e
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Xiph.Org
aom-rav1e
Commits
7d8199f0
Commit
7d8199f0
authored
Feb 10, 2011
by
Johann
Committed by
Code Review
Feb 10, 2011
Browse files
Options
Browse Files
Download
Plain Diff
Merge "Adds armv6 optimized variance calculation"
parents
fffa2a61
cb14764f
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
242 additions
and
25 deletions
+242
-25
vp8/common/arm/bilinearfilter_arm.c
vp8/common/arm/bilinearfilter_arm.c
+1
-20
vp8/common/arm/bilinearfilter_arm.h
vp8/common/arm/bilinearfilter_arm.h
+35
-0
vp8/encoder/arm/arm_csystemdependent.c
vp8/encoder/arm/arm_csystemdependent.c
+4
-4
vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm
vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm
+147
-0
vp8/encoder/arm/variance_arm.c
vp8/encoder/arm/variance_arm.c
+34
-0
vp8/encoder/arm/variance_arm.h
vp8/encoder/arm/variance_arm.h
+17
-0
vp8/vp8_common.mk
vp8/vp8_common.mk
+1
-0
vp8/vp8cx_arm.mk
vp8/vp8cx_arm.mk
+3
-1
No files found.
vp8/common/arm/bilinearfilter_arm.c
View file @
7d8199f0
...
...
@@ -12,26 +12,7 @@
#include <math.h>
#include "filter.h"
#include "subpixel.h"
extern
void
vp8_filter_block2d_bil_first_pass_armv6
(
unsigned
char
*
src_ptr
,
unsigned
short
*
dst_ptr
,
unsigned
int
src_pitch
,
unsigned
int
height
,
unsigned
int
width
,
const
short
*
vp8_filter
);
extern
void
vp8_filter_block2d_bil_second_pass_armv6
(
unsigned
short
*
src_ptr
,
unsigned
char
*
dst_ptr
,
int
dst_pitch
,
unsigned
int
height
,
unsigned
int
width
,
const
short
*
vp8_filter
);
#include "arm/bilinearfilter_arm.h"
void
vp8_filter_block2d_bil_armv6
(
...
...
vp8/common/arm/bilinearfilter_arm.h
0 → 100644
View file @
7d8199f0
/*
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef BILINEARFILTER_ARM_H
#define BILINEARFILTER_ARM_H
extern
void
vp8_filter_block2d_bil_first_pass_armv6
(
const
unsigned
char
*
src_ptr
,
unsigned
short
*
dst_ptr
,
unsigned
int
src_pitch
,
unsigned
int
height
,
unsigned
int
width
,
const
short
*
vp8_filter
);
extern
void
vp8_filter_block2d_bil_second_pass_armv6
(
const
unsigned
short
*
src_ptr
,
unsigned
char
*
dst_ptr
,
int
dst_pitch
,
unsigned
int
height
,
unsigned
int
width
,
const
short
*
vp8_filter
);
#endif
/* BILINEARFILTER_ARM_H */
vp8/encoder/arm/arm_csystemdependent.c
View file @
7d8199f0
...
...
@@ -38,14 +38,14 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
/*cpi->rtcd.variance.var4x4 = vp8_variance4x4_c;
cpi->rtcd.variance.var8x8 = vp8_variance8x8_c;
cpi->rtcd.variance.var8x16 = vp8_variance8x16_c;
cpi->rtcd.variance.var16x8 = vp8_variance16x8_c;
cpi->rtcd.variance.var16x16 = vp8_variance16x16_
c;*/
cpi->rtcd.variance.var16x8 = vp8_variance16x8_c;
*/
cpi
->
rtcd
.
variance
.
var16x16
=
vp8_variance16x16_
armv6
;
/*cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c;
cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_c;
cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c;
cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c;
cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_
c;*/
cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c;
*/
cpi
->
rtcd
.
variance
.
subpixvar16x16
=
vp8_sub_pixel_variance16x16_
armv6
;
/*cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c;
cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/
...
...
vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm
0 → 100644
View file @
7d8199f0
;
; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT
|
vp8_variance16x16_armv6
|
ARM
REQUIRE8
PRESERVE8
AREA
||
.text
||
,
CODE
,
READONLY
,
AL
IGN
=
2
; r0 unsigned char *src_ptr
; r1 int source_stride
; r2 unsigned char *ref_ptr
; r3 int recon_stride
; stack unsigned int *sse
|
vp8_variance16x16_armv6
|
PROC
stmfd
sp
!
,
{
r4
-
r12
,
lr
}
mov
r12
,
#
16
; set loop counter to 16 (=block height)
mov
r8
,
#
0
; initialize sum = 0
mov
r11
,
#
0
; initialize sse = 0
loop
; 1st 4 pixels
ldr
r4
,
[
r0
,
#
0x0
]
; load 4 src pixels
ldr
r5
,
[
r2
,
#
0x0
]
; load 4 ref pixels
mov
lr
,
#
0
; constant zero
usub8
r6
,
r4
,
r5
; calculate difference
sel
r7
,
r6
,
lr
; select bytes with positive difference
usub8
r9
,
r5
,
r4
; calculate difference with reversed operands
sel
r6
,
r9
,
lr
; select bytes with negative difference
; calculate partial sums
usad8
r4
,
r7
,
lr
; calculate sum of positive differences
usad8
r5
,
r6
,
lr
; calculate sum of negative differences
orr
r6
,
r6
,
r7
; differences of all 4 pixels
; calculate total sum
adds
r8
,
r8
,
r4
; add positive differences to sum
subs
r8
,
r8
,
r5
; substract negative differences from sum
; calculate sse
uxtb16
r5
,
r6
; byte (two pixels) to halfwords
uxtb16
r10
,
r6
,
ror
#
8
; another two pixels to halfwords
smlad
r11
,
r5
,
r5
,
r11
; dual signed multiply, add and accumulate (1)
; 2nd 4 pixels
ldr
r4
,
[
r0
,
#
0x4
]
; load 4 src pixels
ldr
r5
,
[
r2
,
#
0x4
]
; load 4 ref pixels
smlad
r11
,
r10
,
r10
,
r11
; dual signed multiply, add and accumulate (2)
usub8
r6
,
r4
,
r5
; calculate difference
sel
r7
,
r6
,
lr
; select bytes with positive difference
usub8
r9
,
r5
,
r4
; calculate difference with reversed operands
sel
r6
,
r9
,
lr
; select bytes with negative difference
; calculate partial sums
usad8
r4
,
r7
,
lr
; calculate sum of positive differences
usad8
r5
,
r6
,
lr
; calculate sum of negative differences
orr
r6
,
r6
,
r7
; differences of all 4 pixels
; calculate total sum
add
r8
,
r8
,
r4
; add positive differences to sum
sub
r8
,
r8
,
r5
; substract negative differences from sum
; calculate sse
uxtb16
r5
,
r6
; byte (two pixels) to halfwords
uxtb16
r10
,
r6
,
ror
#
8
; another two pixels to halfwords
smlad
r11
,
r5
,
r5
,
r11
; dual signed multiply, add and accumulate (1)
; 3rd 4 pixels
ldr
r4
,
[
r0
,
#
0x8
]
; load 4 src pixels
ldr
r5
,
[
r2
,
#
0x8
]
; load 4 ref pixels
smlad
r11
,
r10
,
r10
,
r11
; dual signed multiply, add and accumulate (2)
usub8
r6
,
r4
,
r5
; calculate difference
sel
r7
,
r6
,
lr
; select bytes with positive difference
usub8
r9
,
r5
,
r4
; calculate difference with reversed operands
sel
r6
,
r9
,
lr
; select bytes with negative difference
; calculate partial sums
usad8
r4
,
r7
,
lr
; calculate sum of positive differences
usad8
r5
,
r6
,
lr
; calculate sum of negative differences
orr
r6
,
r6
,
r7
; differences of all 4 pixels
; calculate total sum
add
r8
,
r8
,
r4
; add positive differences to sum
sub
r8
,
r8
,
r5
; substract negative differences from sum
; calculate sse
uxtb16
r5
,
r6
; byte (two pixels) to halfwords
uxtb16
r10
,
r6
,
ror
#
8
; another two pixels to halfwords
smlad
r11
,
r5
,
r5
,
r11
; dual signed multiply, add and accumulate (1)
; 4th 4 pixels
ldr
r4
,
[
r0
,
#
0xc
]
; load 4 src pixels
ldr
r5
,
[
r2
,
#
0xc
]
; load 4 ref pixels
smlad
r11
,
r10
,
r10
,
r11
; dual signed multiply, add and accumulate (2)
usub8
r6
,
r4
,
r5
; calculate difference
add
r0
,
r0
,
r1
; set src_ptr to next row
sel
r7
,
r6
,
lr
; select bytes with positive difference
usub8
r9
,
r5
,
r4
; calculate difference with reversed operands
add
r2
,
r2
,
r3
; set dst_ptr to next row
sel
r6
,
r9
,
lr
; select bytes with negative difference
; calculate partial sums
usad8
r4
,
r7
,
lr
; calculate sum of positive differences
usad8
r5
,
r6
,
lr
; calculate sum of negative differences
orr
r6
,
r6
,
r7
; differences of all 4 pixels
; calculate total sum
add
r8
,
r8
,
r4
; add positive differences to sum
sub
r8
,
r8
,
r5
; substract negative differences from sum
; calculate sse
uxtb16
r5
,
r6
; byte (two pixels) to halfwords
uxtb16
r10
,
r6
,
ror
#
8
; another two pixels to halfwords
smlad
r11
,
r5
,
r5
,
r11
; dual signed multiply, add and accumulate (1)
smlad
r11
,
r10
,
r10
,
r11
; dual signed multiply, add and accumulate (2)
subs
r12
,
r12
,
#
1
bne
loop
; return stuff
ldr
r6
,
[
sp
,
#
0x28
]
; get address of sse
mul
r0
,
r8
,
r8
; sum * sum
str
r11
,
[
r6
]
; store sse
sub
r0
,
r11
,
r0
,
ASR
#
8
; return (sse - ((sum * sum) >> 8))
ldmfd
sp
!
,
{
r4
-
r12
,
pc
}
ENDP
END
vp8/encoder/arm/variance_arm.c
View file @
7d8199f0
...
...
@@ -10,6 +10,40 @@
#include "vpx_config.h"
#include "variance.h"
#include "filter.h"
#include "arm/bilinearfilter_arm.h"
#if HAVE_ARMV6
unsigned
int
vp8_sub_pixel_variance16x16_armv6
(
const
unsigned
char
*
src_ptr
,
int
src_pixels_per_line
,
int
xoffset
,
int
yoffset
,
const
unsigned
char
*
dst_ptr
,
int
dst_pixels_per_line
,
unsigned
int
*
sse
)
{
unsigned
short
first_pass
[
36
*
16
];
unsigned
char
second_pass
[
20
*
16
];
const
short
*
HFilter
,
*
VFilter
;
HFilter
=
vp8_bilinear_filters
[
xoffset
];
VFilter
=
vp8_bilinear_filters
[
yoffset
];
vp8_filter_block2d_bil_first_pass_armv6
(
src_ptr
,
first_pass
,
src_pixels_per_line
,
17
,
16
,
HFilter
);
vp8_filter_block2d_bil_second_pass_armv6
(
first_pass
,
second_pass
,
16
,
16
,
16
,
VFilter
);
return
vp8_variance16x16_armv6
(
second_pass
,
16
,
dst_ptr
,
dst_pixels_per_line
,
sse
);
}
#endif
#if HAVE_ARMV7
...
...
vp8/encoder/arm/variance_arm.h
View file @
7d8199f0
...
...
@@ -12,6 +12,23 @@
#ifndef VARIANCE_ARM_H
#define VARIANCE_ARM_H
#if HAVE_ARMV6
extern
prototype_variance
(
vp8_variance16x16_armv6
);
extern
prototype_subpixvariance
(
vp8_sub_pixel_variance16x16_armv6
);
#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_variance_subpixvar16x16
#define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_armv6
#undef vp8_variance_var16x16
#define vp8_variance_var16x16 vp8_variance16x16_armv6
#endif
/* !CONFIG_RUNTIME_CPU_DETECT */
#endif
/* HAVE_ARMV6 */
#if HAVE_ARMV7
extern
prototype_sad
(
vp8_sad4x4_neon
);
extern
prototype_sad
(
vp8_sad8x8_neon
);
...
...
vp8/vp8_common.mk
View file @
7d8199f0
...
...
@@ -116,6 +116,7 @@ VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/arm_systemdependent.c
# common (c)
VP8_COMMON_SRCS-$(HAVE_ARMV6)
+=
common/arm/bilinearfilter_arm.c
VP8_COMMON_SRCS-$(HAVE_ARMV6)
+=
common/arm/bilinearfilter_arm.h
VP8_COMMON_SRCS-$(HAVE_ARMV6)
+=
common/arm/filter_arm.c
VP8_COMMON_SRCS-$(HAVE_ARMV6)
+=
common/arm/loopfilter_arm.c
VP8_COMMON_SRCS-$(HAVE_ARMV6)
+=
common/arm/reconintra_arm.c
...
...
vp8/vp8cx_arm.mk
View file @
7d8199f0
...
...
@@ -17,9 +17,10 @@ VP8_CX_SRCS-$(ARCH_ARM) += encoder/arm/arm_csystemdependent.c
VP8_CX_SRCS-$(ARCH_ARM)
+=
encoder/asm_enc_offsets.c
VP8_CX_SRCS-$(HAVE_ARMV7)
+=
encoder/arm/encodemb_arm.c
VP8_CX_SRCS-$(HAVE_ARMV7)
+=
encoder/arm/variance_arm.c
VP8_CX_SRCS-$(HAVE_ARMV7)
+=
encoder/arm/quantize_arm.c
VP8_CX_SRCS-$(HAVE_ARMV7)
+=
encoder/arm/picklpf_arm.c
VP8_CX_SRCS-$(HAVE_ARMV6)
+=
encoder/arm/variance_arm.c
VP8_CX_SRCS-$(HAVE_ARMV6)
+=
encoder/arm/variance_arm.h
VP8_CX_SRCS-$(HAVE_ARMV5TE)
+=
encoder/arm/boolhuff_arm.c
VP8_CX_SRCS_REMOVE-$(HAVE_ARMV5TE)
+=
encoder/boolhuff.c
...
...
@@ -33,6 +34,7 @@ VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/armv5te/vp8_packtokens_partitions_ar
#File list for armv6
# encoder
VP8_CX_SRCS-$(HAVE_ARMV6)
+=
encoder/arm/armv6/vp8_variance16x16_armv6
$(ASM)
VP8_CX_SRCS-$(HAVE_ARMV6)
+=
encoder/arm/armv6/walsh_v6
$(ASM)
#File list for neon
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment