Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Guillaume Martres
aom-rav1e
Commits
5c60a646
Commit
5c60a646
authored
Mar 11, 2011
by
Johann
Committed by
Code Review
Mar 11, 2011
Browse files
Merge "ARMv6 optimized quantization"
parents
75051c8b
7ab08e1f
Changes
5
Hide whitespace changes
Inline
Side-by-side
vp8/encoder/arm/arm_csystemdependent.c
View file @
5c60a646
...
...
@@ -71,8 +71,8 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
cpi->rtcd.encodemb.submby = vp8_subtract_mby_c;
cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_c;*/
/*cpi->rtcd.quantize.quantb = vp8_regular_quantize_b;
cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_
c;*/
/*cpi->rtcd.quantize.quantb = vp8_regular_quantize_b;
*/
cpi
->
rtcd
.
quantize
.
fastquantb
=
vp8_fast_quantize_b_
armv6
;
}
#endif
...
...
vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm
0 → 100644
View file @
5c60a646
;
; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT
|
vp8_fast_quantize_b_armv6
|
INCLUDE
asm_enc_offsets.asm
ARM
REQUIRE8
PRESERVE8
AREA
||
.text
||
,
CODE
,
READONLY
,
AL
IGN
=
2
; r0 BLOCK *b
; r1 BLOCKD *d
|
vp8_fast_quantize_b_armv6
|
PROC
stmfd
sp
!
,
{
r1
,
r4
-
r11
,
lr
}
ldr
r3
,
[
r0
,
#
vp8_block_coeff
]
; coeff
ldr
r4
,
[
r0
,
#
vp8_block_quant_fast
]
; quant_fast
ldr
r5
,
[
r0
,
#
vp8_block_round
]
; round
ldr
r6
,
[
r1
,
#
vp8_blockd_qcoeff
]
; qcoeff
ldr
r7
,
[
r1
,
#
vp8_blockd_dqcoeff
]
; dqcoeff
ldr
r8
,
[
r1
,
#
vp8_blockd_dequant
]
; dequant
ldr
r2
,
loop_count
; loop_count=0x1000000. 'lsls' instruction
; is used to update the counter so that
; it can be used to mark nonzero
; quantized coefficient pairs.
mov
r1
,
#
0
; flags for quantized coeffs
; PART 1: quantization and dequantization loop
loop
ldr
r9
,
[
r3
],
#
4
; [z1 | z0]
ldr
r10
,
[
r5
],
#
4
; [r1 | r0]
ldr
r11
,
[
r4
],
#
4
; [q1 | q0]
ssat16
lr
,
#
1
,
r9
; [sz1 | sz0]
eor
r9
,
r9
,
lr
; [z1 ^ sz1 | z0 ^ sz0]
ssub16
r9
,
r9
,
lr
; x = (z ^ sz) - sz
sadd16
r9
,
r9
,
r10
; [x1+r1 | x0+r0]
ldr
r12
,
[
r3
],
#
4
; [z3 | z2]
smulbb
r0
,
r9
,
r11
; [(x0+r0)*q0]
smultt
r9
,
r9
,
r11
; [(x1+r1)*q1]
ldr
r10
,
[
r5
],
#
4
; [r3 | r2]
ssat16
r11
,
#
1
,
r12
; [sz3 | sz2]
eor
r12
,
r12
,
r11
; [z3 ^ sz3 | z2 ^ sz2]
pkhtb
r0
,
r9
,
r0
,
asr
#
16
; [y1 | y0]
ldr
r9
,
[
r4
],
#
4
; [q3 | q2]
ssub16
r12
,
r12
,
r11
; x = (z ^ sz) - sz
sadd16
r12
,
r12
,
r10
; [x3+r3 | x2+r2]
eor
r0
,
r0
,
lr
; [(y1 ^ sz1) | (y0 ^ sz0)]
smulbb
r10
,
r12
,
r9
; [(x2+r2)*q2]
smultt
r12
,
r12
,
r9
; [(x3+r3)*q3]
ssub16
r0
,
r0
,
lr
; x = (y ^ sz) - sz
cmp
r0
,
#
0
; check if zero
orrne
r1
,
r1
,
r2
,
lsr
#
24
; add flag for nonzero coeffs
str
r0
,
[
r6
],
#
4
; *qcoeff++ = x
ldr
r9
,
[
r8
],
#
4
; [dq1 | dq0]
pkhtb
r10
,
r12
,
r10
,
asr
#
16
; [y3 | y2]
eor
r10
,
r10
,
r11
; [(y3 ^ sz3) | (y2 ^ sz2)]
ssub16
r10
,
r10
,
r11
; x = (y ^ sz) - sz
cmp
r10
,
#
0
; check if zero
orrne
r1
,
r1
,
r2
,
lsr
#
23
; add flag for nonzero coeffs
str
r10
,
[
r6
],
#
4
; *qcoeff++ = x
ldr
r11
,
[
r8
],
#
4
; [dq3 | dq2]
smulbb
r12
,
r0
,
r9
; [x0*dq0]
smultt
r0
,
r0
,
r9
; [x1*dq1]
smulbb
r9
,
r10
,
r11
; [x2*dq2]
smultt
r10
,
r10
,
r11
; [x3*dq3]
lsls
r2
,
r2
,
#
2
; update loop counter
strh
r12
,
[
r7
,
#
0
]
; dqcoeff[0] = [x0*dq0]
strh
r0
,
[
r7
,
#
2
]
; dqcoeff[1] = [x1*dq1]
strh
r9
,
[
r7
,
#
4
]
; dqcoeff[2] = [x2*dq2]
strh
r10
,
[
r7
,
#
6
]
; dqcoeff[3] = [x3*dq3]
add
r7
,
r7
,
#
8
; dqcoeff += 8
bne
loop
; PART 2: check position for eob...
mov
lr
,
#
0
; init eob
cmp
r1
,
#
0
; coeffs after quantization?
ldr
r11
,
[
sp
,
#
0
]
; restore BLOCKD pointer
beq
end
; skip eob calculations if all zero
ldr
r0
,
[
r11
,
#
vp8_blockd_qcoeff
]
; check shortcut for nonzero qcoeffs
tst
r1
,
#
0x80
bne
quant_coeff_15_14
tst
r1
,
#
0x20
bne
quant_coeff_13_11
tst
r1
,
#
0x8
bne
quant_coeff_12_7
tst
r1
,
#
0x40
bne
quant_coeff_10_9
tst
r1
,
#
0x10
bne
quant_coeff_8_3
tst
r1
,
#
0x2
bne
quant_coeff_6_5
tst
r1
,
#
0x4
bne
quant_coeff_4_2
b
quant_coeff_1_0
quant_coeff_15_14
ldrh
r2
,
[
r0
,
#
30
]
; rc=15, i=15
mov
lr
,
#
16
cmp
r2
,
#
0
bne
end
ldrh
r3
,
[
r0
,
#
28
]
; rc=14, i=14
mov
lr
,
#
15
cmp
r3
,
#
0
bne
end
quant_coeff_13_11
ldrh
r2
,
[
r0
,
#
22
]
; rc=11, i=13
mov
lr
,
#
14
cmp
r2
,
#
0
bne
end
quant_coeff_12_7
ldrh
r3
,
[
r0
,
#
14
]
; rc=7, i=12
mov
lr
,
#
13
cmp
r3
,
#
0
bne
end
ldrh
r2
,
[
r0
,
#
20
]
; rc=10, i=11
mov
lr
,
#
12
cmp
r2
,
#
0
bne
end
quant_coeff_10_9
ldrh
r3
,
[
r0
,
#
26
]
; rc=13, i=10
mov
lr
,
#
11
cmp
r3
,
#
0
bne
end
ldrh
r2
,
[
r0
,
#
24
]
; rc=12, i=9
mov
lr
,
#
10
cmp
r2
,
#
0
bne
end
quant_coeff_8_3
ldrh
r3
,
[
r0
,
#
18
]
; rc=9, i=8
mov
lr
,
#
9
cmp
r3
,
#
0
bne
end
ldrh
r2
,
[
r0
,
#
12
]
; rc=6, i=7
mov
lr
,
#
8
cmp
r2
,
#
0
bne
end
quant_coeff_6_5
ldrh
r3
,
[
r0
,
#
6
]
; rc=3, i=6
mov
lr
,
#
7
cmp
r3
,
#
0
bne
end
ldrh
r2
,
[
r0
,
#
4
]
; rc=2, i=5
mov
lr
,
#
6
cmp
r2
,
#
0
bne
end
quant_coeff_4_2
ldrh
r3
,
[
r0
,
#
10
]
; rc=5, i=4
mov
lr
,
#
5
cmp
r3
,
#
0
bne
end
ldrh
r2
,
[
r0
,
#
16
]
; rc=8, i=3
mov
lr
,
#
4
cmp
r2
,
#
0
bne
end
ldrh
r3
,
[
r0
,
#
8
]
; rc=4, i=2
mov
lr
,
#
3
cmp
r3
,
#
0
bne
end
quant_coeff_1_0
ldrh
r2
,
[
r0
,
#
2
]
; rc=1, i=1
mov
lr
,
#
2
cmp
r2
,
#
0
bne
end
mov
lr
,
#
1
; rc=0, i=0
end
str
lr
,
[
r11
,
#
vp8_blockd_eob
]
ldmfd
sp
!
,
{
r1
,
r4
-
r11
,
pc
}
ENDP
loop_count
DCD
0x1000000
END
vp8/encoder/arm/quantize_arm.h
View file @
5c60a646
...
...
@@ -12,6 +12,16 @@
#ifndef QUANTIZE_ARM_H
#define QUANTIZE_ARM_H
#if HAVE_ARMV6
extern
prototype_quantize_block
(
vp8_fast_quantize_b_armv6
);
#undef vp8_quantize_fastquantb
#define vp8_quantize_fastquantb vp8_fast_quantize_b_armv6
#endif
/* HAVE_ARMV6 */
#if HAVE_ARMV7
extern
prototype_quantize_block
(
vp8_fast_quantize_b_neon
);
...
...
vp8/encoder/asm_enc_offsets.c
View file @
5c60a646
...
...
@@ -65,6 +65,17 @@ DEFINE(TOKENLIST_SZ, sizeof(TOKENLIST));
DEFINE
(
vp8_common_mb_rows
,
offsetof
(
VP8_COMMON
,
mb_rows
));
// offsets from BLOCK structure
DEFINE
(
vp8_block_coeff
,
offsetof
(
BLOCK
,
coeff
));
DEFINE
(
vp8_block_quant_fast
,
offsetof
(
BLOCK
,
quant_fast
));
DEFINE
(
vp8_block_round
,
offsetof
(
BLOCK
,
round
));
// offsets from BLOCKD structure
DEFINE
(
vp8_blockd_qcoeff
,
offsetof
(
BLOCKD
,
qcoeff
));
DEFINE
(
vp8_blockd_dqcoeff
,
offsetof
(
BLOCKD
,
dqcoeff
));
DEFINE
(
vp8_blockd_dequant
,
offsetof
(
BLOCKD
,
dequant
));
DEFINE
(
vp8_blockd_eob
,
offsetof
(
BLOCKD
,
eob
));
// These two sizes are used in vp8cx_pack_tokens. They are hard coded
// so if the size changes this will have to be adjusted.
#if HAVE_ARMV5TE
...
...
vp8/vp8cx_arm.mk
View file @
5c60a646
...
...
@@ -34,6 +34,7 @@ VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/armv5te/vp8_packtokens_partitions_ar
#File list for armv6
# encoder
VP8_CX_SRCS-$(HAVE_ARMV6)
+=
encoder/arm/armv6/vp8_fast_quantize_b_armv6
$(ASM)
VP8_CX_SRCS-$(HAVE_ARMV6)
+=
encoder/arm/armv6/vp8_sad16x16_armv6
$(ASM)
VP8_CX_SRCS-$(HAVE_ARMV6)
+=
encoder/arm/armv6/vp8_variance16x16_armv6
$(ASM)
VP8_CX_SRCS-$(HAVE_ARMV6)
+=
encoder/arm/armv6/walsh_v6
$(ASM)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment