Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Xiph.Org
aom-rav1e
Commits
52490354
Commit
52490354
authored
May 06, 2011
by
Johann
Committed by
Code Review
May 06, 2011
Browse files
Merge "neon fast quantizer updated"
parents
eeb81173
33fa7c4e
Changes
8
Hide whitespace changes
Inline
Side-by-side
build/make/ads2gas.pl
View file @
52490354
...
...
@@ -23,6 +23,9 @@ print "\t.equ DO1STROUNDING, 0\n";
while
(
<
STDIN
>
)
{
# Load and store alignment
s/@/,:/g
;
# Comment character
s/;/@/g
;
...
...
@@ -114,8 +117,8 @@ while (<STDIN>)
# put the colon at the end of the line in the macro
s/^([a-zA-Z_0-9\$]+)/$1:/
if
!
/EQU/
;
#
Strip ALIGN
s/
\s
ALIGN/
@ ALIGN
/g
;
#
ALIGN directive
s/ALIGN/
.balign
/g
;
# Strip ARM
s/\sARM/@ ARM/g
;
...
...
build/make/ads2gas_apple.pl
View file @
52490354
...
...
@@ -41,6 +41,9 @@ sub trim($)
while
(
<
STDIN
>
)
{
# Load and store alignment
s/@/,:/g
;
# Comment character
s/;/@/g
;
...
...
@@ -137,8 +140,8 @@ while (<STDIN>)
# put the colon at the end of the line in the macro
s/^([a-zA-Z_0-9\$]+)/$1:/
if
!
/EQU/
;
#
Strip ALIGN
s/
\s
ALIGN/
@ ALIGN
/g
;
#
ALIGN directive
s/ALIGN/
.balign
/g
;
# Strip ARM
s/\sARM/@ ARM/g
;
...
...
vp8/encoder/arm/arm_csystemdependent.c
View file @
52490354
...
...
@@ -121,12 +121,8 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
cpi
->
rtcd
.
encodemb
.
submby
=
vp8_subtract_mby_neon
;
cpi
->
rtcd
.
encodemb
.
submbuv
=
vp8_subtract_mbuv_neon
;
/*cpi->rtcd.quantize.quantb = vp8_regular_quantize_b;
cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_c;*/
/* The neon quantizer has not been updated to match the new exact
* quantizer introduced in commit e04e2935
*/
/*cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_neon;*/
/*cpi->rtcd.quantize.quantb = vp8_regular_quantize_b;*/
cpi
->
rtcd
.
quantize
.
fastquantb
=
vp8_fast_quantize_b_neon
;
}
#endif
...
...
vp8/encoder/arm/neon/fastquantizeb_neon.asm
View file @
52490354
;
; Copyright (c) 201
0
The WebM project authors. All Rights Reserved.
; Copyright (c) 201
1
The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
...
...
@@ -9,110 +9,122 @@
;
EXPORT
|
vp8_fast_quantize_b_neon_func
|
EXPORT
|
vp8_fast_quantize_b_neon
|
INCLUDE
asm_enc_offsets.asm
ARM
REQUIRE8
PRESERVE8
AREA
||
.text
||
,
CODE
,
READONLY
,
AL
IGN
=
2
AREA
||
.text
||
,
CODE
,
READONLY
,
AL
IGN
=
4
; r0 short *coeff_ptr
; r1 short *zbin_ptr
; r2 short *qcoeff_ptr
; r3 short *dqcoeff_ptr
; stack short *dequant_ptr
; stack short *scan_mask
; stack short *round_ptr
; stack short *quant_ptr
; return int * eob
|
vp8_fast_quantize_b_neon_func
|
PROC
vld1.16
{
q0
,
q1
}
,
[
r0
]
;load z
vld1.16
{
q10
,
q11
}
,
[
r1
]
;load zbin
;void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
|
vp8_fast_quantize_b_neon
|
PROC
vabs.s16
q4
,
q0
;calculate x = abs(z)
vabs.s16
q5
,
q1
stmfd
sp
!
,
{
r4
-
r7
}
vcge.s16
q10
,
q4
,
q10
;x>=zbin
vcge.s16
q11
,
q5
,
q11
ldr
r3
,
[
r0
,
#
vp8_block_coeff
]
ldr
r4
,
[
r0
,
#
vp8_block_quant_fast
]
ldr
r5
,
[
r0
,
#
vp8_block_round
]
;if x<zbin (q10 & q11 are all 0), go to zero_output
vorr.s16
q6
,
q10
,
q11
vorr.s16
d12
,
d12
,
d13
vmov
r0
,
r1
,
d12
orr
r0
,
r0
,
r1
cmp
r0
,
#
0
beq
zero_output
vld1.16
{
q0
,
q1
}
,
[
r3@128
]
; load z
vorr.s16
q14
,
q0
,
q1
; check if all zero (step 1)
ldr
r6
,
[
r1
,
#
vp8_blockd_qcoeff
]
ldr
r7
,
[
r1
,
#
vp8_blockd_dqcoeff
]
vorr.s16
d28
,
d28
,
d29
; check if all zero (step 2)
ldr
r0
,
[
sp
,
#
8
]
;load round_ptr
ldr
r12
,
[
sp
,
#
12
]
;load quant_ptr
vabs.s16
q12
,
q0
; calculate x = abs(z)
vabs.s16
q13
,
q1
;right shift 15 to get sign, all 0 if it is positive, all 1 if it is negative
vshr.s16
q2
,
q0
,
#
15
; sz
vshr.s16
q2
,
q0
,
#
15
; sz
vmov
r2
,
r3
,
d28
; check if all zero (step 3)
vshr.s16
q3
,
q1
,
#
15
vld1.s16
{
q
6
,
q
7
}
,
[
r
0
]
;
load round_ptr [0-15]
vld1.s16
{
q8
,
q9
}
,
[
r
12
]
;
load quant_ptr [0-15]
vld1.s16
{
q
14
,
q
15
}
,
[
r
5@128
]
;
load round_ptr [0-15]
vld1.s16
{
q8
,
q9
}
,
[
r
4@128
]
;
load quant_ptr [0-15]
vadd.s16
q
4
,
q
6
;x + Round
vadd.s16
q
5
,
q
7
vadd.s16
q
12
,
q
14
;
x + Round
vadd.s16
q
13
,
q
15
ldr
r0
,
[
sp
,
#
4
]
;load rvsplus1_scan_order ptr
ldr
r0
,
_inv_zig_zag_
; load ptr of inverse zigzag table
vqdmulh.s16
q
4
,
q8
;y = ((Round
+
abs(z)) * Quant) >> 16
vqdmulh.s16
q
5
,
q9
vqdmulh.s16
q
12
,
q8
;
y = ((Round
+
abs(z)) * Quant) >> 16
vqdmulh.s16
q
13
,
q9
vld1.16
{
q0
,
q1
}
,
[
r0
]
;load rvsplus1_scan_order
vceq.s16
q8
,
q8
;set q8 to all 1
vld1.16
{
q10
,
q11
}
,
[
r0@128
]
; load inverse scan order
vshr.s16
q4
,
#
1
;right shift 1 after vqdmulh
vshr.s16
q5
,
#
1
vceq.s16
q8
,
q8
; set q8 to all 1
;modify data to have its original sign
veor.s16
q4
,
q2
; y^sz
veor.s16
q5
,
q3
ldr
r4
,
[
r1
,
#
vp8_blockd_dequant
]
vshr.s16
q12
,
#
1
; right shift 1 after vqdmulh
vshr.s16
q13
,
#
1
ldr
r12
,
[
sp
]
;load dequant_ptr
orr
r2
,
r2
,
r3
; check if all zero (step 4)
cmp
r2
,
#
0
; check if all zero (step 5)
beq
zero_output
; check if all zero (step 6)
;modify data to have its original sign
veor.s16
q12
,
q2
; y^sz
veor.s16
q13
,
q3
vsub.s16
q
4
,
q2
; x1
=
(y^sz)
-
sz = (y^sz)
-
(-1) (
two
's complement)
vsub.s16
q
5
,
q3
vsub.s16
q
12
,
q2
; x1
=
(y^sz)
-
sz = (y^sz)
-
(-1) (
2
's complement)
vsub.s16
q
13
,
q3
vand.s16
q4
,
q10
;mask off x1 elements
vand.s16
q5
,
q11
vld1.s16
{
q2
,
q3
}
,
[
r4@128
]
; load dequant_ptr[i]
vld1.s16
{
q6
,
q7
}
,
[
r12
]
;load dequant_ptr[i]
vtst.16
q14
,
q12
,
q8
; now find eob
vtst.16
q15
,
q13
,
q8
; non-zero element is set to all 1
vtst.16
q14
,
q4
,
q8
;now find eob
vtst.16
q15
,
q5
,
q8
;non-zero element is set to all 1 in q4, q5
vst1.s16
{
q12
,
q13
}
,
[
r6@128
]
; store: qcoeff = x1
vst1.s16
{
q4
,
q5
}
,
[
r2
]
;store: qcoeff = x1
vand
q10
,
q10
,
q14
; get all valid numbers from scan array
vand
q11
,
q11
,
q15
vand
q0
,
q0
,
q14
;get all valid number from rvsplus1_scan_order array
vand
q1
,
q1
,
q15
vmax.u16
q0
,
q0
,
q1
;find maximum value in q0, q1
vmax.u16
q0
,
q
1
0
,
q1
1
;
find maximum value in q0, q1
vmax.u16
d0
,
d0
,
d1
vmovl.u16
q0
,
d0
vmul.s16
q
6
,
q
4
;x * Dequant
vmul.s16
q
7
,
q
5
vmul.s16
q
2
,
q
12
;
x * Dequant
vmul.s16
q
3
,
q
13
vmax.u32
d0
,
d0
,
d1
vpmax.u32
d0
,
d0
,
d0
vst1.s16
{
q
6
,
q
7
}
,
[
r
3
]
;
store dqcoeff = x * Dequant
vst1.s16
{
q
2
,
q
3
}
,
[
r
7@128
]
;
store dqcoeff = x * Dequant
vmov.32
r0
,
d0
[
0
]
vmov.32
r0
,
d0
[
0
]
; this instruction takes 1+13 cycles
; if we have vfp, we could use
; vstr s0, [r1, #vp8_blockd_eob]
str
r0
,
[
r1
,
#
vp8_blockd_eob
]
ldmfd
sp
!
,
{
r4
-
r7
}
bx
lr
zero_output
v
st
1.s16
{
q10
,
q11
}
,
[
r2
]
; qcoeff = 0
vst1.s16
{
q
1
0
,
q1
1
}
,
[
r
3
]
;
d
qcoeff = 0
mov
r0
,
#
0
st
r
r2
,
[
r1
,
#
vp8_blockd_eob
]
vst1.s16
{
q0
,
q1
}
,
[
r
6@128
]
; qcoeff = 0
vst1.s16
{
q0
,
q1
}
,
[
r7@128
]
; dqcoeff =
0
ldmfd
sp
!
,
{
r4
-
r7
}
bx
lr
ENDP
; default inverse zigzag table is defined in vp8/common/entropy.c
_inv_zig_zag_
DCD
inv_zig_zag
ALIGN
16
; enable use of @128 bit aligned loads
inv_zig_zag
DCW
0x0001
,
0x0002
,
0x0006
,
0x0007
DCW
0x0003
,
0x0005
,
0x0008
,
0x000d
DCW
0x0004
,
0x0009
,
0x000c
,
0x000e
DCW
0x000a
,
0x000b
,
0x000f
,
0x0010
END
vp8/encoder/arm/quantize_arm.c
deleted
100644 → 0
View file @
eeb81173
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include
<math.h>
#include
"vpx_mem/vpx_mem.h"
#include
"vp8/encoder/quantize.h"
#include
"vp8/common/entropy.h"
DECLARE_ALIGNED
(
16
,
const
short
,
vp8_rvsplus1_default_zig_zag1d
[
16
])
=
{
1
,
2
,
6
,
7
,
3
,
5
,
8
,
13
,
4
,
9
,
12
,
14
,
10
,
11
,
15
,
16
,
};
extern
int
vp8_fast_quantize_b_neon_func
(
short
*
coeff_ptr
,
short
*
zbin_ptr
,
short
*
qcoeff_ptr
,
short
*
dqcoeff_ptr
,
short
*
dequant_ptr
,
const
short
*
scan_mask
,
short
*
round_ptr
,
short
*
quant_ptr
);
void
vp8_fast_quantize_b_neon
(
BLOCK
*
b
,
BLOCKD
*
d
)
{
d
->
eob
=
vp8_fast_quantize_b_neon_func
(
b
->
coeff
,
b
->
zbin
,
d
->
qcoeff
,
d
->
dqcoeff
,
d
->
dequant
,
vp8_rvsplus1_default_zig_zag1d
,
b
->
round
,
b
->
quant_fast
);
}
/*
//neon code is written according to the following rewritten c code
void vp8_fast_quantize_b_neon(BLOCK *b,BLOCKD *d)
{
int i, rc, eob;
int zbin;
int x, x1, y, z, sz;
short *coeff_ptr = &b->Coeff[0];
short *zbin_ptr = &b->Zbin[0][0];
short *round_ptr = &b->Round[0][0];
short *quant_ptr = &b->Quant[0][0];
short *qcoeff_ptr = d->qcoeff;
short *dqcoeff_ptr= d->dqcoeff;
short *dequant_ptr= &d->Dequant[0][0];
eob = 0;
for(i=0;i<16;i++)
{
z = coeff_ptr[i];
zbin = zbin_ptr[i] ;
x = abs(z); // x = abs(z)
if(x>=zbin)
{
sz = (z>>31); // sign of z
y = ((x+round_ptr[i])*quant_ptr[i])>>16; // quantize (x)
x1 = (y^sz) - sz; // get the sign back
qcoeff_ptr[i] = x1; // write to destination
dqcoeff_ptr[i] = x1 * dequant_ptr[i]; // dequantized value
if(y)
{
if(eob<vp8_rvsplus1_default_zig_zag1d[i])
eob=(int)vp8_rvsplus1_default_zig_zag1d[i]; // last nonzero coeffs
}
}else
{
qcoeff_ptr[i] = 0; // write to destination
dqcoeff_ptr[i] = 0; // dequantized value
}
}
d->eob = eob;
}
*/
vp8/encoder/arm/quantize_arm.h
View file @
52490354
...
...
@@ -23,14 +23,13 @@ extern prototype_quantize_block(vp8_fast_quantize_b_armv6);
#if HAVE_ARMV7
extern
prototype_quantize_block
(
vp8_fast_quantize_b_neon
);
/* The neon quantizer has not been updated to match the new exact
* quantizer introduced in commit e04e2935
*/
//#undef vp8_quantize_fastquantb
//#define vp8_quantize_fastquantb vp8_fast_quantize_b_neon
#undef vp8_quantize_fastquantb
#define vp8_quantize_fastquantb vp8_fast_quantize_b_neon
#endif
#endif
/* HAVE_ARMV7 */
#endif
vp8/encoder/quantize.h
View file @
52490354
...
...
@@ -17,6 +17,7 @@
#define prototype_quantize_block(sym) \
void (sym)(BLOCK *b,BLOCKD *d)
#if ARCH_X86 || ARCH_X86_64
#include
"x86/quantize_x86.h"
#endif
...
...
@@ -41,6 +42,7 @@ typedef struct
prototype_quantize_block
(
*
fastquantb
);
}
vp8_quantize_rtcd_vtable_t
;
#if CONFIG_RUNTIME_CPU_DETECT
#define QUANTIZE_INVOKE(ctx,fn) (ctx)->fn
#else
...
...
vp8/vp8cx_arm.mk
View file @
52490354
...
...
@@ -15,7 +15,6 @@
# encoder
VP8_CX_SRCS-$(ARCH_ARM)
+=
encoder/arm/arm_csystemdependent.c
VP8_CX_SRCS-$(HAVE_ARMV7)
+=
encoder/arm/quantize_arm.c
VP8_CX_SRCS-$(HAVE_ARMV7)
+=
encoder/arm/picklpf_arm.c
VP8_CX_SRCS-$(HAVE_ARMV6)
+=
encoder/arm/dct_arm.c
VP8_CX_SRCS-$(HAVE_ARMV6)
+=
encoder/arm/variance_arm.c
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment