Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Xiph.Org
aom-rav1e
Commits
412eaaf0
Commit
412eaaf0
authored
Nov 04, 2014
by
Johann
Committed by
Gerrit Code Review
Nov 04, 2014
Browse files
Merge "vp8 quantization -> intrinsics"
parents
9f9e30d7
7ae75c3d
Changes
4
Hide whitespace changes
Inline
Side-by-side
vp8/common/rtcd_defs.pl
View file @
412eaaf0
...
...
@@ -454,16 +454,14 @@ add_proto qw/void vp8_regular_quantize_b/, "struct block *, struct blockd *";
specialize
qw/vp8_regular_quantize_b sse2 sse4_1/
;
add_proto
qw/void vp8_fast_quantize_b/
,
"
struct block *, struct blockd *
";
specialize
qw/vp8_fast_quantize_b sse2 ssse3 media neon
_asm
/
;
specialize
qw/vp8_fast_quantize_b sse2 ssse3 media neon/
;
$vp8_fast_quantize_b_media
=
vp8_fast_quantize_b_armv6
;
$vp8_fast_quantize_b_neon_asm
=
vp8_fast_quantize_b_neon
;
add_proto
qw/void vp8_regular_quantize_b_pair/
,
"
struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2
";
# no asm yet
add_proto
qw/void vp8_fast_quantize_b_pair/
,
"
struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2
";
specialize
qw/vp8_fast_quantize_b_pair neon_asm/
;
$vp8_fast_quantize_b_pair_neon_asm
=
vp8_fast_quantize_b_pair_neon
;
specialize
qw/vp8_fast_quantize_b_pair neon/
;
add_proto
qw/void vp8_quantize_mb/
,
"
struct macroblock *
";
specialize
qw/vp8_quantize_mb neon/
;
...
...
vp8/encoder/arm/neon/fastquantizeb_neon.asm
deleted
100644 → 0
View file @
9f9e30d7
;
; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT
|
vp8_fast_quantize_b_neon
|
EXPORT
|
vp8_fast_quantize_b_pair_neon
|
INCLUDE
vp8_asm_enc_offsets.asm
ARM
REQUIRE8
PRESERVE8
AREA
||
.text
||
,
CODE
,
READONLY
,
AL
IGN
=
4
;vp8_fast_quantize_b_pair_neon(BLOCK *b1, BLOCK *b2, BLOCKD *d1, BLOCKD *d2);
|
vp8_fast_quantize_b_pair_neon
|
PROC
stmfd
sp
!
,
{
r4
-
r9
}
vstmdb
sp
!
,
{
q4
-
q7
}
ldr
r4
,
[
r0
,
#
vp8_block_coeff
]
ldr
r5
,
[
r0
,
#
vp8_block_quant_fast
]
ldr
r6
,
[
r0
,
#
vp8_block_round
]
vld1.16
{
q0
,
q1
}
,
[
r4@128
]
; load z
ldr
r7
,
[
r2
,
#
vp8_blockd_qcoeff
]
vabs.s16
q4
,
q0
; calculate x = abs(z)
vabs.s16
q5
,
q1
;right shift 15 to get sign, all 0 if it is positive, all 1 if it is negative
vshr.s16
q2
,
q0
,
#
15
; sz
vshr.s16
q3
,
q1
,
#
15
vld1.s16
{
q6
,
q7
}
,
[
r6@128
]
; load round_ptr [0-15]
vld1.s16
{
q8
,
q9
}
,
[
r5@128
]
; load quant_ptr [0-15]
ldr
r4
,
[
r1
,
#
vp8_block_coeff
]
vadd.s16
q4
,
q6
; x + Round
vadd.s16
q5
,
q7
vld1.16
{
q0
,
q1
}
,
[
r4@128
]
; load z2
vqdmulh.s16
q4
,
q8
; y = ((Round+abs(z)) * Quant) >> 16
vqdmulh.s16
q5
,
q9
vabs.s16
q10
,
q0
; calculate x2 = abs(z_2)
vabs.s16
q11
,
q1
vshr.s16
q12
,
q0
,
#
15
; sz2
vshr.s16
q13
,
q1
,
#
15
;modify data to have its original sign
veor.s16
q4
,
q2
; y^sz
veor.s16
q5
,
q3
vadd.s16
q10
,
q6
; x2 + Round
vadd.s16
q11
,
q7
ldr
r8
,
[
r2
,
#
vp8_blockd_dequant
]
vqdmulh.s16
q10
,
q8
; y2 = ((Round+abs(z)) * Quant) >> 16
vqdmulh.s16
q11
,
q9
vshr.s16
q4
,
#
1
; right shift 1 after vqdmulh
vshr.s16
q5
,
#
1
vld1.s16
{
q6
,
q7
}
,
[
r8@128
]
;load dequant_ptr[i]
vsub.s16
q4
,
q2
; x1=(y^sz)-sz = (y^sz)-(-1) (2's complement)
vsub.s16
q5
,
q3
vshr.s16
q10
,
#
1
; right shift 1 after vqdmulh
vshr.s16
q11
,
#
1
ldr
r9
,
[
r2
,
#
vp8_blockd_dqcoeff
]
veor.s16
q10
,
q12
; y2^sz2
veor.s16
q11
,
q13
vst1.s16
{
q4
,
q5
}
,
[
r7
]
; store: qcoeff = x1
vsub.s16
q10
,
q12
; x2=(y^sz)-sz = (y^sz)-(-1) (2's complement)
vsub.s16
q11
,
q13
ldr
r6
,
[
r3
,
#
vp8_blockd_qcoeff
]
vmul.s16
q2
,
q6
,
q4
; x * Dequant
vmul.s16
q3
,
q7
,
q5
adr
r0
,
inv_zig_zag
; load ptr of inverse zigzag table
vceq.s16
q8
,
q8
; set q8 to all 1
vst1.s16
{
q10
,
q11
}
,
[
r6
]
; store: qcoeff = x2
vmul.s16
q12
,
q6
,
q10
; x2 * Dequant
vmul.s16
q13
,
q7
,
q11
vld1.16
{
q6
,
q7
}
,
[
r0@128
]
; load inverse scan order
vtst.16
q14
,
q4
,
q8
; now find eob
vtst.16
q15
,
q5
,
q8
; non-zero element is set to all 1
vst1.s16
{
q2
,
q3
}
,
[
r9
]
; store dqcoeff = x * Dequant
ldr
r7
,
[
r3
,
#
vp8_blockd_dqcoeff
]
vand
q0
,
q6
,
q14
; get all valid numbers from scan array
vand
q1
,
q7
,
q15
vst1.s16
{
q12
,
q13
}
,
[
r7
]
; store dqcoeff = x * Dequant
vtst.16
q2
,
q10
,
q8
; now find eob
vtst.16
q3
,
q11
,
q8
; non-zero element is set to all 1
vmax.u16
q0
,
q0
,
q1
; find maximum value in q0, q1
vand
q10
,
q6
,
q2
; get all valid numbers from scan array
vand
q11
,
q7
,
q3
vmax.u16
q10
,
q10
,
q11
; find maximum value in q10, q11
vmax.u16
d0
,
d0
,
d1
vmax.u16
d20
,
d20
,
d21
vmovl.u16
q0
,
d0
vmovl.u16
q10
,
d20
vmax.u32
d0
,
d0
,
d1
vmax.u32
d20
,
d20
,
d21
vpmax.u32
d0
,
d0
,
d0
vpmax.u32
d20
,
d20
,
d20
ldr
r4
,
[
r2
,
#
vp8_blockd_eob
]
ldr
r5
,
[
r3
,
#
vp8_blockd_eob
]
vst1.8
{
d0
[
0
]
}
,
[
r4
]
; store eob
vst1.8
{
d20
[
0
]
}
,
[
r5
]
; store eob
vldmia
sp
!
,
{
q4
-
q7
}
ldmfd
sp
!
,
{
r4
-
r9
}
bx
lr
ENDP
;void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
|
vp8_fast_quantize_b_neon
|
PROC
stmfd
sp
!
,
{
r4
-
r7
}
ldr
r3
,
[
r0
,
#
vp8_block_coeff
]
ldr
r4
,
[
r0
,
#
vp8_block_quant_fast
]
ldr
r5
,
[
r0
,
#
vp8_block_round
]
vld1.16
{
q0
,
q1
}
,
[
r3@128
]
; load z
vorr.s16
q14
,
q0
,
q1
; check if all zero (step 1)
ldr
r6
,
[
r1
,
#
vp8_blockd_qcoeff
]
ldr
r7
,
[
r1
,
#
vp8_blockd_dqcoeff
]
vorr.s16
d28
,
d28
,
d29
; check if all zero (step 2)
vabs.s16
q12
,
q0
; calculate x = abs(z)
vabs.s16
q13
,
q1
;right shift 15 to get sign, all 0 if it is positive, all 1 if it is negative
vshr.s16
q2
,
q0
,
#
15
; sz
vmov
r2
,
r3
,
d28
; check if all zero (step 3)
vshr.s16
q3
,
q1
,
#
15
vld1.s16
{
q14
,
q15
}
,
[
r5@128
]
; load round_ptr [0-15]
vld1.s16
{
q8
,
q9
}
,
[
r4@128
]
; load quant_ptr [0-15]
vadd.s16
q12
,
q14
; x + Round
vadd.s16
q13
,
q15
adr
r0
,
inv_zig_zag
; load ptr of inverse zigzag table
vqdmulh.s16
q12
,
q8
; y = ((Round+abs(z)) * Quant) >> 16
vqdmulh.s16
q13
,
q9
vld1.16
{
q10
,
q11
}
,
[
r0@128
]
; load inverse scan order
vceq.s16
q8
,
q8
; set q8 to all 1
ldr
r4
,
[
r1
,
#
vp8_blockd_dequant
]
vshr.s16
q12
,
#
1
; right shift 1 after vqdmulh
vshr.s16
q13
,
#
1
ldr
r5
,
[
r1
,
#
vp8_blockd_eob
]
orr
r2
,
r2
,
r3
; check if all zero (step 4)
cmp
r2
,
#
0
; check if all zero (step 5)
beq
zero_output
; check if all zero (step 6)
;modify data to have its original sign
veor.s16
q12
,
q2
; y^sz
veor.s16
q13
,
q3
vsub.s16
q12
,
q2
; x1=(y^sz)-sz = (y^sz)-(-1) (2's complement)
vsub.s16
q13
,
q3
vld1.s16
{
q2
,
q3
}
,
[
r4@128
]
; load dequant_ptr[i]
vtst.16
q14
,
q12
,
q8
; now find eob
vtst.16
q15
,
q13
,
q8
; non-zero element is set to all 1
vst1.s16
{
q12
,
q13
}
,
[
r6@128
]
; store: qcoeff = x1
vand
q10
,
q10
,
q14
; get all valid numbers from scan array
vand
q11
,
q11
,
q15
vmax.u16
q0
,
q10
,
q11
; find maximum value in q0, q1
vmax.u16
d0
,
d0
,
d1
vmovl.u16
q0
,
d0
vmul.s16
q2
,
q12
; x * Dequant
vmul.s16
q3
,
q13
vmax.u32
d0
,
d0
,
d1
vpmax.u32
d0
,
d0
,
d0
vst1.s16
{
q2
,
q3
}
,
[
r7@128
]
; store dqcoeff = x * Dequant
vst1.8
{
d0
[
0
]
}
,
[
r5
]
; store eob
ldmfd
sp
!
,
{
r4
-
r7
}
bx
lr
zero_output
strb
r2
,
[
r5
]
; store eob
vst1.s16
{
q0
,
q1
}
,
[
r6@128
]
; qcoeff = 0
vst1.s16
{
q0
,
q1
}
,
[
r7@128
]
; dqcoeff = 0
ldmfd
sp
!
,
{
r4
-
r7
}
bx
lr
ENDP
; default inverse zigzag table is defined in vp8/common/entropy.c
ALIGN
16
; enable use of @128 bit aligned loads
inv_zig_zag
DCW
0x0001
,
0x0002
,
0x0006
,
0x0007
DCW
0x0003
,
0x0005
,
0x0008
,
0x000d
DCW
0x0004
,
0x0009
,
0x000c
,
0x000e
DCW
0x000a
,
0x000b
,
0x000f
,
0x0010
END
vp8/encoder/arm/neon/fastquantizeb_neon.c
0 → 100644
View file @
412eaaf0
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include
<arm_neon.h>
#include
"vp8/encoder/block.h"
#include
"vpx_mem/vpx_mem.h"
static
const
uint16_t
inv_zig_zag
[
16
]
=
{
0x0001
,
0x0002
,
0x0006
,
0x0007
,
0x0003
,
0x0005
,
0x0008
,
0x000d
,
0x0004
,
0x0009
,
0x000c
,
0x000e
,
0x000a
,
0x000b
,
0x000f
,
0x0010
};
void
vp8_fast_quantize_b_neon
(
BLOCK
*
b
,
BLOCKD
*
d
)
{
const
int16x8_t
one_q
=
vdupq_n_s16
(
0xff
),
z0
=
vld1q_s16
(
b
->
coeff
),
z1
=
vld1q_s16
(
b
->
coeff
+
8
),
round0
=
vld1q_s16
(
b
->
round
),
round1
=
vld1q_s16
(
b
->
round
+
8
),
quant0
=
vld1q_s16
(
b
->
quant_fast
),
quant1
=
vld1q_s16
(
b
->
quant_fast
+
8
),
dequant0
=
vld1q_s16
(
d
->
dequant
),
dequant1
=
vld1q_s16
(
d
->
dequant
+
8
);
const
uint16x8_t
zig_zag0
=
vld1q_u16
(
inv_zig_zag
),
zig_zag1
=
vld1q_u16
(
inv_zig_zag
+
8
);
int16x8_t
x0
,
x1
,
sz0
,
sz1
,
y0
,
y1
;
uint16x8_t
eob0
,
eob1
;
uint16x4_t
eob_d16
;
uint32x2_t
eob_d32
;
uint32x4_t
eob_q32
;
/* sign of z: z >> 15 */
sz0
=
vshrq_n_s16
(
z0
,
15
);
sz1
=
vshrq_n_s16
(
z1
,
15
);
/* x = abs(z) */
x0
=
vabsq_s16
(
z0
);
x1
=
vabsq_s16
(
z1
);
/* x += round */
x0
=
vaddq_s16
(
x0
,
round0
);
x1
=
vaddq_s16
(
x1
,
round1
);
/* y = 2 * (x * quant) >> 16 */
y0
=
vqdmulhq_s16
(
x0
,
quant0
);
y1
=
vqdmulhq_s16
(
x1
,
quant1
);
/* Compensate for doubling in vqdmulhq */
y0
=
vshrq_n_s16
(
y0
,
1
);
y1
=
vshrq_n_s16
(
y1
,
1
);
/* Restore sign bit */
y0
=
veorq_s16
(
y0
,
sz0
);
y1
=
veorq_s16
(
y1
,
sz1
);
x0
=
vsubq_s16
(
y0
,
sz0
);
x1
=
vsubq_s16
(
y1
,
sz1
);
/* find non-zero elements */
eob0
=
vtstq_s16
(
x0
,
one_q
);
eob1
=
vtstq_s16
(
x1
,
one_q
);
/* mask zig zag */
eob0
=
vandq_u16
(
eob0
,
zig_zag0
);
eob1
=
vandq_u16
(
eob1
,
zig_zag1
);
/* select the largest value */
eob0
=
vmaxq_u16
(
eob0
,
eob1
);
eob_d16
=
vmax_u16
(
vget_low_u16
(
eob0
),
vget_high_u16
(
eob0
));
eob_q32
=
vmovl_u16
(
eob_d16
);
eob_d32
=
vmax_u32
(
vget_low_u32
(
eob_q32
),
vget_high_u32
(
eob_q32
));
eob_d32
=
vpmax_u32
(
eob_d32
,
eob_d32
);
/* qcoeff = x */
vst1q_s16
(
d
->
qcoeff
,
x0
);
vst1q_s16
(
d
->
qcoeff
+
8
,
x1
);
/* dqcoeff = x * dequant */
vst1q_s16
(
d
->
dqcoeff
,
vmulq_s16
(
dequant0
,
x0
));
vst1q_s16
(
d
->
dqcoeff
+
8
,
vmulq_s16
(
dequant1
,
x1
));
vst1_lane_s8
((
int8_t
*
)
d
->
eob
,
vreinterpret_s8_u32
(
eob_d32
),
0
);
}
void
vp8_fast_quantize_b_pair_neon
(
BLOCK
*
b0
,
BLOCK
*
b1
,
BLOCKD
*
d0
,
BLOCKD
*
d1
)
{
const
int16x8_t
one_q
=
vdupq_n_s16
(
0xff
),
b0_z0
=
vld1q_s16
(
b0
->
coeff
),
b0_z1
=
vld1q_s16
(
b0
->
coeff
+
8
),
b0_round0
=
vld1q_s16
(
b0
->
round
),
b0_round1
=
vld1q_s16
(
b0
->
round
+
8
),
b0_quant0
=
vld1q_s16
(
b0
->
quant_fast
),
b0_quant1
=
vld1q_s16
(
b0
->
quant_fast
+
8
),
d0_dequant0
=
vld1q_s16
(
d0
->
dequant
),
d0_dequant1
=
vld1q_s16
(
d0
->
dequant
+
8
),
b1_z0
=
vld1q_s16
(
b1
->
coeff
),
b1_z1
=
vld1q_s16
(
b1
->
coeff
+
8
),
b1_round0
=
vld1q_s16
(
b1
->
round
),
b1_round1
=
vld1q_s16
(
b1
->
round
+
8
),
b1_quant0
=
vld1q_s16
(
b1
->
quant_fast
),
b1_quant1
=
vld1q_s16
(
b1
->
quant_fast
+
8
),
d1_dequant0
=
vld1q_s16
(
d1
->
dequant
),
d1_dequant1
=
vld1q_s16
(
d1
->
dequant
+
8
);
const
uint16x8_t
zig_zag0
=
vld1q_u16
(
inv_zig_zag
),
zig_zag1
=
vld1q_u16
(
inv_zig_zag
+
8
);
int16x8_t
b0_x0
,
b0_x1
,
b0_sz0
,
b0_sz1
,
b0_y0
,
b0_y1
,
b1_x0
,
b1_x1
,
b1_sz0
,
b1_sz1
,
b1_y0
,
b1_y1
;
uint16x8_t
b0_eob0
,
b0_eob1
,
b1_eob0
,
b1_eob1
;
uint16x4_t
b0_eob_d16
,
b1_eob_d16
;
uint32x2_t
b0_eob_d32
,
b1_eob_d32
;
uint32x4_t
b0_eob_q32
,
b1_eob_q32
;
/* sign of z: z >> 15 */
b0_sz0
=
vshrq_n_s16
(
b0_z0
,
15
);
b0_sz1
=
vshrq_n_s16
(
b0_z1
,
15
);
b1_sz0
=
vshrq_n_s16
(
b1_z0
,
15
);
b1_sz1
=
vshrq_n_s16
(
b1_z1
,
15
);
/* x = abs(z) */
b0_x0
=
vabsq_s16
(
b0_z0
);
b0_x1
=
vabsq_s16
(
b0_z1
);
b1_x0
=
vabsq_s16
(
b1_z0
);
b1_x1
=
vabsq_s16
(
b1_z1
);
/* x += round */
b0_x0
=
vaddq_s16
(
b0_x0
,
b0_round0
);
b0_x1
=
vaddq_s16
(
b0_x1
,
b0_round1
);
b1_x0
=
vaddq_s16
(
b1_x0
,
b1_round0
);
b1_x1
=
vaddq_s16
(
b1_x1
,
b1_round1
);
/* y = 2 * (x * quant) >> 16 */
b0_y0
=
vqdmulhq_s16
(
b0_x0
,
b0_quant0
);
b0_y1
=
vqdmulhq_s16
(
b0_x1
,
b0_quant1
);
b1_y0
=
vqdmulhq_s16
(
b1_x0
,
b1_quant0
);
b1_y1
=
vqdmulhq_s16
(
b1_x1
,
b1_quant1
);
/* Compensate for doubling in vqdmulhq */
b0_y0
=
vshrq_n_s16
(
b0_y0
,
1
);
b0_y1
=
vshrq_n_s16
(
b0_y1
,
1
);
b1_y0
=
vshrq_n_s16
(
b1_y0
,
1
);
b1_y1
=
vshrq_n_s16
(
b1_y1
,
1
);
/* Restore sign bit */
b0_y0
=
veorq_s16
(
b0_y0
,
b0_sz0
);
b0_y1
=
veorq_s16
(
b0_y1
,
b0_sz1
);
b0_x0
=
vsubq_s16
(
b0_y0
,
b0_sz0
);
b0_x1
=
vsubq_s16
(
b0_y1
,
b0_sz1
);
b1_y0
=
veorq_s16
(
b1_y0
,
b1_sz0
);
b1_y1
=
veorq_s16
(
b1_y1
,
b1_sz1
);
b1_x0
=
vsubq_s16
(
b1_y0
,
b1_sz0
);
b1_x1
=
vsubq_s16
(
b1_y1
,
b1_sz1
);
/* find non-zero elements */
b0_eob0
=
vtstq_s16
(
b0_x0
,
one_q
);
b0_eob1
=
vtstq_s16
(
b0_x1
,
one_q
);
b1_eob0
=
vtstq_s16
(
b1_x0
,
one_q
);
b1_eob1
=
vtstq_s16
(
b1_x1
,
one_q
);
/* mask zig zag */
b0_eob0
=
vandq_u16
(
b0_eob0
,
zig_zag0
);
b0_eob1
=
vandq_u16
(
b0_eob1
,
zig_zag1
);
b1_eob0
=
vandq_u16
(
b1_eob0
,
zig_zag0
);
b1_eob1
=
vandq_u16
(
b1_eob1
,
zig_zag1
);
/* select the largest value */
b0_eob0
=
vmaxq_u16
(
b0_eob0
,
b0_eob1
);
b0_eob_d16
=
vmax_u16
(
vget_low_u16
(
b0_eob0
),
vget_high_u16
(
b0_eob0
));
b0_eob_q32
=
vmovl_u16
(
b0_eob_d16
);
b0_eob_d32
=
vmax_u32
(
vget_low_u32
(
b0_eob_q32
),
vget_high_u32
(
b0_eob_q32
));
b0_eob_d32
=
vpmax_u32
(
b0_eob_d32
,
b0_eob_d32
);
b1_eob0
=
vmaxq_u16
(
b1_eob0
,
b1_eob1
);
b1_eob_d16
=
vmax_u16
(
vget_low_u16
(
b1_eob0
),
vget_high_u16
(
b1_eob0
));
b1_eob_q32
=
vmovl_u16
(
b1_eob_d16
);
b1_eob_d32
=
vmax_u32
(
vget_low_u32
(
b1_eob_q32
),
vget_high_u32
(
b1_eob_q32
));
b1_eob_d32
=
vpmax_u32
(
b1_eob_d32
,
b1_eob_d32
);
/* qcoeff = x */
vst1q_s16
(
d0
->
qcoeff
,
b0_x0
);
vst1q_s16
(
d0
->
qcoeff
+
8
,
b0_x1
);
vst1q_s16
(
d1
->
qcoeff
,
b1_x0
);
vst1q_s16
(
d1
->
qcoeff
+
8
,
b1_x1
);
/* dqcoeff = x * dequant */
vst1q_s16
(
d0
->
dqcoeff
,
vmulq_s16
(
d0_dequant0
,
b0_x0
));
vst1q_s16
(
d0
->
dqcoeff
+
8
,
vmulq_s16
(
d0_dequant1
,
b0_x1
));
vst1q_s16
(
d1
->
dqcoeff
,
vmulq_s16
(
d1_dequant0
,
b1_x0
));
vst1q_s16
(
d1
->
dqcoeff
+
8
,
vmulq_s16
(
d1_dequant1
,
b1_x1
));
vst1_lane_s8
((
int8_t
*
)
d0
->
eob
,
vreinterpret_s8_u32
(
b0_eob_d32
),
0
);
vst1_lane_s8
((
int8_t
*
)
d1
->
eob
,
vreinterpret_s8_u32
(
b1_eob_d32
),
0
);
return
;
}
vp8/vp8cx_arm.mk
View file @
412eaaf0
...
...
@@ -35,9 +35,8 @@ VP8_CX_SRCS-$(HAVE_MEDIA) += encoder/arm/armv6/walsh_v6$(ASM)
#File list for neon
# encoder
VP8_CX_SRCS-$(HAVE_NEON_ASM)
+=
encoder/arm/neon/fastquantizeb_neon
$(ASM)
VP8_CX_SRCS-$(HAVE_NEON)
+=
encoder/arm/neon/denoising_neon.c
VP8_CX_SRCS-$(HAVE_NEON)
+=
encoder/arm/neon/fastquantizeb_neon.c
VP8_CX_SRCS-$(HAVE_NEON)
+=
encoder/arm/neon/shortfdct_neon.c
VP8_CX_SRCS-$(HAVE_NEON)
+=
encoder/arm/neon/subtract_neon.c
VP8_CX_SRCS-$(HAVE_NEON)
+=
encoder/arm/neon/vp8_mse16x16_neon.c
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment