Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Xiph.Org
aom-rav1e
Commits
7feae8e8
Commit
7feae8e8
authored
Aug 11, 2016
by
clang-format
Committed by
James Zern
Aug 12, 2016
Browse files
vp10/common: apply clang-format
Change-Id: I01d8241eba3ccaf4d06c00a51df2d17c126f6f9d
parent
26777fca
Changes
88
Expand all
Hide whitespace changes
Inline
Side-by-side
vp10/common/alloccommon.c
View file @
7feae8e8
...
...
@@ -35,8 +35,7 @@ static int alloc_seg_map(VP10_COMMON *cm, int seg_map_size) {
for
(
i
=
0
;
i
<
NUM_PING_PONG_BUFFERS
;
++
i
)
{
cm
->
seg_map_array
[
i
]
=
(
uint8_t
*
)
vpx_calloc
(
seg_map_size
,
1
);
if
(
cm
->
seg_map_array
[
i
]
==
NULL
)
return
1
;
if
(
cm
->
seg_map_array
[
i
]
==
NULL
)
return
1
;
}
cm
->
seg_map_alloc_size
=
seg_map_size
;
...
...
@@ -91,7 +90,7 @@ void vp10_free_context_buffers(VP10_COMMON *cm) {
int
i
;
cm
->
free_mi
(
cm
);
free_seg_map
(
cm
);
for
(
i
=
0
;
i
<
MAX_MB_PLANE
;
i
++
)
{
for
(
i
=
0
;
i
<
MAX_MB_PLANE
;
i
++
)
{
vpx_free
(
cm
->
above_context
[
i
]);
cm
->
above_context
[
i
]
=
NULL
;
}
...
...
@@ -110,15 +109,13 @@ int vp10_alloc_context_buffers(VP10_COMMON *cm, int width, int height) {
new_mi_size
=
cm
->
mi_stride
*
calc_mi_size
(
cm
->
mi_rows
);
if
(
cm
->
mi_alloc_size
<
new_mi_size
)
{
cm
->
free_mi
(
cm
);
if
(
cm
->
alloc_mi
(
cm
,
new_mi_size
))
goto
fail
;
if
(
cm
->
alloc_mi
(
cm
,
new_mi_size
))
goto
fail
;
}
if
(
cm
->
seg_map_alloc_size
<
cm
->
mi_rows
*
cm
->
mi_cols
)
{
// Create the segmentation map structure and set to 0.
free_seg_map
(
cm
);
if
(
alloc_seg_map
(
cm
,
cm
->
mi_rows
*
cm
->
mi_cols
))
goto
fail
;
if
(
alloc_seg_map
(
cm
,
cm
->
mi_rows
*
cm
->
mi_cols
))
goto
fail
;
}
if
(
cm
->
above_context_alloc_cols
<
cm
->
mi_cols
)
{
...
...
@@ -129,7 +126,7 @@ int vp10_alloc_context_buffers(VP10_COMMON *cm, int width, int height) {
ALIGN_POWER_OF_TWO
(
cm
->
mi_cols
,
MAX_MIB_SIZE_LOG2
);
int
i
;
for
(
i
=
0
;
i
<
MAX_MB_PLANE
;
i
++
)
{
for
(
i
=
0
;
i
<
MAX_MB_PLANE
;
i
++
)
{
vpx_free
(
cm
->
above_context
[
i
]);
cm
->
above_context
[
i
]
=
(
ENTROPY_CONTEXT
*
)
vpx_calloc
(
2
*
aligned_mi_cols
,
sizeof
(
*
cm
->
above_context
[
0
]));
...
...
@@ -153,7 +150,7 @@ int vp10_alloc_context_buffers(VP10_COMMON *cm, int width, int height) {
return
0
;
fail:
fail:
vp10_free_context_buffers
(
cm
);
return
1
;
}
...
...
vp10/common/alloccommon.h
View file @
7feae8e8
...
...
@@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP10_COMMON_ALLOCCOMMON_H_
#define VP10_COMMON_ALLOCCOMMON_H_
...
...
vp10/common/ans.h
View file @
7feae8e8
...
...
@@ -23,20 +23,18 @@
#if ANS_DIVIDE_BY_MULTIPLY
#include
"vp10/common/divide.h"
#define ANS_DIVREM(quotient, remainder, dividend, divisor) \
do { \
quotient = fastdiv(dividend, divisor); \
remainder = dividend - quotient * divisor; \
do {
\
quotient = fastdiv(dividend, divisor);
\
remainder = dividend - quotient * divisor;
\
} while (0)
#define ANS_DIV(dividend, divisor) \
fastdiv(dividend, divisor)
#define ANS_DIV(dividend, divisor) fastdiv(dividend, divisor)
#else
#define ANS_DIVREM(quotient, remainder, dividend, divisor) \
do { \
quotient = dividend / divisor; \
remainder = dividend % divisor; \
do {
\
quotient = dividend / divisor;
\
remainder = dividend % divisor;
\
} while (0)
#define ANS_DIV(dividend, divisor) \
((dividend) / (divisor))
#define ANS_DIV(dividend, divisor) ((dividend) / (divisor))
#endif
#ifdef __cplusplus
...
...
@@ -245,8 +243,7 @@ static INLINE int uabs_read_literal(struct AnsDecoder *ans, int bits) {
// TODO(aconverse): Investigate ways to read/write literals faster,
// e.g. 8-bit chunks.
for
(
bit
=
bits
-
1
;
bit
>=
0
;
bit
--
)
literal
|=
uabs_read_bit
(
ans
)
<<
bit
;
for
(
bit
=
bits
-
1
;
bit
>=
0
;
bit
--
)
literal
|=
uabs_read_bit
(
ans
)
<<
bit
;
return
literal
;
}
...
...
@@ -257,8 +254,7 @@ static INLINE int uabs_read_tree(struct AnsDecoder *ans,
const
AnsP8
*
probs
)
{
vpx_tree_index
i
=
0
;
while
((
i
=
tree
[
i
+
uabs_read
(
ans
,
probs
[
i
>>
1
])])
>
0
)
continue
;
while
((
i
=
tree
[
i
+
uabs_read
(
ans
,
probs
[
i
>>
1
])])
>
0
)
continue
;
return
-
i
;
}
...
...
@@ -288,8 +284,7 @@ static INLINE void rans_build_cdf_from_pdf(const AnsP10 token_probs[],
assert
(
cdf_tab
[
i
-
1
]
==
rans_precision
);
}
static
INLINE
int
ans_find_largest
(
const
AnsP10
*
const
pdf_tab
,
int
num_syms
)
{
static
INLINE
int
ans_find_largest
(
const
AnsP10
*
const
pdf_tab
,
int
num_syms
)
{
int
largest_idx
=
-
1
;
int
largest_p
=
-
1
;
int
i
;
...
...
@@ -365,8 +360,7 @@ static INLINE void fetch_sym(struct rans_dec_sym *out, const rans_dec_lut cdf,
out
->
cum_prob
=
(
AnsP10
)
cdf
[
i
-
1
];
}
static
INLINE
int
rans_read
(
struct
AnsDecoder
*
ans
,
const
rans_dec_lut
tab
)
{
static
INLINE
int
rans_read
(
struct
AnsDecoder
*
ans
,
const
rans_dec_lut
tab
)
{
unsigned
rem
;
unsigned
quo
;
struct
rans_dec_sym
sym
;
...
...
@@ -381,8 +375,7 @@ static INLINE int rans_read(struct AnsDecoder *ans,
}
static
INLINE
int
ans_read_init
(
struct
AnsDecoder
*
const
ans
,
const
uint8_t
*
const
buf
,
int
offset
)
{
const
uint8_t
*
const
buf
,
int
offset
)
{
unsigned
x
;
if
(
offset
<
1
)
return
1
;
ans
->
buf
=
buf
;
...
...
@@ -403,8 +396,7 @@ static INLINE int ans_read_init(struct AnsDecoder *const ans,
return
1
;
}
ans
->
state
+=
l_base
;
if
(
ans
->
state
>=
l_base
*
io_base
)
return
1
;
if
(
ans
->
state
>=
l_base
*
io_base
)
return
1
;
return
0
;
}
...
...
vp10/common/arm/neon/iht4x4_add_neon.c
View file @
7feae8e8
...
...
@@ -23,226 +23,211 @@ static int16_t cospi_8_64 = 0x3b21;
static
int16_t
cospi_16_64
=
0x2d41
;
static
int16_t
cospi_24_64
=
0x187e
;
static
INLINE
void
TRANSPOSE4X4
(
int16x8_t
*
q8s16
,
int16x8_t
*
q9s16
)
{
int32x4_t
q8s32
,
q9s32
;
int16x4x2_t
d0x2s16
,
d1x2s16
;
int32x4x2_t
q0x2s32
;
d0x2s16
=
vtrn_s16
(
vget_low_s16
(
*
q8s16
),
vget_high_s16
(
*
q8s16
));
d1x2s16
=
vtrn_s16
(
vget_low_s16
(
*
q9s16
),
vget_high_s16
(
*
q9s16
));
q8s32
=
vreinterpretq_s32_s16
(
vcombine_s16
(
d0x2s16
.
val
[
0
],
d0x2s16
.
val
[
1
]));
q9s32
=
vreinterpretq_s32_s16
(
vcombine_s16
(
d1x2s16
.
val
[
0
],
d1x2s16
.
val
[
1
]));
q0x2s32
=
vtrnq_s32
(
q8s32
,
q9s32
);
*
q8s16
=
vreinterpretq_s16_s32
(
q0x2s32
.
val
[
0
]);
*
q9s16
=
vreinterpretq_s16_s32
(
q0x2s32
.
val
[
1
]);
return
;
static
INLINE
void
TRANSPOSE4X4
(
int16x8_t
*
q8s16
,
int16x8_t
*
q9s16
)
{
int32x4_t
q8s32
,
q9s32
;
int16x4x2_t
d0x2s16
,
d1x2s16
;
int32x4x2_t
q0x2s32
;
d0x2s16
=
vtrn_s16
(
vget_low_s16
(
*
q8s16
),
vget_high_s16
(
*
q8s16
));
d1x2s16
=
vtrn_s16
(
vget_low_s16
(
*
q9s16
),
vget_high_s16
(
*
q9s16
));
q8s32
=
vreinterpretq_s32_s16
(
vcombine_s16
(
d0x2s16
.
val
[
0
],
d0x2s16
.
val
[
1
]));
q9s32
=
vreinterpretq_s32_s16
(
vcombine_s16
(
d1x2s16
.
val
[
0
],
d1x2s16
.
val
[
1
]));
q0x2s32
=
vtrnq_s32
(
q8s32
,
q9s32
);
*
q8s16
=
vreinterpretq_s16_s32
(
q0x2s32
.
val
[
0
]);
*
q9s16
=
vreinterpretq_s16_s32
(
q0x2s32
.
val
[
1
]);
return
;
}
static
INLINE
void
GENERATE_COSINE_CONSTANTS
(
int16x4_t
*
d0s16
,
int16x4_t
*
d1s16
,
int16x4_t
*
d2s16
)
{
*
d0s16
=
vdup_n_s16
(
cospi_8_64
);
*
d1s16
=
vdup_n_s16
(
cospi_16_64
);
*
d2s16
=
vdup_n_s16
(
cospi_24_64
);
return
;
static
INLINE
void
GENERATE_COSINE_CONSTANTS
(
int16x4_t
*
d0s16
,
int16x4_t
*
d1s16
,
int16x4_t
*
d2s16
)
{
*
d0s16
=
vdup_n_s16
(
cospi_8_64
);
*
d1s16
=
vdup_n_s16
(
cospi_16_64
);
*
d2s16
=
vdup_n_s16
(
cospi_24_64
);
return
;
}
static
INLINE
void
GENERATE_SINE_CONSTANTS
(
int16x4_t
*
d3s16
,
int16x4_t
*
d4s16
,
int16x4_t
*
d5s16
,
int16x8_t
*
q3s16
)
{
*
d3s16
=
vdup_n_s16
(
sinpi_1_9
);
*
d4s16
=
vdup_n_s16
(
sinpi_2_9
);
*
q3s16
=
vdupq_n_s16
(
sinpi_3_9
);
*
d5s16
=
vdup_n_s16
(
sinpi_4_9
);
return
;
static
INLINE
void
GENERATE_SINE_CONSTANTS
(
int16x4_t
*
d3s16
,
int16x4_t
*
d4s16
,
int16x4_t
*
d5s16
,
int16x8_t
*
q3s16
)
{
*
d3s16
=
vdup_n_s16
(
sinpi_1_9
);
*
d4s16
=
vdup_n_s16
(
sinpi_2_9
);
*
q3s16
=
vdupq_n_s16
(
sinpi_3_9
);
*
d5s16
=
vdup_n_s16
(
sinpi_4_9
);
return
;
}
static
INLINE
void
IDCT4x4_1D
(
int16x4_t
*
d0s16
,
int16x4_t
*
d1s16
,
int16x4_t
*
d2s16
,
int16x8_t
*
q8s16
,
int16x8_t
*
q9s16
)
{
int16x4_t
d16s16
,
d17s16
,
d18s16
,
d19s16
,
d23s16
,
d24s16
;
int16x4_t
d26s16
,
d27s16
,
d28s16
,
d29s16
;
int32x4_t
q10s32
,
q13s32
,
q14s32
,
q15s32
;
int16x8_t
q13s16
,
q14s16
;
d16s16
=
vget_low_s16
(
*
q8s16
);
d17s16
=
vget_high_s16
(
*
q8s16
);
d18s16
=
vget_low_s16
(
*
q9s16
);
d19s16
=
vget_high_s16
(
*
q9s16
);
d23s16
=
vadd_s16
(
d16s16
,
d18s16
);
d24s16
=
vsub_s16
(
d16s16
,
d18s16
);
q15s32
=
vmull_s16
(
d17s16
,
*
d2s16
);
q10s32
=
vmull_s16
(
d17s16
,
*
d0s16
);
q13s32
=
vmull_s16
(
d23s16
,
*
d1s16
);
q14s32
=
vmull_s16
(
d24s16
,
*
d1s16
);
q15s32
=
vmlsl_s16
(
q15s32
,
d19s16
,
*
d0s16
);
q10s32
=
vmlal_s16
(
q10s32
,
d19s16
,
*
d2s16
);
d26s16
=
vqrshrn_n_s32
(
q13s32
,
14
);
d27s16
=
vqrshrn_n_s32
(
q14s32
,
14
);
d29s16
=
vqrshrn_n_s32
(
q15s32
,
14
);
d28s16
=
vqrshrn_n_s32
(
q10s32
,
14
);
q13s16
=
vcombine_s16
(
d26s16
,
d27s16
);
q14s16
=
vcombine_s16
(
d28s16
,
d29s16
);
*
q8s16
=
vaddq_s16
(
q13s16
,
q14s16
);
*
q9s16
=
vsubq_s16
(
q13s16
,
q14s16
);
*
q9s16
=
vcombine_s16
(
vget_high_s16
(
*
q9s16
),
vget_low_s16
(
*
q9s16
));
// vswp
return
;
static
INLINE
void
IDCT4x4_1D
(
int16x4_t
*
d0s16
,
int16x4_t
*
d1s16
,
int16x4_t
*
d2s16
,
int16x8_t
*
q8s16
,
int16x8_t
*
q9s16
)
{
int16x4_t
d16s16
,
d17s16
,
d18s16
,
d19s16
,
d23s16
,
d24s16
;
int16x4_t
d26s16
,
d27s16
,
d28s16
,
d29s16
;
int32x4_t
q10s32
,
q13s32
,
q14s32
,
q15s32
;
int16x8_t
q13s16
,
q14s16
;
d16s16
=
vget_low_s16
(
*
q8s16
);
d17s16
=
vget_high_s16
(
*
q8s16
);
d18s16
=
vget_low_s16
(
*
q9s16
);
d19s16
=
vget_high_s16
(
*
q9s16
);
d23s16
=
vadd_s16
(
d16s16
,
d18s16
);
d24s16
=
vsub_s16
(
d16s16
,
d18s16
);
q15s32
=
vmull_s16
(
d17s16
,
*
d2s16
);
q10s32
=
vmull_s16
(
d17s16
,
*
d0s16
);
q13s32
=
vmull_s16
(
d23s16
,
*
d1s16
);
q14s32
=
vmull_s16
(
d24s16
,
*
d1s16
);
q15s32
=
vmlsl_s16
(
q15s32
,
d19s16
,
*
d0s16
);
q10s32
=
vmlal_s16
(
q10s32
,
d19s16
,
*
d2s16
);
d26s16
=
vqrshrn_n_s32
(
q13s32
,
14
);
d27s16
=
vqrshrn_n_s32
(
q14s32
,
14
);
d29s16
=
vqrshrn_n_s32
(
q15s32
,
14
);
d28s16
=
vqrshrn_n_s32
(
q10s32
,
14
);
q13s16
=
vcombine_s16
(
d26s16
,
d27s16
);
q14s16
=
vcombine_s16
(
d28s16
,
d29s16
);
*
q8s16
=
vaddq_s16
(
q13s16
,
q14s16
);
*
q9s16
=
vsubq_s16
(
q13s16
,
q14s16
);
*
q9s16
=
vcombine_s16
(
vget_high_s16
(
*
q9s16
),
vget_low_s16
(
*
q9s16
));
// vswp
return
;
}
static
INLINE
void
IADST4x4_1D
(
int16x4_t
*
d3s16
,
int16x4_t
*
d4s16
,
int16x4_t
*
d5s16
,
int16x8_t
*
q3s16
,
int16x8_t
*
q8s16
,
int16x8_t
*
q9s16
)
{
int16x4_t
d6s16
,
d16s16
,
d17s16
,
d18s16
,
d19s16
;
int32x4_t
q8s32
,
q9s32
,
q10s32
,
q11s32
,
q12s32
,
q13s32
,
q14s32
,
q15s32
;
d6s16
=
vget_low_s16
(
*
q3s16
);
d16s16
=
vget_low_s16
(
*
q8s16
);
d17s16
=
vget_high_s16
(
*
q8s16
);
d18s16
=
vget_low_s16
(
*
q9s16
);
d19s16
=
vget_high_s16
(
*
q9s16
);
q10s32
=
vmull_s16
(
*
d3s16
,
d16s16
);
q11s32
=
vmull_s16
(
*
d4s16
,
d16s16
);
q12s32
=
vmull_s16
(
d6s16
,
d17s16
);
q13s32
=
vmull_s16
(
*
d5s16
,
d18s16
);
q14s32
=
vmull_s16
(
*
d3s16
,
d18s16
);
q15s32
=
vmovl_s16
(
d16s16
);
q15s32
=
vaddw_s16
(
q15s32
,
d19s16
);
q8s32
=
vmull_s16
(
*
d4s16
,
d19s16
);
q15s32
=
vsubw_s16
(
q15s32
,
d18s16
);
q9s32
=
vmull_s16
(
*
d5s16
,
d19s16
);
q10s32
=
vaddq_s32
(
q10s32
,
q13s32
);
q10s32
=
vaddq_s32
(
q10s32
,
q8s32
);
q11s32
=
vsubq_s32
(
q11s32
,
q14s32
);
q8s32
=
vdupq_n_s32
(
sinpi_3_9
);
q11s32
=
vsubq_s32
(
q11s32
,
q9s32
);
q15s32
=
vmulq_s32
(
q15s32
,
q8s32
);
q13s32
=
vaddq_s32
(
q10s32
,
q12s32
);
q10s32
=
vaddq_s32
(
q10s32
,
q11s32
);
q14s32
=
vaddq_s32
(
q11s32
,
q12s32
);
q10s32
=
vsubq_s32
(
q10s32
,
q12s32
);
d16s16
=
vqrshrn_n_s32
(
q13s32
,
14
);
d17s16
=
vqrshrn_n_s32
(
q14s32
,
14
);
d18s16
=
vqrshrn_n_s32
(
q15s32
,
14
);
d19s16
=
vqrshrn_n_s32
(
q10s32
,
14
);
*
q8s16
=
vcombine_s16
(
d16s16
,
d17s16
);
*
q9s16
=
vcombine_s16
(
d18s16
,
d19s16
);
return
;
static
INLINE
void
IADST4x4_1D
(
int16x4_t
*
d3s16
,
int16x4_t
*
d4s16
,
int16x4_t
*
d5s16
,
int16x8_t
*
q3s16
,
int16x8_t
*
q8s16
,
int16x8_t
*
q9s16
)
{
int16x4_t
d6s16
,
d16s16
,
d17s16
,
d18s16
,
d19s16
;
int32x4_t
q8s32
,
q9s32
,
q10s32
,
q11s32
,
q12s32
,
q13s32
,
q14s32
,
q15s32
;
d6s16
=
vget_low_s16
(
*
q3s16
);
d16s16
=
vget_low_s16
(
*
q8s16
);
d17s16
=
vget_high_s16
(
*
q8s16
);
d18s16
=
vget_low_s16
(
*
q9s16
);
d19s16
=
vget_high_s16
(
*
q9s16
);
q10s32
=
vmull_s16
(
*
d3s16
,
d16s16
);
q11s32
=
vmull_s16
(
*
d4s16
,
d16s16
);
q12s32
=
vmull_s16
(
d6s16
,
d17s16
);
q13s32
=
vmull_s16
(
*
d5s16
,
d18s16
);
q14s32
=
vmull_s16
(
*
d3s16
,
d18s16
);
q15s32
=
vmovl_s16
(
d16s16
);
q15s32
=
vaddw_s16
(
q15s32
,
d19s16
);
q8s32
=
vmull_s16
(
*
d4s16
,
d19s16
);
q15s32
=
vsubw_s16
(
q15s32
,
d18s16
);
q9s32
=
vmull_s16
(
*
d5s16
,
d19s16
);
q10s32
=
vaddq_s32
(
q10s32
,
q13s32
);
q10s32
=
vaddq_s32
(
q10s32
,
q8s32
);
q11s32
=
vsubq_s32
(
q11s32
,
q14s32
);
q8s32
=
vdupq_n_s32
(
sinpi_3_9
);
q11s32
=
vsubq_s32
(
q11s32
,
q9s32
);
q15s32
=
vmulq_s32
(
q15s32
,
q8s32
);
q13s32
=
vaddq_s32
(
q10s32
,
q12s32
);
q10s32
=
vaddq_s32
(
q10s32
,
q11s32
);
q14s32
=
vaddq_s32
(
q11s32
,
q12s32
);
q10s32
=
vsubq_s32
(
q10s32
,
q12s32
);
d16s16
=
vqrshrn_n_s32
(
q13s32
,
14
);
d17s16
=
vqrshrn_n_s32
(
q14s32
,
14
);
d18s16
=
vqrshrn_n_s32
(
q15s32
,
14
);
d19s16
=
vqrshrn_n_s32
(
q10s32
,
14
);
*
q8s16
=
vcombine_s16
(
d16s16
,
d17s16
);
*
q9s16
=
vcombine_s16
(
d18s16
,
d19s16
);
return
;
}
void
vp10_iht4x4_16_add_neon
(
const
tran_low_t
*
input
,
uint8_t
*
dest
,
int
dest_stride
,
int
tx_type
)
{
uint8x8_t
d26u8
,
d27u8
;
int16x4_t
d0s16
,
d1s16
,
d2s16
,
d3s16
,
d4s16
,
d5s16
;
uint32x2_t
d26u32
,
d27u32
;
int16x8_t
q3s16
,
q8s16
,
q9s16
;
uint16x8_t
q8u16
,
q9u16
;
d26u32
=
d27u32
=
vdup_n_u32
(
0
);
q8s16
=
vld1q_s16
(
input
);
q9s16
=
vld1q_s16
(
input
+
8
);
TRANSPOSE4X4
(
&
q8s16
,
&
q9s16
);
switch
(
tx_type
)
{
case
0
:
// idct_idct is not supported. Fall back to C
vp10_iht4x4_16_add_c
(
input
,
dest
,
dest_stride
,
tx_type
);
return
;
break
;
case
1
:
// iadst_idct
// generate constants
GENERATE_COSINE_CONSTANTS
(
&
d0s16
,
&
d1s16
,
&
d2s16
);
GENERATE_SINE_CONSTANTS
(
&
d3s16
,
&
d4s16
,
&
d5s16
,
&
q3s16
);
// first transform rows
IDCT4x4_1D
(
&
d0s16
,
&
d1s16
,
&
d2s16
,
&
q8s16
,
&
q9s16
);
// transpose the matrix
TRANSPOSE4X4
(
&
q8s16
,
&
q9s16
);
// then transform columns
IADST4x4_1D
(
&
d3s16
,
&
d4s16
,
&
d5s16
,
&
q3s16
,
&
q8s16
,
&
q9s16
);
break
;
case
2
:
// idct_iadst
// generate constantsyy
GENERATE_COSINE_CONSTANTS
(
&
d0s16
,
&
d1s16
,
&
d2s16
);
GENERATE_SINE_CONSTANTS
(
&
d3s16
,
&
d4s16
,
&
d5s16
,
&
q3s16
);
// first transform rows
IADST4x4_1D
(
&
d3s16
,
&
d4s16
,
&
d5s16
,
&
q3s16
,
&
q8s16
,
&
q9s16
);
// transpose the matrix
TRANSPOSE4X4
(
&
q8s16
,
&
q9s16
);
// then transform columns
IDCT4x4_1D
(
&
d0s16
,
&
d1s16
,
&
d2s16
,
&
q8s16
,
&
q9s16
);
break
;
case
3
:
// iadst_iadst
// generate constants
GENERATE_SINE_CONSTANTS
(
&
d3s16
,
&
d4s16
,
&
d5s16
,
&
q3s16
);
// first transform rows
IADST4x4_1D
(
&
d3s16
,
&
d4s16
,
&
d5s16
,
&
q3s16
,
&
q8s16
,
&
q9s16
);
// transpose the matrix
TRANSPOSE4X4
(
&
q8s16
,
&
q9s16
);
// then transform columns
IADST4x4_1D
(
&
d3s16
,
&
d4s16
,
&
d5s16
,
&
q3s16
,
&
q8s16
,
&
q9s16
);
break
;
default:
// iadst_idct
assert
(
0
);
break
;
}
q8s16
=
vrshrq_n_s16
(
q8s16
,
4
);
q9s16
=
vrshrq_n_s16
(
q9s16
,
4
);
d26u32
=
vld1_lane_u32
((
const
uint32_t
*
)
dest
,
d26u32
,
0
);
dest
+=
dest_stride
;
d26u32
=
vld1_lane_u32
((
const
uint32_t
*
)
dest
,
d26u32
,
1
);
dest
+=
dest_stride
;
d27u32
=
vld1_lane_u32
((
const
uint32_t
*
)
dest
,
d27u32
,
0
);
dest
+=
dest_stride
;
d27u32
=
vld1_lane_u32
((
const
uint32_t
*
)
dest
,
d27u32
,
1
);
q8u16
=
vaddw_u8
(
vreinterpretq_u16_s16
(
q8s16
),
vreinterpret_u8_u32
(
d26u32
));
q9u16
=
vaddw_u8
(
vreinterpretq_u16_s16
(
q9s16
),
vreinterpret_u8_u32
(
d27u32
));
d26u8
=
vqmovun_s16
(
vreinterpretq_s16_u16
(
q8u16
));
d27u8
=
vqmovun_s16
(
vreinterpretq_s16_u16
(
q9u16
));
vst1_lane_u32
((
uint32_t
*
)
dest
,
vreinterpret_u32_u8
(
d27u8
),
1
);
dest
-=
dest_stride
;
vst1_lane_u32
((
uint32_t
*
)
dest
,
vreinterpret_u32_u8
(
d27u8
),
0
);
dest
-=
dest_stride
;
vst1_lane_u32
((
uint32_t
*
)
dest
,
vreinterpret_u32_u8
(
d26u8
),
1
);
dest
-=
dest_stride
;
vst1_lane_u32
((
uint32_t
*
)
dest
,
vreinterpret_u32_u8
(
d26u8
),
0
);
return
;
int
dest_stride
,
int
tx_type
)
{
uint8x8_t
d26u8
,
d27u8
;
int16x4_t
d0s16
,
d1s16
,
d2s16
,
d3s16
,
d4s16
,
d5s16
;
uint32x2_t
d26u32
,
d27u32
;
int16x8_t
q3s16
,
q8s16
,
q9s16
;
uint16x8_t
q8u16
,
q9u16
;
d26u32
=
d27u32
=
vdup_n_u32
(
0
);
q8s16
=
vld1q_s16
(
input
);
q9s16
=
vld1q_s16
(
input
+
8
);
TRANSPOSE4X4
(
&
q8s16
,
&
q9s16
);
switch
(
tx_type
)
{
case
0
:
// idct_idct is not supported. Fall back to C
vp10_iht4x4_16_add_c
(
input
,
dest
,
dest_stride
,
tx_type
);
return
;
break
;
case
1
:
// iadst_idct
// generate constants
GENERATE_COSINE_CONSTANTS
(
&
d0s16
,
&
d1s16
,
&
d2s16
);
GENERATE_SINE_CONSTANTS
(
&
d3s16
,
&
d4s16
,
&
d5s16
,
&
q3s16
);
// first transform rows
IDCT4x4_1D
(
&
d0s16
,
&
d1s16
,
&
d2s16
,
&
q8s16
,
&
q9s16
);
// transpose the matrix
TRANSPOSE4X4
(
&
q8s16
,
&
q9s16
);
// then transform columns
IADST4x4_1D
(
&
d3s16
,
&
d4s16
,
&
d5s16
,
&
q3s16
,
&
q8s16
,
&
q9s16
);
break
;
case
2
:
// idct_iadst
// generate constantsyy
GENERATE_COSINE_CONSTANTS
(
&
d0s16
,
&
d1s16
,
&
d2s16
);
GENERATE_SINE_CONSTANTS
(
&
d3s16
,
&
d4s16
,
&
d5s16
,
&
q3s16
);
// first transform rows
IADST4x4_1D
(
&
d3s16
,
&
d4s16
,
&
d5s16
,
&
q3s16
,
&
q8s16
,
&
q9s16
);
// transpose the matrix
TRANSPOSE4X4
(
&
q8s16
,
&
q9s16
);
// then transform columns
IDCT4x4_1D
(
&
d0s16
,
&
d1s16
,
&
d2s16
,
&
q8s16
,
&
q9s16
);
break
;
case
3
:
// iadst_iadst
// generate constants
GENERATE_SINE_CONSTANTS
(
&
d3s16
,
&
d4s16
,
&
d5s16
,
&
q3s16
);
// first transform rows
IADST4x4_1D
(
&
d3s16
,
&
d4s16
,
&
d5s16
,
&
q3s16
,
&
q8s16
,
&
q9s16
);
// transpose the matrix
TRANSPOSE4X4
(
&
q8s16
,
&
q9s16
);
// then transform columns
IADST4x4_1D
(
&
d3s16
,
&
d4s16
,
&
d5s16
,
&
q3s16
,
&
q8s16
,
&
q9s16
);
break
;
default:
// iadst_idct
assert
(
0
);
break
;
}
q8s16
=
vrshrq_n_s16
(
q8s16
,
4
);
q9s16
=
vrshrq_n_s16
(
q9s16
,
4
);
d26u32
=
vld1_lane_u32
((
const
uint32_t
*
)
dest
,
d26u32
,
0
);
dest
+=
dest_stride
;
d26u32
=
vld1_lane_u32
((
const
uint32_t
*
)
dest
,
d26u32
,
1
);
dest
+=
dest_stride
;
d27u32
=
vld1_lane_u32
((
const
uint32_t
*
)
dest
,
d27u32
,
0
);
dest
+=
dest_stride
;
d27u32
=
vld1_lane_u32
((
const
uint32_t
*
)
dest
,
d27u32
,
1
);
q8u16
=
vaddw_u8
(
vreinterpretq_u16_s16
(
q8s16
),
vreinterpret_u8_u32
(
d26u32
));
q9u16
=
vaddw_u8
(
vreinterpretq_u16_s16
(
q9s16
),
vreinterpret_u8_u32
(
d27u32
));
d26u8
=
vqmovun_s16
(
vreinterpretq_s16_u16
(
q8u16
));
d27u8
=
vqmovun_s16
(
vreinterpretq_s16_u16
(
q9u16
));
vst1_lane_u32
((
uint32_t
*
)
dest
,
vreinterpret_u32_u8
(
d27u8
),
1
);
dest
-=
dest_stride
;
vst1_lane_u32
((
uint32_t
*
)
dest
,
vreinterpret_u32_u8
(
d27u8
),
0
);
dest
-=
dest_stride
;
vst1_lane_u32
((
uint32_t
*
)
dest
,
vreinterpret_u32_u8
(
d26u8
),
1
);
dest
-=
dest_stride
;
vst1_lane_u32
((
uint32_t
*
)
dest
,
vreinterpret_u32_u8
(
d26u8
),
0
);
return
;
}
vp10/common/arm/neon/iht8x8_add_neon.c
View file @
7feae8e8
This diff is collapsed.
Click to expand it.
vp10/common/blockd.c
View file @
7feae8e8
...
...
@@ -15,10 +15,9 @@
#include
"vp10/common/blockd.h"
PREDICTION_MODE
vp10_left_block_mode
(
const
MODE_INFO
*
cur_mi
,
const
MODE_INFO
*
left_mi
,
int
b
)
{
const
MODE_INFO
*
left_mi
,
int
b
)
{
if
(
b
==
0
||
b
==
2
)
{
if
(
!
left_mi
||
is_inter_block
(
&
left_mi
->
mbmi
))
return
DC_PRED
;
if
(
!
left_mi
||
is_inter_block
(
&
left_mi
->
mbmi
))
return
DC_PRED
;
return
get_y_mode
(
left_mi
,
b
+
1
);
}
else
{
...
...
@@ -28,10 +27,9 @@ PREDICTION_MODE vp10_left_block_mode(const MODE_INFO *cur_mi,
}
PREDICTION_MODE
vp10_above_block_mode
(
const
MODE_INFO
*
cur_mi
,
const
MODE_INFO
*
above_mi
,
int
b
)
{