Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Guillaume Martres
aom-rav1e
Commits
419c3f6f
Commit
419c3f6f
authored
Oct 10, 2013
by
Dmitry Kovalev
Committed by
Gerrit Code Review
Oct 10, 2013
Browse files
Merge "Giving consistent names to IDCT 16x16 functions."
parents
5bcc11b1
b096c5a3
Changes
9
Hide whitespace changes
Inline
Side-by-side
test/dct16x16_test.cc
View file @
419c3f6f
...
...
@@ -21,7 +21,7 @@
extern
"C"
{
#include
"vp9/common/vp9_entropy.h"
#include
"./vp9_rtcd.h"
void
vp9_
short_
idct16x16_add_c
(
int16_t
*
input
,
uint8_t
*
output
,
int
pitch
);
void
vp9_idct16x16_
256_
add_c
(
int16_t
*
input
,
uint8_t
*
output
,
int
pitch
);
}
#include
"vpx/vpx_integer.h"
...
...
@@ -496,7 +496,7 @@ using std::tr1::make_tuple;
INSTANTIATE_TEST_CASE_P
(
C
,
Trans16x16DCT
,
::
testing
::
Values
(
make_tuple
(
&
vp9_short_fdct16x16_c
,
&
vp9_
short_
idct16x16_add_c
,
0
)));
make_tuple
(
&
vp9_short_fdct16x16_c
,
&
vp9_idct16x16_
256_
add_c
,
0
)));
INSTANTIATE_TEST_CASE_P
(
C
,
Trans16x16HT
,
::
testing
::
Values
(
...
...
@@ -510,7 +510,7 @@ INSTANTIATE_TEST_CASE_P(
SSE2
,
Trans16x16DCT
,
::
testing
::
Values
(
make_tuple
(
&
vp9_short_fdct16x16_sse2
,
&
vp9_
short_
idct16x16_add_sse2
,
0
)));
&
vp9_idct16x16_
256_
add_sse2
,
0
)));
INSTANTIATE_TEST_CASE_P
(
SSE2
,
Trans16x16HT
,
::
testing
::
Values
(
...
...
vp9/common/arm/neon/vp9_idct16x16_neon.c
View file @
419c3f6f
...
...
@@ -11,19 +11,19 @@
#include
"./vp9_rtcd.h"
#include
"vp9/common/vp9_common.h"
extern
void
vp9_
short_
idct16x16_add_neon_pass1
(
int16_t
*
input
,
extern
void
vp9_idct16x16_
256_
add_neon_pass1
(
int16_t
*
input
,
int16_t
*
output
,
int
output_stride
);
extern
void
vp9_
short_
idct16x16_add_neon_pass2
(
int16_t
*
src
,
extern
void
vp9_idct16x16_
256_
add_neon_pass2
(
int16_t
*
src
,
int16_t
*
output
,
int16_t
*
pass1Output
,
int16_t
skip_adding
,
uint8_t
*
dest
,
int
dest_stride
);
extern
void
vp9_
short_
idct16x16_10_add_neon_pass1
(
int16_t
*
input
,
extern
void
vp9_idct16x16_10_add_neon_pass1
(
int16_t
*
input
,
int16_t
*
output
,
int
output_stride
);
extern
void
vp9_
short_
idct16x16_10_add_neon_pass2
(
int16_t
*
src
,
extern
void
vp9_idct16x16_10_add_neon_pass2
(
int16_t
*
src
,
int16_t
*
output
,
int16_t
*
pass1Output
,
int16_t
skip_adding
,
...
...
@@ -34,7 +34,7 @@ extern void vp9_short_idct16x16_10_add_neon_pass2(int16_t *src,
extern
void
vp9_push_neon
(
int64_t
*
store
);
extern
void
vp9_pop_neon
(
int64_t
*
store
);
void
vp9_
short_
idct16x16_add_neon
(
int16_t
*
input
,
void
vp9_idct16x16_
256_
add_neon
(
int16_t
*
input
,
uint8_t
*
dest
,
int
dest_stride
)
{
int64_t
store_reg
[
8
];
int16_t
pass1_output
[
16
*
16
]
=
{
0
};
...
...
@@ -46,12 +46,12 @@ void vp9_short_idct16x16_add_neon(int16_t *input,
/* Parallel idct on the upper 8 rows */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
// stage 6 result in pass1_output.
vp9_
short_
idct16x16_add_neon_pass1
(
input
,
pass1_output
,
8
);
vp9_idct16x16_
256_
add_neon_pass1
(
input
,
pass1_output
,
8
);
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
// with result in pass1(pass1_output) to calculate final result in stage 7
// which will be saved into row_idct_output.
vp9_
short_
idct16x16_add_neon_pass2
(
input
+
1
,
vp9_idct16x16_
256_
add_neon_pass2
(
input
+
1
,
row_idct_output
,
pass1_output
,
0
,
...
...
@@ -61,12 +61,12 @@ void vp9_short_idct16x16_add_neon(int16_t *input,
/* Parallel idct on the lower 8 rows */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
// stage 6 result in pass1_output.
vp9_
short_
idct16x16_add_neon_pass1
(
input
+
8
*
16
,
pass1_output
,
8
);
vp9_idct16x16_
256_
add_neon_pass1
(
input
+
8
*
16
,
pass1_output
,
8
);
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
// with result in pass1(pass1_output) to calculate final result in stage 7
// which will be saved into row_idct_output.
vp9_
short_
idct16x16_add_neon_pass2
(
input
+
8
*
16
+
1
,
vp9_idct16x16_
256_
add_neon_pass2
(
input
+
8
*
16
+
1
,
row_idct_output
+
8
,
pass1_output
,
0
,
...
...
@@ -76,12 +76,12 @@ void vp9_short_idct16x16_add_neon(int16_t *input,
/* Parallel idct on the left 8 columns */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
// stage 6 result in pass1_output.
vp9_
short_
idct16x16_add_neon_pass1
(
row_idct_output
,
pass1_output
,
8
);
vp9_idct16x16_
256_
add_neon_pass1
(
row_idct_output
,
pass1_output
,
8
);
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
// with result in pass1(pass1_output) to calculate final result in stage 7.
// Then add the result to the destination data.
vp9_
short_
idct16x16_add_neon_pass2
(
row_idct_output
+
1
,
vp9_idct16x16_
256_
add_neon_pass2
(
row_idct_output
+
1
,
row_idct_output
,
pass1_output
,
1
,
...
...
@@ -91,12 +91,12 @@ void vp9_short_idct16x16_add_neon(int16_t *input,
/* Parallel idct on the right 8 columns */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
// stage 6 result in pass1_output.
vp9_
short_
idct16x16_add_neon_pass1
(
row_idct_output
+
8
*
16
,
pass1_output
,
8
);
vp9_idct16x16_
256_
add_neon_pass1
(
row_idct_output
+
8
*
16
,
pass1_output
,
8
);
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
// with result in pass1(pass1_output) to calculate final result in stage 7.
// Then add the result to the destination data.
vp9_
short_
idct16x16_add_neon_pass2
(
row_idct_output
+
8
*
16
+
1
,
vp9_idct16x16_
256_
add_neon_pass2
(
row_idct_output
+
8
*
16
+
1
,
row_idct_output
+
8
,
pass1_output
,
1
,
...
...
@@ -109,7 +109,7 @@ void vp9_short_idct16x16_add_neon(int16_t *input,
return
;
}
void
vp9_
short_
idct16x16_10_add_neon
(
int16_t
*
input
,
void
vp9_idct16x16_10_add_neon
(
int16_t
*
input
,
uint8_t
*
dest
,
int
dest_stride
)
{
int64_t
store_reg
[
8
];
int16_t
pass1_output
[
16
*
16
]
=
{
0
};
...
...
@@ -121,12 +121,12 @@ void vp9_short_idct16x16_10_add_neon(int16_t *input,
/* Parallel idct on the upper 8 rows */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
// stage 6 result in pass1_output.
vp9_
short_
idct16x16_10_add_neon_pass1
(
input
,
pass1_output
,
8
);
vp9_idct16x16_10_add_neon_pass1
(
input
,
pass1_output
,
8
);
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
// with result in pass1(pass1_output) to calculate final result in stage 7
// which will be saved into row_idct_output.
vp9_
short_
idct16x16_10_add_neon_pass2
(
input
+
1
,
vp9_idct16x16_10_add_neon_pass2
(
input
+
1
,
row_idct_output
,
pass1_output
,
0
,
...
...
@@ -138,12 +138,12 @@ void vp9_short_idct16x16_10_add_neon(int16_t *input,
/* Parallel idct on the left 8 columns */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
// stage 6 result in pass1_output.
vp9_
short_
idct16x16_add_neon_pass1
(
row_idct_output
,
pass1_output
,
8
);
vp9_idct16x16_
256_
add_neon_pass1
(
row_idct_output
,
pass1_output
,
8
);
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
// with result in pass1(pass1_output) to calculate final result in stage 7.
// Then add the result to the destination data.
vp9_
short_
idct16x16_add_neon_pass2
(
row_idct_output
+
1
,
vp9_idct16x16_
256_
add_neon_pass2
(
row_idct_output
+
1
,
row_idct_output
,
pass1_output
,
1
,
...
...
@@ -153,12 +153,12 @@ void vp9_short_idct16x16_10_add_neon(int16_t *input,
/* Parallel idct on the right 8 columns */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
// stage 6 result in pass1_output.
vp9_
short_
idct16x16_add_neon_pass1
(
row_idct_output
+
8
*
16
,
pass1_output
,
8
);
vp9_idct16x16_
256_
add_neon_pass1
(
row_idct_output
+
8
*
16
,
pass1_output
,
8
);
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
// with result in pass1(pass1_output) to calculate final result in stage 7.
// Then add the result to the destination data.
vp9_
short_
idct16x16_add_neon_pass2
(
row_idct_output
+
8
*
16
+
1
,
vp9_idct16x16_
256_
add_neon_pass2
(
row_idct_output
+
8
*
16
+
1
,
row_idct_output
+
8
,
pass1_output
,
1
,
...
...
vp9/common/arm/neon/vp9_short_idct16x16_1_add_neon.asm
View file @
419c3f6f
...
...
@@ -8,21 +8,21 @@
;
EXPORT
|
vp9_
short_
idct16x16_1_add_neon
|
EXPORT
|
vp9_idct16x16_1_add_neon
|
ARM
REQUIRE8
PRESERVE8
AREA
||
.text
||
,
CODE
,
READONLY
,
AL
IGN
=
2
;void vp9_
short_
idct16x16_1_add_neon(int16_t *input, uint8_t *dest,
;void vp9_idct16x16_1_add_neon(int16_t *input, uint8_t *dest,
; int dest_stride)
;
; r0 int16_t input
; r1 uint8_t *dest
; r2 int dest_stride)
|
vp9_
short_
idct16x16_1_add_neon
|
PROC
|
vp9_idct16x16_1_add_neon
|
PROC
ldrsh
r0
,
[
r0
]
; generate cospi_16_64 = 11585
...
...
@@ -193,6 +193,6 @@
vst1.64
{
d31
}
,
[
r12
],
r2
bx
lr
ENDP
; |vp9_
short_
idct16x16_1_add_neon|
ENDP
; |vp9_idct16x16_1_add_neon|
END
vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm
View file @
419c3f6f
...
...
@@ -8,10 +8,10 @@
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT
|
vp9_
short_
idct16x16_add_neon_pass1
|
EXPORT
|
vp9_
short_
idct16x16_add_neon_pass2
|
EXPORT
|
vp9_
short_
idct16x16_10_add_neon_pass1
|
EXPORT
|
vp9_
short_
idct16x16_10_add_neon_pass2
|
EXPORT
|
vp9_idct16x16_
256_
add_neon_pass1
|
EXPORT
|
vp9_idct16x16_
256_
add_neon_pass2
|
EXPORT
|
vp9_idct16x16_10_add_neon_pass1
|
EXPORT
|
vp9_idct16x16_10_add_neon_pass2
|
ARM
REQUIRE8
PRESERVE8
...
...
@@ -36,7 +36,7 @@
MEND
AREA
Bl
ock
,
CODE
,
READONLY
; name this block of code
;void |vp9_
short_
idct16x16_add_neon_pass1|(int16_t *input,
;void |vp9_idct16x16_
256_
add_neon_pass1|(int16_t *input,
; int16_t *output, int output_stride)
;
; r0 int16_t input
...
...
@@ -46,7 +46,7 @@
; idct16 stage1 - stage6 on all the elements loaded in q8-q15. The output
; will be stored back into q8-q15 registers. This function will touch q0-q7
; registers and use them as buffer during calculation.
|
vp9_
short_
idct16x16_add_neon_pass1
|
PROC
|
vp9_idct16x16_
256_
add_neon_pass1
|
PROC
; TODO(hkuang): Find a better way to load the elements.
; load elements of 0, 2, 4, 6, 8, 10, 12, 14 into q8 - q15
...
...
@@ -273,9 +273,9 @@
vst1.64
{
d31
}
,
[
r1
],
r2
bx
lr
ENDP
; |vp9_
short_
idct16x16_add_neon_pass1|
ENDP
; |vp9_idct16x16_
256_
add_neon_pass1|
;void vp9_
short_
idct16x16_add_neon_pass2(int16_t *src,
;void vp9_idct16x16_
256_
add_neon_pass2(int16_t *src,
; int16_t *output,
; int16_t *pass1Output,
; int16_t skip_adding,
...
...
@@ -292,7 +292,7 @@
; idct16 stage1 - stage7 on all the elements loaded in q8-q15. The output
; will be stored back into q8-q15 registers. This function will touch q0-q7
; registers and use them as buffer during calculation.
|
vp9_
short_
idct16x16_add_neon_pass2
|
PROC
|
vp9_idct16x16_
256_
add_neon_pass2
|
PROC
push
{
r3
-
r9
}
; TODO(hkuang): Find a better way to load the elements.
...
...
@@ -784,9 +784,9 @@ skip_adding_dest
end_idct16x16_pass2
pop
{
r3
-
r9
}
bx
lr
ENDP
; |vp9_
short_
idct16x16_add_neon_pass2|
ENDP
; |vp9_idct16x16_
256_
add_neon_pass2|
;void |vp9_
short_
idct16x16_10_add_neon_pass1|(int16_t *input,
;void |vp9_idct16x16_10_add_neon_pass1|(int16_t *input,
; int16_t *output, int output_stride)
;
; r0 int16_t input
...
...
@@ -796,7 +796,7 @@ end_idct16x16_pass2
; idct16 stage1 - stage6 on all the elements loaded in q8-q15. The output
; will be stored back into q8-q15 registers. This function will touch q0-q7
; registers and use them as buffer during calculation.
|
vp9_
short_
idct16x16_10_add_neon_pass1
|
PROC
|
vp9_idct16x16_10_add_neon_pass1
|
PROC
; TODO(hkuang): Find a better way to load the elements.
; load elements of 0, 2, 4, 6, 8, 10, 12, 14 into q8 - q15
...
...
@@ -905,9 +905,9 @@ end_idct16x16_pass2
vst1.64
{
d31
}
,
[
r1
],
r2
bx
lr
ENDP
; |vp9_
short_
idct16x16_10_add_neon_pass1|
ENDP
; |vp9_idct16x16_10_add_neon_pass1|
;void vp9_
short_
idct16x16_10_add_neon_pass2(int16_t *src,
;void vp9_idct16x16_10_add_neon_pass2(int16_t *src,
; int16_t *output,
; int16_t *pass1Output,
; int16_t skip_adding,
...
...
@@ -924,7 +924,7 @@ end_idct16x16_pass2
; idct16 stage1 - stage7 on all the elements loaded in q8-q15. The output
; will be stored back into q8-q15 registers. This function will touch q0-q7
; registers and use them as buffer during calculation.
|
vp9_
short_
idct16x16_10_add_neon_pass2
|
PROC
|
vp9_idct16x16_10_add_neon_pass2
|
PROC
push
{
r3
-
r9
}
; TODO(hkuang): Find a better way to load the elements.
...
...
@@ -1175,5 +1175,5 @@ end_idct16x16_pass2
end_idct10_16x16_pass2
pop
{
r3
-
r9
}
bx
lr
ENDP
; |vp9_
short_
idct16x16_10_add_neon_pass2|
ENDP
; |vp9_idct16x16_10_add_neon_pass2|
END
vp9/common/vp9_idct.c
View file @
419c3f6f
...
...
@@ -611,7 +611,7 @@ static void idct16_1d(int16_t *input, int16_t *output) {
output
[
15
]
=
step2
[
0
]
-
step2
[
15
];
}
void
vp9_
short_
idct16x16_add_c
(
int16_t
*
input
,
uint8_t
*
dest
,
int
dest_stride
)
{
void
vp9_idct16x16_
256_
add_c
(
int16_t
*
input
,
uint8_t
*
dest
,
int
dest_stride
)
{
int16_t
out
[
16
*
16
];
int16_t
*
outptr
=
out
;
int
i
,
j
;
...
...
@@ -838,7 +838,7 @@ void vp9_short_iht16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride,
+
dest
[
j
*
dest_stride
+
i
]);
}
}
void
vp9_
short_
idct16x16_10_add_c
(
int16_t
*
input
,
uint8_t
*
dest
,
void
vp9_idct16x16_10_add_c
(
int16_t
*
input
,
uint8_t
*
dest
,
int
dest_stride
)
{
int16_t
out
[
16
*
16
]
=
{
0
};
int16_t
*
outptr
=
out
;
...
...
@@ -864,7 +864,7 @@ void vp9_short_idct16x16_10_add_c(int16_t *input, uint8_t *dest,
}
}
void
vp9_
short_
idct16x16_1_add_c
(
int16_t
*
input
,
uint8_t
*
dest
,
void
vp9_idct16x16_1_add_c
(
int16_t
*
input
,
uint8_t
*
dest
,
int
dest_stride
)
{
int
i
,
j
;
int
a1
;
...
...
@@ -1333,17 +1333,17 @@ void vp9_idct8x8_add(int16_t *input, uint8_t *dest, int stride, int eob) {
}
}
void
vp9_idct
_add_
16x16
(
int16_t
*
input
,
uint8_t
*
dest
,
int
stride
,
int
eob
)
{
void
vp9_idct16x16
_add
(
int16_t
*
input
,
uint8_t
*
dest
,
int
stride
,
int
eob
)
{
/* The calculation can be simplified if there are not many non-zero dct
* coefficients. Use eobs to separate different cases. */
if
(
eob
)
{
if
(
eob
==
1
)
/* DC only DCT coefficient. */
vp9_
short_
idct16x16_1_add
(
input
,
dest
,
stride
);
vp9_idct16x16_1_add
(
input
,
dest
,
stride
);
else
if
(
eob
<=
10
)
vp9_
short_
idct16x16_10_add
(
input
,
dest
,
stride
);
vp9_idct16x16_10_add
(
input
,
dest
,
stride
);
else
vp9_
short_
idct16x16_add
(
input
,
dest
,
stride
);
vp9_idct16x16_
256_
add
(
input
,
dest
,
stride
);
}
}
...
...
@@ -1379,7 +1379,7 @@ void vp9_iht_add_8x8(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
void
vp9_iht_add_16x16
(
TX_TYPE
tx_type
,
int16_t
*
input
,
uint8_t
*
dest
,
int
stride
,
int
eob
)
{
if
(
tx_type
==
DCT_DCT
)
{
vp9_idct
_add_
16x16
(
input
,
dest
,
stride
,
eob
);
vp9_idct16x16
_add
(
input
,
dest
,
stride
,
eob
);
}
else
{
if
(
eob
>
0
)
{
vp9_short_iht16x16_add
(
input
,
dest
,
stride
,
tx_type
);
...
...
vp9/common/vp9_idct.h
View file @
419c3f6f
...
...
@@ -91,7 +91,7 @@ typedef struct {
void
vp9_idct4x4_add
(
int16_t
*
input
,
uint8_t
*
dest
,
int
stride
,
int
eob
);
void
vp9_iwht4x4_add
(
int16_t
*
input
,
uint8_t
*
dest
,
int
stride
,
int
eob
);
void
vp9_idct8x8_add
(
int16_t
*
input
,
uint8_t
*
dest
,
int
stride
,
int
eob
);
void
vp9_idct
_add_
16x16
(
int16_t
*
input
,
uint8_t
*
dest
,
int
stride
,
int
eob
);
void
vp9_idct16x16
_add
(
int16_t
*
input
,
uint8_t
*
dest
,
int
stride
,
int
eob
);
void
vp9_idct_add_32x32
(
int16_t
*
input
,
uint8_t
*
dest
,
int
stride
,
int
eob
);
void
vp9_iht_add
(
TX_TYPE
tx_type
,
int16_t
*
input
,
uint8_t
*
dest
,
...
...
vp9/common/vp9_rtcd_defs.sh
View file @
419c3f6f
...
...
@@ -282,14 +282,14 @@ specialize vp9_idct8x8_64_add sse2 neon
prototype void vp9_idct8x8_10_add
"int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_idct8x8_10_add sse2 neon
prototype void vp9_
short_
idct16x16_1_add
"int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_
short_
idct16x16_1_add sse2 neon
prototype void vp9_idct16x16_1_add
"int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_idct16x16_1_add sse2 neon
prototype void vp9_
short_
idct16x16_add
"int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_
short_
idct16x16_add sse2 neon
prototype void vp9_idct16x16_
256_
add
"int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_idct16x16_
256_
add sse2 neon
prototype void vp9_
short_
idct16x16_10_add
"int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_
short_
idct16x16_10_add sse2 neon
prototype void vp9_idct16x16_10_add
"int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_idct16x16_10_add sse2 neon
prototype void vp9_short_idct32x32_add
"int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_short_idct32x32_add sse2 neon
...
...
vp9/common/x86/vp9_idct_intrin_sse2.c
View file @
419c3f6f
...
...
@@ -1263,7 +1263,7 @@ void vp9_idct8x8_10_add_sse2(int16_t *input, uint8_t *dest, int stride) {
stp2_10, stp2_13, stp2_11, stp2_12) \
}
void
vp9_
short_
idct16x16_add_sse2
(
int16_t
*
input
,
uint8_t
*
dest
,
int
stride
)
{
void
vp9_idct16x16_
256_
add_sse2
(
int16_t
*
input
,
uint8_t
*
dest
,
int
stride
)
{
const
__m128i
rounding
=
_mm_set1_epi32
(
DCT_CONST_ROUNDING
);
const
__m128i
final_rounding
=
_mm_set1_epi16
(
1
<<
5
);
const
__m128i
zero
=
_mm_setzero_si128
();
...
...
@@ -1470,7 +1470,7 @@ void vp9_short_idct16x16_add_sse2(int16_t *input, uint8_t *dest, int stride) {
}
}
void
vp9_
short_
idct16x16_1_add_sse2
(
int16_t
*
input
,
uint8_t
*
dest
,
int
stride
)
{
void
vp9_idct16x16_1_add_sse2
(
int16_t
*
input
,
uint8_t
*
dest
,
int
stride
)
{
__m128i
dc_value
;
const
__m128i
zero
=
_mm_setzero_si128
();
int
a
,
i
;
...
...
@@ -2456,7 +2456,7 @@ void vp9_short_iht16x16_add_sse2(int16_t *input, uint8_t *dest, int stride,
write_buffer_8x16
(
dest
,
in1
,
stride
);
}
void
vp9_
short_
idct16x16_10_add_sse2
(
int16_t
*
input
,
uint8_t
*
dest
,
void
vp9_idct16x16_10_add_sse2
(
int16_t
*
input
,
uint8_t
*
dest
,
int
stride
)
{
const
__m128i
rounding
=
_mm_set1_epi32
(
DCT_CONST_ROUNDING
);
const
__m128i
final_rounding
=
_mm_set1_epi16
(
1
<<
5
);
...
...
vp9/encoder/vp9_encodemb.c
View file @
419c3f6f
...
...
@@ -454,7 +454,7 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
vp9_short_idct32x32_add
(
dqcoeff
,
dst
,
pd
->
dst
.
stride
);
break
;
case
TX_16X16
:
vp9_idct
_add_
16x16
(
dqcoeff
,
dst
,
pd
->
dst
.
stride
,
pd
->
eobs
[
block
]);
vp9_idct16x16
_add
(
dqcoeff
,
dst
,
pd
->
dst
.
stride
,
pd
->
eobs
[
block
]);
break
;
case
TX_8X8
:
vp9_idct8x8_add
(
dqcoeff
,
dst
,
pd
->
dst
.
stride
,
pd
->
eobs
[
block
]);
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment