Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Xiph.Org
aom-rav1e
Commits
7e491de1
Commit
7e491de1
authored
Nov 05, 2014
by
Yaowu Xu
Committed by
Gerrit Code Review
Nov 05, 2014
Browse files
Merge "Fix visual studio 2013 compiler warnings"
parents
92440e82
2c4fee17
Changes
10
Hide whitespace changes
Inline
Side-by-side
vp9/common/vp9_idct.c
View file @
7e491de1
...
...
@@ -34,7 +34,7 @@
// bd of x uses trans_low with 8+x bits, need to remove 24-x bits
#define WRAPLOW(x, bd) ((((int32_t)(x)) << (24 - bd)) >> (24 - bd))
#else
#define WRAPLOW(x, bd) (x)
#define WRAPLOW(x, bd)
((int32_t)
(x)
)
#endif // CONFIG_EMULATE_HARDWARE
#if CONFIG_VP9_HIGHBITDEPTH
...
...
@@ -367,14 +367,14 @@ static void iadst8(const tran_low_t *input, tran_low_t *output) {
}
// stage 1
s0
=
cospi_2_64
*
x0
+
cospi_30_64
*
x1
;
s1
=
cospi_30_64
*
x0
-
cospi_2_64
*
x1
;
s2
=
cospi_10_64
*
x2
+
cospi_22_64
*
x3
;
s3
=
cospi_22_64
*
x2
-
cospi_10_64
*
x3
;
s4
=
cospi_18_64
*
x4
+
cospi_14_64
*
x5
;
s5
=
cospi_14_64
*
x4
-
cospi_18_64
*
x5
;
s6
=
cospi_26_64
*
x6
+
cospi_6_64
*
x7
;
s7
=
cospi_6_64
*
x6
-
cospi_26_64
*
x7
;
s0
=
(
int
)(
cospi_2_64
*
x0
+
cospi_30_64
*
x1
)
;
s1
=
(
int
)(
cospi_30_64
*
x0
-
cospi_2_64
*
x1
)
;
s2
=
(
int
)(
cospi_10_64
*
x2
+
cospi_22_64
*
x3
)
;
s3
=
(
int
)(
cospi_22_64
*
x2
-
cospi_10_64
*
x3
)
;
s4
=
(
int
)(
cospi_18_64
*
x4
+
cospi_14_64
*
x5
)
;
s5
=
(
int
)(
cospi_14_64
*
x4
-
cospi_18_64
*
x5
)
;
s6
=
(
int
)(
cospi_26_64
*
x6
+
cospi_6_64
*
x7
)
;
s7
=
(
int
)(
cospi_6_64
*
x6
-
cospi_26_64
*
x7
)
;
x0
=
WRAPLOW
(
dct_const_round_shift
(
s0
+
s4
),
8
);
x1
=
WRAPLOW
(
dct_const_round_shift
(
s1
+
s5
),
8
);
...
...
@@ -386,14 +386,14 @@ static void iadst8(const tran_low_t *input, tran_low_t *output) {
x7
=
WRAPLOW
(
dct_const_round_shift
(
s3
-
s7
),
8
);
// stage 2
s0
=
x0
;
s1
=
x1
;
s2
=
x2
;
s3
=
x3
;
s4
=
cospi_8_64
*
x4
+
cospi_24_64
*
x5
;
s5
=
cospi_24_64
*
x4
-
cospi_8_64
*
x5
;
s6
=
-
cospi_24_64
*
x6
+
cospi_8_64
*
x7
;
s7
=
cospi_8_64
*
x6
+
cospi_24_64
*
x7
;
s0
=
(
int
)
x0
;
s1
=
(
int
)
x1
;
s2
=
(
int
)
x2
;
s3
=
(
int
)
x3
;
s4
=
(
int
)(
cospi_8_64
*
x4
+
cospi_24_64
*
x5
)
;
s5
=
(
int
)(
cospi_24_64
*
x4
-
cospi_8_64
*
x5
)
;
s6
=
(
int
)(
-
cospi_24_64
*
x6
+
cospi_8_64
*
x7
)
;
s7
=
(
int
)(
cospi_8_64
*
x6
+
cospi_24_64
*
x7
)
;
x0
=
WRAPLOW
(
s0
+
s2
,
8
);
x1
=
WRAPLOW
(
s1
+
s3
,
8
);
...
...
@@ -405,10 +405,10 @@ static void iadst8(const tran_low_t *input, tran_low_t *output) {
x7
=
WRAPLOW
(
dct_const_round_shift
(
s5
-
s7
),
8
);
// stage 3
s2
=
cospi_16_64
*
(
x2
+
x3
);
s3
=
cospi_16_64
*
(
x2
-
x3
);
s6
=
cospi_16_64
*
(
x6
+
x7
);
s7
=
cospi_16_64
*
(
x6
-
x7
);
s2
=
(
int
)(
cospi_16_64
*
(
x2
+
x3
)
)
;
s3
=
(
int
)(
cospi_16_64
*
(
x2
-
x3
)
)
;
s6
=
(
int
)(
cospi_16_64
*
(
x6
+
x7
)
)
;
s7
=
(
int
)(
cospi_16_64
*
(
x6
-
x7
)
)
;
x2
=
WRAPLOW
(
dct_const_round_shift
(
s2
),
8
);
x3
=
WRAPLOW
(
dct_const_round_shift
(
s3
),
8
);
...
...
vp9/common/vp9_idct.h
View file @
7e491de1
...
...
@@ -29,10 +29,12 @@ extern "C" {
#define UNIT_QUANT_FACTOR (1 << UNIT_QUANT_SHIFT)
#define pair_set_epi16(a, b) \
_mm_set_epi16(b, a, b, a, b, a, b, a)
_mm_set_epi16((int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a), \
(int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a))
#define dual_set_epi16(a, b) \
_mm_set_epi16(b, b, b, b, a, a, a, a)
_mm_set_epi16((int16_t)(b), (int16_t)(b), (int16_t)(b), (int16_t)(b), \
(int16_t)(a), (int16_t)(a), (int16_t)(a), (int16_t)(a))
// Constants:
// for (int i = 1; i< 32; ++i)
...
...
vp9/common/x86/vp9_idct_intrin_sse2.c
View file @
7e491de1
...
...
@@ -216,7 +216,7 @@ static void iadst4_sse2(__m128i *in) {
const
__m128i
k__sinpi_p03_p02
=
pair_set_epi16
(
sinpi_3_9
,
sinpi_2_9
);
const
__m128i
k__sinpi_p02_m01
=
pair_set_epi16
(
sinpi_2_9
,
-
sinpi_1_9
);
const
__m128i
k__sinpi_p03_m04
=
pair_set_epi16
(
sinpi_3_9
,
-
sinpi_4_9
);
const
__m128i
k__sinpi_p03_p03
=
_mm_set1_epi16
(
sinpi_3_9
);
const
__m128i
k__sinpi_p03_p03
=
_mm_set1_epi16
(
(
int16_t
)
sinpi_3_9
);
const
__m128i
kZero
=
_mm_set1_epi16
(
0
);
const
__m128i
k__DCT_CONST_ROUNDING
=
_mm_set1_epi32
(
DCT_CONST_ROUNDING
);
__m128i
u
[
8
],
v
[
8
],
in7
;
...
...
@@ -641,7 +641,7 @@ static void iadst8_sse2(__m128i *in) {
const
__m128i
k__cospi_p24_m08
=
pair_set_epi16
(
cospi_24_64
,
-
cospi_8_64
);
const
__m128i
k__cospi_m24_p08
=
pair_set_epi16
(
-
cospi_24_64
,
cospi_8_64
);
const
__m128i
k__cospi_p16_m16
=
pair_set_epi16
(
cospi_16_64
,
-
cospi_16_64
);
const
__m128i
k__cospi_p16_p16
=
_mm_set1_epi16
(
cospi_16_64
);
const
__m128i
k__cospi_p16_p16
=
_mm_set1_epi16
(
(
int16_t
)
cospi_16_64
);
const
__m128i
k__const_0
=
_mm_set1_epi16
(
0
);
const
__m128i
k__DCT_CONST_ROUNDING
=
_mm_set1_epi32
(
DCT_CONST_ROUNDING
);
...
...
@@ -1530,8 +1530,8 @@ static void iadst16_8col(__m128i *in) {
const
__m128i
k__cospi_p08_p24
=
pair_set_epi16
(
cospi_8_64
,
cospi_24_64
);
const
__m128i
k__cospi_p24_m08
=
pair_set_epi16
(
cospi_24_64
,
-
cospi_8_64
);
const
__m128i
k__cospi_m24_p08
=
pair_set_epi16
(
-
cospi_24_64
,
cospi_8_64
);
const
__m128i
k__cospi_m16_m16
=
_mm_set1_epi16
(
-
cospi_16_64
);
const
__m128i
k__cospi_p16_p16
=
_mm_set1_epi16
(
cospi_16_64
);
const
__m128i
k__cospi_m16_m16
=
_mm_set1_epi16
(
(
int16_t
)
-
cospi_16_64
);
const
__m128i
k__cospi_p16_p16
=
_mm_set1_epi16
(
(
int16_t
)
cospi_16_64
);
const
__m128i
k__cospi_p16_m16
=
pair_set_epi16
(
cospi_16_64
,
-
cospi_16_64
);
const
__m128i
k__cospi_m16_p16
=
pair_set_epi16
(
-
cospi_16_64
,
cospi_16_64
);
const
__m128i
k__DCT_CONST_ROUNDING
=
_mm_set1_epi32
(
DCT_CONST_ROUNDING
);
...
...
@@ -1985,7 +1985,7 @@ static void idct16_8col(__m128i *in) {
const
__m128i
k__cospi_p04_p28
=
pair_set_epi16
(
cospi_4_64
,
cospi_28_64
);
const
__m128i
k__cospi_p12_m20
=
pair_set_epi16
(
cospi_12_64
,
-
cospi_20_64
);
const
__m128i
k__cospi_p20_p12
=
pair_set_epi16
(
cospi_20_64
,
cospi_12_64
);
const
__m128i
k__cospi_p16_p16
=
_mm_set1_epi16
(
cospi_16_64
);
const
__m128i
k__cospi_p16_p16
=
_mm_set1_epi16
(
(
int16_t
)
cospi_16_64
);
const
__m128i
k__cospi_p16_m16
=
pair_set_epi16
(
cospi_16_64
,
-
cospi_16_64
);
const
__m128i
k__cospi_p24_m08
=
pair_set_epi16
(
cospi_24_64
,
-
cospi_8_64
);
const
__m128i
k__cospi_p08_p24
=
pair_set_epi16
(
cospi_8_64
,
cospi_24_64
);
...
...
vp9/common/x86/vp9_idct_intrin_ssse3.c
View file @
7e491de1
...
...
@@ -36,7 +36,7 @@ static void idct16_8col(__m128i *in, int round) {
const
__m128i
k__cospi_m24_m08
=
pair_set_epi16
(
-
cospi_24_64
,
-
cospi_8_64
);
const
__m128i
k__DCT_CONST_ROUNDING
=
_mm_set1_epi32
(
DCT_CONST_ROUNDING
);
const
__m128i
k__cospi_p16_p16_x2
=
pair_set_epi16
(
23170
,
23170
);
const
__m128i
k__cospi_p16_p16
=
_mm_set1_epi16
(
cospi_16_64
);
const
__m128i
k__cospi_p16_p16
=
_mm_set1_epi16
(
(
int16_t
)
cospi_16_64
);
const
__m128i
k__cospi_m16_p16
=
pair_set_epi16
(
-
cospi_16_64
,
cospi_16_64
);
__m128i
v
[
16
],
u
[
16
],
s
[
16
],
t
[
16
];
...
...
vp9/encoder/vp9_dct.c
View file @
7e491de1
...
...
@@ -37,12 +37,12 @@ static void fdct4(const tran_low_t *input, tran_low_t *output) {
temp1
=
(
step
[
0
]
+
step
[
1
])
*
cospi_16_64
;
temp2
=
(
step
[
0
]
-
step
[
1
])
*
cospi_16_64
;
output
[
0
]
=
fdct_round_shift
(
temp1
);
output
[
2
]
=
fdct_round_shift
(
temp2
);
output
[
0
]
=
(
tran_low_t
)
fdct_round_shift
(
temp1
);
output
[
2
]
=
(
tran_low_t
)
fdct_round_shift
(
temp2
);
temp1
=
step
[
2
]
*
cospi_24_64
+
step
[
3
]
*
cospi_8_64
;
temp2
=
-
step
[
2
]
*
cospi_8_64
+
step
[
3
]
*
cospi_24_64
;
output
[
1
]
=
fdct_round_shift
(
temp1
);
output
[
3
]
=
fdct_round_shift
(
temp2
);
output
[
1
]
=
(
tran_low_t
)
fdct_round_shift
(
temp1
);
output
[
3
]
=
(
tran_low_t
)
fdct_round_shift
(
temp2
);
}
void
vp9_fdct4x4_1_c
(
const
int16_t
*
input
,
tran_low_t
*
output
,
int
stride
)
{
...
...
@@ -98,12 +98,12 @@ void vp9_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride) {
step
[
3
]
=
input
[
0
]
-
input
[
3
];
temp1
=
(
step
[
0
]
+
step
[
1
])
*
cospi_16_64
;
temp2
=
(
step
[
0
]
-
step
[
1
])
*
cospi_16_64
;
out
[
0
]
=
fdct_round_shift
(
temp1
);
out
[
2
]
=
fdct_round_shift
(
temp2
);
out
[
0
]
=
(
tran_low_t
)
fdct_round_shift
(
temp1
);
out
[
2
]
=
(
tran_low_t
)
fdct_round_shift
(
temp2
);
temp1
=
step
[
2
]
*
cospi_24_64
+
step
[
3
]
*
cospi_8_64
;
temp2
=
-
step
[
2
]
*
cospi_8_64
+
step
[
3
]
*
cospi_24_64
;
out
[
1
]
=
fdct_round_shift
(
temp1
);
out
[
3
]
=
fdct_round_shift
(
temp2
);
out
[
1
]
=
(
tran_low_t
)
fdct_round_shift
(
temp1
);
out
[
3
]
=
(
tran_low_t
)
fdct_round_shift
(
temp2
);
// Do next column (which is a transposed row in second/horizontal pass)
in_pass0
++
;
in
++
;
...
...
@@ -157,10 +157,10 @@ static void fadst4(const tran_low_t *input, tran_low_t *output) {
s3
=
x2
-
x0
+
x3
;
// 1-D transform scaling factor is sqrt(2).
output
[
0
]
=
fdct_round_shift
(
s0
);
output
[
1
]
=
fdct_round_shift
(
s1
);
output
[
2
]
=
fdct_round_shift
(
s2
);
output
[
3
]
=
fdct_round_shift
(
s3
);
output
[
0
]
=
(
tran_low_t
)
fdct_round_shift
(
s0
);
output
[
1
]
=
(
tran_low_t
)
fdct_round_shift
(
s1
);
output
[
2
]
=
(
tran_low_t
)
fdct_round_shift
(
s2
);
output
[
3
]
=
(
tran_low_t
)
fdct_round_shift
(
s3
);
}
static
const
transform_2d
FHT_4
[]
=
{
...
...
@@ -227,16 +227,16 @@ static void fdct8(const tran_low_t *input, tran_low_t *output) {
t1
=
(
x0
-
x1
)
*
cospi_16_64
;
t2
=
x2
*
cospi_24_64
+
x3
*
cospi_8_64
;
t3
=
-
x2
*
cospi_8_64
+
x3
*
cospi_24_64
;
output
[
0
]
=
fdct_round_shift
(
t0
);
output
[
2
]
=
fdct_round_shift
(
t2
);
output
[
4
]
=
fdct_round_shift
(
t1
);
output
[
6
]
=
fdct_round_shift
(
t3
);
output
[
0
]
=
(
tran_low_t
)
fdct_round_shift
(
t0
);
output
[
2
]
=
(
tran_low_t
)
fdct_round_shift
(
t2
);
output
[
4
]
=
(
tran_low_t
)
fdct_round_shift
(
t1
);
output
[
6
]
=
(
tran_low_t
)
fdct_round_shift
(
t3
);
// Stage 2
t0
=
(
s6
-
s5
)
*
cospi_16_64
;
t1
=
(
s6
+
s5
)
*
cospi_16_64
;
t2
=
fdct_round_shift
(
t0
);
t3
=
fdct_round_shift
(
t1
);
t2
=
(
tran_low_t
)
fdct_round_shift
(
t0
);
t3
=
(
tran_low_t
)
fdct_round_shift
(
t1
);
// Stage 3
x0
=
s4
+
t2
;
...
...
@@ -249,10 +249,10 @@ static void fdct8(const tran_low_t *input, tran_low_t *output) {
t1
=
x1
*
cospi_12_64
+
x2
*
cospi_20_64
;
t2
=
x2
*
cospi_12_64
+
x1
*
-
cospi_20_64
;
t3
=
x3
*
cospi_28_64
+
x0
*
-
cospi_4_64
;
output
[
1
]
=
fdct_round_shift
(
t0
);
output
[
3
]
=
fdct_round_shift
(
t2
);
output
[
5
]
=
fdct_round_shift
(
t1
);
output
[
7
]
=
fdct_round_shift
(
t3
);
output
[
1
]
=
(
tran_low_t
)
fdct_round_shift
(
t0
);
output
[
3
]
=
(
tran_low_t
)
fdct_round_shift
(
t2
);
output
[
5
]
=
(
tran_low_t
)
fdct_round_shift
(
t1
);
output
[
7
]
=
(
tran_low_t
)
fdct_round_shift
(
t3
);
}
void
vp9_fdct8x8_1_c
(
const
int16_t
*
input
,
tran_low_t
*
output
,
int
stride
)
{
...
...
@@ -298,10 +298,10 @@ void vp9_fdct8x8_c(const int16_t *input, tran_low_t *final_output, int stride) {
t1
=
(
x0
-
x1
)
*
cospi_16_64
;
t2
=
x2
*
cospi_24_64
+
x3
*
cospi_8_64
;
t3
=
-
x2
*
cospi_8_64
+
x3
*
cospi_24_64
;
output
[
0
*
8
]
=
fdct_round_shift
(
t0
);
output
[
2
*
8
]
=
fdct_round_shift
(
t2
);
output
[
4
*
8
]
=
fdct_round_shift
(
t1
);
output
[
6
*
8
]
=
fdct_round_shift
(
t3
);
output
[
0
*
8
]
=
(
tran_low_t
)
fdct_round_shift
(
t0
);
output
[
2
*
8
]
=
(
tran_low_t
)
fdct_round_shift
(
t2
);
output
[
4
*
8
]
=
(
tran_low_t
)
fdct_round_shift
(
t1
);
output
[
6
*
8
]
=
(
tran_low_t
)
fdct_round_shift
(
t3
);
// Stage 2
t0
=
(
s6
-
s5
)
*
cospi_16_64
;
...
...
@@ -320,10 +320,10 @@ void vp9_fdct8x8_c(const int16_t *input, tran_low_t *final_output, int stride) {
t1
=
x1
*
cospi_12_64
+
x2
*
cospi_20_64
;
t2
=
x2
*
cospi_12_64
+
x1
*
-
cospi_20_64
;
t3
=
x3
*
cospi_28_64
+
x0
*
-
cospi_4_64
;
output
[
1
*
8
]
=
fdct_round_shift
(
t0
);
output
[
3
*
8
]
=
fdct_round_shift
(
t2
);
output
[
5
*
8
]
=
fdct_round_shift
(
t1
);
output
[
7
*
8
]
=
fdct_round_shift
(
t3
);
output
[
1
*
8
]
=
(
tran_low_t
)
fdct_round_shift
(
t0
);
output
[
3
*
8
]
=
(
tran_low_t
)
fdct_round_shift
(
t2
);
output
[
5
*
8
]
=
(
tran_low_t
)
fdct_round_shift
(
t1
);
output
[
7
*
8
]
=
(
tran_low_t
)
fdct_round_shift
(
t3
);
input
++
;
output
++
;
}
...
...
@@ -434,10 +434,10 @@ void vp9_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) {
t1
=
(
x0
-
x1
)
*
cospi_16_64
;
t2
=
x3
*
cospi_8_64
+
x2
*
cospi_24_64
;
t3
=
x3
*
cospi_24_64
-
x2
*
cospi_8_64
;
out
[
0
]
=
fdct_round_shift
(
t0
);
out
[
4
]
=
fdct_round_shift
(
t2
);
out
[
8
]
=
fdct_round_shift
(
t1
);
out
[
12
]
=
fdct_round_shift
(
t3
);
out
[
0
]
=
(
tran_low_t
)
fdct_round_shift
(
t0
);
out
[
4
]
=
(
tran_low_t
)
fdct_round_shift
(
t2
);
out
[
8
]
=
(
tran_low_t
)
fdct_round_shift
(
t1
);
out
[
12
]
=
(
tran_low_t
)
fdct_round_shift
(
t3
);
// Stage 2
t0
=
(
s6
-
s5
)
*
cospi_16_64
;
...
...
@@ -456,10 +456,10 @@ void vp9_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) {
t1
=
x1
*
cospi_12_64
+
x2
*
cospi_20_64
;
t2
=
x2
*
cospi_12_64
+
x1
*
-
cospi_20_64
;
t3
=
x3
*
cospi_28_64
+
x0
*
-
cospi_4_64
;
out
[
2
]
=
fdct_round_shift
(
t0
);
out
[
6
]
=
fdct_round_shift
(
t2
);
out
[
10
]
=
fdct_round_shift
(
t1
);
out
[
14
]
=
fdct_round_shift
(
t3
);
out
[
2
]
=
(
tran_low_t
)
fdct_round_shift
(
t0
);
out
[
6
]
=
(
tran_low_t
)
fdct_round_shift
(
t2
);
out
[
10
]
=
(
tran_low_t
)
fdct_round_shift
(
t1
);
out
[
14
]
=
(
tran_low_t
)
fdct_round_shift
(
t3
);
}
// Work on the next eight values; step1 -> odd_results
{
...
...
@@ -502,20 +502,20 @@ void vp9_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) {
// step 6
temp1
=
step1
[
0
]
*
cospi_30_64
+
step1
[
7
]
*
cospi_2_64
;
temp2
=
step1
[
1
]
*
cospi_14_64
+
step1
[
6
]
*
cospi_18_64
;
out
[
1
]
=
fdct_round_shift
(
temp1
);
out
[
9
]
=
fdct_round_shift
(
temp2
);
out
[
1
]
=
(
tran_low_t
)
fdct_round_shift
(
temp1
);
out
[
9
]
=
(
tran_low_t
)
fdct_round_shift
(
temp2
);
temp1
=
step1
[
2
]
*
cospi_22_64
+
step1
[
5
]
*
cospi_10_64
;
temp2
=
step1
[
3
]
*
cospi_6_64
+
step1
[
4
]
*
cospi_26_64
;
out
[
5
]
=
fdct_round_shift
(
temp1
);
out
[
13
]
=
fdct_round_shift
(
temp2
);
out
[
5
]
=
(
tran_low_t
)
fdct_round_shift
(
temp1
);
out
[
13
]
=
(
tran_low_t
)
fdct_round_shift
(
temp2
);
temp1
=
step1
[
3
]
*
-
cospi_26_64
+
step1
[
4
]
*
cospi_6_64
;
temp2
=
step1
[
2
]
*
-
cospi_10_64
+
step1
[
5
]
*
cospi_22_64
;
out
[
3
]
=
fdct_round_shift
(
temp1
);
out
[
11
]
=
fdct_round_shift
(
temp2
);
out
[
3
]
=
(
tran_low_t
)
fdct_round_shift
(
temp1
);
out
[
11
]
=
(
tran_low_t
)
fdct_round_shift
(
temp2
);
temp1
=
step1
[
1
]
*
-
cospi_18_64
+
step1
[
6
]
*
cospi_14_64
;
temp2
=
step1
[
0
]
*
-
cospi_2_64
+
step1
[
7
]
*
cospi_30_64
;
out
[
7
]
=
fdct_round_shift
(
temp1
);
out
[
15
]
=
fdct_round_shift
(
temp2
);
out
[
7
]
=
(
tran_low_t
)
fdct_round_shift
(
temp1
);
out
[
15
]
=
(
tran_low_t
)
fdct_round_shift
(
temp2
);
}
// Do next column (which is a transposed row in second/horizontal pass)
in
++
;
...
...
@@ -589,14 +589,14 @@ static void fadst8(const tran_low_t *input, tran_low_t *output) {
x6
=
fdct_round_shift
(
s6
);
x7
=
fdct_round_shift
(
s7
);
output
[
0
]
=
x0
;
output
[
1
]
=
-
x4
;
output
[
2
]
=
x6
;
output
[
3
]
=
-
x2
;
output
[
4
]
=
x3
;
output
[
5
]
=
-
x7
;
output
[
6
]
=
x5
;
output
[
7
]
=
-
x1
;
output
[
0
]
=
(
tran_low_t
)
x0
;
output
[
1
]
=
(
tran_low_t
)
-
x4
;
output
[
2
]
=
(
tran_low_t
)
x6
;
output
[
3
]
=
(
tran_low_t
)
-
x2
;
output
[
4
]
=
(
tran_low_t
)
x3
;
output
[
5
]
=
(
tran_low_t
)
-
x7
;
output
[
6
]
=
(
tran_low_t
)
x5
;
output
[
7
]
=
(
tran_low_t
)
-
x1
;
}
static
const
transform_2d
FHT_8
[]
=
{
...
...
@@ -659,10 +659,10 @@ void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride) {
c1
=
e1
-
c1
;
a1
-=
c1
;
d1
+=
b1
;
op
[
0
]
=
a1
;
op
[
4
]
=
c1
;
op
[
8
]
=
d1
;
op
[
12
]
=
b1
;
op
[
0
]
=
(
tran_low_t
)
a1
;
op
[
4
]
=
(
tran_low_t
)
c1
;
op
[
8
]
=
(
tran_low_t
)
d1
;
op
[
12
]
=
(
tran_low_t
)
b1
;
ip_pass0
++
;
op
++
;
...
...
@@ -683,10 +683,10 @@ void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride) {
c1
=
e1
-
c1
;
a1
-=
c1
;
d1
+=
b1
;
op
[
0
]
=
a1
*
UNIT_QUANT_FACTOR
;
op
[
1
]
=
c1
*
UNIT_QUANT_FACTOR
;
op
[
2
]
=
d1
*
UNIT_QUANT_FACTOR
;
op
[
3
]
=
b1
*
UNIT_QUANT_FACTOR
;
op
[
0
]
=
(
tran_low_t
)(
a1
*
UNIT_QUANT_FACTOR
)
;
op
[
1
]
=
(
tran_low_t
)(
c1
*
UNIT_QUANT_FACTOR
)
;
op
[
2
]
=
(
tran_low_t
)(
d1
*
UNIT_QUANT_FACTOR
)
;
op
[
3
]
=
(
tran_low_t
)(
b1
*
UNIT_QUANT_FACTOR
)
;
ip
+=
4
;
op
+=
4
;
...
...
@@ -745,10 +745,10 @@ static void fdct16(const tran_low_t in[16], tran_low_t out[16]) {
t1
=
(
x0
-
x1
)
*
cospi_16_64
;
t2
=
x3
*
cospi_8_64
+
x2
*
cospi_24_64
;
t3
=
x3
*
cospi_24_64
-
x2
*
cospi_8_64
;
out
[
0
]
=
fdct_round_shift
(
t0
);
out
[
4
]
=
fdct_round_shift
(
t2
);
out
[
8
]
=
fdct_round_shift
(
t1
);
out
[
12
]
=
fdct_round_shift
(
t3
);
out
[
0
]
=
(
tran_low_t
)
fdct_round_shift
(
t0
);
out
[
4
]
=
(
tran_low_t
)
fdct_round_shift
(
t2
);
out
[
8
]
=
(
tran_low_t
)
fdct_round_shift
(
t1
);
out
[
12
]
=
(
tran_low_t
)
fdct_round_shift
(
t3
);
// Stage 2
t0
=
(
s6
-
s5
)
*
cospi_16_64
;
...
...
@@ -767,10 +767,10 @@ static void fdct16(const tran_low_t in[16], tran_low_t out[16]) {
t1
=
x1
*
cospi_12_64
+
x2
*
cospi_20_64
;
t2
=
x2
*
cospi_12_64
+
x1
*
-
cospi_20_64
;
t3
=
x3
*
cospi_28_64
+
x0
*
-
cospi_4_64
;
out
[
2
]
=
fdct_round_shift
(
t0
);
out
[
6
]
=
fdct_round_shift
(
t2
);
out
[
10
]
=
fdct_round_shift
(
t1
);
out
[
14
]
=
fdct_round_shift
(
t3
);
out
[
2
]
=
(
tran_low_t
)
fdct_round_shift
(
t0
);
out
[
6
]
=
(
tran_low_t
)
fdct_round_shift
(
t2
);
out
[
10
]
=
(
tran_low_t
)
fdct_round_shift
(
t1
);
out
[
14
]
=
(
tran_low_t
)
fdct_round_shift
(
t3
);
}
// step 2
...
...
@@ -816,23 +816,23 @@ static void fdct16(const tran_low_t in[16], tran_low_t out[16]) {
// step 6
temp1
=
step1
[
0
]
*
cospi_30_64
+
step1
[
7
]
*
cospi_2_64
;
temp2
=
step1
[
1
]
*
cospi_14_64
+
step1
[
6
]
*
cospi_18_64
;
out
[
1
]
=
fdct_round_shift
(
temp1
);
out
[
9
]
=
fdct_round_shift
(
temp2
);
out
[
1
]
=
(
tran_low_t
)
fdct_round_shift
(
temp1
);
out
[
9
]
=
(
tran_low_t
)
fdct_round_shift
(
temp2
);
temp1
=
step1
[
2
]
*
cospi_22_64
+
step1
[
5
]
*
cospi_10_64
;
temp2
=
step1
[
3
]
*
cospi_6_64
+
step1
[
4
]
*
cospi_26_64
;
out
[
5
]
=
fdct_round_shift
(
temp1
);
out
[
13
]
=
fdct_round_shift
(
temp2
);
out
[
5
]
=
(
tran_low_t
)
fdct_round_shift
(
temp1
);
out
[
13
]
=
(
tran_low_t
)
fdct_round_shift
(
temp2
);
temp1
=
step1
[
3
]
*
-
cospi_26_64
+
step1
[
4
]
*
cospi_6_64
;
temp2
=
step1
[
2
]
*
-
cospi_10_64
+
step1
[
5
]
*
cospi_22_64
;
out
[
3
]
=
fdct_round_shift
(
temp1
);
out
[
11
]
=
fdct_round_shift
(
temp2
);
out
[
3
]
=
(
tran_low_t
)
fdct_round_shift
(
temp1
);
out
[
11
]
=
(
tran_low_t
)
fdct_round_shift
(
temp2
);
temp1
=
step1
[
1
]
*
-
cospi_18_64
+
step1
[
6
]
*
cospi_14_64
;
temp2
=
step1
[
0
]
*
-
cospi_2_64
+
step1
[
7
]
*
cospi_30_64
;
out
[
7
]
=
fdct_round_shift
(
temp1
);
out
[
15
]
=
fdct_round_shift
(
temp2
);
out
[
7
]
=
(
tran_low_t
)
fdct_round_shift
(
temp1
);
out
[
15
]
=
(
tran_low_t
)
fdct_round_shift
(
temp2
);
}
static
void
fadst16
(
const
tran_low_t
*
input
,
tran_low_t
*
output
)
{
...
...
@@ -980,22 +980,22 @@ static void fadst16(const tran_low_t *input, tran_low_t *output) {
x14
=
fdct_round_shift
(
s14
);
x15
=
fdct_round_shift
(
s15
);
output
[
0
]
=
x0
;
output
[
1
]
=
-
x8
;
output
[
2
]
=
x12
;
output
[
3
]
=
-
x4
;
output
[
4
]
=
x6
;
output
[
5
]
=
x14
;
output
[
6
]
=
x10
;
output
[
7
]
=
x2
;
output
[
8
]
=
x3
;
output
[
9
]
=
x11
;
output
[
10
]
=
x15
;
output
[
11
]
=
x7
;
output
[
12
]
=
x5
;
output
[
13
]
=
-
x13
;
output
[
14
]
=
x9
;
output
[
15
]
=
-
x1
;
output
[
0
]
=
(
tran_low_t
)
x0
;
output
[
1
]
=
(
tran_low_t
)
-
x8
;
output
[
2
]
=
(
tran_low_t
)
x12
;
output
[
3
]
=
(
tran_low_t
)
-
x4
;
output
[
4
]
=
(
tran_low_t
)
x6
;
output
[
5
]
=
(
tran_low_t
)
x14
;
output
[
6
]
=
(
tran_low_t
)
x10
;
output
[
7
]
=
(
tran_low_t
)
x2
;
output
[
8
]
=
(
tran_low_t
)
x3
;
output
[
9
]
=
(
tran_low_t
)
x11
;
output
[
10
]
=
(
tran_low_t
)
x15
;
output
[
11
]
=
(
tran_low_t
)
x7
;
output
[
12
]
=
(
tran_low_t
)
x5
;
output
[
13
]
=
(
tran_low_t
)
-
x13
;
output
[
14
]
=
(
tran_low_t
)
x9
;
output
[
15
]
=
(
tran_low_t
)
-
x1
;
}
static
const
transform_2d
FHT_16
[]
=
{
...
...
@@ -1404,7 +1404,8 @@ void vp9_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride) {
temp_in
[
j
]
=
output
[
j
+
i
*
32
];
fdct32
(
temp_in
,
temp_out
,
0
);
for
(
j
=
0
;
j
<
32
;
++
j
)
out
[
j
+
i
*
32
]
=
(
temp_out
[
j
]
+
1
+
(
temp_out
[
j
]
<
0
))
>>
2
;
out
[
j
+
i
*
32
]
=
(
tran_low_t
)((
temp_out
[
j
]
+
1
+
(
temp_out
[
j
]
<
0
))
>>
2
);
}
}
...
...
@@ -1435,7 +1436,7 @@ void vp9_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, int stride) {
temp_in
[
j
]
=
output
[
j
+
i
*
32
];
fdct32
(
temp_in
,
temp_out
,
1
);
for
(
j
=
0
;
j
<
32
;
++
j
)
out
[
j
+
i
*
32
]
=
temp_out
[
j
];
out
[
j
+
i
*
32
]
=
(
tran_low_t
)
temp_out
[
j
];
}
}
...
...
vp9/encoder/vp9_quantize.c
View file @
7e491de1
...
...
@@ -56,7 +56,7 @@ void vp9_highbd_quantize_dc(const tran_low_t *coeff_ptr, int skip_block,
const
int64_t
tmp
=
(
clamp
(
abs_coeff
+
round_ptr
[
rc
!=
0
],
INT32_MIN
,
INT32_MAX
)
*
quant
)
>>
16
;
qcoeff_ptr
[
rc
]
=
(
tmp
^
coeff_sign
)
-
coeff_sign
;
qcoeff_ptr
[
rc
]
=
(
tran_low_t
)(
(
tmp
^
coeff_sign
)
-
coeff_sign
)
;
dqcoeff_ptr
[
rc
]
=
qcoeff_ptr
[
rc
]
*
dequant_ptr
;
if
(
tmp
)
eob
=
0
;
...
...
@@ -107,7 +107,7 @@ void vp9_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr,
const
int64_t
tmp
=
(
clamp
(
abs_coeff
+
round_ptr
[
rc
!=
0
],
INT32_MIN
,
INT32_MAX
)
*
quant
)
>>
15
;
qcoeff_ptr
[
rc
]
=
(
tmp
^
coeff_sign
)
-
coeff_sign
;
qcoeff_ptr
[
rc
]
=
(
tran_low_t
)(
(
tmp
^
coeff_sign
)
-
coeff_sign
)
;
dqcoeff_ptr
[
rc
]
=
qcoeff_ptr
[
rc
]
*
dequant_ptr
/
2
;
if
(
tmp
)
eob
=
0
;
...
...
@@ -197,7 +197,7 @@ void vp9_highbd_quantize_fp_c(const tran_low_t *coeff_ptr,
(
clamp
(
abs_coeff
+
round_ptr
[
rc
!=
0
],
INT32_MIN
,
INT32_MAX
)
*
quant_ptr
[
rc
!=
0
])
>>
16
;
qcoeff_ptr
[
rc
]
=
(
tmp
^
coeff_sign
)
-
coeff_sign
;
qcoeff_ptr
[
rc
]
=
(
tran_low_t
)(
(
tmp
^
coeff_sign
)
-
coeff_sign
)
;
dqcoeff_ptr
[
rc
]
=
qcoeff_ptr
[
rc
]
*
dequant_ptr
[
rc
!=
0
];
if
(
tmp
)
...
...
@@ -284,7 +284,7 @@ void vp9_highbd_quantize_fp_32x32_c(const tran_low_t *coeff_ptr,
tmp
=
clamp
(
abs_coeff
+
ROUND_POWER_OF_TWO
(
round_ptr
[
rc
!=
0
],
1
),
INT32_MIN
,
INT32_MAX
);
tmp
=
(
tmp
*
quant_ptr
[
rc
!=
0
])
>>
15
;
qcoeff_ptr
[
rc
]
=
(
tmp
^
coeff_sign
)
-
coeff_sign
;
qcoeff_ptr
[
rc
]
=
(
tran_low_t
)(
(
tmp
^
coeff_sign
)
-
coeff_sign
)
;
dqcoeff_ptr
[
rc
]
=
qcoeff_ptr
[
rc
]
*
dequant_ptr
[
rc
!=
0
]
/
2
;
}
...
...
@@ -393,7 +393,7 @@ void vp9_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
INT32_MIN
,
INT32_MAX
);
tmp
=
((((
tmp
*
quant_ptr
[
rc
!=
0
])
>>
16
)
+
tmp
)
*
quant_shift_ptr
[
rc
!=
0
])
>>
16
;
// quantization
qcoeff_ptr
[
rc
]
=
(
tmp
^
coeff_sign
)
-
coeff_sign
;
qcoeff_ptr
[
rc
]
=
(
tran_low_t
)(
(
tmp
^
coeff_sign
)
-
coeff_sign
)
;
dqcoeff_ptr
[
rc
]
=
qcoeff_ptr
[
rc
]
*
dequant_ptr
[
rc
!=
0
];
if
(
tmp
)
...
...
@@ -510,7 +510,7 @@ void vp9_highbd_quantize_b_32x32_c(const tran_low_t *coeff_ptr,
tmp
=
((((
tmp
*
quant_ptr
[
rc
!=
0
])
>>
16
)
+
tmp
)
*
quant_shift_ptr
[
rc
!=
0
])
>>
15
;
qcoeff_ptr
[
rc
]
=
(
tmp
^
coeff_sign
)
-
coeff_sign
;
qcoeff_ptr
[
rc
]
=
(
tran_low_t
)(
(
tmp
^
coeff_sign
)
-
coeff_sign
)
;
dqcoeff_ptr
[
rc
]
=
qcoeff_ptr
[
rc
]
*
dequant_ptr
[
rc
!=
0
]
/
2
;
if
(
tmp
)
...
...
vp9/encoder/vp9_variance.c
View file @
7e491de1
...
...
@@ -298,8 +298,8 @@ void highbd_variance(const uint8_t *a8, int a_stride,
uint64_t
sse_long
=
0
;
uint64_t
sum_long
=
0
;
highbd_variance64
(
a8
,
a_stride
,
b8
,
b_stride
,
w
,
h
,
&
sse_long
,
&
sum_long
);
*
sse
=
sse_long
;
*
sum
=
sum_long
;
*
sse
=
(
unsigned
int
)
sse_long
;
*
sum
=
(
int
)
sum_long
;
}
void
highbd_10_variance
(
const
uint8_t
*
a8
,
int
a_stride
,
...
...
@@ -309,8 +309,8 @@ void highbd_10_variance(const uint8_t *a8, int a_stride,
uint64_t
sse_long
=
0
;
uint64_t
sum_long
=
0
;
highbd_variance64
(
a8
,
a_stride
,
b8
,
b_stride
,
w
,
h
,
&
sse_long
,
&
sum_long
);
*
sum
=
ROUND_POWER_OF_TWO
(
sum_long
,
2
);
*
sse
=
ROUND_POWER_OF_TWO
(
sse_long
,
4
);
*
sum
=
(
int
)
ROUND_POWER_OF_TWO
(
sum_long
,
2
);
*
sse
=
(
unsigned
int
)
ROUND_POWER_OF_TWO
(
sse_long
,
4
);
}
void
highbd_12_variance
(
const
uint8_t
*
a8
,
int
a_stride
,
...
...
@@ -320,8 +320,8 @@ void highbd_12_variance(const uint8_t *a8, int a_stride,
uint64_t
sse_long
=
0
;
uint64_t
sum_long
=
0
;
highbd_variance64
(
a8
,
a_stride
,
b8
,
b_stride
,
w
,
h
,
&
sse_long
,
&
sum_long
);
*
sum
=
ROUND_POWER_OF_TWO
(
sum_long
,
4
);
*
sse
=
ROUND_POWER_OF_TWO
(
sse_long
,
8
);
*
sum
=
(
int
)
ROUND_POWER_OF_TWO
(
sum_long
,
4
);
*
sse
=
(
unsigned
int
)
ROUND_POWER_OF_TWO
(
sse_long
,
8
);
}
static
void
highbd_var_filter_block2d_bil_first_pass
(
...
...
vp9/encoder/x86/vp9_dct32x32_avx2.c
View file @
7e491de1
...
...
@@ -13,13 +13,14 @@
#include
"vpx_ports/mem.h"
#define pair256_set_epi16(a, b) \
_mm256_set_epi16(b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a)
_mm256_set_epi16((int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a), \
(int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a), \
(int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a), \
(int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a))
#define pair256_set_epi32(a, b) \
_mm256_set_epi32(b, a, b, a, b, a, b, a)
_mm256_set_epi32((int)(b), (int)(a), (int)(b), (int)(a), \
(int)(b), (int)(a), (int)(b), (int)(a))
#if FDCT32x32_HIGH_PRECISION
static
INLINE
__m256i
k_madd_epi32_avx2
(
__m256i
a
,
__m256i
b
)
{
...
...
@@ -50,7 +51,7 @@ void FDCT32x32_2D_AVX2(const int16_t *input,