Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Xiph.Org
aom-rav1e
Commits
8251736b
Commit
8251736b
authored
Jan 17, 2018
by
Angie Chiang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Let adst4's precision be adjustable
Change-Id: I6e251328b2934130992dbd355cfdffc3c721d357
parent
06250276
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
59 additions
and
42 deletions
+59
-42
av1/common/av1_inv_txfm1d.c
av1/common/av1_inv_txfm1d.c
+15
-13
av1/common/av1_txfm.h
av1/common/av1_txfm.h
+13
-0
av1/common/x86/highbd_inv_txfm_sse4.c
av1/common/x86/highbd_inv_txfm_sse4.c
+5
-5
av1/encoder/av1_fwd_txfm1d.c
av1/encoder/av1_fwd_txfm1d.c
+16
-14
av1/encoder/x86/highbd_fwd_txfm_sse4.c
av1/encoder/x86/highbd_fwd_txfm_sse4.c
+5
-5
test/av1_fwd_txfm2d_test.cc
test/av1_fwd_txfm2d_test.cc
+5
-5
No files found.
av1/common/av1_inv_txfm1d.c
View file @
8251736b
...
...
@@ -742,7 +742,9 @@ void av1_iadst4_new(const int32_t *input, int32_t *output,
(
void
)
cos_bit
;
const
int32_t
size
=
4
;
int32_t
stage
=
0
;
int64_t
s0
,
s1
,
s2
,
s3
,
s4
,
s5
,
s6
,
s7
;
int
bit
=
cos_bit
[
0
];
const
int32_t
*
sinpi
=
sinpi_arr
(
bit
);
int32_t
s0
,
s1
,
s2
,
s3
,
s4
,
s5
,
s6
,
s7
;
// stage 0;
apply_range
(
stage
,
input
,
input
,
size
,
stage_range
[
stage
]);
...
...
@@ -756,29 +758,29 @@ void av1_iadst4_new(const int32_t *input, int32_t *output,
return
;
}
s0
=
sinpi
_1_9
*
x0
;
s1
=
sinpi
_2_9
*
x0
;
s2
=
sinpi
_3_9
*
x1
;
s3
=
sinpi
_4_9
*
x2
;
s4
=
sinpi
_1_9
*
x2
;
s5
=
sinpi
_2_9
*
x3
;
s6
=
sinpi
_4_9
*
x3
;
s0
=
sinpi
[
1
]
*
x0
;
s1
=
sinpi
[
2
]
*
x0
;
s2
=
sinpi
[
3
]
*
x1
;
s3
=
sinpi
[
4
]
*
x2
;
s4
=
sinpi
[
1
]
*
x2
;
s5
=
sinpi
[
2
]
*
x3
;
s6
=
sinpi
[
4
]
*
x3
;
s7
=
x0
-
x2
+
x3
;
s0
=
s0
+
s3
+
s5
;
s1
=
s1
-
s4
-
s6
;
s3
=
s2
;
s2
=
sinpi
_3_9
*
s7
;
s2
=
sinpi
[
3
]
*
s7
;
// 1-D transform scaling factor is sqrt(2).
// The overall dynamic range is 14b (input) + 14b (multiplication scaling)
// + 1b (addition) = 29b.
// Hence the output bit depth is 15b.
stage
=
3
;
output
[
0
]
=
(
int32_t
)
dct_const_
round_shift
(
s0
+
s3
);
output
[
1
]
=
(
int32_t
)
dct_const_
round_shift
(
s1
+
s3
);
output
[
2
]
=
(
int32_t
)
dct_const_
round_shift
(
s2
);
output
[
3
]
=
(
int32_t
)
dct_const_
round_shift
(
s0
+
s1
-
s3
);
output
[
0
]
=
round_shift
(
s0
+
s3
,
bit
);
output
[
1
]
=
round_shift
(
s1
+
s3
,
bit
);
output
[
2
]
=
round_shift
(
s2
,
bit
);
output
[
3
]
=
round_shift
(
s0
+
s1
-
s3
,
bit
);
apply_range
(
stage
,
input
,
output
,
size
,
stage_range
[
stage
]);
}
...
...
av1/common/av1_txfm.h
View file @
8251736b
...
...
@@ -75,10 +75,23 @@ static const int32_t cospi_arr_data[7][64] = {
14359
,
12785
,
11204
,
9616
,
8022
,
6424
,
4821
,
3216
,
1608
}
};
// sinpi_arr_data[i][j] = (int)round((sqrt(2) * sin(kPi/9) * 2 / 3) * (1 <<
// (cos_bit_min + i)))
static
const
int32_t
sinpi_arr_data
[
7
][
5
]
=
{
{
0
,
330
,
621
,
836
,
951
},
{
0
,
660
,
1241
,
1672
,
1902
},
{
0
,
1321
,
2482
,
3344
,
3803
},
{
0
,
2642
,
4965
,
6689
,
7606
},
{
0
,
5283
,
9929
,
13377
,
15212
},
{
0
,
10566
,
19858
,
26755
,
30425
},
{
0
,
21133
,
39717
,
53510
,
60849
}
};
static
INLINE
const
int32_t
*
cospi_arr
(
int
n
)
{
return
cospi_arr_data
[
n
-
cos_bit_min
];
}
static
INLINE
const
int32_t
*
sinpi_arr
(
int
n
)
{
return
sinpi_arr_data
[
n
-
cos_bit_min
];
}
static
INLINE
int32_t
round_shift
(
int32_t
value
,
int
bit
)
{
assert
(
bit
>=
1
);
return
(
int32_t
)(((
int64_t
)
value
+
(
1ll
<<
(
bit
-
1
)))
>>
bit
);
...
...
av1/common/x86/highbd_inv_txfm_sse4.c
View file @
8251736b
...
...
@@ -72,12 +72,12 @@ static void idct4x4_sse4_1(__m128i *in, int bit) {
}
static
void
iadst4x4_sse4_1
(
__m128i
*
in
,
int
bit
)
{
bit
=
14
;
const
int32_t
*
sinpi
=
sinpi_arr
(
bit
)
;
const
__m128i
rnding
=
_mm_set1_epi32
(
1
<<
(
bit
-
1
));
const
__m128i
sinpi1
=
_mm_set1_epi32
((
int
)
sinpi
_1_9
);
const
__m128i
sinpi2
=
_mm_set1_epi32
((
int
)
sinpi
_2_9
);
const
__m128i
sinpi3
=
_mm_set1_epi32
((
int
)
sinpi
_3_9
);
const
__m128i
sinpi4
=
_mm_set1_epi32
((
int
)
sinpi
_4_9
);
const
__m128i
sinpi1
=
_mm_set1_epi32
((
int
)
sinpi
[
1
]
);
const
__m128i
sinpi2
=
_mm_set1_epi32
((
int
)
sinpi
[
2
]
);
const
__m128i
sinpi3
=
_mm_set1_epi32
((
int
)
sinpi
[
3
]
);
const
__m128i
sinpi4
=
_mm_set1_epi32
((
int
)
sinpi
[
4
]
);
__m128i
t
;
__m128i
s0
,
s1
,
s2
,
s3
,
s4
,
s5
,
s6
,
s7
;
__m128i
x0
,
x1
,
x2
,
x3
;
...
...
av1/encoder/av1_fwd_txfm1d.c
View file @
8251736b
...
...
@@ -694,8 +694,10 @@ void av1_fadst4_new(const int32_t *input, int32_t *output,
const
int8_t
*
cos_bit
,
const
int8_t
*
stage_range
)
{
(
void
)
cos_bit
;
(
void
)
stage_range
;
int64_t
x0
,
x1
,
x2
,
x3
;
int64_t
s0
,
s1
,
s2
,
s3
,
s4
,
s5
,
s6
,
s7
;
int
bit
=
cos_bit
[
0
];
const
int32_t
*
sinpi
=
sinpi_arr
(
bit
);
int32_t
x0
,
x1
,
x2
,
x3
;
int32_t
s0
,
s1
,
s2
,
s3
,
s4
,
s5
,
s6
,
s7
;
x0
=
input
[
0
];
x1
=
input
[
1
];
...
...
@@ -707,17 +709,17 @@ void av1_fadst4_new(const int32_t *input, int32_t *output,
return
;
}
s0
=
sinpi
_1_9
*
x0
;
s1
=
sinpi
_4_9
*
x0
;
s2
=
sinpi
_2_9
*
x1
;
s3
=
sinpi
_1_9
*
x1
;
s4
=
sinpi
_3_9
*
x2
;
s5
=
sinpi
_4_9
*
x3
;
s6
=
sinpi
_2_9
*
x3
;
s0
=
sinpi
[
1
]
*
x0
;
s1
=
sinpi
[
4
]
*
x0
;
s2
=
sinpi
[
2
]
*
x1
;
s3
=
sinpi
[
1
]
*
x1
;
s4
=
sinpi
[
3
]
*
x2
;
s5
=
sinpi
[
4
]
*
x3
;
s6
=
sinpi
[
2
]
*
x3
;
s7
=
x0
+
x1
-
x3
;
x0
=
s0
+
s2
+
s5
;
x1
=
sinpi
_3_9
*
s7
;
x1
=
sinpi
[
3
]
*
s7
;
x2
=
s1
-
s3
+
s6
;
x3
=
s4
;
...
...
@@ -727,10 +729,10 @@ void av1_fadst4_new(const int32_t *input, int32_t *output,
s3
=
x2
-
x0
+
x3
;
// 1-D transform scaling factor is sqrt(2).
output
[
0
]
=
(
int32_t
)
fdct_
round_shift
(
s0
);
output
[
1
]
=
(
int32_t
)
fdct_
round_shift
(
s1
);
output
[
2
]
=
(
int32_t
)
fdct_
round_shift
(
s2
);
output
[
3
]
=
(
int32_t
)
fdct_
round_shift
(
s3
);
output
[
0
]
=
round_shift
(
s0
,
bit
);
output
[
1
]
=
round_shift
(
s1
,
bit
);
output
[
2
]
=
round_shift
(
s2
,
bit
);
output
[
3
]
=
round_shift
(
s3
,
bit
);
}
void
av1_fadst8_new
(
const
int32_t
*
input
,
int32_t
*
output
,
...
...
av1/encoder/x86/highbd_fwd_txfm_sse4.c
View file @
8251736b
...
...
@@ -121,12 +121,12 @@ static INLINE void write_buffer_4x4(__m128i *res, int32_t *output) {
}
static
void
fadst4x4_sse4_1
(
__m128i
*
in
,
int
bit
)
{
bit
=
14
;
const
int32_t
*
sinpi
=
sinpi_arr
(
bit
)
;
const
__m128i
rnding
=
_mm_set1_epi32
(
1
<<
(
bit
-
1
));
const
__m128i
sinpi1
=
_mm_set1_epi32
((
int
)
sinpi
_1_9
);
const
__m128i
sinpi2
=
_mm_set1_epi32
((
int
)
sinpi
_2_9
);
const
__m128i
sinpi3
=
_mm_set1_epi32
((
int
)
sinpi
_3_9
);
const
__m128i
sinpi4
=
_mm_set1_epi32
((
int
)
sinpi
_4_9
);
const
__m128i
sinpi1
=
_mm_set1_epi32
((
int
)
sinpi
[
1
]
);
const
__m128i
sinpi2
=
_mm_set1_epi32
((
int
)
sinpi
[
2
]
);
const
__m128i
sinpi3
=
_mm_set1_epi32
((
int
)
sinpi
[
3
]
);
const
__m128i
sinpi4
=
_mm_set1_epi32
((
int
)
sinpi
[
4
]
);
__m128i
t
;
__m128i
s0
,
s1
,
s2
,
s3
,
s4
,
s5
,
s6
,
s7
;
__m128i
x0
,
x1
,
x2
,
x3
;
...
...
test/av1_fwd_txfm2d_test.cc
View file @
8251736b
...
...
@@ -138,7 +138,7 @@ vector<AV1FwdTxfm2dParam> GetTxfm2dParamList() {
vector
<
AV1FwdTxfm2dParam
>
param_list
;
for
(
int
t
=
0
;
t
<
TX_TYPES
;
++
t
)
{
const
TX_TYPE
tx_type
=
static_cast
<
TX_TYPE
>
(
t
);
param_list
.
push_back
(
AV1FwdTxfm2dParam
(
tx_type
,
TX_4X4
,
2
,
0.5
));
param_list
.
push_back
(
AV1FwdTxfm2dParam
(
tx_type
,
TX_4X4
,
3
,
0.5
));
param_list
.
push_back
(
AV1FwdTxfm2dParam
(
tx_type
,
TX_8X8
,
5
,
0.5
));
param_list
.
push_back
(
AV1FwdTxfm2dParam
(
tx_type
,
TX_16X16
,
11
,
1.2
));
param_list
.
push_back
(
AV1FwdTxfm2dParam
(
tx_type
,
TX_32X32
,
70
,
6.1
));
...
...
@@ -148,15 +148,15 @@ vector<AV1FwdTxfm2dParam> GetTxfm2dParamList() {
}
#endif // CONFIG_TX64X64
param_list
.
push_back
(
AV1FwdTxfm2dParam
(
tx_type
,
TX_4X8
,
3.2
,
0.5
0
));
param_list
.
push_back
(
AV1FwdTxfm2dParam
(
tx_type
,
TX_8X4
,
3.
2
,
0.64
));
param_list
.
push_back
(
AV1FwdTxfm2dParam
(
tx_type
,
TX_4X8
,
3.2
,
0.5
2
));
param_list
.
push_back
(
AV1FwdTxfm2dParam
(
tx_type
,
TX_8X4
,
3.
6
,
0.64
));
param_list
.
push_back
(
AV1FwdTxfm2dParam
(
tx_type
,
TX_8X16
,
8
,
0.8
));
param_list
.
push_back
(
AV1FwdTxfm2dParam
(
tx_type
,
TX_16X8
,
8
,
1.1
));
param_list
.
push_back
(
AV1FwdTxfm2dParam
(
tx_type
,
TX_16X32
,
29
,
3.9
));
param_list
.
push_back
(
AV1FwdTxfm2dParam
(
tx_type
,
TX_32X16
,
37
,
5.9
));
param_list
.
push_back
(
AV1FwdTxfm2dParam
(
tx_type
,
TX_4X16
,
3
,
0.6
));
param_list
.
push_back
(
AV1FwdTxfm2dParam
(
tx_type
,
TX_16X4
,
5
,
0.9
));
param_list
.
push_back
(
AV1FwdTxfm2dParam
(
tx_type
,
TX_4X16
,
5
,
0.6
));
param_list
.
push_back
(
AV1FwdTxfm2dParam
(
tx_type
,
TX_16X4
,
6
,
0.9
));
param_list
.
push_back
(
AV1FwdTxfm2dParam
(
tx_type
,
TX_8X32
,
21
,
1.2
));
param_list
.
push_back
(
AV1FwdTxfm2dParam
(
tx_type
,
TX_32X8
,
13
,
1.7
));
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment