Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Xiph.Org
aom-rav1e
Commits
2b43501b
Commit
2b43501b
authored
Sep 28, 2017
by
Debargha Mukherjee
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Implement 64x32 and 32x64 transforms
Change-Id: Ifa983d83a509cdfad78f6400df7d60c8f5b4f68c
parent
26d3e45f
Changes
15
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
2220 additions
and
62 deletions
+2220
-62
av1/common/av1_fwd_txfm2d.c
av1/common/av1_fwd_txfm2d.c
+50
-0
av1/common/av1_inv_txfm2d.c
av1/common/av1_inv_txfm2d.c
+14
-2
av1/common/av1_loopfilter.c
av1/common/av1_loopfilter.c
+8
-0
av1/common/av1_rtcd_defs.pl
av1/common/av1_rtcd_defs.pl
+18
-5
av1/common/av1_txfm.h
av1/common/av1_txfm.h
+6
-0
av1/common/common_data.h
av1/common/common_data.h
+194
-22
av1/common/entropy.c
av1/common/entropy.c
+6
-0
av1/common/entropy.h
av1/common/entropy.h
+18
-0
av1/common/enums.h
av1/common/enums.h
+11
-7
av1/common/idct.c
av1/common/idct.c
+184
-16
av1/common/scan.c
av1/common/scan.c
+1436
-0
av1/common/x86/av1_fwd_txfm2d_sse4.c
av1/common/x86/av1_fwd_txfm2d_sse4.c
+2
-0
av1/encoder/dct.c
av1/encoder/dct.c
+126
-8
av1/encoder/hybrid_fwd_txfm.c
av1/encoder/hybrid_fwd_txfm.c
+113
-2
av1/encoder/rd.c
av1/encoder/rd.c
+34
-0
No files found.
av1/common/av1_fwd_txfm2d.c
View file @
2b43501b
...
...
@@ -257,6 +257,7 @@ void av1_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output, int stride,
fwd_txfm2d_c
(
input
,
output
,
stride
,
&
cfg
,
txfm_buf
,
bd
);
}
#if CONFIG_TX64X64
void
av1_fwd_txfm2d_64x64_c
(
const
int16_t
*
input
,
int32_t
*
output
,
int
stride
,
int
tx_type
,
int
bd
)
{
int32_t
txfm_buf
[
64
*
64
];
...
...
@@ -264,6 +265,21 @@ void av1_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output, int stride,
fwd_txfm2d_c
(
input
,
output
,
stride
,
&
cfg
,
txfm_buf
,
bd
);
}
void
av1_fwd_txfm2d_32x64_c
(
const
int16_t
*
input
,
int32_t
*
output
,
int
stride
,
int
tx_type
,
int
bd
)
{
int32_t
txfm_buf
[
32
*
64
];
TXFM_2D_FLIP_CFG
cfg
=
av1_get_fwd_txfm_32x64_cfg
(
tx_type
);
fwd_txfm2d_c
(
input
,
output
,
stride
,
&
cfg
,
txfm_buf
,
bd
);
}
void
av1_fwd_txfm2d_64x32_c
(
const
int16_t
*
input
,
int32_t
*
output
,
int
stride
,
int
tx_type
,
int
bd
)
{
int32_t
txfm_buf
[
64
*
32
];
TXFM_2D_FLIP_CFG
cfg
=
av1_get_fwd_txfm_64x32_cfg
(
tx_type
);
fwd_txfm2d_c
(
input
,
output
,
stride
,
&
cfg
,
txfm_buf
,
bd
);
}
#endif // CONFIG_TX64X64
static
const
TXFM_1D_CFG
*
fwd_txfm_col_cfg_ls
[
TX_TYPES_1D
][
TX_SIZES
]
=
{
// DCT
{
...
...
@@ -342,6 +358,39 @@ TXFM_2D_FLIP_CFG av1_get_fwd_txfm_cfg(int tx_type, int tx_size) {
return
cfg
;
}
#if CONFIG_TX64X64
TXFM_2D_FLIP_CFG
av1_get_fwd_txfm_32x64_cfg
(
int
tx_type
)
{
TXFM_2D_FLIP_CFG
cfg
;
const
int
tx_type_row
=
htx_tab
[
tx_type
];
const
int
tx_size_row
=
txsize_horz_map
[
TX_32X64
];
switch
(
tx_type
)
{
case
DCT_DCT
:
cfg
.
col_cfg
=
&
fwd_txfm_1d_col_cfg_dct_64
;
cfg
.
row_cfg
=
fwd_txfm_row_cfg_ls
[
tx_type_row
][
tx_size_row
];
cfg
.
ud_flip
=
0
;
cfg
.
lr_flip
=
0
;
break
;
default:
assert
(
0
);
}
return
cfg
;
}
TXFM_2D_FLIP_CFG
av1_get_fwd_txfm_64x32_cfg
(
int
tx_type
)
{
TXFM_2D_FLIP_CFG
cfg
;
const
int
tx_type_col
=
vtx_tab
[
tx_type
];
const
int
tx_size_col
=
txsize_vert_map
[
TX_64X32
];
switch
(
tx_type
)
{
case
DCT_DCT
:
cfg
.
col_cfg
=
fwd_txfm_col_cfg_ls
[
tx_type_col
][
tx_size_col
];
cfg
.
row_cfg
=
&
fwd_txfm_1d_row_cfg_dct_64
;
cfg
.
ud_flip
=
0
;
cfg
.
lr_flip
=
0
;
break
;
default:
assert
(
0
);
}
return
cfg
;
}
TXFM_2D_FLIP_CFG
av1_get_fwd_txfm_64x64_cfg
(
int
tx_type
)
{
TXFM_2D_FLIP_CFG
cfg
;
switch
(
tx_type
)
{
...
...
@@ -358,3 +407,4 @@ TXFM_2D_FLIP_CFG av1_get_fwd_txfm_64x64_cfg(int tx_type) {
}
return
cfg
;
}
#endif // CONFIG_TX64X64
av1/common/av1_inv_txfm2d.c
View file @
2b43501b
...
...
@@ -349,12 +349,24 @@ void av1_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output,
inv_txfm2d_add_facade
(
input
,
output
,
stride
,
txfm_buf
,
tx_type
,
TX_32X32
,
bd
);
}
#if CONFIG_TX64X64
void
av1_inv_txfm2d_add_64x64_c
(
const
int32_t
*
input
,
uint16_t
*
output
,
int
stride
,
int
tx_type
,
int
bd
)
{
int
txfm_buf
[
64
*
64
+
64
+
64
];
TXFM_2D_FLIP_CFG
cfg
=
av1_get_inv_txfm_64x64_cfg
(
tx_type
);
inv_txfm2d_add_c
(
input
,
output
,
stride
,
&
cfg
,
txfm_buf
,
-
4
,
bd
);
#if CONFIG_TX64X64
assert
(
fwd_shift_sum
[
TX_64X64
]
==
-
4
);
#endif
}
void
av1_inv_txfm2d_add_64x32_c
(
const
int32_t
*
input
,
uint16_t
*
output
,
int
stride
,
int
tx_type
,
int
bd
)
{
int
txfm_buf
[
64
*
32
+
64
+
64
];
inv_txfm2d_add_facade
(
input
,
output
,
stride
,
txfm_buf
,
tx_type
,
TX_32X64
,
bd
);
}
void
av1_inv_txfm2d_add_32x64_c
(
const
int32_t
*
input
,
uint16_t
*
output
,
int
stride
,
int
tx_type
,
int
bd
)
{
int
txfm_buf
[
64
*
32
+
64
+
64
];
inv_txfm2d_add_facade
(
input
,
output
,
stride
,
txfm_buf
,
tx_type
,
TX_32X64
,
bd
);
}
#endif // CONFIG_TX64X64
av1/common/av1_loopfilter.c
View file @
2b43501b
...
...
@@ -2813,6 +2813,10 @@ static const uint32_t av1_transform_masks[NUM_EDGE_DIRS][TX_SIZES_ALL] = {
16
-
1
,
// TX_16X8
16
-
1
,
// TX_16X32
32
-
1
,
// TX_32X16
#if CONFIG_TX64X64
32
-
1
,
// TX_32X64
64
-
1
,
// TX_64X32
#endif // CONFIG_TX64X64
4
-
1
,
// TX_4X16
16
-
1
,
// TX_16X4
8
-
1
,
// TX_8X32
...
...
@@ -2835,6 +2839,10 @@ static const uint32_t av1_transform_masks[NUM_EDGE_DIRS][TX_SIZES_ALL] = {
8
-
1
,
// TX_16X8
32
-
1
,
// TX_16X32
16
-
1
,
// TX_32X16
#if CONFIG_TX64X64
64
-
1
,
// TX_32X64
32
-
1
,
// TX_64X32
#endif // CONFIG_TX64X64
16
-
1
,
// TX_4X16
4
-
1
,
// TX_16X4
32
-
1
,
// TX_8X32
...
...
av1/common/av1_rtcd_defs.pl
View file @
2b43501b
...
...
@@ -160,6 +160,8 @@ add_proto qw/void av1_iht32x32_1024_add/, "const tran_low_t *input, uint8_t *out
if
(
aom_config
("
CONFIG_TX64X64
")
eq
"
yes
")
{
add_proto
qw/void av1_iht64x64_4096_add/
,
"
const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param
";
add_proto
qw/void av1_iht32x64_2048_add/
,
"
const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param
";
add_proto
qw/void av1_iht64x32_2048_add/
,
"
const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param
";
}
if
(
aom_config
("
CONFIG_NEW_QUANT
")
eq
"
yes
")
{
...
...
@@ -285,7 +287,11 @@ add_proto qw/void av1_inv_txfm2d_add_32x32/, "const int32_t *input, uint16_t *ou
if
(
aom_config
("
CONFIG_DAALA_DCT32
")
ne
"
yes
")
{
specialize
qw/av1_inv_txfm2d_add_32x32 avx2/
;
}
add_proto
qw/void av1_inv_txfm2d_add_64x64/
,
"
const int32_t *input, uint16_t *output, int stride, int tx_type, int bd
";
if
(
aom_config
("
CONFIG_TX64X64
")
eq
"
yes
")
{
add_proto
qw/void av1_inv_txfm2d_add_64x64/
,
"
const int32_t *input, uint16_t *output, int stride, int tx_type, int bd
";
add_proto
qw/void av1_inv_txfm2d_add_64x32/
,
"
const int32_t *input, uint16_t *output, int stride, int tx_type, int bd
";
add_proto
qw/void av1_inv_txfm2d_add_32x64/
,
"
const int32_t *input, uint16_t *output, int stride, int tx_type, int bd
";
}
#
# Encoder functions below this point.
...
...
@@ -354,6 +360,8 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
if
(
aom_config
("
CONFIG_TX64X64
")
eq
"
yes
")
{
add_proto
qw/void av1_fht64x64/
,
"
const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param
";
add_proto
qw/void av1_fht64x32/
,
"
const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param
";
add_proto
qw/void av1_fht32x64/
,
"
const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param
";
}
add_proto
qw/void av1_fht4x8/
,
"
const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param
";
...
...
@@ -396,7 +404,7 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
}
}
add_proto
qw/void av1_fwd_idtx/
,
"
const int16_t *src_diff, tran_low_t *coeff, int stride, int bs, int tx_type
";
add_proto
qw/void av1_fwd_idtx/
,
"
const int16_t *src_diff, tran_low_t *coeff, int stride, int bs
x, int bsy
, int tx_type
";
#fwd txfm
add_proto
qw/void av1_fwd_txfm2d_4x8/
,
"
const int16_t *input, int32_t *output, int stride, int tx_type, int bd
";
...
...
@@ -421,9 +429,14 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
if
(
aom_config
("
CONFIG_DAALA_DCT32
")
ne
"
yes
")
{
specialize
qw/av1_fwd_txfm2d_32x32 sse4_1/
;
}
add_proto
qw/void av1_fwd_txfm2d_64x64/
,
"
const int16_t *input, int32_t *output, int stride, int tx_type, int bd
";
if
(
aom_config
("
CONFIG_DAALA_DCT64
")
ne
"
yes
")
{
specialize
qw/av1_fwd_txfm2d_64x64 sse4_1/
;
if
(
aom_config
("
CONFIG_TX64X64
")
eq
"
yes
")
{
add_proto
qw/void av1_fwd_txfm2d_32x64/
,
"
const int16_t *input, int32_t *output, int stride, int tx_type, int bd
";
add_proto
qw/void av1_fwd_txfm2d_64x32/
,
"
const int16_t *input, int32_t *output, int stride, int tx_type, int bd
";
add_proto
qw/void av1_fwd_txfm2d_64x64/
,
"
const int16_t *input, int32_t *output, int stride, int tx_type, int bd
";
if
(
aom_config
("
CONFIG_DAALA_DCT64
")
ne
"
yes
")
{
specialize
qw/av1_fwd_txfm2d_64x64 sse4_1/
;
}
}
#
# Motion search
...
...
av1/common/av1_txfm.h
View file @
2b43501b
...
...
@@ -207,6 +207,8 @@ static INLINE int av1_rotate_tx_size(int tx_size) {
case
TX_32X32
:
return
TX_32X32
;
#if CONFIG_TX64X64
case
TX_64X64
:
return
TX_64X64
;
case
TX_32X64
:
return
TX_64X32
;
case
TX_64X32
:
return
TX_32X64
;
#endif
case
TX_4X8
:
return
TX_8X4
;
case
TX_8X4
:
return
TX_4X8
;
...
...
@@ -352,7 +354,11 @@ void av1_gen_inv_stage_range(int8_t *stage_range_col, int8_t *stage_range_row,
int
bd
);
TXFM_2D_FLIP_CFG
av1_get_fwd_txfm_cfg
(
int
tx_type
,
int
tx_size
);
#if CONFIG_TX64X64
TXFM_2D_FLIP_CFG
av1_get_fwd_txfm_64x64_cfg
(
int
tx_type
);
TXFM_2D_FLIP_CFG
av1_get_fwd_txfm_64x32_cfg
(
int
tx_type
);
TXFM_2D_FLIP_CFG
av1_get_fwd_txfm_32x64_cfg
(
int
tx_type
);
#endif // CONFIG_TX64X64
TXFM_2D_FLIP_CFG
av1_get_inv_txfm_cfg
(
int
tx_type
,
int
tx_size
);
#ifdef __cplusplus
}
...
...
av1/common/common_data.h
View file @
2b43501b
This diff is collapsed.
Click to expand it.
av1/common/entropy.c
View file @
2b43501b
...
...
@@ -149,6 +149,9 @@ const uint16_t band_count_table[TX_SIZES_ALL][8] = {
{
1
,
2
,
3
,
4
,
8
,
32
-
18
,
0
},
{
1
,
2
,
3
,
4
,
8
,
32
-
18
,
0
},
{
1
,
2
,
3
,
4
,
11
,
128
-
21
,
0
},
{
1
,
2
,
3
,
4
,
11
,
128
-
21
,
0
},
{
1
,
2
,
3
,
4
,
11
,
512
-
21
,
0
},
{
1
,
2
,
3
,
4
,
11
,
512
-
21
,
0
},
#if CONFIG_TX64X64
{
1
,
2
,
3
,
4
,
11
,
2048
-
21
,
0
},
{
1
,
2
,
3
,
4
,
11
,
2048
-
21
,
0
},
#endif // CONFIG_TX64X64
{
1
,
2
,
3
,
4
,
11
,
64
-
21
,
0
},
{
1
,
2
,
3
,
4
,
11
,
64
-
21
,
0
},
{
1
,
2
,
3
,
4
,
11
,
256
-
21
,
0
},
{
1
,
2
,
3
,
4
,
11
,
256
-
21
,
0
},
};
...
...
@@ -165,6 +168,9 @@ const uint16_t band_cum_count_table[TX_SIZES_ALL][8] = {
{
0
,
1
,
3
,
6
,
10
,
18
,
32
,
0
},
{
0
,
1
,
3
,
6
,
10
,
18
,
32
,
0
},
{
0
,
1
,
3
,
6
,
10
,
21
,
128
,
0
},
{
0
,
1
,
3
,
6
,
10
,
21
,
128
,
0
},
{
0
,
1
,
3
,
6
,
10
,
21
,
512
,
0
},
{
0
,
1
,
3
,
6
,
10
,
21
,
512
,
0
},
#if CONFIG_TX64X64
{
0
,
1
,
3
,
6
,
10
,
21
,
2048
,
0
},
{
0
,
1
,
3
,
6
,
10
,
21
,
2048
,
0
},
#endif // CONFIG_TX64X64
{
0
,
1
,
3
,
6
,
10
,
21
,
64
,
0
},
{
0
,
1
,
3
,
6
,
10
,
21
,
64
,
0
},
{
0
,
1
,
3
,
6
,
10
,
21
,
256
,
0
},
{
0
,
1
,
3
,
6
,
10
,
21
,
256
,
0
},
};
...
...
av1/common/entropy.h
View file @
2b43501b
...
...
@@ -326,6 +326,16 @@ static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
left_ec
=
!!
(
*
(
const
uint64_t
*
)
l
|
*
(
const
uint64_t
*
)(
l
+
8
)
|
*
(
const
uint64_t
*
)(
l
+
16
)
|
*
(
const
uint64_t
*
)(
l
+
24
));
break
;
case
TX_32X64
:
above_ec
=
!!
(
*
(
const
uint64_t
*
)
a
|
*
(
const
uint64_t
*
)(
a
+
8
));
left_ec
=
!!
(
*
(
const
uint64_t
*
)
l
|
*
(
const
uint64_t
*
)(
l
+
8
)
|
*
(
const
uint64_t
*
)(
l
+
16
)
|
*
(
const
uint64_t
*
)(
l
+
24
));
break
;
case
TX_64X32
:
above_ec
=
!!
(
*
(
const
uint64_t
*
)
a
|
*
(
const
uint64_t
*
)(
a
+
8
)
|
*
(
const
uint64_t
*
)(
a
+
16
)
|
*
(
const
uint64_t
*
)(
a
+
24
));
left_ec
=
!!
(
*
(
const
uint64_t
*
)
l
|
*
(
const
uint64_t
*
)(
l
+
8
));
break
;
#endif // CONFIG_TX64X64
#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
case
TX_4X16
:
...
...
@@ -396,6 +406,14 @@ static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
above_ec
=
!!
(
*
(
const
uint64_t
*
)
a
|
*
(
const
uint64_t
*
)(
a
+
8
));
left_ec
=
!!
(
*
(
const
uint64_t
*
)
l
|
*
(
const
uint64_t
*
)(
l
+
8
));
break
;
case
TX_32X64
:
above_ec
=
!!*
(
const
uint64_t
*
)
a
;
left_ec
=
!!
(
*
(
const
uint64_t
*
)
l
|
*
(
const
uint64_t
*
)(
l
+
8
));
break
;
case
TX_64X32
:
above_ec
=
!!
(
*
(
const
uint64_t
*
)
a
|
*
(
const
uint64_t
*
)(
a
+
8
));
left_ec
=
!!*
(
const
uint64_t
*
)
l
;
break
;
#endif // CONFIG_TX64X64
#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
case
TX_4X16
:
...
...
av1/common/enums.h
View file @
2b43501b
...
...
@@ -190,14 +190,18 @@ typedef enum ATTRIBUTE_PACKED {
TX_16X16
,
// 16x16 transform
TX_32X32
,
// 32x32 transform
#if CONFIG_TX64X64
TX_64X64
,
// 64x64 transform
TX_64X64
,
// 64x64 transform
#endif // CONFIG_TX64X64
TX_4X8
,
// 4x8 transform
TX_8X4
,
// 8x4 transform
TX_8X16
,
// 8x16 transform
TX_16X8
,
// 16x8 transform
TX_16X32
,
// 16x32 transform
TX_32X16
,
// 32x16 transform
#if CONFIG_TX64X64
TX_32X64
,
// 32x64 transform
TX_64X32
,
// 64x32 transform
#endif // CONFIG_TX64X64
TX_4X8
,
// 4x8 transform
TX_8X4
,
// 8x4 transform
TX_8X16
,
// 8x16 transform
TX_16X8
,
// 16x8 transform
TX_16X32
,
// 16x32 transform
TX_32X16
,
// 32x16 transform
TX_4X16
,
// 4x16 transform
TX_16X4
,
// 16x4 transform
TX_8X32
,
// 8x32 transform
...
...
av1/common/idct.c
View file @
2b43501b
...
...
@@ -122,15 +122,16 @@ static void ihalfright64_c(const tran_low_t *input, tran_low_t *output) {
// Inverse identity transform and add.
#if CONFIG_EXT_TX
static
void
inv_idtx_add_c
(
const
tran_low_t
*
input
,
uint8_t
*
dest
,
int
stride
,
int
bs
,
int
tx_type
)
{
int
bs
x
,
int
bsy
,
int
tx_type
)
{
int
r
,
c
;
const
int
shift
=
bs
<
32
?
3
:
(
bs
<
64
?
2
:
1
);
const
int
pels
=
bsx
*
bsy
;
const
int
shift
=
3
-
((
pels
>
256
)
+
(
pels
>
1024
));
if
(
tx_type
==
IDTX
)
{
for
(
r
=
0
;
r
<
bs
;
++
r
)
{
for
(
c
=
0
;
c
<
bs
;
++
c
)
for
(
r
=
0
;
r
<
bs
y
;
++
r
)
{
for
(
c
=
0
;
c
<
bs
x
;
++
c
)
dest
[
c
]
=
clip_pixel_add
(
dest
[
c
],
input
[
c
]
>>
shift
);
dest
+=
stride
;
input
+=
bs
;
input
+=
bs
x
;
}
}
}
...
...
@@ -185,17 +186,19 @@ static void maybe_flip_strides(uint8_t **dst, int *dstride, tran_low_t **src,
#if CONFIG_HIGHBITDEPTH
#if CONFIG_EXT_TX && CONFIG_TX64X64
static
void
highbd_inv_idtx_add_c
(
const
tran_low_t
*
input
,
uint8_t
*
dest8
,
int
stride
,
int
bs
,
int
tx_type
,
int
bd
)
{
int
stride
,
int
bsx
,
int
bsy
,
int
tx_type
,
int
bd
)
{
int
r
,
c
;
const
int
shift
=
bs
<
32
?
3
:
2
;
const
int
pels
=
bsx
*
bsy
;
const
int
shift
=
3
-
((
pels
>
256
)
+
(
pels
>
1024
));
uint16_t
*
dest
=
CONVERT_TO_SHORTPTR
(
dest8
);
if
(
tx_type
==
IDTX
)
{
for
(
r
=
0
;
r
<
bs
;
++
r
)
{
for
(
c
=
0
;
c
<
bs
;
++
c
)
for
(
r
=
0
;
r
<
bs
y
;
++
r
)
{
for
(
c
=
0
;
c
<
bs
x
;
++
c
)
dest
[
c
]
=
highbd_clip_pixel_add
(
dest
[
c
],
input
[
c
]
>>
shift
,
bd
);
dest
+=
stride
;
input
+=
bs
;
input
+=
bs
x
;
}
}
}
...
...
@@ -1521,6 +1524,131 @@ void av1_iht64x64_4096_add_c(const tran_low_t *input, uint8_t *dest, int stride,
}
}
}
void
av1_iht64x32_2048_add_c
(
const
tran_low_t
*
input
,
uint8_t
*
dest
,
int
stride
,
const
TxfmParam
*
txfm_param
)
{
int
tx_type
=
txfm_param
->
tx_type
;
#if CONFIG_MRC_TX
assert
(
tx_type
!=
MRC_DCT
&&
"Invalid tx type for tx size"
);
#endif // CONFIG_MRC_TX
#if CONFIG_DCT_ONLY
assert
(
tx_type
==
DCT_DCT
);
#endif
static
const
transform_2d
IHT_64x32
[]
=
{
{
aom_idct32_c
,
idct64_row_c
},
// DCT_DCT
{
ihalfright32_c
,
idct64_row_c
},
// ADST_DCT
{
aom_idct32_c
,
ihalfright64_c
},
// DCT_ADST
{
ihalfright32_c
,
ihalfright64_c
},
// ADST_ADST
#if CONFIG_EXT_TX
{
ihalfright32_c
,
idct64_row_c
},
// FLIPADST_DCT
{
aom_idct32_c
,
ihalfright64_c
},
// DCT_FLIPADST
{
ihalfright32_c
,
ihalfright64_c
},
// FLIPADST_FLIPADST
{
ihalfright32_c
,
ihalfright64_c
},
// ADST_FLIPADST
{
ihalfright32_c
,
ihalfright64_c
},
// FLIPADST_ADST
{
iidtx32_c
,
iidtx64_c
},
// IDTX
{
aom_idct32_c
,
iidtx64_c
},
// V_DCT
{
iidtx32_c
,
idct64_row_c
},
// H_DCT
{
ihalfright32_c
,
iidtx64_c
},
// V_ADST
{
iidtx32_c
,
ihalfright64_c
},
// H_ADST
{
ihalfright32_c
,
iidtx64_c
},
// V_FLIPADST
{
iidtx32_c
,
ihalfright64_c
},
// H_FLIPADST
#endif
};
const
int
n
=
32
;
const
int
n2
=
64
;
int
i
,
j
;
tran_low_t
out
[
64
][
32
],
tmp
[
64
][
32
],
outtmp
[
64
];
tran_low_t
*
outp
=
&
out
[
0
][
0
];
int
outstride
=
n
;
// inverse transform row vectors and transpose
for
(
i
=
0
;
i
<
n
;
++
i
)
{
IHT_64x32
[
tx_type
].
rows
(
input
,
outtmp
);
for
(
j
=
0
;
j
<
n2
;
++
j
)
tmp
[
j
][
i
]
=
(
tran_low_t
)
dct_const_round_shift
(
outtmp
[
j
]
*
Sqrt2
);
input
+=
n2
;
}
// inverse transform column vectors
for
(
i
=
0
;
i
<
n2
;
++
i
)
IHT_64x32
[
tx_type
].
cols
(
tmp
[
i
],
out
[
i
]);
#if CONFIG_EXT_TX
maybe_flip_strides
(
&
dest
,
&
stride
,
&
outp
,
&
outstride
,
tx_type
,
n
,
n2
);
#endif
// Sum with the destination
for
(
i
=
0
;
i
<
n
;
++
i
)
{
for
(
j
=
0
;
j
<
n2
;
++
j
)
{
int
d
=
i
*
stride
+
j
;
int
s
=
j
*
outstride
+
i
;
dest
[
d
]
=
clip_pixel_add
(
dest
[
d
],
ROUND_POWER_OF_TWO
(
outp
[
s
],
5
));
}
}
}
void
av1_iht32x64_2048_add_c
(
const
tran_low_t
*
input
,
uint8_t
*
dest
,
int
stride
,
const
TxfmParam
*
txfm_param
)
{
int
tx_type
=
txfm_param
->
tx_type
;
#if CONFIG_MRC_TX
assert
(
tx_type
!=
MRC_DCT
&&
"Invalid tx type for tx size"
);
#endif // CONFIG_MRC_TX
#if CONFIG_DCT_ONLY
assert
(
tx_type
==
DCT_DCT
);
#endif
static
const
transform_2d
IHT_32x64
[]
=
{
{
idct64_col_c
,
aom_idct32_c
},
// DCT_DCT
{
ihalfright64_c
,
aom_idct32_c
},
// ADST_DCT
{
idct64_col_c
,
ihalfright32_c
},
// DCT_ADST
{
ihalfright64_c
,
ihalfright32_c
},
// ADST_ADST
#if CONFIG_EXT_TX
{
ihalfright64_c
,
aom_idct32_c
},
// FLIPADST_DCT
{
idct64_col_c
,
ihalfright32_c
},
// DCT_FLIPADST
{
ihalfright64_c
,
ihalfright32_c
},
// FLIPADST_FLIPADST
{
ihalfright64_c
,
ihalfright32_c
},
// ADST_FLIPADST
{
ihalfright64_c
,
ihalfright32_c
},
// FLIPADST_ADST
{
iidtx64_c
,
iidtx32_c
},
// IDTX
{
idct64_col_c
,
iidtx32_c
},
// V_DCT
{
iidtx64_c
,
aom_idct32_c
},
// H_DCT
{
ihalfright64_c
,
iidtx32_c
},
// V_ADST
{
iidtx64_c
,
ihalfright32_c
},
// H_ADST
{
ihalfright64_c
,
iidtx32_c
},
// V_FLIPADST
{
iidtx64_c
,
ihalfright32_c
},
// H_FLIPADST
#endif
};
const
int
n
=
32
;
const
int
n2
=
64
;
int
i
,
j
;
tran_low_t
out
[
32
][
64
],
tmp
[
32
][
64
],
outtmp
[
32
];
tran_low_t
*
outp
=
&
out
[
0
][
0
];
int
outstride
=
n2
;
// inverse transform row vectors and transpose
for
(
i
=
0
;
i
<
n2
;
++
i
)
{
IHT_32x64
[
tx_type
].
rows
(
input
,
outtmp
);
for
(
j
=
0
;
j
<
n
;
++
j
)
tmp
[
j
][
i
]
=
(
tran_low_t
)
dct_const_round_shift
(
outtmp
[
j
]
*
Sqrt2
);
input
+=
n
;
}
// inverse transform column vectors
for
(
i
=
0
;
i
<
n
;
++
i
)
IHT_32x64
[
tx_type
].
cols
(
tmp
[
i
],
out
[
i
]);
#if CONFIG_EXT_TX
maybe_flip_strides
(
&
dest
,
&
stride
,
&
outp
,
&
outstride
,
tx_type
,
n2
,
n
);
#endif
// Sum with the destination
for
(
i
=
0
;
i
<
n2
;
++
i
)
{
for
(
j
=
0
;
j
<
n
;
++
j
)
{
int
d
=
i
*
stride
+
j
;
int
s
=
j
*
outstride
+
i
;
dest
[
d
]
=
clip_pixel_add
(
dest
[
d
],
ROUND_POWER_OF_TWO
(
outp
[
s
],
5
));
}
}
}
#endif // CONFIG_TX64X64
// idct
...
...
@@ -1743,7 +1871,7 @@ static void inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, int stride,
// Use C version since DST only exists in C code
av1_iht4x4_16_add_c
(
input
,
dest
,
stride
,
txfm_param
);
break
;
case
IDTX
:
inv_idtx_add_c
(
input
,
dest
,
stride
,
4
,
tx_type
);
break
;
case
IDTX
:
inv_idtx_add_c
(
input
,
dest
,
stride
,
4
,
4
,
tx_type
);
break
;
#endif // CONFIG_EXT_TX
default:
assert
(
0
);
break
;
}
...
...
@@ -1834,6 +1962,18 @@ static void inv_txfm_add_32x16(const tran_low_t *input, uint8_t *dest,
av1_iht32x16_512_add
(
input
,
dest
,
stride
,
txfm_param
);
}
#if CONFIG_TX64X64
static
void
inv_txfm_add_32x64
(
const
tran_low_t
*
input
,
uint8_t
*
dest
,
int
stride
,
const
TxfmParam
*
txfm_param
)
{
av1_iht32x64_2048_add
(
input
,
dest
,
stride
,
txfm_param
);
}
static
void
inv_txfm_add_64x32
(
const
tran_low_t
*
input
,
uint8_t
*
dest
,
int
stride
,
const
TxfmParam
*
txfm_param
)
{
av1_iht64x32_2048_add
(
input
,
dest
,
stride
,
txfm_param
);
}
#endif // CONFIG_TX64X64
static
void
inv_txfm_add_8x8
(
const
tran_low_t
*
input
,
uint8_t
*
dest
,
int
stride
,
const
TxfmParam
*
txfm_param
)
{
const
TX_TYPE
tx_type
=
txfm_param
->
tx_type
;
...
...
@@ -1875,7 +2015,7 @@ static void inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, int stride,
// Use C version since DST only exists in C code
av1_iht8x8_64_add_c
(
input
,
dest
,
stride
,
txfm_param
);
break
;
case
IDTX
:
inv_idtx_add_c
(
input
,
dest
,
stride
,
8
,
tx_type
);
break
;
case
IDTX
:
inv_idtx_add_c
(
input
,
dest
,
stride
,
8
,
8
,
tx_type
);
break
;
#endif // CONFIG_EXT_TX
default:
assert
(
0
);
break
;
}
...
...
@@ -1917,7 +2057,7 @@ static void inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest,
av1_iht16x16_256_add
(
input
,
dest
,
stride
,
txfm_param
);
#endif // CONFIG_DAALA_DCT16
break
;
case
IDTX
:
inv_idtx_add_c
(
input
,
dest
,
stride
,
16
,
tx_type
);
break
;
case
IDTX
:
inv_idtx_add_c
(
input
,
dest
,
stride
,
16
,
16
,
tx_type
);
break
;
#endif // CONFIG_EXT_TX
#if CONFIG_MRC_TX
case
MRC_DCT
:
assert
(
0
&&
"Invalid tx type for tx size"
);
...
...
@@ -1954,7 +2094,7 @@ static void inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
case
H_FLIPADST
:
av1_iht32x32_1024_add_c
(
input
,
dest
,
stride
,
txfm_param
);
break
;
case
IDTX
:
inv_idtx_add_c
(
input
,
dest
,
stride
,
32
,
tx_type
);
break
;
case
IDTX
:
inv_idtx_add_c
(
input
,
dest
,
stride
,
32
,
32
,
tx_type
);
break
;
#endif // CONFIG_EXT_TX
#if CONFIG_MRC_TX
case
MRC_DCT
:
imrc32x32_add_c
(
input
,
dest
,
stride
,
txfm_param
);
break
;
...
...
@@ -1990,7 +2130,7 @@ static void inv_txfm_add_64x64(const tran_low_t *input, uint8_t *dest,
case
H_FLIPADST
:
av1_iht64x64_4096_add_c
(
input
,
dest
,
stride
,
txfm_param
);
break
;
case
IDTX
:
inv_idtx_add_c
(
input
,
dest
,
stride
,
64
,
tx_type
);
break
;
case
IDTX
:
inv_idtx_add_c
(
input
,
dest
,
stride
,
64
,
64
,
tx_type
);
break
;
#endif // CONFIG_EXT_TX
#if CONFIG_MRC_TX
case
MRC_DCT
:
assert
(
0
&&
"Invalid tx type for tx size"
);
...
...
@@ -2130,6 +2270,22 @@ static void highbd_inv_txfm_add_32x16(const tran_low_t *input, uint8_t *dest,
txfm_param
->
tx_type
,
txfm_param
->
bd
);
}
#if CONFIG_TX64X64
static
void
highbd_inv_txfm_add_32x64
(
const
tran_low_t
*
input
,
uint8_t
*
dest
,
int
stride
,
const
TxfmParam
*
txfm_param
)
{
const
int32_t
*
src
=
(
const
int32_t
*
)
input
;
av1_inv_txfm2d_add_32x64_c
(
src
,
CONVERT_TO_SHORTPTR
(
dest
),
stride
,
txfm_param
->
tx_type
,
txfm_param
->
bd
);
}
static
void
highbd_inv_txfm_add_64x32
(
const
tran_low_t
*
input
,
uint8_t
*
dest
,
int
stride
,
const
TxfmParam
*
txfm_param
)
{
const
int32_t
*
src
=
(
const
int32_t
*
)
input
;
av1_inv_txfm2d_add_64x32_c
(
src
,
CONVERT_TO_SHORTPTR
(
dest
),
stride
,
txfm_param
->
tx_type
,
txfm_param
->
bd
);
}
#endif // CONFIG_TX64X64
static
void
highbd_inv_txfm_add_8x8
(
const
tran_low_t
*
input
,
uint8_t
*
dest
,
int
stride
,
const
TxfmParam
*
txfm_param
)
{
int
bd
=
txfm_param
->
bd
;
...
...
@@ -2280,7 +2436,7 @@ static void highbd_inv_txfm_add_64x64(const tran_low_t *input, uint8_t *dest,
DCT_DCT
,
bd
);
break
;
case
IDTX
:
highbd_inv_idtx_add_c
(
input
,
dest
,
stride
,
64
,
tx_type
,
bd
);
highbd_inv_idtx_add_c
(
input
,
dest
,
stride
,
64
,
64
,
tx_type
,
bd
);
break
;
#endif // CONFIG_EXT_TX
default:
assert
(
0
);
break
;
...
...
@@ -2304,6 +2460,10 @@ void av1_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
case
TX_16X8
:
inv_txfm_add_16x8
(
input
,
dest
,
stride
,
txfm_param
);
break
;
case
TX_16X32
:
inv_txfm_add_16x32
(
input
,
dest
,
stride
,
txfm_param
);
break
;
case
TX_32X16
:
inv_txfm_add_32x16
(
input
,
dest
,
stride
,
txfm_param
);
break
;
#if CONFIG_TX64X64
case
TX_64X32
:
inv_txfm_add_64x32
(
input
,
dest
,
stride
,
txfm_param
);
break
;
case
TX_32X64
:
inv_txfm_add_32x64
(
input
,
dest
,
stride
,
txfm_param
);
break
;
#endif // CONFIG_TX64X64
case
TX_4X4
:
// this is like av1_short_idct4x4 but has a special case around eob<=1
// which is significant (not just an optimization) for the lossless
...
...
@@ -2474,6 +2634,14 @@ void av1_highbd_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
case
TX_32X16
:
highbd_inv_txfm_add_32x16
(
input
,
dest
,
stride
,
txfm_param
);
break
;
#if CONFIG_TX64X64
case
TX_64X32
:
highbd_inv_txfm_add_64x32
(
input
,
dest
,
stride
,
txfm_param
);
break
;
case
TX_32X64
:
highbd_inv_txfm_add_32x64
(
input
,
dest
,
stride
,
txfm_param
);
break
;
#endif // CONFIG_TX64X64
case
TX_4X4
:
// this is like av1_short_idct4x4 but has a special case around eob<=1
// which is significant (not just an optimization) for the lossless
...
...
av1/common/scan.c
View file @
2b43501b
This diff is collapsed.
Click to expand it.
av1/common/x86/av1_fwd_txfm2d_sse4.c
View file @
2b43501b
...
...
@@ -81,6 +81,7 @@ void av1_fwd_txfm2d_32x32_sse4_1(const int16_t *input, int32_t *output,
fwd_txfm2d_sse4_1
(
input
,
output
,
stride
,
&
cfg
,
txfm_buf
);
}
#if CONFIG_TX64X64
void
av1_fwd_txfm2d_64x64_sse4_1
(
const
int16_t
*
input
,
int32_t
*
output
,
int
stride
,
int
tx_type
,
int
bd
)
{
DECLARE_ALIGNED
(
16
,
int32_t
,
txfm_buf
[
4096
]);
...
...
@@ -88,3 +89,4 @@ void av1_fwd_txfm2d_64x64_sse4_1(const int16_t *input, int32_t *output,
(
void
)
bd
;
fwd_txfm2d_sse4_1
(
input
,
output
,
stride
,
&
cfg
,
txfm_buf
);
}
#endif // CONFIG_TX64X64
av1/encoder/dct.c
View file @
2b43501b
...
...
@@ -2446,7 +2446,7 @@ void av1_fht64x64_c(const int16_t *input, tran_low_t *output, int stride,
{
daala_idtx64
,
daala_fdst64
},
// H_ADST
{
daala_fdst64
,
daala_idtx64
},
// V_FLIPADST
{
daala_idtx64
,
daala_fdst64
},
// H_FLIPADST