Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Xiph.Org
aom-rav1e
Commits
13d2aee7
Commit
13d2aee7
authored
Mar 29, 2017
by
Yi Luo
Browse files
Add the missing IDTX type optimization to hybrid txfm
Change-Id: I99b15e5270bfefe2eb3e982aeba06ed564540d73
parent
72e2e982
Changes
9
Hide whitespace changes
Inline
Side-by-side
av1/common/x86/hybrid_inv_txfm_avx2.c
View file @
13d2aee7
...
...
@@ -470,6 +470,10 @@ void av1_iht16x16_256_add_avx2(const tran_low_t *input, uint8_t *dest,
iadst16
(
in
);
flip_col
(
&
dest
,
&
stride
,
16
);
break
;
case
IDTX
:
iidtx16
(
in
);
iidtx16
(
in
);
break
;
case
V_DCT
:
iidtx16
(
in
);
idct16
(
in
);
...
...
av1/common/x86/idct_intrin_sse2.c
View file @
13d2aee7
...
...
@@ -494,6 +494,10 @@ void av1_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest,
aom_iadst16_sse2
(
in0
,
in1
);
FLIPUD_PTR
(
dest
,
stride
,
16
);
break
;
case
IDTX
:
iidtx16_sse2
(
in0
,
in1
);
iidtx16_sse2
(
in0
,
in1
);
break
;
case
V_DCT
:
iidtx16_sse2
(
in0
,
in1
);
aom_idct16_sse2
(
in0
,
in1
);
...
...
av1/encoder/hybrid_fwd_txfm.c
View file @
13d2aee7
...
...
@@ -52,12 +52,7 @@ static void fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
return
;
}
#if CONFIG_EXT_TX
if
(
tx_type
==
IDTX
)
av1_fwd_idtx_c
(
src_diff
,
coeff
,
diff_stride
,
4
,
tx_type
);
else
#endif
av1_fht4x4
(
src_diff
,
coeff
,
diff_stride
,
tx_type
);
av1_fht4x4
(
src_diff
,
coeff
,
diff_stride
,
tx_type
);
}
static
void
fwd_txfm_4x8
(
const
int16_t
*
src_diff
,
tran_low_t
*
coeff
,
...
...
@@ -106,36 +101,21 @@ static void fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
int
diff_stride
,
TX_TYPE
tx_type
,
FWD_TXFM_OPT
fwd_txfm_opt
)
{
(
void
)
fwd_txfm_opt
;
#if CONFIG_EXT_TX
if
(
tx_type
==
IDTX
)
av1_fwd_idtx_c
(
src_diff
,
coeff
,
diff_stride
,
8
,
tx_type
);
else
#endif
av1_fht8x8
(
src_diff
,
coeff
,
diff_stride
,
tx_type
);
av1_fht8x8
(
src_diff
,
coeff
,
diff_stride
,
tx_type
);
}
static
void
fwd_txfm_16x16
(
const
int16_t
*
src_diff
,
tran_low_t
*
coeff
,
int
diff_stride
,
TX_TYPE
tx_type
,
FWD_TXFM_OPT
fwd_txfm_opt
)
{
(
void
)
fwd_txfm_opt
;
#if CONFIG_EXT_TX
if
(
tx_type
==
IDTX
)
av1_fwd_idtx_c
(
src_diff
,
coeff
,
diff_stride
,
16
,
tx_type
);
else
#endif
av1_fht16x16
(
src_diff
,
coeff
,
diff_stride
,
tx_type
);
av1_fht16x16
(
src_diff
,
coeff
,
diff_stride
,
tx_type
);
}
static
void
fwd_txfm_32x32
(
const
int16_t
*
src_diff
,
tran_low_t
*
coeff
,
int
diff_stride
,
TX_TYPE
tx_type
,
FWD_TXFM_OPT
fwd_txfm_opt
)
{
(
void
)
fwd_txfm_opt
;
#if CONFIG_EXT_TX
if
(
tx_type
==
IDTX
)
av1_fwd_idtx_c
(
src_diff
,
coeff
,
diff_stride
,
32
,
tx_type
);
else
#endif
av1_fht32x32
(
src_diff
,
coeff
,
diff_stride
,
tx_type
);
av1_fht32x32
(
src_diff
,
coeff
,
diff_stride
,
tx_type
);
}
#if CONFIG_TX64X64
...
...
av1/encoder/x86/dct_intrin_sse2.c
View file @
13d2aee7
...
...
@@ -257,6 +257,12 @@ void av1_fht4x4_sse2(const int16_t *input, tran_low_t *output, int stride,
fadst4_sse2
(
in
);
write_buffer_4x4
(
output
,
in
);
break
;
case
IDTX
:
load_buffer_4x4
(
input
,
in
,
stride
,
0
,
0
);
fidtx4_sse2
(
in
);
fidtx4_sse2
(
in
);
write_buffer_4x4
(
output
,
in
);
break
;
case
V_DCT
:
load_buffer_4x4
(
input
,
in
,
stride
,
0
,
0
);
fdct4_sse2
(
in
);
...
...
@@ -1357,6 +1363,13 @@ void av1_fht8x8_sse2(const int16_t *input, tran_low_t *output, int stride,
right_shift_8x8
(
in
,
1
);
write_buffer_8x8
(
output
,
in
,
8
);
break
;
case
IDTX
:
load_buffer_8x8
(
input
,
in
,
stride
,
0
,
0
);
fidtx8_sse2
(
in
);
fidtx8_sse2
(
in
);
right_shift_8x8
(
in
,
1
);
write_buffer_8x8
(
output
,
in
,
8
);
break
;
case
V_DCT
:
load_buffer_8x8
(
input
,
in
,
stride
,
0
,
0
);
fdct8_sse2
(
in
);
...
...
@@ -2579,6 +2592,13 @@ void av1_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride,
fadst16_sse2
(
in0
,
in1
);
write_buffer_16x16
(
output
,
in0
,
in1
,
16
);
break
;
case
IDTX
:
load_buffer_16x16
(
input
,
in0
,
in1
,
stride
,
0
,
0
);
fidtx16_sse2
(
in0
,
in1
);
right_shift_16x16
(
in0
,
in1
);
fidtx16_sse2
(
in0
,
in1
);
write_buffer_16x16
(
output
,
in0
,
in1
,
16
);
break
;
case
V_DCT
:
load_buffer_16x16
(
input
,
in0
,
in1
,
stride
,
0
,
0
);
fdct16_sse2
(
in0
,
in1
);
...
...
av1/encoder/x86/hybrid_fwd_txfm_avx2.c
View file @
13d2aee7
...
...
@@ -1025,6 +1025,13 @@ void av1_fht16x16_avx2(const int16_t *input, tran_low_t *output, int stride,
right_shift_16x16
(
in
);
fadst16_avx2
(
in
);
break
;
case
IDTX
:
load_buffer_16x16
(
input
,
stride
,
0
,
0
,
in
);
fidtx16_avx2
(
in
);
mm256_transpose_16x16
(
in
);
right_shift_16x16
(
in
);
fidtx16_avx2
(
in
);
break
;
case
V_DCT
:
load_buffer_16x16
(
input
,
stride
,
0
,
0
,
in
);
fdct16_avx2
(
in
);
...
...
@@ -1621,6 +1628,12 @@ void av1_fht32x32_avx2(const int16_t *input, tran_low_t *output, int stride,
right_shift_32x32
(
in0
,
in1
);
fhalfright32_avx2
(
in0
,
in1
);
break
;
case
IDTX
:
load_buffer_32x32
(
input
,
stride
,
0
,
0
,
in0
,
in1
);
fidtx32_avx2
(
in0
,
in1
);
right_shift_32x32
(
in0
,
in1
);
fidtx32_avx2
(
in0
,
in1
);
break
;
case
V_DCT
:
load_buffer_32x32
(
input
,
stride
,
0
,
0
,
in0
,
in1
);
fdct32_avx2
(
in0
,
in1
);
...
...
test/av1_fht16x16_test.cc
View file @
13d2aee7
...
...
@@ -184,6 +184,8 @@ const Ht16x16Param kArrayHt16x16Param_sse2[] = {
256
),
make_tuple
(
&
av1_fht16x16_sse2
,
&
av1_iht16x16_256_add_sse2
,
8
,
AOM_BITS_8
,
256
),
make_tuple
(
&
av1_fht16x16_sse2
,
&
av1_iht16x16_256_add_sse2
,
9
,
AOM_BITS_8
,
256
),
make_tuple
(
&
av1_fht16x16_sse2
,
&
av1_iht16x16_256_add_sse2
,
10
,
AOM_BITS_8
,
256
),
make_tuple
(
&
av1_fht16x16_sse2
,
&
av1_iht16x16_256_add_sse2
,
11
,
AOM_BITS_8
,
...
...
@@ -223,6 +225,8 @@ const Ht16x16Param kArrayHt16x16Param_avx2[] = {
256
),
make_tuple
(
&
av1_fht16x16_avx2
,
&
av1_iht16x16_256_add_avx2
,
8
,
AOM_BITS_8
,
256
),
make_tuple
(
&
av1_fht16x16_avx2
,
&
av1_iht16x16_256_add_avx2
,
9
,
AOM_BITS_8
,
256
),
make_tuple
(
&
av1_fht16x16_avx2
,
&
av1_iht16x16_256_add_avx2
,
10
,
AOM_BITS_8
,
256
),
make_tuple
(
&
av1_fht16x16_avx2
,
&
av1_iht16x16_256_add_avx2
,
11
,
AOM_BITS_8
,
...
...
test/av1_fht4x4_test.cc
View file @
13d2aee7
...
...
@@ -177,6 +177,7 @@ const Ht4x4Param kArrayHt4x4Param_sse2[] = {
make_tuple
(
&
av1_fht4x4_sse2
,
&
av1_iht4x4_16_add_sse2
,
6
,
AOM_BITS_8
,
16
),
make_tuple
(
&
av1_fht4x4_sse2
,
&
av1_iht4x4_16_add_sse2
,
7
,
AOM_BITS_8
,
16
),
make_tuple
(
&
av1_fht4x4_sse2
,
&
av1_iht4x4_16_add_sse2
,
8
,
AOM_BITS_8
,
16
),
make_tuple
(
&
av1_fht4x4_sse2
,
&
av1_iht4x4_16_add_sse2
,
9
,
AOM_BITS_8
,
16
),
make_tuple
(
&
av1_fht4x4_sse2
,
&
av1_iht4x4_16_add_sse2
,
10
,
AOM_BITS_8
,
16
),
make_tuple
(
&
av1_fht4x4_sse2
,
&
av1_iht4x4_16_add_sse2
,
11
,
AOM_BITS_8
,
16
),
make_tuple
(
&
av1_fht4x4_sse2
,
&
av1_iht4x4_16_add_sse2
,
12
,
AOM_BITS_8
,
16
),
...
...
test/av1_fht8x8_test.cc
View file @
13d2aee7
...
...
@@ -177,6 +177,7 @@ const Ht8x8Param kArrayHt8x8Param_sse2[] = {
make_tuple
(
&
av1_fht8x8_sse2
,
&
av1_iht8x8_64_add_sse2
,
6
,
AOM_BITS_8
,
64
),
make_tuple
(
&
av1_fht8x8_sse2
,
&
av1_iht8x8_64_add_sse2
,
7
,
AOM_BITS_8
,
64
),
make_tuple
(
&
av1_fht8x8_sse2
,
&
av1_iht8x8_64_add_sse2
,
8
,
AOM_BITS_8
,
64
),
make_tuple
(
&
av1_fht8x8_sse2
,
&
av1_iht8x8_64_add_sse2
,
9
,
AOM_BITS_8
,
64
),
make_tuple
(
&
av1_fht8x8_sse2
,
&
av1_iht8x8_64_add_sse2
,
10
,
AOM_BITS_8
,
64
),
make_tuple
(
&
av1_fht8x8_sse2
,
&
av1_iht8x8_64_add_sse2
,
11
,
AOM_BITS_8
,
64
),
make_tuple
(
&
av1_fht8x8_sse2
,
&
av1_iht8x8_64_add_sse2
,
12
,
AOM_BITS_8
,
64
),
...
...
test/fht32x32_test.cc
View file @
13d2aee7
...
...
@@ -201,6 +201,7 @@ const Ht32x32Param kArrayHt32x32Param_avx2[] = {
make_tuple
(
&
av1_fht32x32_avx2
,
&
dummy_inv_txfm
,
6
,
AOM_BITS_8
,
1024
),
make_tuple
(
&
av1_fht32x32_avx2
,
&
dummy_inv_txfm
,
7
,
AOM_BITS_8
,
1024
),
make_tuple
(
&
av1_fht32x32_avx2
,
&
dummy_inv_txfm
,
8
,
AOM_BITS_8
,
1024
),
make_tuple
(
&
av1_fht32x32_avx2
,
&
dummy_inv_txfm
,
9
,
AOM_BITS_8
,
1024
),
make_tuple
(
&
av1_fht32x32_avx2
,
&
dummy_inv_txfm
,
10
,
AOM_BITS_8
,
1024
),
make_tuple
(
&
av1_fht32x32_avx2
,
&
dummy_inv_txfm
,
11
,
AOM_BITS_8
,
1024
),
make_tuple
(
&
av1_fht32x32_avx2
,
&
dummy_inv_txfm
,
12
,
AOM_BITS_8
,
1024
),
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment