Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Xiph.Org
aom-rav1e
Commits
13d2aee7
Commit
13d2aee7
authored
Mar 29, 2017
by
Yi Luo
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add the missing IDTX type optimization to hybrid txfm
Change-Id: I99b15e5270bfefe2eb3e982aeba06ed564540d73
parent
72e2e982
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
52 additions
and
24 deletions
+52
-24
av1/common/x86/hybrid_inv_txfm_avx2.c
av1/common/x86/hybrid_inv_txfm_avx2.c
+4
-0
av1/common/x86/idct_intrin_sse2.c
av1/common/x86/idct_intrin_sse2.c
+4
-0
av1/encoder/hybrid_fwd_txfm.c
av1/encoder/hybrid_fwd_txfm.c
+4
-24
av1/encoder/x86/dct_intrin_sse2.c
av1/encoder/x86/dct_intrin_sse2.c
+20
-0
av1/encoder/x86/hybrid_fwd_txfm_avx2.c
av1/encoder/x86/hybrid_fwd_txfm_avx2.c
+13
-0
test/av1_fht16x16_test.cc
test/av1_fht16x16_test.cc
+4
-0
test/av1_fht4x4_test.cc
test/av1_fht4x4_test.cc
+1
-0
test/av1_fht8x8_test.cc
test/av1_fht8x8_test.cc
+1
-0
test/fht32x32_test.cc
test/fht32x32_test.cc
+1
-0
No files found.
av1/common/x86/hybrid_inv_txfm_avx2.c
View file @
13d2aee7
...
...
@@ -470,6 +470,10 @@ void av1_iht16x16_256_add_avx2(const tran_low_t *input, uint8_t *dest,
iadst16
(
in
);
flip_col
(
&
dest
,
&
stride
,
16
);
break
;
case
IDTX
:
iidtx16
(
in
);
iidtx16
(
in
);
break
;
case
V_DCT
:
iidtx16
(
in
);
idct16
(
in
);
...
...
av1/common/x86/idct_intrin_sse2.c
View file @
13d2aee7
...
...
@@ -494,6 +494,10 @@ void av1_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest,
aom_iadst16_sse2
(
in0
,
in1
);
FLIPUD_PTR
(
dest
,
stride
,
16
);
break
;
case
IDTX
:
iidtx16_sse2
(
in0
,
in1
);
iidtx16_sse2
(
in0
,
in1
);
break
;
case
V_DCT
:
iidtx16_sse2
(
in0
,
in1
);
aom_idct16_sse2
(
in0
,
in1
);
...
...
av1/encoder/hybrid_fwd_txfm.c
View file @
13d2aee7
...
...
@@ -52,12 +52,7 @@ static void fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
return
;
}
#if CONFIG_EXT_TX
if
(
tx_type
==
IDTX
)
av1_fwd_idtx_c
(
src_diff
,
coeff
,
diff_stride
,
4
,
tx_type
);
else
#endif
av1_fht4x4
(
src_diff
,
coeff
,
diff_stride
,
tx_type
);
av1_fht4x4
(
src_diff
,
coeff
,
diff_stride
,
tx_type
);
}
static
void
fwd_txfm_4x8
(
const
int16_t
*
src_diff
,
tran_low_t
*
coeff
,
...
...
@@ -106,36 +101,21 @@ static void fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
int
diff_stride
,
TX_TYPE
tx_type
,
FWD_TXFM_OPT
fwd_txfm_opt
)
{
(
void
)
fwd_txfm_opt
;
#if CONFIG_EXT_TX
if
(
tx_type
==
IDTX
)
av1_fwd_idtx_c
(
src_diff
,
coeff
,
diff_stride
,
8
,
tx_type
);
else
#endif
av1_fht8x8
(
src_diff
,
coeff
,
diff_stride
,
tx_type
);
av1_fht8x8
(
src_diff
,
coeff
,
diff_stride
,
tx_type
);
}
static
void
fwd_txfm_16x16
(
const
int16_t
*
src_diff
,
tran_low_t
*
coeff
,
int
diff_stride
,
TX_TYPE
tx_type
,
FWD_TXFM_OPT
fwd_txfm_opt
)
{
(
void
)
fwd_txfm_opt
;
#if CONFIG_EXT_TX
if
(
tx_type
==
IDTX
)
av1_fwd_idtx_c
(
src_diff
,
coeff
,
diff_stride
,
16
,
tx_type
);
else
#endif
av1_fht16x16
(
src_diff
,
coeff
,
diff_stride
,
tx_type
);
av1_fht16x16
(
src_diff
,
coeff
,
diff_stride
,
tx_type
);
}
static
void
fwd_txfm_32x32
(
const
int16_t
*
src_diff
,
tran_low_t
*
coeff
,
int
diff_stride
,
TX_TYPE
tx_type
,
FWD_TXFM_OPT
fwd_txfm_opt
)
{
(
void
)
fwd_txfm_opt
;
#if CONFIG_EXT_TX
if
(
tx_type
==
IDTX
)
av1_fwd_idtx_c
(
src_diff
,
coeff
,
diff_stride
,
32
,
tx_type
);
else
#endif
av1_fht32x32
(
src_diff
,
coeff
,
diff_stride
,
tx_type
);
av1_fht32x32
(
src_diff
,
coeff
,
diff_stride
,
tx_type
);
}
#if CONFIG_TX64X64
...
...
av1/encoder/x86/dct_intrin_sse2.c
View file @
13d2aee7
...
...
@@ -257,6 +257,12 @@ void av1_fht4x4_sse2(const int16_t *input, tran_low_t *output, int stride,
fadst4_sse2
(
in
);
write_buffer_4x4
(
output
,
in
);
break
;
case
IDTX
:
load_buffer_4x4
(
input
,
in
,
stride
,
0
,
0
);
fidtx4_sse2
(
in
);
fidtx4_sse2
(
in
);
write_buffer_4x4
(
output
,
in
);
break
;
case
V_DCT
:
load_buffer_4x4
(
input
,
in
,
stride
,
0
,
0
);
fdct4_sse2
(
in
);
...
...
@@ -1357,6 +1363,13 @@ void av1_fht8x8_sse2(const int16_t *input, tran_low_t *output, int stride,
right_shift_8x8
(
in
,
1
);
write_buffer_8x8
(
output
,
in
,
8
);
break
;
case
IDTX
:
load_buffer_8x8
(
input
,
in
,
stride
,
0
,
0
);
fidtx8_sse2
(
in
);
fidtx8_sse2
(
in
);
right_shift_8x8
(
in
,
1
);
write_buffer_8x8
(
output
,
in
,
8
);
break
;
case
V_DCT
:
load_buffer_8x8
(
input
,
in
,
stride
,
0
,
0
);
fdct8_sse2
(
in
);
...
...
@@ -2579,6 +2592,13 @@ void av1_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride,
fadst16_sse2
(
in0
,
in1
);
write_buffer_16x16
(
output
,
in0
,
in1
,
16
);
break
;
case
IDTX
:
load_buffer_16x16
(
input
,
in0
,
in1
,
stride
,
0
,
0
);
fidtx16_sse2
(
in0
,
in1
);
right_shift_16x16
(
in0
,
in1
);
fidtx16_sse2
(
in0
,
in1
);
write_buffer_16x16
(
output
,
in0
,
in1
,
16
);
break
;
case
V_DCT
:
load_buffer_16x16
(
input
,
in0
,
in1
,
stride
,
0
,
0
);
fdct16_sse2
(
in0
,
in1
);
...
...
av1/encoder/x86/hybrid_fwd_txfm_avx2.c
View file @
13d2aee7
...
...
@@ -1025,6 +1025,13 @@ void av1_fht16x16_avx2(const int16_t *input, tran_low_t *output, int stride,
right_shift_16x16
(
in
);
fadst16_avx2
(
in
);
break
;
case
IDTX
:
load_buffer_16x16
(
input
,
stride
,
0
,
0
,
in
);
fidtx16_avx2
(
in
);
mm256_transpose_16x16
(
in
);
right_shift_16x16
(
in
);
fidtx16_avx2
(
in
);
break
;
case
V_DCT
:
load_buffer_16x16
(
input
,
stride
,
0
,
0
,
in
);
fdct16_avx2
(
in
);
...
...
@@ -1621,6 +1628,12 @@ void av1_fht32x32_avx2(const int16_t *input, tran_low_t *output, int stride,
right_shift_32x32
(
in0
,
in1
);
fhalfright32_avx2
(
in0
,
in1
);
break
;
case
IDTX
:
load_buffer_32x32
(
input
,
stride
,
0
,
0
,
in0
,
in1
);
fidtx32_avx2
(
in0
,
in1
);
right_shift_32x32
(
in0
,
in1
);
fidtx32_avx2
(
in0
,
in1
);
break
;
case
V_DCT
:
load_buffer_32x32
(
input
,
stride
,
0
,
0
,
in0
,
in1
);
fdct32_avx2
(
in0
,
in1
);
...
...
test/av1_fht16x16_test.cc
View file @
13d2aee7
...
...
@@ -184,6 +184,8 @@ const Ht16x16Param kArrayHt16x16Param_sse2[] = {
256
),
make_tuple
(
&
av1_fht16x16_sse2
,
&
av1_iht16x16_256_add_sse2
,
8
,
AOM_BITS_8
,
256
),
make_tuple
(
&
av1_fht16x16_sse2
,
&
av1_iht16x16_256_add_sse2
,
9
,
AOM_BITS_8
,
256
),
make_tuple
(
&
av1_fht16x16_sse2
,
&
av1_iht16x16_256_add_sse2
,
10
,
AOM_BITS_8
,
256
),
make_tuple
(
&
av1_fht16x16_sse2
,
&
av1_iht16x16_256_add_sse2
,
11
,
AOM_BITS_8
,
...
...
@@ -223,6 +225,8 @@ const Ht16x16Param kArrayHt16x16Param_avx2[] = {
256
),
make_tuple
(
&
av1_fht16x16_avx2
,
&
av1_iht16x16_256_add_avx2
,
8
,
AOM_BITS_8
,
256
),
make_tuple
(
&
av1_fht16x16_avx2
,
&
av1_iht16x16_256_add_avx2
,
9
,
AOM_BITS_8
,
256
),
make_tuple
(
&
av1_fht16x16_avx2
,
&
av1_iht16x16_256_add_avx2
,
10
,
AOM_BITS_8
,
256
),
make_tuple
(
&
av1_fht16x16_avx2
,
&
av1_iht16x16_256_add_avx2
,
11
,
AOM_BITS_8
,
...
...
test/av1_fht4x4_test.cc
View file @
13d2aee7
...
...
@@ -177,6 +177,7 @@ const Ht4x4Param kArrayHt4x4Param_sse2[] = {
make_tuple
(
&
av1_fht4x4_sse2
,
&
av1_iht4x4_16_add_sse2
,
6
,
AOM_BITS_8
,
16
),
make_tuple
(
&
av1_fht4x4_sse2
,
&
av1_iht4x4_16_add_sse2
,
7
,
AOM_BITS_8
,
16
),
make_tuple
(
&
av1_fht4x4_sse2
,
&
av1_iht4x4_16_add_sse2
,
8
,
AOM_BITS_8
,
16
),
make_tuple
(
&
av1_fht4x4_sse2
,
&
av1_iht4x4_16_add_sse2
,
9
,
AOM_BITS_8
,
16
),
make_tuple
(
&
av1_fht4x4_sse2
,
&
av1_iht4x4_16_add_sse2
,
10
,
AOM_BITS_8
,
16
),
make_tuple
(
&
av1_fht4x4_sse2
,
&
av1_iht4x4_16_add_sse2
,
11
,
AOM_BITS_8
,
16
),
make_tuple
(
&
av1_fht4x4_sse2
,
&
av1_iht4x4_16_add_sse2
,
12
,
AOM_BITS_8
,
16
),
...
...
test/av1_fht8x8_test.cc
View file @
13d2aee7
...
...
@@ -177,6 +177,7 @@ const Ht8x8Param kArrayHt8x8Param_sse2[] = {
make_tuple
(
&
av1_fht8x8_sse2
,
&
av1_iht8x8_64_add_sse2
,
6
,
AOM_BITS_8
,
64
),
make_tuple
(
&
av1_fht8x8_sse2
,
&
av1_iht8x8_64_add_sse2
,
7
,
AOM_BITS_8
,
64
),
make_tuple
(
&
av1_fht8x8_sse2
,
&
av1_iht8x8_64_add_sse2
,
8
,
AOM_BITS_8
,
64
),
make_tuple
(
&
av1_fht8x8_sse2
,
&
av1_iht8x8_64_add_sse2
,
9
,
AOM_BITS_8
,
64
),
make_tuple
(
&
av1_fht8x8_sse2
,
&
av1_iht8x8_64_add_sse2
,
10
,
AOM_BITS_8
,
64
),
make_tuple
(
&
av1_fht8x8_sse2
,
&
av1_iht8x8_64_add_sse2
,
11
,
AOM_BITS_8
,
64
),
make_tuple
(
&
av1_fht8x8_sse2
,
&
av1_iht8x8_64_add_sse2
,
12
,
AOM_BITS_8
,
64
),
...
...
test/fht32x32_test.cc
View file @
13d2aee7
...
...
@@ -201,6 +201,7 @@ const Ht32x32Param kArrayHt32x32Param_avx2[] = {
make_tuple
(
&
av1_fht32x32_avx2
,
&
dummy_inv_txfm
,
6
,
AOM_BITS_8
,
1024
),
make_tuple
(
&
av1_fht32x32_avx2
,
&
dummy_inv_txfm
,
7
,
AOM_BITS_8
,
1024
),
make_tuple
(
&
av1_fht32x32_avx2
,
&
dummy_inv_txfm
,
8
,
AOM_BITS_8
,
1024
),
make_tuple
(
&
av1_fht32x32_avx2
,
&
dummy_inv_txfm
,
9
,
AOM_BITS_8
,
1024
),
make_tuple
(
&
av1_fht32x32_avx2
,
&
dummy_inv_txfm
,
10
,
AOM_BITS_8
,
1024
),
make_tuple
(
&
av1_fht32x32_avx2
,
&
dummy_inv_txfm
,
11
,
AOM_BITS_8
,
1024
),
make_tuple
(
&
av1_fht32x32_avx2
,
&
dummy_inv_txfm
,
12
,
AOM_BITS_8
,
1024
),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment