Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
A
aom-rav1e
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Xiph.Org
aom-rav1e
Commits
46d2cc57
Commit
46d2cc57
authored
Nov 06, 2015
by
Debargha Mukherjee
Committed by
Gerrit Code Review
Nov 06, 2015
Browse files
Options
Browse Files
Download
Plain Diff
Merge "Eliminate copying for FLIPADST in fwd transforms." into nextgenv2
parents
444acd77
01bb4a31
Changes
3
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
365 additions
and
368 deletions
+365
-368
vp10/encoder/dct.c
vp10/encoder/dct.c
+115
-0
vp10/encoder/encodemb.c
vp10/encoder/encodemb.c
+50
-339
vp10/encoder/x86/dct_sse2.c
vp10/encoder/x86/dct_sse2.c
+200
-29
No files found.
vp10/encoder/dct.c
View file @
46d2cc57
...
...
@@ -1161,6 +1161,106 @@ static void fadst16(const tran_low_t *input, tran_low_t *output) {
output
[
15
]
=
(
tran_low_t
)
-
x1
;
}
#if CONFIG_EXT_TX
static
void
copy_block
(
const
int16_t
*
src
,
int
src_stride
,
int
l
,
int16_t
*
dest
,
int
dest_stride
)
{
int
i
;
for
(
i
=
0
;
i
<
l
;
++
i
)
{
memcpy
(
dest
+
dest_stride
*
i
,
src
+
src_stride
*
i
,
l
*
sizeof
(
int16_t
));
}
}
static
void
fliplr
(
int16_t
*
dest
,
int
stride
,
int
l
)
{
int
i
,
j
;
for
(
i
=
0
;
i
<
l
;
++
i
)
{
for
(
j
=
0
;
j
<
l
/
2
;
++
j
)
{
const
int16_t
tmp
=
dest
[
i
*
stride
+
j
];
dest
[
i
*
stride
+
j
]
=
dest
[
i
*
stride
+
l
-
1
-
j
];
dest
[
i
*
stride
+
l
-
1
-
j
]
=
tmp
;
}
}
}
static
void
flipud
(
int16_t
*
dest
,
int
stride
,
int
l
)
{
int
i
,
j
;
for
(
j
=
0
;
j
<
l
;
++
j
)
{
for
(
i
=
0
;
i
<
l
/
2
;
++
i
)
{
const
int16_t
tmp
=
dest
[
i
*
stride
+
j
];
dest
[
i
*
stride
+
j
]
=
dest
[(
l
-
1
-
i
)
*
stride
+
j
];
dest
[(
l
-
1
-
i
)
*
stride
+
j
]
=
tmp
;
}
}
}
static
void
fliplrud
(
int16_t
*
dest
,
int
stride
,
int
l
)
{
int
i
,
j
;
for
(
i
=
0
;
i
<
l
/
2
;
++
i
)
{
for
(
j
=
0
;
j
<
l
;
++
j
)
{
const
int16_t
tmp
=
dest
[
i
*
stride
+
j
];
dest
[
i
*
stride
+
j
]
=
dest
[(
l
-
1
-
i
)
*
stride
+
l
-
1
-
j
];
dest
[(
l
-
1
-
i
)
*
stride
+
l
-
1
-
j
]
=
tmp
;
}
}
}
static
void
copy_fliplr
(
const
int16_t
*
src
,
int
src_stride
,
int
l
,
int16_t
*
dest
,
int
dest_stride
)
{
copy_block
(
src
,
src_stride
,
l
,
dest
,
dest_stride
);
fliplr
(
dest
,
dest_stride
,
l
);
}
static
void
copy_flipud
(
const
int16_t
*
src
,
int
src_stride
,
int
l
,
int16_t
*
dest
,
int
dest_stride
)
{
copy_block
(
src
,
src_stride
,
l
,
dest
,
dest_stride
);
flipud
(
dest
,
dest_stride
,
l
);
}
static
void
copy_fliplrud
(
const
int16_t
*
src
,
int
src_stride
,
int
l
,
int16_t
*
dest
,
int
dest_stride
)
{
copy_block
(
src
,
src_stride
,
l
,
dest
,
dest_stride
);
fliplrud
(
dest
,
dest_stride
,
l
);
}
static
void
maybe_flip_input
(
const
int16_t
**
src
,
int
*
src_stride
,
int
l
,
int16_t
*
buff
,
int
tx_type
)
{
switch
(
tx_type
)
{
case
DCT_DCT
:
case
ADST_DCT
:
case
DCT_ADST
:
case
ADST_ADST
:
case
DST_DST
:
case
DCT_DST
:
case
DST_DCT
:
case
DST_ADST
:
case
ADST_DST
:
break
;
case
FLIPADST_DCT
:
case
FLIPADST_ADST
:
case
FLIPADST_DST
:
copy_flipud
(
*
src
,
*
src_stride
,
l
,
buff
,
l
);
*
src
=
buff
;
*
src_stride
=
l
;
break
;
case
DCT_FLIPADST
:
case
ADST_FLIPADST
:
case
DST_FLIPADST
:
copy_fliplr
(
*
src
,
*
src_stride
,
l
,
buff
,
l
);
*
src
=
buff
;
*
src_stride
=
l
;
break
;
case
FLIPADST_FLIPADST
:
copy_fliplrud
(
*
src
,
*
src_stride
,
l
,
buff
,
l
);
*
src
=
buff
;
*
src_stride
=
l
;
break
;
default:
assert
(
0
);
break
;
}
}
#endif // CONFIG_EXT_TX
static
const
transform_2d
FHT_4
[]
=
{
{
fdct4
,
fdct4
},
// DCT_DCT = 0,
{
fadst4
,
fdct4
},
// ADST_DCT = 1,
...
...
@@ -1234,6 +1334,11 @@ void vp10_fht4x4_c(const int16_t *input, tran_low_t *output,
tran_low_t
temp_in
[
4
],
temp_out
[
4
];
const
transform_2d
ht
=
FHT_4
[
tx_type
];
#if CONFIG_EXT_TX
int16_t
flipped_input
[
4
*
4
];
maybe_flip_input
(
&
input
,
&
stride
,
4
,
flipped_input
,
tx_type
);
#endif
// Columns
for
(
i
=
0
;
i
<
4
;
++
i
)
{
for
(
j
=
0
;
j
<
4
;
++
j
)
...
...
@@ -1378,6 +1483,11 @@ void vp10_fht8x8_c(const int16_t *input, tran_low_t *output,
tran_low_t
temp_in
[
8
],
temp_out
[
8
];
const
transform_2d
ht
=
FHT_8
[
tx_type
];
#if CONFIG_EXT_TX
int16_t
flipped_input
[
8
*
8
];
maybe_flip_input
(
&
input
,
&
stride
,
8
,
flipped_input
,
tx_type
);
#endif
// Columns
for
(
i
=
0
;
i
<
8
;
++
i
)
{
for
(
j
=
0
;
j
<
8
;
++
j
)
...
...
@@ -1464,6 +1574,11 @@ void vp10_fht16x16_c(const int16_t *input, tran_low_t *output,
tran_low_t
temp_in
[
16
],
temp_out
[
16
];
const
transform_2d
ht
=
FHT_16
[
tx_type
];
#if CONFIG_EXT_TX
int16_t
flipped_input
[
16
*
16
];
maybe_flip_input
(
&
input
,
&
stride
,
16
,
flipped_input
,
tx_type
);
#endif
// Columns
for
(
i
=
0
;
i
<
16
;
++
i
)
{
for
(
j
=
0
;
j
<
16
;
++
j
)
...
...
vp10/encoder/encodemb.c
View file @
46d2cc57
This diff is collapsed.
Click to expand it.
vp10/encoder/x86/dct_sse2.c
View file @
46d2cc57
...
...
@@ -18,16 +18,37 @@
#include "vpx_dsp/x86/txfm_common_sse2.h"
#include "vpx_ports/mem.h"
// Reverse the 8 16 bit words in __m128i
static
INLINE
__m128i
mm_reverse_epi16
(
const
__m128i
x
)
{
const
__m128i
a
=
_mm_shufflelo_epi16
(
x
,
0x1b
);
const
__m128i
b
=
_mm_shufflehi_epi16
(
a
,
0x1b
);
return
_mm_shuffle_epi32
(
b
,
0x4e
);
}
static
INLINE
void
load_buffer_4x4
(
const
int16_t
*
input
,
__m128i
*
in
,
int
stride
)
{
int
stride
,
int
flipud
,
int
fliplr
)
{
const
__m128i
k__nonzero_bias_a
=
_mm_setr_epi16
(
0
,
1
,
1
,
1
,
1
,
1
,
1
,
1
);
const
__m128i
k__nonzero_bias_b
=
_mm_setr_epi16
(
1
,
0
,
0
,
0
,
0
,
0
,
0
,
0
);
__m128i
mask
;
in
[
0
]
=
_mm_loadl_epi64
((
const
__m128i
*
)(
input
+
0
*
stride
));
in
[
1
]
=
_mm_loadl_epi64
((
const
__m128i
*
)(
input
+
1
*
stride
));
in
[
2
]
=
_mm_loadl_epi64
((
const
__m128i
*
)(
input
+
2
*
stride
));
in
[
3
]
=
_mm_loadl_epi64
((
const
__m128i
*
)(
input
+
3
*
stride
));
if
(
!
flipud
)
{
in
[
0
]
=
_mm_loadl_epi64
((
const
__m128i
*
)(
input
+
0
*
stride
));
in
[
1
]
=
_mm_loadl_epi64
((
const
__m128i
*
)(
input
+
1
*
stride
));
in
[
2
]
=
_mm_loadl_epi64
((
const
__m128i
*
)(
input
+
2
*
stride
));
in
[
3
]
=
_mm_loadl_epi64
((
const
__m128i
*
)(
input
+
3
*
stride
));
}
else
{
in
[
0
]
=
_mm_loadl_epi64
((
const
__m128i
*
)(
input
+
3
*
stride
));
in
[
1
]
=
_mm_loadl_epi64
((
const
__m128i
*
)(
input
+
2
*
stride
));
in
[
2
]
=
_mm_loadl_epi64
((
const
__m128i
*
)(
input
+
1
*
stride
));
in
[
3
]
=
_mm_loadl_epi64
((
const
__m128i
*
)(
input
+
0
*
stride
));
}
if
(
fliplr
)
{
in
[
0
]
=
_mm_shufflelo_epi16
(
in
[
0
],
0x1b
);
in
[
1
]
=
_mm_shufflelo_epi16
(
in
[
1
],
0x1b
);
in
[
2
]
=
_mm_shufflelo_epi16
(
in
[
2
],
0x1b
);
in
[
3
]
=
_mm_shufflelo_epi16
(
in
[
3
],
0x1b
);
}
in
[
0
]
=
_mm_slli_epi16
(
in
[
0
],
4
);
in
[
1
]
=
_mm_slli_epi16
(
in
[
1
],
4
);
...
...
@@ -160,23 +181,55 @@ void vp10_fht4x4_sse2(const int16_t *input, tran_low_t *output,
vpx_fdct4x4_sse2
(
input
,
output
,
stride
);
break
;
case
ADST_DCT
:
load_buffer_4x4
(
input
,
in
,
stride
);
load_buffer_4x4
(
input
,
in
,
stride
,
0
,
0
);
fadst4_sse2
(
in
);
fdct4_sse2
(
in
);
write_buffer_4x4
(
output
,
in
);
break
;
case
DCT_ADST
:
load_buffer_4x4
(
input
,
in
,
stride
);
load_buffer_4x4
(
input
,
in
,
stride
,
0
,
0
);
fdct4_sse2
(
in
);
fadst4_sse2
(
in
);
write_buffer_4x4
(
output
,
in
);
break
;
case
ADST_ADST
:
load_buffer_4x4
(
input
,
in
,
stride
);
load_buffer_4x4
(
input
,
in
,
stride
,
0
,
0
);
fadst4_sse2
(
in
);
fadst4_sse2
(
in
);
write_buffer_4x4
(
output
,
in
);
break
;
#if CONFIG_EXT_TX
case
FLIPADST_DCT
:
load_buffer_4x4
(
input
,
in
,
stride
,
1
,
0
);
fadst4_sse2
(
in
);
fdct4_sse2
(
in
);
write_buffer_4x4
(
output
,
in
);
break
;
case
DCT_FLIPADST
:
load_buffer_4x4
(
input
,
in
,
stride
,
0
,
1
);
fdct4_sse2
(
in
);
fadst4_sse2
(
in
);
write_buffer_4x4
(
output
,
in
);
break
;
case
FLIPADST_FLIPADST
:
load_buffer_4x4
(
input
,
in
,
stride
,
1
,
1
);
fadst4_sse2
(
in
);
fadst4_sse2
(
in
);
write_buffer_4x4
(
output
,
in
);
break
;
case
ADST_FLIPADST
:
load_buffer_4x4
(
input
,
in
,
stride
,
0
,
1
);
fadst4_sse2
(
in
);
fadst4_sse2
(
in
);
write_buffer_4x4
(
output
,
in
);
break
;
case
FLIPADST_ADST
:
load_buffer_4x4
(
input
,
in
,
stride
,
1
,
0
);
fadst4_sse2
(
in
);
fadst4_sse2
(
in
);
write_buffer_4x4
(
output
,
in
);
break
;
#endif // CONFIG_EXT_TX
default:
assert
(
0
);
break
;
...
...
@@ -627,15 +680,37 @@ void vp10_fdct8x8_quant_sse2(const int16_t *input, int stride,
// load 8x8 array
static
INLINE
void
load_buffer_8x8
(
const
int16_t
*
input
,
__m128i
*
in
,
int
stride
)
{
in
[
0
]
=
_mm_load_si128
((
const
__m128i
*
)(
input
+
0
*
stride
));
in
[
1
]
=
_mm_load_si128
((
const
__m128i
*
)(
input
+
1
*
stride
));
in
[
2
]
=
_mm_load_si128
((
const
__m128i
*
)(
input
+
2
*
stride
));
in
[
3
]
=
_mm_load_si128
((
const
__m128i
*
)(
input
+
3
*
stride
));
in
[
4
]
=
_mm_load_si128
((
const
__m128i
*
)(
input
+
4
*
stride
));
in
[
5
]
=
_mm_load_si128
((
const
__m128i
*
)(
input
+
5
*
stride
));
in
[
6
]
=
_mm_load_si128
((
const
__m128i
*
)(
input
+
6
*
stride
));
in
[
7
]
=
_mm_load_si128
((
const
__m128i
*
)(
input
+
7
*
stride
));
int
stride
,
int
flipud
,
int
fliplr
)
{
if
(
!
flipud
)
{
in
[
0
]
=
_mm_load_si128
((
const
__m128i
*
)(
input
+
0
*
stride
));
in
[
1
]
=
_mm_load_si128
((
const
__m128i
*
)(
input
+
1
*
stride
));
in
[
2
]
=
_mm_load_si128
((
const
__m128i
*
)(
input
+
2
*
stride
));
in
[
3
]
=
_mm_load_si128
((
const
__m128i
*
)(
input
+
3
*
stride
));
in
[
4
]
=
_mm_load_si128
((
const
__m128i
*
)(
input
+
4
*
stride
));
in
[
5
]
=
_mm_load_si128
((
const
__m128i
*
)(
input
+
5
*
stride
));
in
[
6
]
=
_mm_load_si128
((
const
__m128i
*
)(
input
+
6
*
stride
));
in
[
7
]
=
_mm_load_si128
((
const
__m128i
*
)(
input
+
7
*
stride
));
}
else
{
in
[
0
]
=
_mm_load_si128
((
const
__m128i
*
)(
input
+
7
*
stride
));
in
[
1
]
=
_mm_load_si128
((
const
__m128i
*
)(
input
+
6
*
stride
));
in
[
2
]
=
_mm_load_si128
((
const
__m128i
*
)(
input
+
5
*
stride
));
in
[
3
]
=
_mm_load_si128
((
const
__m128i
*
)(
input
+
4
*
stride
));
in
[
4
]
=
_mm_load_si128
((
const
__m128i
*
)(
input
+
3
*
stride
));
in
[
5
]
=
_mm_load_si128
((
const
__m128i
*
)(
input
+
2
*
stride
));
in
[
6
]
=
_mm_load_si128
((
const
__m128i
*
)(
input
+
1
*
stride
));
in
[
7
]
=
_mm_load_si128
((
const
__m128i
*
)(
input
+
0
*
stride
));
}
if
(
fliplr
)
{
in
[
0
]
=
mm_reverse_epi16
(
in
[
0
]);
in
[
1
]
=
mm_reverse_epi16
(
in
[
1
]);
in
[
2
]
=
mm_reverse_epi16
(
in
[
2
]);
in
[
3
]
=
mm_reverse_epi16
(
in
[
3
]);
in
[
4
]
=
mm_reverse_epi16
(
in
[
4
]);
in
[
5
]
=
mm_reverse_epi16
(
in
[
5
]);
in
[
6
]
=
mm_reverse_epi16
(
in
[
6
]);
in
[
7
]
=
mm_reverse_epi16
(
in
[
7
]);
}
in
[
0
]
=
_mm_slli_epi16
(
in
[
0
],
2
);
in
[
1
]
=
_mm_slli_epi16
(
in
[
1
],
2
);
...
...
@@ -1144,26 +1219,63 @@ void vp10_fht8x8_sse2(const int16_t *input, tran_low_t *output,
vpx_fdct8x8_sse2
(
input
,
output
,
stride
);
break
;
case
ADST_DCT
:
load_buffer_8x8
(
input
,
in
,
stride
);
load_buffer_8x8
(
input
,
in
,
stride
,
0
,
0
);
fadst8_sse2
(
in
);
fdct8_sse2
(
in
);
right_shift_8x8
(
in
,
1
);
write_buffer_8x8
(
output
,
in
,
8
);
break
;
case
DCT_ADST
:
load_buffer_8x8
(
input
,
in
,
stride
);
load_buffer_8x8
(
input
,
in
,
stride
,
0
,
0
);
fdct8_sse2
(
in
);
fadst8_sse2
(
in
);
right_shift_8x8
(
in
,
1
);
write_buffer_8x8
(
output
,
in
,
8
);
break
;
case
ADST_ADST
:
load_buffer_8x8
(
input
,
in
,
stride
);
load_buffer_8x8
(
input
,
in
,
stride
,
0
,
0
);
fadst8_sse2
(
in
);
fadst8_sse2
(
in
);
right_shift_8x8
(
in
,
1
);
write_buffer_8x8
(
output
,
in
,
8
);
break
;
#if CONFIG_EXT_TX
case
FLIPADST_DCT
:
load_buffer_8x8
(
input
,
in
,
stride
,
1
,
0
);
fadst8_sse2
(
in
);
fdct8_sse2
(
in
);
right_shift_8x8
(
in
,
1
);
write_buffer_8x8
(
output
,
in
,
8
);
break
;
case
DCT_FLIPADST
:
load_buffer_8x8
(
input
,
in
,
stride
,
0
,
1
);
fdct8_sse2
(
in
);
fadst8_sse2
(
in
);
right_shift_8x8
(
in
,
1
);
write_buffer_8x8
(
output
,
in
,
8
);
break
;
case
FLIPADST_FLIPADST
:
load_buffer_8x8
(
input
,
in
,
stride
,
1
,
1
);
fadst8_sse2
(
in
);
fadst8_sse2
(
in
);
right_shift_8x8
(
in
,
1
);
write_buffer_8x8
(
output
,
in
,
8
);
break
;
case
ADST_FLIPADST
:
load_buffer_8x8
(
input
,
in
,
stride
,
0
,
1
);
fadst8_sse2
(
in
);
fadst8_sse2
(
in
);
right_shift_8x8
(
in
,
1
);
write_buffer_8x8
(
output
,
in
,
8
);
break
;
case
FLIPADST_ADST
:
load_buffer_8x8
(
input
,
in
,
stride
,
1
,
0
);
fadst8_sse2
(
in
);
fadst8_sse2
(
in
);
right_shift_8x8
(
in
,
1
);
write_buffer_8x8
(
output
,
in
,
8
);
break
;
#endif // CONFIG_EXT_TX
default:
assert
(
0
);
break
;
...
...
@@ -1171,15 +1283,37 @@ void vp10_fht8x8_sse2(const int16_t *input, tran_low_t *output,
}
static
INLINE
void
load_buffer_16x16
(
const
int16_t
*
input
,
__m128i
*
in0
,
__m128i
*
in1
,
int
stride
)
{
__m128i
*
in1
,
int
stride
,
int
flipud
,
int
fliplr
)
{
// Load 4 8x8 blocks
const
int16_t
*
topL
=
input
;
const
int16_t
*
topR
=
input
+
8
;
const
int16_t
*
botL
=
input
+
8
*
stride
;
const
int16_t
*
botR
=
input
+
8
*
stride
+
8
;
const
int16_t
*
tmp
;
if
(
flipud
)
{
// Swap left columns
tmp
=
topL
;
topL
=
botL
;
botL
=
tmp
;
// Swap right columns
tmp
=
topR
;
topR
=
botR
;
botR
=
tmp
;
}
if
(
fliplr
)
{
// Swap top rows
tmp
=
topL
;
topL
=
topR
;
topR
=
tmp
;
// Swap bottom rows
tmp
=
botL
;
botL
=
botR
;
botR
=
tmp
;
}
// load first 8 columns
load_buffer_8x8
(
input
,
in0
,
stride
);
load_buffer_8x8
(
input
+
8
*
stride
,
in0
+
8
,
stride
);
load_buffer_8x8
(
topL
,
in0
,
stride
,
flipud
,
fliplr
);
load_buffer_8x8
(
botL
,
in0
+
8
,
stride
,
flipud
,
fliplr
);
input
+=
8
;
// load second 8 columns
load_buffer_8x8
(
input
,
in1
,
stride
);
load_buffer_8x8
(
input
+
8
*
stride
,
in1
+
8
,
stride
);
load_buffer_8x8
(
topR
,
in1
,
stride
,
flipud
,
fliplr
);
load_buffer_8x8
(
botR
,
in1
+
8
,
stride
,
flipud
,
fliplr
);
}
static
INLINE
void
write_buffer_16x16
(
tran_low_t
*
output
,
__m128i
*
in0
,
...
...
@@ -2031,26 +2165,63 @@ void vp10_fht16x16_sse2(const int16_t *input, tran_low_t *output,
vpx_fdct16x16_sse2
(
input
,
output
,
stride
);
break
;
case
ADST_DCT
:
load_buffer_16x16
(
input
,
in0
,
in1
,
stride
);
load_buffer_16x16
(
input
,
in0
,
in1
,
stride
,
0
,
0
);
fadst16_sse2
(
in0
,
in1
);
right_shift_16x16
(
in0
,
in1
);
fdct16_sse2
(
in0
,
in1
);
write_buffer_16x16
(
output
,
in0
,
in1
,
16
);
break
;
case
DCT_ADST
:
load_buffer_16x16
(
input
,
in0
,
in1
,
stride
);
load_buffer_16x16
(
input
,
in0
,
in1
,
stride
,
0
,
0
);
fdct16_sse2
(
in0
,
in1
);
right_shift_16x16
(
in0
,
in1
);
fadst16_sse2
(
in0
,
in1
);
write_buffer_16x16
(
output
,
in0
,
in1
,
16
);
break
;
case
ADST_ADST
:
load_buffer_16x16
(
input
,
in0
,
in1
,
stride
);
load_buffer_16x16
(
input
,
in0
,
in1
,
stride
,
0
,
0
);
fadst16_sse2
(
in0
,
in1
);
right_shift_16x16
(
in0
,
in1
);
fadst16_sse2
(
in0
,
in1
);
write_buffer_16x16
(
output
,
in0
,
in1
,
16
);
break
;
#if CONFIG_EXT_TX
case
FLIPADST_DCT
:
load_buffer_16x16
(
input
,
in0
,
in1
,
stride
,
1
,
0
);
fadst16_sse2
(
in0
,
in1
);
right_shift_16x16
(
in0
,
in1
);
fdct16_sse2
(
in0
,
in1
);
write_buffer_16x16
(
output
,
in0
,
in1
,
16
);
break
;
case
DCT_FLIPADST
:
load_buffer_16x16
(
input
,
in0
,
in1
,
stride
,
0
,
1
);
fdct16_sse2
(
in0
,
in1
);
right_shift_16x16
(
in0
,
in1
);
fadst16_sse2
(
in0
,
in1
);
write_buffer_16x16
(
output
,
in0
,
in1
,
16
);
break
;
case
FLIPADST_FLIPADST
:
load_buffer_16x16
(
input
,
in0
,
in1
,
stride
,
1
,
1
);
fadst16_sse2
(
in0
,
in1
);
right_shift_16x16
(
in0
,
in1
);
fadst16_sse2
(
in0
,
in1
);
write_buffer_16x16
(
output
,
in0
,
in1
,
16
);
break
;
case
ADST_FLIPADST
:
load_buffer_16x16
(
input
,
in0
,
in1
,
stride
,
0
,
1
);
fadst16_sse2
(
in0
,
in1
);
right_shift_16x16
(
in0
,
in1
);
fadst16_sse2
(
in0
,
in1
);
write_buffer_16x16
(
output
,
in0
,
in1
,
16
);
break
;
case
FLIPADST_ADST
:
load_buffer_16x16
(
input
,
in0
,
in1
,
stride
,
1
,
0
);
fadst16_sse2
(
in0
,
in1
);
right_shift_16x16
(
in0
,
in1
);
fadst16_sse2
(
in0
,
in1
);
write_buffer_16x16
(
output
,
in0
,
in1
,
16
);
break
;
#endif // CONFIG_EXT_TX
default:
assert
(
0
);
break
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment