Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
A
aom-rav1e
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Xiph.Org
aom-rav1e
Commits
70e514ce
Commit
70e514ce
authored
Nov 06, 2015
by
Debargha Mukherjee
Committed by
Gerrit Code Review
Nov 06, 2015
Browse files
Options
Browse Files
Download
Plain Diff
Merge "Flip the result of the inverse transform for FLIPADST." into nextgenv2
parents
46d2cc57
4f510809
Changes
2
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
425 additions
and
364 deletions
+425
-364
vp10/common/idct.c
vp10/common/idct.c
+273
-346
vp10/common/x86/idct_intrin_sse2.c
vp10/common/x86/idct_intrin_sse2.c
+152
-18
No files found.
vp10/common/idct.c
View file @
70e514ce
This diff is collapsed.
Click to expand it.
vp10/common/x86/idct_intrin_sse2.c
View file @
70e514ce
...
...
@@ -11,6 +11,54 @@
#include "vpx_dsp/x86/inv_txfm_sse2.h"
#include "vpx_dsp/x86/txfm_common_sse2.h"
#include "vpx_ports/mem.h"
#include "vp10/common/enums.h"
#if CONFIG_EXT_TX
// Reverse the 8 16 bit words in __m128i
static
INLINE
__m128i
mm_reverse_epi16
(
const
__m128i
x
)
{
const
__m128i
a
=
_mm_shufflelo_epi16
(
x
,
0x1b
);
const
__m128i
b
=
_mm_shufflehi_epi16
(
a
,
0x1b
);
return
_mm_shuffle_epi32
(
b
,
0x4e
);
}
static
INLINE
void
fliplr_4x4
(
__m128i
in
[
2
])
{
in
[
0
]
=
_mm_shufflelo_epi16
(
in
[
0
],
0x1b
);
in
[
0
]
=
_mm_shufflehi_epi16
(
in
[
0
],
0x1b
);
in
[
1
]
=
_mm_shufflelo_epi16
(
in
[
1
],
0x1b
);
in
[
1
]
=
_mm_shufflehi_epi16
(
in
[
1
],
0x1b
);
}
static
INLINE
void
fliplr_8x8
(
__m128i
in
[
8
])
{
in
[
0
]
=
mm_reverse_epi16
(
in
[
0
]);
in
[
1
]
=
mm_reverse_epi16
(
in
[
1
]);
in
[
2
]
=
mm_reverse_epi16
(
in
[
2
]);
in
[
3
]
=
mm_reverse_epi16
(
in
[
3
]);
in
[
4
]
=
mm_reverse_epi16
(
in
[
4
]);
in
[
5
]
=
mm_reverse_epi16
(
in
[
5
]);
in
[
6
]
=
mm_reverse_epi16
(
in
[
6
]);
in
[
7
]
=
mm_reverse_epi16
(
in
[
7
]);
}
static
INLINE
void
fliplr_16x8
(
__m128i
in
[
16
])
{
fliplr_8x8
(
&
in
[
0
]);
fliplr_8x8
(
&
in
[
8
]);
}
#define FLIPLR_16x16(in0, in1) do { \
__m128i *tmp; \
fliplr_16x8(in0); \
fliplr_16x8(in1); \
tmp = (in0); \
(in0) = (in1); \
(in1) = tmp; \
} while (0)
#define FLIPUD_PTR(dest, stride, size) do { \
(dest) = (dest) + ((size) - 1) * (stride); \
(stride) = - (stride); \
} while (0)
#endif
void
vp10_iht4x4_16_add_sse2
(
const
tran_low_t
*
input
,
uint8_t
*
dest
,
int
stride
,
int
tx_type
)
{
...
...
@@ -22,22 +70,50 @@ void vp10_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
in
[
1
]
=
load_input_data
(
input
+
8
);
switch
(
tx_type
)
{
case
0
:
// DCT_DCT
case
DCT_DCT
:
idct4_sse2
(
in
);
idct4_sse2
(
in
);
break
;
case
ADST_DCT
:
idct4_sse2
(
in
);
iadst4_sse2
(
in
);
break
;
case
DCT_ADST
:
iadst4_sse2
(
in
);
idct4_sse2
(
in
);
break
;
case
1
:
// ADST_DCT
case
ADST_ADST
:
iadst4_sse2
(
in
);
iadst4_sse2
(
in
);
break
;
#if CONFIG_EXT_TX
case
FLIPADST_DCT
:
idct4_sse2
(
in
);
iadst4_sse2
(
in
);
FLIPUD_PTR
(
dest
,
stride
,
4
);
break
;
case
2
:
// DCT_ADST
case
DCT_FLIPADST
:
iadst4_sse2
(
in
);
idct4_sse2
(
in
);
fliplr_4x4
(
in
);
break
;
case
FLIPADST_FLIPADST
:
iadst4_sse2
(
in
);
iadst4_sse2
(
in
);
FLIPUD_PTR
(
dest
,
stride
,
4
);
fliplr_4x4
(
in
);
break
;
case
3
:
// ADST_ADST
case
ADST_FLIPADST
:
iadst4_sse2
(
in
);
iadst4_sse2
(
in
);
fliplr_4x4
(
in
);
break
;
case
FLIPADST_ADST
:
iadst4_sse2
(
in
);
iadst4_sse2
(
in
);
FLIPUD_PTR
(
dest
,
stride
,
4
);
break
;
#endif // CONFIG_EXT_TX
default:
assert
(
0
);
break
;
...
...
@@ -52,12 +128,12 @@ void vp10_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
// Reconstruction and Store
{
__m128i
d0
=
_mm_cvtsi32_si128
(
*
(
const
int
*
)(
dest
));
__m128i
d0
=
_mm_cvtsi32_si128
(
*
(
const
int
*
)(
dest
+
stride
*
0
));
__m128i
d1
=
_mm_cvtsi32_si128
(
*
(
const
int
*
)(
dest
+
stride
*
1
));
__m128i
d2
=
_mm_cvtsi32_si128
(
*
(
const
int
*
)(
dest
+
stride
*
2
));
d0
=
_mm_unpacklo_epi32
(
d0
,
_mm_cvtsi32_si128
(
*
(
const
int
*
)(
dest
+
stride
)));
d2
=
_mm_unpacklo_epi32
(
d2
,
_mm_cvtsi32_si128
(
*
(
const
int
*
)(
dest
+
stride
*
3
)));
__m128i
d3
=
_mm_cvtsi32_si128
(
*
(
const
int
*
)(
dest
+
stride
*
3
));
d0
=
_mm_unpacklo_epi32
(
d0
,
d1
);
d2
=
_mm_unpacklo_epi32
(
d2
,
d3
);
d0
=
_mm_unpacklo_epi8
(
d0
,
zero
);
d2
=
_mm_unpacklo_epi8
(
d2
,
zero
);
d0
=
_mm_add_epi16
(
d0
,
in
[
0
]);
...
...
@@ -94,22 +170,50 @@ void vp10_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
in
[
7
]
=
load_input_data
(
input
+
8
*
7
);
switch
(
tx_type
)
{
case
0
:
// DCT_DCT
case
DCT_DCT
:
idct8_sse2
(
in
);
idct8_sse2
(
in
);
break
;
case
ADST_DCT
:
idct8_sse2
(
in
);
iadst8_sse2
(
in
);
break
;
case
DCT_ADST
:
iadst8_sse2
(
in
);
idct8_sse2
(
in
);
break
;
case
1
:
// ADST_DCT
case
ADST_ADST
:
iadst8_sse2
(
in
);
iadst8_sse2
(
in
);
break
;
#if CONFIG_EXT_TX
case
FLIPADST_DCT
:
idct8_sse2
(
in
);
iadst8_sse2
(
in
);
FLIPUD_PTR
(
dest
,
stride
,
8
);
break
;
case
2
:
// DCT_ADST
case
DCT_FLIPADST
:
iadst8_sse2
(
in
);
idct8_sse2
(
in
);
fliplr_8x8
(
in
);
break
;
case
3
:
// ADST_ADST
case
FLIPADST_FLIPADST
:
iadst8_sse2
(
in
);
iadst8_sse2
(
in
);
FLIPUD_PTR
(
dest
,
stride
,
8
);
fliplr_8x8
(
in
);
break
;
case
ADST_FLIPADST
:
iadst8_sse2
(
in
);
iadst8_sse2
(
in
);
fliplr_8x8
(
in
);
break
;
case
FLIPADST_ADST
:
iadst8_sse2
(
in
);
iadst8_sse2
(
in
);
FLIPUD_PTR
(
dest
,
stride
,
8
);
break
;
#endif // CONFIG_EXT_TX
default:
assert
(
0
);
break
;
...
...
@@ -146,29 +250,59 @@ void vp10_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
void
vp10_iht16x16_256_add_sse2
(
const
tran_low_t
*
input
,
uint8_t
*
dest
,
int
stride
,
int
tx_type
)
{
__m128i
in0
[
16
],
in1
[
16
];
__m128i
in
[
32
];
__m128i
*
in0
=
&
in
[
0
];
__m128i
*
in1
=
&
in
[
16
];
load_buffer_8x16
(
input
,
in0
);
input
+=
8
;
load_buffer_8x16
(
input
,
in1
);
switch
(
tx_type
)
{
case
0
:
// DCT_DCT
case
DCT_DCT
:
idct16_sse2
(
in0
,
in1
);
idct16_sse2
(
in0
,
in1
);
break
;
case
ADST_DCT
:
idct16_sse2
(
in0
,
in1
);
iadst16_sse2
(
in0
,
in1
);
break
;
case
DCT_ADST
:
iadst16_sse2
(
in0
,
in1
);
idct16_sse2
(
in0
,
in1
);
break
;
case
ADST_ADST
:
iadst16_sse2
(
in0
,
in1
);
iadst16_sse2
(
in0
,
in1
);
break
;
case
1
:
// ADST_DCT
#if CONFIG_EXT_TX
case
FLIPADST_DCT
:
idct16_sse2
(
in0
,
in1
);
iadst16_sse2
(
in0
,
in1
);
FLIPUD_PTR
(
dest
,
stride
,
16
);
break
;
case
2
:
// DCT_ADST
case
DCT_FLIPADST
:
iadst16_sse2
(
in0
,
in1
);
idct16_sse2
(
in0
,
in1
);
FLIPLR_16x16
(
in0
,
in1
);
break
;
case
FLIPADST_FLIPADST
:
iadst16_sse2
(
in0
,
in1
);
iadst16_sse2
(
in0
,
in1
);
FLIPUD_PTR
(
dest
,
stride
,
16
);
FLIPLR_16x16
(
in0
,
in1
);
break
;
case
ADST_FLIPADST
:
iadst16_sse2
(
in0
,
in1
);
iadst16_sse2
(
in0
,
in1
);
FLIPLR_16x16
(
in0
,
in1
);
break
;
case
3
:
// ADST_ADST
case
FLIPADST_ADST
:
iadst16_sse2
(
in0
,
in1
);
iadst16_sse2
(
in0
,
in1
);
FLIPUD_PTR
(
dest
,
stride
,
16
);
break
;
#endif // CONFIG_EXT_TX
default:
assert
(
0
);
break
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment