Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Xiph.Org
aom-rav1e
Commits
b9c3dd48
Commit
b9c3dd48
authored
Jul 26, 2013
by
Jingning Han
Committed by
Gerrit Code Review
Jul 26, 2013
Browse files
Merge "Special handle on DC only inverse 8x8 2D-DCT"
parents
620861de
325e0aa6
Changes
5
Hide whitespace changes
Inline
Side-by-side
vp9/common/vp9_idct.c
View file @
b9c3dd48
...
...
@@ -225,6 +225,19 @@ void vp9_short_idct8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
}
}
void
vp9_short_idct8x8_1_add_c
(
int16_t
*
input
,
uint8_t
*
dest
,
int
dest_stride
)
{
int
i
,
j
;
int
a1
;
int16_t
out
=
dct_const_round_shift
(
input
[
0
]
*
cospi_16_64
);
out
=
dct_const_round_shift
(
out
*
cospi_16_64
);
a1
=
ROUND_POWER_OF_TWO
(
out
,
5
);
for
(
j
=
0
;
j
<
8
;
++
j
)
{
for
(
i
=
0
;
i
<
8
;
++
i
)
dest
[
i
]
=
clip_pixel
(
dest
[
i
]
+
a1
);
dest
+=
dest_stride
;
}
}
static
void
iadst4_1d
(
int16_t
*
input
,
int16_t
*
output
)
{
int
s0
,
s1
,
s2
,
s3
,
s4
,
s5
,
s6
,
s7
;
...
...
@@ -433,12 +446,6 @@ void vp9_short_idct10_8x8_add_c(int16_t *input, uint8_t *dest,
}
}
void
vp9_short_idct1_8x8_c
(
int16_t
*
input
,
int16_t
*
output
)
{
int16_t
out
=
dct_const_round_shift
(
input
[
0
]
*
cospi_16_64
);
out
=
dct_const_round_shift
(
out
*
cospi_16_64
);
output
[
0
]
=
ROUND_POWER_OF_TWO
(
out
,
5
);
}
static
void
idct16_1d
(
int16_t
*
input
,
int16_t
*
output
)
{
int16_t
step1
[
16
],
step2
[
16
];
int
temp1
,
temp2
;
...
...
vp9/common/vp9_rtcd_defs.sh
View file @
b9c3dd48
...
...
@@ -297,15 +297,15 @@ specialize vp9_short_idct4x4_1_add sse2
prototype void vp9_short_idct4x4_add
"int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_short_idct4x4_add sse2
prototype void vp9_short_idct8x8_1_add
"int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_short_idct8x8_1_add sse2
prototype void vp9_short_idct8x8_add
"int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_short_idct8x8_add sse2 neon
prototype void vp9_short_idct10_8x8_add
"int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_short_idct10_8x8_add sse2
prototype void vp9_short_idct1_8x8
"int16_t *input, int16_t *output"
specialize vp9_short_idct1_8x8
prototype void vp9_short_idct16x16_add
"int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_short_idct16x16_add sse2
...
...
vp9/common/x86/vp9_idct_intrin_sse2.c
View file @
b9c3dd48
...
...
@@ -523,9 +523,9 @@ void vp9_short_iht4x4_add_sse2(int16_t *input, uint8_t *dest, int stride,
{ \
__m128i d0 = _mm_loadl_epi64((__m128i *)(dest)); \
d0 = _mm_unpacklo_epi8(d0, zero); \
in_x
= _mm_add_epi16(in_x, d0); \
in_x
= _mm_packus_epi16(
in_x, in_x
); \
_mm_storel_epi64((__m128i *)(dest),
in_x
); \
d0
= _mm_add_epi16(in_x, d0); \
d0
= _mm_packus_epi16(
d0, d0
); \
_mm_storel_epi64((__m128i *)(dest),
d0
); \
dest += stride; \
}
...
...
@@ -597,6 +597,27 @@ void vp9_short_idct8x8_add_sse2(int16_t *input, uint8_t *dest, int stride) {
RECON_AND_STORE
(
dest
,
in7
);
}
void
vp9_short_idct8x8_1_add_sse2
(
int16_t
*
input
,
uint8_t
*
dest
,
int
stride
)
{
__m128i
dc_value
;
const
__m128i
zero
=
_mm_setzero_si128
();
int
a
;
a
=
dct_const_round_shift
(
input
[
0
]
*
cospi_16_64
);
a
=
dct_const_round_shift
(
a
*
cospi_16_64
);
a
=
ROUND_POWER_OF_TWO
(
a
,
5
);
dc_value
=
_mm_set1_epi16
(
a
);
RECON_AND_STORE
(
dest
,
dc_value
);
RECON_AND_STORE
(
dest
,
dc_value
);
RECON_AND_STORE
(
dest
,
dc_value
);
RECON_AND_STORE
(
dest
,
dc_value
);
RECON_AND_STORE
(
dest
,
dc_value
);
RECON_AND_STORE
(
dest
,
dc_value
);
RECON_AND_STORE
(
dest
,
dc_value
);
RECON_AND_STORE
(
dest
,
dc_value
);
}
// perform 8x8 transpose
static
INLINE
void
array_transpose_8x8
(
__m128i
*
in
,
__m128i
*
res
)
{
const
__m128i
tr0_0
=
_mm_unpacklo_epi16
(
in
[
0
],
in
[
1
]);
...
...
vp9/decoder/vp9_idct_blk.c
View file @
b9c3dd48
...
...
@@ -93,15 +93,8 @@ void vp9_idct_add_8x8_c(int16_t *input, uint8_t *dest, int stride, int eob) {
if
(
eob
)
{
if
(
eob
==
1
)
{
// DC only DCT coefficient
int16_t
in
=
input
[
0
];
int16_t
out
;
// Note: the idct1 will need to be modified accordingly whenever
// vp9_short_idct8x8_c() is modified.
vp9_short_idct1_8x8_c
(
&
in
,
&
out
);
vp9_short_idct8x8_1_add
(
input
,
dest
,
stride
);
input
[
0
]
=
0
;
vp9_add_constant_residual_8x8
(
out
,
dest
,
stride
);
}
else
{
vp9_short_idct8x8_add
(
input
,
dest
,
stride
);
vpx_memset
(
input
,
0
,
128
);
...
...
vp9/encoder/vp9_encodemb.c
View file @
b9c3dd48
...
...
@@ -47,6 +47,14 @@ static void inverse_transform_b_4x4_add(MACROBLOCKD *xd, int eob,
xd
->
inv_txm4x4_add
(
dqcoeff
,
dest
,
stride
);
}
static
void
inverse_transform_b_8x8_add
(
MACROBLOCKD
*
xd
,
int
eob
,
int16_t
*
dqcoeff
,
uint8_t
*
dest
,
int
stride
)
{
if
(
eob
<=
1
)
vp9_short_idct8x8_1_add
(
dqcoeff
,
dest
,
stride
);
else
vp9_short_idct8x8_add
(
dqcoeff
,
dest
,
stride
);
}
static
void
subtract_plane
(
MACROBLOCK
*
x
,
BLOCK_SIZE_TYPE
bsize
,
int
plane
)
{
struct
macroblock_plane
*
const
p
=
&
x
->
plane
[
plane
];
...
...
@@ -533,7 +541,8 @@ static void encode_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
vp9_short_idct16x16_add
(
dqcoeff
,
dst
,
pd
->
dst
.
stride
);
break
;
case
TX_8X8
:
vp9_short_idct8x8_add
(
dqcoeff
,
dst
,
pd
->
dst
.
stride
);
inverse_transform_b_8x8_add
(
xd
,
pd
->
eobs
[
block
],
dqcoeff
,
dst
,
pd
->
dst
.
stride
);
break
;
case
TX_4X4
:
// this is like vp9_short_idct4x4 but has a special case around eob<=1
...
...
@@ -711,7 +720,7 @@ void encode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize,
pd
->
dequant
,
p
->
zbin_extra
,
eob
,
scan
,
iscan
);
if
(
!
x
->
skip_encode
&&
*
eob
)
{
if
(
tx_type
==
DCT_DCT
)
vp9_short_idct8x8_add
(
dqcoeff
,
dst
,
pd
->
dst
.
stride
);
inverse_transform_b_8x8_add
(
xd
,
*
eob
,
dqcoeff
,
dst
,
pd
->
dst
.
stride
);
else
vp9_short_iht8x8_add
(
dqcoeff
,
dst
,
pd
->
dst
.
stride
,
tx_type
);
}
...
...
@@ -746,8 +755,7 @@ void encode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize,
// this is like vp9_short_idct4x4 but has a special case around eob<=1
// which is significant (not just an optimization) for the lossless
// case.
inverse_transform_b_4x4_add
(
xd
,
*
eob
,
dqcoeff
,
dst
,
pd
->
dst
.
stride
);
inverse_transform_b_4x4_add
(
xd
,
*
eob
,
dqcoeff
,
dst
,
pd
->
dst
.
stride
);
else
vp9_short_iht4x4_add
(
dqcoeff
,
dst
,
pd
->
dst
.
stride
,
tx_type
);
}
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment