Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Xiph.Org
aom-rav1e
Commits
466f3951
Commit
466f3951
authored
Apr 16, 2013
by
Scott LaVarnway
Committed by
Gerrit Code Review
Apr 16, 2013
Browse files
Merge "Removing extra params from x_add_residual() functions" into experimental
parents
e9c2bb08
6f95d53e
Changes
3
Hide whitespace changes
Inline
Side-by-side
vp9/common/vp9_rtcd_defs.sh
View file @
466f3951
...
...
@@ -108,25 +108,25 @@ prototype void vp9_intra_uv4x4_predict "struct macroblockd *xd, struct blockd *x
specialize vp9_intra_uv4x4_predict
;
if
[
"
$CONFIG_VP9_DECODER
"
=
"yes"
]
;
then
prototype void vp9_add_residual_4x4
"const int16_t *diff,
const uint8_t *pred, int pitch,
uint8_t *dest, int stride"
prototype void vp9_add_residual_4x4
"const int16_t *diff, uint8_t *dest, int stride"
specialize vp9_add_residual_4x4 sse2
prototype void vp9_add_residual_8x8
"const int16_t *diff,
const uint8_t *pred, int pitch,
uint8_t *dest, int stride"
prototype void vp9_add_residual_8x8
"const int16_t *diff, uint8_t *dest, int stride"
specialize vp9_add_residual_8x8 sse2
prototype void vp9_add_residual_16x16
"const int16_t *diff,
const uint8_t *pred, int pitch,
uint8_t *dest, int stride"
prototype void vp9_add_residual_16x16
"const int16_t *diff, uint8_t *dest, int stride"
specialize vp9_add_residual_16x16 sse2
prototype void vp9_add_residual_32x32
"const int16_t *diff,
const uint8_t *pred, int pitch,
uint8_t *dest, int stride"
prototype void vp9_add_residual_32x32
"const int16_t *diff, uint8_t *dest, int stride"
specialize vp9_add_residual_32x32 sse2
prototype void vp9_add_constant_residual_8x8
"const int16_t diff,
const uint8_t *pred, int pitch,
uint8_t *dest, int stride"
prototype void vp9_add_constant_residual_8x8
"const int16_t diff, uint8_t *dest, int stride"
specialize vp9_add_constant_residual_8x8 sse2
prototype void vp9_add_constant_residual_16x16
"const int16_t diff,
const uint8_t *pred, int pitch,
uint8_t *dest, int stride"
prototype void vp9_add_constant_residual_16x16
"const int16_t diff, uint8_t *dest, int stride"
specialize vp9_add_constant_residual_16x16 sse2
prototype void vp9_add_constant_residual_32x32
"const int16_t diff,
const uint8_t *pred, int pitch,
uint8_t *dest, int stride"
prototype void vp9_add_constant_residual_32x32
"const int16_t diff, uint8_t *dest, int stride"
specialize vp9_add_constant_residual_32x32 sse2
fi
...
...
vp9/decoder/vp9_dequantize.c
View file @
466f3951
...
...
@@ -16,67 +16,60 @@
#include "vp9/common/vp9_common.h"
static
void
add_residual
(
const
int16_t
*
diff
,
const
uint8_t
*
pred
,
int
pitch
,
uint8_t
*
dest
,
int
stride
,
int
width
,
int
height
)
{
static
void
add_residual
(
const
int16_t
*
diff
,
uint8_t
*
dest
,
int
stride
,
int
width
,
int
height
)
{
int
r
,
c
;
for
(
r
=
0
;
r
<
height
;
r
++
)
{
for
(
c
=
0
;
c
<
width
;
c
++
)
dest
[
c
]
=
clip_pixel
(
diff
[
c
]
+
pred
[
c
]);
dest
[
c
]
=
clip_pixel
(
diff
[
c
]
+
dest
[
c
]);
dest
+=
stride
;
diff
+=
width
;
pred
+=
pitch
;
}
}
void
vp9_add_residual_4x4_c
(
const
int16_t
*
diff
,
const
uint8_t
*
pred
,
int
pitch
,
uint8_t
*
dest
,
int
stride
)
{
add_residual
(
diff
,
pred
,
pitch
,
dest
,
stride
,
4
,
4
);
void
vp9_add_residual_4x4_c
(
const
int16_t
*
diff
,
uint8_t
*
dest
,
int
stride
)
{
add_residual
(
diff
,
dest
,
stride
,
4
,
4
);
}
void
vp9_add_residual_8x8_c
(
const
int16_t
*
diff
,
const
uint8_t
*
pred
,
int
pitch
,
uint8_t
*
dest
,
int
stride
)
{
add_residual
(
diff
,
pred
,
pitch
,
dest
,
stride
,
8
,
8
);
void
vp9_add_residual_8x8_c
(
const
int16_t
*
diff
,
uint8_t
*
dest
,
int
stride
)
{
add_residual
(
diff
,
dest
,
stride
,
8
,
8
);
}
void
vp9_add_residual_16x16_c
(
const
int16_t
*
diff
,
const
uint8_t
*
pred
,
int
pitch
,
uint8_t
*
dest
,
int
stride
)
{
add_residual
(
diff
,
pred
,
pitch
,
dest
,
stride
,
16
,
16
);
void
vp9_add_residual_16x16_c
(
const
int16_t
*
diff
,
uint8_t
*
dest
,
int
stride
)
{
add_residual
(
diff
,
dest
,
stride
,
16
,
16
);
}
void
vp9_add_residual_32x32_c
(
const
int16_t
*
diff
,
const
uint8_t
*
pred
,
int
pitch
,
uint8_t
*
dest
,
int
stride
)
{
add_residual
(
diff
,
pred
,
pitch
,
dest
,
stride
,
32
,
32
);
void
vp9_add_residual_32x32_c
(
const
int16_t
*
diff
,
uint8_t
*
dest
,
int
stride
)
{
add_residual
(
diff
,
dest
,
stride
,
32
,
32
);
}
static
void
add_constant_residual
(
const
int16_t
diff
,
const
uint8_t
*
pred
,
int
pitch
,
uint8_t
*
dest
,
int
stride
,
static
void
add_constant_residual
(
const
int16_t
diff
,
uint8_t
*
dest
,
int
stride
,
int
width
,
int
height
)
{
int
r
,
c
;
for
(
r
=
0
;
r
<
height
;
r
++
)
{
for
(
c
=
0
;
c
<
width
;
c
++
)
dest
[
c
]
=
clip_pixel
(
diff
+
pred
[
c
]);
dest
[
c
]
=
clip_pixel
(
diff
+
dest
[
c
]);
dest
+=
stride
;
pred
+=
pitch
;
}
}
void
vp9_add_constant_residual_8x8_c
(
const
int16_t
diff
,
const
uint8_t
*
pred
,
int
pitch
,
uint8_t
*
dest
,
int
stride
)
{
add_constant_residual
(
diff
,
pred
,
pitch
,
dest
,
stride
,
8
,
8
);
void
vp9_add_constant_residual_8x8_c
(
const
int16_t
diff
,
uint8_t
*
dest
,
int
stride
)
{
add_constant_residual
(
diff
,
dest
,
stride
,
8
,
8
);
}
void
vp9_add_constant_residual_16x16_c
(
const
int16_t
diff
,
const
uint8_t
*
pred
,
int
pitch
,
uint8_t
*
dest
,
int
stride
)
{
add_constant_residual
(
diff
,
pred
,
pitch
,
dest
,
stride
,
16
,
16
);
void
vp9_add_constant_residual_16x16_c
(
const
int16_t
diff
,
uint8_t
*
dest
,
int
stride
)
{
add_constant_residual
(
diff
,
dest
,
stride
,
16
,
16
);
}
void
vp9_add_constant_residual_32x32_c
(
const
int16_t
diff
,
const
uint8_t
*
pred
,
int
pitch
,
uint8_t
*
dest
,
int
stride
)
{
add_constant_residual
(
diff
,
pred
,
pitch
,
dest
,
stride
,
32
,
32
);
void
vp9_add_constant_residual_32x32_c
(
const
int16_t
diff
,
uint8_t
*
dest
,
int
stride
)
{
add_constant_residual
(
diff
,
dest
,
stride
,
32
,
32
);
}
void
vp9_dequant_iht_add_c
(
TX_TYPE
tx_type
,
int16_t
*
input
,
...
...
@@ -90,7 +83,7 @@ void vp9_dequant_iht_add_c(TX_TYPE tx_type, int16_t *input,
vp9_short_iht4x4
(
input
,
output
,
4
,
tx_type
);
vpx_memset
(
input
,
0
,
32
);
vp9_add_residual_4x4
(
output
,
dest
,
stride
,
dest
,
stride
);
vp9_add_residual_4x4
(
output
,
dest
,
stride
);
}
void
vp9_dequant_iht_add_8x8_c
(
TX_TYPE
tx_type
,
int16_t
*
input
,
...
...
@@ -107,7 +100,7 @@ void vp9_dequant_iht_add_8x8_c(TX_TYPE tx_type, int16_t *input,
vp9_short_iht8x8
(
input
,
output
,
8
,
tx_type
);
vpx_memset
(
input
,
0
,
128
);
vp9_add_residual_8x8
(
output
,
dest
,
stride
,
dest
,
stride
);
vp9_add_residual_8x8
(
output
,
dest
,
stride
);
}
}
...
...
@@ -123,15 +116,15 @@ void vp9_dequant_idct_add_c(int16_t *input, const int16_t *dq, uint8_t *dest,
// the idct halves ( >> 1) the pitch
vp9_short_idct4x4
(
input
,
output
,
4
<<
1
);
vpx_memset
(
input
,
0
,
32
);
vp9_add_residual_4x4
(
output
,
dest
,
stride
,
dest
,
stride
);
vp9_add_residual_4x4
(
output
,
dest
,
stride
);
}
else
{
vp9_dc_only_idct_add
(
input
[
0
]
*
dq
[
0
],
dest
,
dest
,
stride
,
stride
);
((
int
*
)
input
)[
0
]
=
0
;
}
}
void
vp9_dequant_dc_idct_add_c
(
int16_t
*
input
,
const
int16_t
*
dq
,
uint8_t
*
pred
,
uint8_t
*
dest
,
int
pitch
,
int
stride
,
int
dc
)
{
void
vp9_dequant_dc_idct_add_c
(
int16_t
*
input
,
const
int16_t
*
dq
,
uint8_t
*
dest
,
int
stride
,
int
dc
)
{
int
i
;
DECLARE_ALIGNED_ARRAY
(
16
,
int16_t
,
output
,
16
);
...
...
@@ -143,7 +136,7 @@ void vp9_dequant_dc_idct_add_c(int16_t *input, const int16_t *dq, uint8_t *pred,
// the idct halves ( >> 1) the pitch
vp9_short_idct4x4
(
input
,
output
,
4
<<
1
);
vpx_memset
(
input
,
0
,
32
);
vp9_add_residual_4x4
(
output
,
pred
,
pitch
,
dest
,
stride
);
vp9_add_residual_4x4
(
output
,
dest
,
stride
);
}
void
vp9_dequant_idct_add_lossless_c
(
int16_t
*
input
,
const
int16_t
*
dq
,
...
...
@@ -157,7 +150,7 @@ void vp9_dequant_idct_add_lossless_c(int16_t *input, const int16_t *dq,
vp9_short_iwalsh4x4_c
(
input
,
output
,
4
<<
1
);
vpx_memset
(
input
,
0
,
32
);
vp9_add_residual_4x4
(
output
,
dest
,
stride
,
dest
,
stride
);
vp9_add_residual_4x4
(
output
,
dest
,
stride
);
}
else
{
vp9_dc_only_inv_walsh_add
(
input
[
0
]
*
dq
[
0
],
dest
,
dest
,
stride
,
stride
);
((
int
*
)
input
)[
0
]
=
0
;
...
...
@@ -176,7 +169,7 @@ void vp9_dequant_dc_idct_add_lossless_c(int16_t *input, const int16_t *dq,
vp9_short_iwalsh4x4_c
(
input
,
output
,
4
<<
1
);
vpx_memset
(
input
,
0
,
32
);
vp9_add_residual_4x4
(
output
,
dest
,
stride
,
dest
,
stride
);
vp9_add_residual_4x4
(
output
,
dest
,
stride
);
}
void
vp9_dequant_idct_add_8x8_c
(
int16_t
*
input
,
const
int16_t
*
dq
,
...
...
@@ -202,7 +195,7 @@ void vp9_dequant_idct_add_8x8_c(int16_t *input, const int16_t *dq,
vp9_short_idct1_8x8_c
(
&
in
,
&
out
);
input
[
0
]
=
0
;
vp9_add_constant_residual_8x8
(
out
,
dest
,
stride
,
dest
,
stride
);
vp9_add_constant_residual_8x8
(
out
,
dest
,
stride
);
#if !CONFIG_SCATTERSCAN
}
else
if
(
eob
<=
10
)
{
input
[
1
]
*=
dq
[
1
];
...
...
@@ -222,7 +215,7 @@ void vp9_dequant_idct_add_8x8_c(int16_t *input, const int16_t *dq,
input
[
16
]
=
input
[
17
]
=
0
;
input
[
24
]
=
0
;
vp9_add_residual_8x8
(
output
,
dest
,
stride
,
dest
,
stride
);
vp9_add_residual_8x8
(
output
,
dest
,
stride
);
#endif
}
else
{
int
i
;
...
...
@@ -234,7 +227,7 @@ void vp9_dequant_idct_add_8x8_c(int16_t *input, const int16_t *dq,
// the idct halves ( >> 1) the pitch
vp9_short_idct8x8
(
input
,
output
,
8
<<
1
);
vpx_memset
(
input
,
0
,
128
);
vp9_add_residual_8x8
(
output
,
dest
,
stride
,
dest
,
stride
);
vp9_add_residual_8x8
(
output
,
dest
,
stride
);
}
}
}
...
...
@@ -262,7 +255,7 @@ void vp9_dequant_iht_add_16x16_c(TX_TYPE tx_type, int16_t *input,
vpx_memset
(
input
,
0
,
512
);
vp9_add_residual_16x16
(
output
,
dest
,
stride
,
dest
,
stride
);
vp9_add_residual_16x16
(
output
,
dest
,
stride
);
}
}
...
...
@@ -282,7 +275,7 @@ void vp9_dequant_idct_add_16x16_c(int16_t *input, const int16_t *dq,
vp9_short_idct1_16x16_c
(
&
in
,
&
out
);
input
[
0
]
=
0
;
vp9_add_constant_residual_16x16
(
out
,
dest
,
stride
,
dest
,
stride
);
vp9_add_constant_residual_16x16
(
out
,
dest
,
stride
);
#if !CONFIG_SCATTERSCAN
}
else
if
(
eob
<=
10
)
{
input
[
0
]
*=
dq
[
0
];
...
...
@@ -305,7 +298,7 @@ void vp9_dequant_idct_add_16x16_c(int16_t *input, const int16_t *dq,
input
[
32
]
=
input
[
33
]
=
0
;
input
[
48
]
=
0
;
vp9_add_residual_16x16
(
output
,
dest
,
stride
,
dest
,
stride
);
vp9_add_residual_16x16
(
output
,
dest
,
stride
);
#endif
}
else
{
int
i
;
...
...
@@ -319,7 +312,7 @@ void vp9_dequant_idct_add_16x16_c(int16_t *input, const int16_t *dq,
// the idct halves ( >> 1) the pitch
vp9_short_idct16x16
(
input
,
output
,
16
<<
1
);
vpx_memset
(
input
,
0
,
512
);
vp9_add_residual_16x16
(
output
,
dest
,
stride
,
dest
,
stride
);
vp9_add_residual_16x16
(
output
,
dest
,
stride
);
}
}
}
...
...
@@ -332,7 +325,7 @@ void vp9_dequant_idct_add_32x32_c(int16_t *input, const int16_t *dq,
input
[
0
]
=
input
[
0
]
*
dq
[
0
]
/
2
;
if
(
eob
==
1
)
{
vp9_short_idct1_32x32
(
input
,
output
);
vp9_add_constant_residual_32x32
(
output
[
0
],
dest
,
stride
,
dest
,
stride
);
vp9_add_constant_residual_32x32
(
output
[
0
],
dest
,
stride
);
input
[
0
]
=
0
;
#if !CONFIG_SCATTERSCAN
}
else
if
(
eob
<=
10
)
{
...
...
@@ -354,7 +347,7 @@ void vp9_dequant_idct_add_32x32_c(int16_t *input, const int16_t *dq,
input
[
64
]
=
input
[
65
]
=
0
;
input
[
96
]
=
0
;
vp9_add_residual_32x32
(
output
,
dest
,
stride
,
dest
,
stride
);
vp9_add_residual_32x32
(
output
,
dest
,
stride
);
#endif
}
else
{
int
i
;
...
...
@@ -362,7 +355,7 @@ void vp9_dequant_idct_add_32x32_c(int16_t *input, const int16_t *dq,
input
[
i
]
=
input
[
i
]
*
dq
[
1
]
/
2
;
vp9_short_idct32x32
(
input
,
output
,
64
);
vpx_memset
(
input
,
0
,
2048
);
vp9_add_residual_32x32
(
output
,
dest
,
stride
,
dest
,
stride
);
vp9_add_residual_32x32
(
output
,
dest
,
stride
);
}
}
}
vp9/decoder/x86/vp9_dequantize_x86.c
View file @
466f3951
...
...
@@ -17,8 +17,7 @@
#if HAVE_SSE2
void
vp9_add_residual_4x4_sse2
(
const
int16_t
*
diff
,
const
uint8_t
*
pred
,
int
pitch
,
uint8_t
*
dest
,
int
stride
)
{
void
vp9_add_residual_4x4_sse2
(
const
int16_t
*
diff
,
uint8_t
*
dest
,
int
stride
)
{
const
int
width
=
4
;
const
__m128i
zero
=
_mm_setzero_si128
();
...
...
@@ -29,10 +28,10 @@ void vp9_add_residual_4x4_sse2(const int16_t *diff, const uint8_t *pred,
const
__m128i
d3
=
_mm_loadl_epi64
((
const
__m128i
*
)(
diff
+
3
*
width
));
// Prediction data.
__m128i
p0
=
_mm_cvtsi32_si128
(
*
(
const
int
*
)(
pred
+
0
*
pitch
));
__m128i
p1
=
_mm_cvtsi32_si128
(
*
(
const
int
*
)(
pred
+
1
*
pitch
));
__m128i
p2
=
_mm_cvtsi32_si128
(
*
(
const
int
*
)(
pred
+
2
*
pitch
));
__m128i
p3
=
_mm_cvtsi32_si128
(
*
(
const
int
*
)(
pred
+
3
*
pitch
));
__m128i
p0
=
_mm_cvtsi32_si128
(
*
(
const
int
*
)(
dest
+
0
*
stride
));
__m128i
p1
=
_mm_cvtsi32_si128
(
*
(
const
int
*
)(
dest
+
1
*
stride
));
__m128i
p2
=
_mm_cvtsi32_si128
(
*
(
const
int
*
)(
dest
+
2
*
stride
));
__m128i
p3
=
_mm_cvtsi32_si128
(
*
(
const
int
*
)(
dest
+
3
*
stride
));
p0
=
_mm_unpacklo_epi8
(
p0
,
zero
);
p1
=
_mm_unpacklo_epi8
(
p1
,
zero
);
...
...
@@ -61,8 +60,7 @@ void vp9_add_residual_4x4_sse2(const int16_t *diff, const uint8_t *pred,
*
(
int
*
)
dest
=
_mm_cvtsi128_si32
(
p2
);
}
void
vp9_add_residual_8x8_sse2
(
const
int16_t
*
diff
,
const
uint8_t
*
pred
,
int
pitch
,
uint8_t
*
dest
,
int
stride
)
{
void
vp9_add_residual_8x8_sse2
(
const
int16_t
*
diff
,
uint8_t
*
dest
,
int
stride
)
{
const
int
width
=
8
;
const
__m128i
zero
=
_mm_setzero_si128
();
...
...
@@ -77,14 +75,14 @@ void vp9_add_residual_8x8_sse2(const int16_t *diff, const uint8_t *pred,
const
__m128i
d7
=
_mm_load_si128
((
const
__m128i
*
)(
diff
+
7
*
width
));
// Prediction data.
__m128i
p0
=
_mm_loadl_epi64
((
const
__m128i
*
)(
pred
+
0
*
pitch
));
__m128i
p1
=
_mm_loadl_epi64
((
const
__m128i
*
)(
pred
+
1
*
pitch
));
__m128i
p2
=
_mm_loadl_epi64
((
const
__m128i
*
)(
pred
+
2
*
pitch
));
__m128i
p3
=
_mm_loadl_epi64
((
const
__m128i
*
)(
pred
+
3
*
pitch
));
__m128i
p4
=
_mm_loadl_epi64
((
const
__m128i
*
)(
pred
+
4
*
pitch
));
__m128i
p5
=
_mm_loadl_epi64
((
const
__m128i
*
)(
pred
+
5
*
pitch
));
__m128i
p6
=
_mm_loadl_epi64
((
const
__m128i
*
)(
pred
+
6
*
pitch
));
__m128i
p7
=
_mm_loadl_epi64
((
const
__m128i
*
)(
pred
+
7
*
pitch
));
__m128i
p0
=
_mm_loadl_epi64
((
const
__m128i
*
)(
dest
+
0
*
stride
));
__m128i
p1
=
_mm_loadl_epi64
((
const
__m128i
*
)(
dest
+
1
*
stride
));
__m128i
p2
=
_mm_loadl_epi64
((
const
__m128i
*
)(
dest
+
2
*
stride
));
__m128i
p3
=
_mm_loadl_epi64
((
const
__m128i
*
)(
dest
+
3
*
stride
));
__m128i
p4
=
_mm_loadl_epi64
((
const
__m128i
*
)(
dest
+
4
*
stride
));
__m128i
p5
=
_mm_loadl_epi64
((
const
__m128i
*
)(
dest
+
5
*
stride
));
__m128i
p6
=
_mm_loadl_epi64
((
const
__m128i
*
)(
dest
+
6
*
stride
));
__m128i
p7
=
_mm_loadl_epi64
((
const
__m128i
*
)(
dest
+
7
*
stride
));
p0
=
_mm_unpacklo_epi8
(
p0
,
zero
);
p1
=
_mm_unpacklo_epi8
(
p1
,
zero
);
...
...
@@ -126,8 +124,8 @@ void vp9_add_residual_8x8_sse2(const int16_t *diff, const uint8_t *pred,
_mm_storel_epi64
((
__m128i
*
)(
dest
+
7
*
stride
),
p6
);
}
void
vp9_add_residual_16x16_sse2
(
const
int16_t
*
diff
,
const
uint8_t
*
pred
,
int
pitch
,
uint8_t
*
dest
,
int
stride
)
{
void
vp9_add_residual_16x16_sse2
(
const
int16_t
*
diff
,
uint8_t
*
dest
,
int
stride
)
{
const
int
width
=
16
;
int
i
=
4
;
const
__m128i
zero
=
_mm_setzero_si128
();
...
...
@@ -147,10 +145,10 @@ void vp9_add_residual_16x16_sse2(const int16_t *diff, const uint8_t *pred,
d7
=
_mm_load_si128
((
const
__m128i
*
)(
diff
+
3
*
width
+
8
));
// Prediction data.
p1
=
_mm_load_si128
((
const
__m128i
*
)(
pred
+
0
*
pitch
));
p3
=
_mm_load_si128
((
const
__m128i
*
)(
pred
+
1
*
pitch
));
p5
=
_mm_load_si128
((
const
__m128i
*
)(
pred
+
2
*
pitch
));
p7
=
_mm_load_si128
((
const
__m128i
*
)(
pred
+
3
*
pitch
));
p1
=
_mm_load_si128
((
const
__m128i
*
)(
dest
+
0
*
stride
));
p3
=
_mm_load_si128
((
const
__m128i
*
)(
dest
+
1
*
stride
));
p5
=
_mm_load_si128
((
const
__m128i
*
)(
dest
+
2
*
stride
));
p7
=
_mm_load_si128
((
const
__m128i
*
)(
dest
+
3
*
stride
));
p0
=
_mm_unpacklo_epi8
(
p1
,
zero
);
p1
=
_mm_unpackhi_epi8
(
p1
,
zero
);
...
...
@@ -181,13 +179,12 @@ void vp9_add_residual_16x16_sse2(const int16_t *diff, const uint8_t *pred,
_mm_store_si128
((
__m128i
*
)(
dest
+
3
*
stride
),
p3
);
diff
+=
4
*
width
;
pred
+=
4
*
pitch
;
dest
+=
4
*
stride
;
}
while
(
--
i
);
}
void
vp9_add_residual_32x32_sse2
(
const
int16_t
*
diff
,
const
uint8_t
*
pred
,
int
pitch
,
uint8_t
*
dest
,
int
stride
)
{
void
vp9_add_residual_32x32_sse2
(
const
int16_t
*
diff
,
uint8_t
*
dest
,
int
stride
)
{
const
int
width
=
32
;
int
i
=
16
;
const
__m128i
zero
=
_mm_setzero_si128
();
...
...
@@ -207,10 +204,10 @@ void vp9_add_residual_32x32_sse2(const int16_t *diff, const uint8_t *pred,
d7
=
_mm_load_si128
((
const
__m128i
*
)(
diff
+
1
*
width
+
24
));
// Prediction data.
p1
=
_mm_load_si128
((
const
__m128i
*
)(
pred
+
0
*
pitch
));
p3
=
_mm_load_si128
((
const
__m128i
*
)(
pred
+
0
*
pitch
+
16
));
p5
=
_mm_load_si128
((
const
__m128i
*
)(
pred
+
1
*
pitch
));
p7
=
_mm_load_si128
((
const
__m128i
*
)(
pred
+
1
*
pitch
+
16
));
p1
=
_mm_load_si128
((
const
__m128i
*
)(
dest
+
0
*
stride
));
p3
=
_mm_load_si128
((
const
__m128i
*
)(
dest
+
0
*
stride
+
16
));
p5
=
_mm_load_si128
((
const
__m128i
*
)(
dest
+
1
*
stride
));
p7
=
_mm_load_si128
((
const
__m128i
*
)(
dest
+
1
*
stride
+
16
));
p0
=
_mm_unpacklo_epi8
(
p1
,
zero
);
p1
=
_mm_unpackhi_epi8
(
p1
,
zero
);
...
...
@@ -241,25 +238,24 @@ void vp9_add_residual_32x32_sse2(const int16_t *diff, const uint8_t *pred,
_mm_store_si128
((
__m128i
*
)(
dest
+
1
*
stride
+
16
),
p3
);
diff
+=
2
*
width
;
pred
+=
2
*
pitch
;
dest
+=
2
*
stride
;
}
while
(
--
i
);
}
void
vp9_add_constant_residual_8x8_sse2
(
const
int16_t
diff
,
const
uint8_t
*
pred
,
int
pitch
,
uint8_t
*
dest
,
int
stride
)
{
void
vp9_add_constant_residual_8x8_sse2
(
const
int16_t
diff
,
uint8_t
*
dest
,
int
stride
)
{
uint8_t
abs_diff
;
__m128i
d
;
// Prediction data.
__m128i
p0
=
_mm_loadl_epi64
((
const
__m128i
*
)(
pred
+
0
*
pitch
));
__m128i
p1
=
_mm_loadl_epi64
((
const
__m128i
*
)(
pred
+
1
*
pitch
));
__m128i
p2
=
_mm_loadl_epi64
((
const
__m128i
*
)(
pred
+
2
*
pitch
));
__m128i
p3
=
_mm_loadl_epi64
((
const
__m128i
*
)(
pred
+
3
*
pitch
));
__m128i
p4
=
_mm_loadl_epi64
((
const
__m128i
*
)(
pred
+
4
*
pitch
));
__m128i
p5
=
_mm_loadl_epi64
((
const
__m128i
*
)(
pred
+
5
*
pitch
));
__m128i
p6
=
_mm_loadl_epi64
((
const
__m128i
*
)(
pred
+
6
*
pitch
));
__m128i
p7
=
_mm_loadl_epi64
((
const
__m128i
*
)(
pred
+
7
*
pitch
));
__m128i
p0
=
_mm_loadl_epi64
((
const
__m128i
*
)(
dest
+
0
*
stride
));
__m128i
p1
=
_mm_loadl_epi64
((
const
__m128i
*
)(
dest
+
1
*
stride
));
__m128i
p2
=
_mm_loadl_epi64
((
const
__m128i
*
)(
dest
+
2
*
stride
));
__m128i
p3
=
_mm_loadl_epi64
((
const
__m128i
*
)(
dest
+
3
*
stride
));
__m128i
p4
=
_mm_loadl_epi64
((
const
__m128i
*
)(
dest
+
4
*
stride
));
__m128i
p5
=
_mm_loadl_epi64
((
const
__m128i
*
)(
dest
+
5
*
stride
));
__m128i
p6
=
_mm_loadl_epi64
((
const
__m128i
*
)(
dest
+
6
*
stride
));
__m128i
p7
=
_mm_loadl_epi64
((
const
__m128i
*
)(
dest
+
7
*
stride
));
p0
=
_mm_unpacklo_epi64
(
p0
,
p1
);
p2
=
_mm_unpacklo_epi64
(
p2
,
p3
);
...
...
@@ -303,29 +299,28 @@ void vp9_add_constant_residual_8x8_sse2(const int16_t diff, const uint8_t *pred,
_mm_storel_epi64
((
__m128i
*
)(
dest
+
7
*
stride
),
p6
);
}
void
vp9_add_constant_residual_16x16_sse2
(
const
int16_t
diff
,
const
uint8_t
*
pred
,
int
pitch
,
uint8_t
*
dest
,
int
stride
)
{
void
vp9_add_constant_residual_16x16_sse2
(
const
int16_t
diff
,
uint8_t
*
dest
,
int
stride
)
{
uint8_t
abs_diff
;
__m128i
d
;
// Prediction data.
__m128i
p0
=
_mm_load_si128
((
const
__m128i
*
)(
pred
+
0
*
pitch
));
__m128i
p1
=
_mm_load_si128
((
const
__m128i
*
)(
pred
+
1
*
pitch
));
__m128i
p2
=
_mm_load_si128
((
const
__m128i
*
)(
pred
+
2
*
pitch
));
__m128i
p3
=
_mm_load_si128
((
const
__m128i
*
)(
pred
+
3
*
pitch
));
__m128i
p4
=
_mm_load_si128
((
const
__m128i
*
)(
pred
+
4
*
pitch
));
__m128i
p5
=
_mm_load_si128
((
const
__m128i
*
)(
pred
+
5
*
pitch
));
__m128i
p6
=
_mm_load_si128
((
const
__m128i
*
)(
pred
+
6
*
pitch
));
__m128i
p7
=
_mm_load_si128
((
const
__m128i
*
)(
pred
+
7
*
pitch
));
__m128i
p8
=
_mm_load_si128
((
const
__m128i
*
)(
pred
+
8
*
pitch
));
__m128i
p9
=
_mm_load_si128
((
const
__m128i
*
)(
pred
+
9
*
pitch
));
__m128i
p10
=
_mm_load_si128
((
const
__m128i
*
)(
pred
+
10
*
pitch
));
__m128i
p11
=
_mm_load_si128
((
const
__m128i
*
)(
pred
+
11
*
pitch
));
__m128i
p12
=
_mm_load_si128
((
const
__m128i
*
)(
pred
+
12
*
pitch
));
__m128i
p13
=
_mm_load_si128
((
const
__m128i
*
)(
pred
+
13
*
pitch
));
__m128i
p14
=
_mm_load_si128
((
const
__m128i
*
)(
pred
+
14
*
pitch
));
__m128i
p15
=
_mm_load_si128
((
const
__m128i
*
)(
pred
+
15
*
pitch
));
__m128i
p0
=
_mm_load_si128
((
const
__m128i
*
)(
dest
+
0
*
stride
));
__m128i
p1
=
_mm_load_si128
((
const
__m128i
*
)(
dest
+
1
*
stride
));
__m128i
p2
=
_mm_load_si128
((
const
__m128i
*
)(
dest
+
2
*
stride
));
__m128i
p3
=
_mm_load_si128
((
const
__m128i
*
)(
dest
+
3
*
stride
));
__m128i
p4
=
_mm_load_si128
((
const
__m128i
*
)(
dest
+
4
*
stride
));
__m128i
p5
=
_mm_load_si128
((
const
__m128i
*
)(
dest
+
5
*
stride
));
__m128i
p6
=
_mm_load_si128
((
const
__m128i
*
)(
dest
+
6
*
stride
));
__m128i
p7
=
_mm_load_si128
((
const
__m128i
*
)(
dest
+
7
*
stride
));
__m128i
p8
=
_mm_load_si128
((
const
__m128i
*
)(
dest
+
8
*
stride
));
__m128i
p9
=
_mm_load_si128
((
const
__m128i
*
)(
dest
+
9
*
stride
));
__m128i
p10
=
_mm_load_si128
((
const
__m128i
*
)(
dest
+
10
*
stride
));
__m128i
p11
=
_mm_load_si128
((
const
__m128i
*
)(
dest
+
11
*
stride
));
__m128i
p12
=
_mm_load_si128
((
const
__m128i
*
)(
dest
+
12
*
stride
));
__m128i
p13
=
_mm_load_si128
((
const
__m128i
*
)(
dest
+
13
*
stride
));
__m128i
p14
=
_mm_load_si128
((
const
__m128i
*
)(
dest
+
14
*
stride
));
__m128i
p15
=
_mm_load_si128
((
const
__m128i
*
)(
dest
+
15
*
stride
));
// Clip diff value to [0, 255] range. Then, do addition or subtraction
// according to its sign.
...
...
@@ -390,9 +385,8 @@ void vp9_add_constant_residual_16x16_sse2(const int16_t diff,
_mm_store_si128
((
__m128i
*
)(
dest
+
15
*
stride
),
p15
);
}
void
vp9_add_constant_residual_32x32_sse2
(
const
int16_t
diff
,
const
uint8_t
*
pred
,
int
pitch
,
uint8_t
*
dest
,
int
stride
)
{
void
vp9_add_constant_residual_32x32_sse2
(
const
int16_t
diff
,
uint8_t
*
dest
,
int
stride
)
{
uint8_t
abs_diff
;
__m128i
d
;
int
i
=
8
;
...
...
@@ -407,14 +401,14 @@ void vp9_add_constant_residual_32x32_sse2(const int16_t diff,
do
{
// Prediction data.
__m128i
p0
=
_mm_load_si128
((
const
__m128i
*
)(
pred
+
0
*
pitch
));
__m128i
p1
=
_mm_load_si128
((
const
__m128i
*
)(
pred
+
0
*
pitch
+
16
));
__m128i
p2
=
_mm_load_si128
((
const
__m128i
*
)(
pred
+
1
*
pitch
));
__m128i
p3
=
_mm_load_si128
((
const
__m128i
*
)(
pred
+
1
*
pitch
+
16
));
__m128i
p4
=
_mm_load_si128
((
const
__m128i
*
)(
pred
+
2
*
pitch
));
__m128i
p5
=
_mm_load_si128
((
const
__m128i
*
)(
pred
+
2
*
pitch
+
16
));
__m128i
p6
=
_mm_load_si128
((
const
__m128i
*
)(
pred
+
3
*
pitch
));
__m128i
p7
=
_mm_load_si128
((
const
__m128i
*
)(
pred
+
3
*
pitch
+
16
));
__m128i
p0
=
_mm_load_si128
((
const
__m128i
*
)(
dest
+
0
*
stride
));
__m128i
p1
=
_mm_load_si128
((
const
__m128i
*
)(
dest
+
0
*
stride
+
16
));
__m128i
p2
=
_mm_load_si128
((
const
__m128i
*
)(
dest
+
1
*
stride
));
__m128i
p3
=
_mm_load_si128
((
const
__m128i
*
)(
dest
+
1
*
stride
+
16
));
__m128i
p4
=
_mm_load_si128
((
const
__m128i
*
)(
dest
+
2
*
stride
));
__m128i
p5
=
_mm_load_si128
((
const
__m128i
*
)(
dest
+
2
*
stride
+
16
));
__m128i
p6
=
_mm_load_si128
((
const
__m128i
*
)(
dest
+
3
*
stride
));
__m128i
p7
=
_mm_load_si128
((
const
__m128i
*
)(
dest
+
3
*
stride
+
16
));
// Clip diff value to [0, 255] range. Then, do addition or subtraction
// according to its sign.
...
...
@@ -448,7 +442,6 @@ void vp9_add_constant_residual_32x32_sse2(const int16_t diff,
_mm_store_si128
((
__m128i
*
)(
dest
+
3
*
stride
),
p6
);
_mm_store_si128
((
__m128i
*
)(
dest
+
3
*
stride
+
16
),
p7
);
pred
+=
4
*
pitch
;
dest
+=
4
*
stride
;
}
while
(
--
i
);
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment