Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Xiph.Org
aom-rav1e
Commits
8e78a0d3
Commit
8e78a0d3
authored
Aug 28, 2014
by
Dmitry Kovalev
Committed by
Gerrit Code Review
Aug 28, 2014
Browse files
Merge "Implementing 4x4 variance calculation with SSE2."
parents
73edeb03
dcac083c
Changes
1
Hide whitespace changes
Inline
Side-by-side
vp9/encoder/x86/vp9_variance_sse2.c
View file @
8e78a0d3
...
...
@@ -8,6 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include
<emmintrin.h>
// SSE2
#include
"./vpx_config.h"
#include
"vp9/encoder/vp9_variance.h"
...
...
@@ -17,10 +19,37 @@ typedef unsigned int (*variance_fn_t) (const unsigned char *src, int src_stride,
const
unsigned
char
*
ref
,
int
ref_stride
,
unsigned
int
*
sse
,
int
*
sum
);
unsigned
int
vp9_get4x4var_mmx
(
const
unsigned
char
*
src
,
int
src_stride
,
const
unsigned
char
*
ref
,
int
ref_stride
,
unsigned
int
*
sse
,
int
*
sum
);
#define READ64(p, stride, i) \
_mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const uint32_t *)(p + i * stride)), \
_mm_cvtsi32_si128(*(const uint32_t *)(p + (i + 1) * stride)))
unsigned
int
vp9_get4x4var_sse2
(
const
uint8_t
*
src
,
int
src_stride
,
const
uint8_t
*
ref
,
int
ref_stride
,
unsigned
int
*
sse
,
int
*
sum
)
{
const
__m128i
zero
=
_mm_setzero_si128
();
const
__m128i
src0
=
_mm_unpacklo_epi8
(
READ64
(
src
,
src_stride
,
0
),
zero
);
const
__m128i
src1
=
_mm_unpacklo_epi8
(
READ64
(
src
,
src_stride
,
2
),
zero
);
const
__m128i
ref0
=
_mm_unpacklo_epi8
(
READ64
(
ref
,
ref_stride
,
0
),
zero
);
const
__m128i
ref1
=
_mm_unpacklo_epi8
(
READ64
(
ref
,
ref_stride
,
2
),
zero
);
const
__m128i
diff0
=
_mm_sub_epi16
(
src0
,
ref0
);
const
__m128i
diff1
=
_mm_sub_epi16
(
src1
,
ref1
);
// sum
__m128i
vsum
=
_mm_add_epi16
(
diff0
,
diff1
);
vsum
=
_mm_add_epi16
(
vsum
,
_mm_srli_si128
(
vsum
,
8
));
vsum
=
_mm_add_epi16
(
vsum
,
_mm_srli_si128
(
vsum
,
4
));
vsum
=
_mm_add_epi16
(
vsum
,
_mm_srli_si128
(
vsum
,
2
));
*
sum
=
(
int16_t
)
_mm_extract_epi16
(
vsum
,
0
);
// sse
vsum
=
_mm_add_epi32
(
_mm_madd_epi16
(
diff0
,
diff0
),
_mm_madd_epi16
(
diff1
,
diff1
));
vsum
=
_mm_add_epi32
(
vsum
,
_mm_srli_si128
(
vsum
,
8
));
vsum
=
_mm_add_epi32
(
vsum
,
_mm_srli_si128
(
vsum
,
4
));
*
sse
=
_mm_cvtsi128_si32
(
vsum
);
return
0
;
}
unsigned
int
vp9_get8x8var_sse2
(
const
unsigned
char
*
src
,
int
src_stride
,
const
unsigned
char
*
ref
,
int
ref_stride
,
...
...
@@ -55,8 +84,7 @@ unsigned int vp9_variance4x4_sse2(const unsigned char *src, int src_stride,
const
unsigned
char
*
ref
,
int
ref_stride
,
unsigned
int
*
sse
)
{
int
sum
;
variance_sse2
(
src
,
src_stride
,
ref
,
ref_stride
,
4
,
4
,
sse
,
&
sum
,
vp9_get4x4var_mmx
,
4
);
vp9_get4x4var_sse2
(
src
,
src_stride
,
ref
,
ref_stride
,
sse
,
&
sum
);
return
*
sse
-
(((
unsigned
int
)
sum
*
sum
)
>>
4
);
}
...
...
@@ -65,7 +93,7 @@ unsigned int vp9_variance8x4_sse2(const uint8_t *src, int src_stride,
unsigned
int
*
sse
)
{
int
sum
;
variance_sse2
(
src
,
src_stride
,
ref
,
ref_stride
,
8
,
4
,
sse
,
&
sum
,
vp9_get4x4var_
mmx
,
4
);
sse
,
&
sum
,
vp9_get4x4var_
sse2
,
4
);
return
*
sse
-
(((
unsigned
int
)
sum
*
sum
)
>>
5
);
}
...
...
@@ -74,7 +102,7 @@ unsigned int vp9_variance4x8_sse2(const uint8_t *src, int src_stride,
unsigned
int
*
sse
)
{
int
sum
;
variance_sse2
(
src
,
src_stride
,
ref
,
ref_stride
,
4
,
8
,
sse
,
&
sum
,
vp9_get4x4var_
mmx
,
4
);
sse
,
&
sum
,
vp9_get4x4var_
sse2
,
4
);
return
*
sse
-
(((
unsigned
int
)
sum
*
sum
)
>>
5
);
}
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment