Commit f76ccce5 authored by Marco Paniconi's avatar Marco Paniconi Committed by Marco

Revert "Revert "Force_split on 16x16 blocks in variance partition.""

This reverts commit 004b9d83

Change-Id: I2f2d0bdb9368c2c07f1d29a69cd461267a3a8743
parent c0b23ac2
......@@ -1114,6 +1114,9 @@ specialize qw/vp9_avg_8x8 sse2 neon/;
add_proto qw/unsigned int vp9_avg_4x4/, "const uint8_t *, int p";
specialize qw/vp9_avg_4x4 sse2/;
add_proto qw/void vp9_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
specialize qw/vp9_minmax_8x8 sse2/;
add_proto qw/void vp9_hadamard_8x8/, "int16_t const *src_diff, int src_stride, int16_t *coeff";
specialize qw/vp9_hadamard_8x8 sse2/, "$ssse3_x86_64";
......@@ -1137,6 +1140,8 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vp9_highbd_avg_8x8/;
add_proto qw/unsigned int vp9_highbd_avg_4x4/, "const uint8_t *, int p";
specialize qw/vp9_highbd_avg_4x4/;
add_proto qw/unsigned int vp9_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
specialize qw/vp9_highbd_minmax_8x8/;
}
# ENCODEMB INVOKE
......
......@@ -155,6 +155,20 @@ int vp9_vector_var_c(int16_t const *ref, int16_t const *src,
return var;
}
void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp,
int *min, int *max) {
int i, j;
*min = 255;
*max = 0;
for (i = 0; i < 8; ++i, s += p, d += dp) {
for (j = 0; j < 8; ++j) {
int diff = abs(s[j]-d[j]);
*min = diff < *min ? diff : *min;
*max = diff > *max ? diff : *max;
}
}
}
#if CONFIG_VP9_HIGHBITDEPTH
unsigned int vp9_highbd_avg_8x8_c(const uint8_t *s8, int p) {
int i, j;
......@@ -175,6 +189,22 @@ unsigned int vp9_highbd_avg_4x4_c(const uint8_t *s8, int p) {
return (sum + 8) >> 4;
}
void vp9_highbd_minmax_8x8_c(const uint8_t *s8, int p, const uint8_t *d8,
int dp, int *min, int *max) {
int i, j;
*min = 255;
*max = 0;
const uint16_t* s = CONVERT_TO_SHORTPTR(s8);
const uint16_t* d = CONVERT_TO_SHORTPTR(d8);
for (i = 0; i < 8; ++i, s += p, d += dp) {
for (j = 0; j < 8; ++j) {
int diff = abs(s[j]-d[j]);
*min = diff < *min ? diff : *min;
*max = diff > *max ? diff : *max;
}
}
}
#endif // CONFIG_VP9_HIGHBITDEPTH
This diff is collapsed.
......@@ -463,6 +463,8 @@ typedef struct VP9_COMP {
// 0 - threshold_64x64; 1 - threshold_32x32;
// 2 - threshold_16x16; 3 - vbp_threshold_8x8;
int64_t vbp_thresholds[4];
int64_t vbp_threshold_minmax;
int64_t vbp_threshold_sad;
BLOCK_SIZE vbp_bsize_min;
// Multi-threading
......
......@@ -11,6 +11,83 @@
#include <emmintrin.h>
#include "vpx_ports/mem.h"
void vp9_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp,
int *min, int *max) {
__m128i u0, s0, d0, diff, maxabsdiff, minabsdiff, negdiff, absdiff0, absdiff;
u0 = _mm_setzero_si128();
// Row 0
s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s)), u0);
d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d)), u0);
diff = _mm_subs_epi16(s0, d0);
negdiff = _mm_subs_epi16(u0, diff);
absdiff0 = _mm_max_epi16(diff, negdiff);
// Row 1
s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + p)), u0);
d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + dp)), u0);
diff = _mm_subs_epi16(s0, d0);
negdiff = _mm_subs_epi16(u0, diff);
absdiff = _mm_max_epi16(diff, negdiff);
maxabsdiff = _mm_max_epi16(absdiff0, absdiff);
minabsdiff = _mm_min_epi16(absdiff0, absdiff);
// Row 2
s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 2 * p)), u0);
d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + 2 * dp)), u0);
diff = _mm_subs_epi16(s0, d0);
negdiff = _mm_subs_epi16(u0, diff);
absdiff = _mm_max_epi16(diff, negdiff);
maxabsdiff = _mm_max_epi16(maxabsdiff, absdiff);
minabsdiff = _mm_min_epi16(minabsdiff, absdiff);
// Row 3
s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 3 * p)), u0);
d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + 3 * dp)), u0);
diff = _mm_subs_epi16(s0, d0);
negdiff = _mm_subs_epi16(u0, diff);
absdiff = _mm_max_epi16(diff, negdiff);
maxabsdiff = _mm_max_epi16(maxabsdiff, absdiff);
minabsdiff = _mm_min_epi16(minabsdiff, absdiff);
// Row 4
s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 4 * p)), u0);
d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + 4 * dp)), u0);
diff = _mm_subs_epi16(s0, d0);
negdiff = _mm_subs_epi16(u0, diff);
absdiff = _mm_max_epi16(diff, negdiff);
maxabsdiff = _mm_max_epi16(maxabsdiff, absdiff);
minabsdiff = _mm_min_epi16(minabsdiff, absdiff);
// Row 5
s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 5 * p)), u0);
d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + 5 * dp)), u0);
diff = _mm_subs_epi16(s0, d0);
negdiff = _mm_subs_epi16(u0, diff);
absdiff = _mm_max_epi16(diff, negdiff);
maxabsdiff = _mm_max_epi16(maxabsdiff, absdiff);
minabsdiff = _mm_min_epi16(minabsdiff, absdiff);
// Row 6
s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 6 * p)), u0);
d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + 6 * dp)), u0);
diff = _mm_subs_epi16(s0, d0);
negdiff = _mm_subs_epi16(u0, diff);
absdiff = _mm_max_epi16(diff, negdiff);
maxabsdiff = _mm_max_epi16(maxabsdiff, absdiff);
minabsdiff = _mm_min_epi16(minabsdiff, absdiff);
// Row 7
s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 7 * p)), u0);
d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + 7 * dp)), u0);
diff = _mm_subs_epi16(s0, d0);
negdiff = _mm_subs_epi16(u0, diff);
absdiff = _mm_max_epi16(diff, negdiff);
maxabsdiff = _mm_max_epi16(maxabsdiff, absdiff);
minabsdiff = _mm_min_epi16(minabsdiff, absdiff);
maxabsdiff = _mm_max_epi16(maxabsdiff, _mm_srli_si128(maxabsdiff, 8));
maxabsdiff = _mm_max_epi16(maxabsdiff, _mm_srli_epi64(maxabsdiff, 32));
maxabsdiff = _mm_max_epi16(maxabsdiff, _mm_srli_epi64(maxabsdiff, 16));
*max = _mm_extract_epi16(maxabsdiff, 0);
minabsdiff = _mm_min_epi16(minabsdiff, _mm_srli_si128(minabsdiff, 8));
minabsdiff = _mm_min_epi16(minabsdiff, _mm_srli_epi64(minabsdiff, 32));
minabsdiff = _mm_min_epi16(minabsdiff, _mm_srli_epi64(minabsdiff, 16));
*min = _mm_extract_epi16(minabsdiff, 0);
}
unsigned int vp9_avg_8x8_sse2(const uint8_t *s, int p) {
__m128i s0, s1, u0;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment