Commit 37ebf187 authored by Urvang Joshi's avatar Urvang Joshi

[Normative] DC_PRED: Avoid div using mult+shift.

DC_PRED requires dividing by 'count' = block width + height.
- For square blocks this is efficient, as count is a power of 2.
- But for 1x2 and 1x4 rectangular blocks the division is inefficient.

So, we approximate this division with an integer multiply and a shift.

test_intra_pred_speed test results:

Size    Before (ms) After (ms)
4x8     221         206
8x4     195         189
8x16    121          88
16x8     84          73
16x32    63          60
32x16    68          66

Compression quality is neutral:
- Division vs 32-bit mult + shift is neutral:
https://arewecompressedyet.com/?job=ALL_rectpred_bef%402018-01-10T19%3A02%3A20.745Z&job=ALL_rectpred_aft_mult_shift%402018-01-10T19%3A04%3A21.380Z
- And 32-bit mult + shift vs 8-bit mult + shift is also neutral:
https://arewecompressedyet.com/?job=mult_shift_32bit%402018-01-16T20%3A30%3A21.418Z&job=mult_shift_08bit%402018-01-16T20%3A31%3A25.673Z

BUG=aomedia:1191

Change-Id: I2343a055bc3d85cb7be7652d7a6db1b768361be9
parent 1e959897
......@@ -172,20 +172,24 @@ specialize qw/aom_smooth_predictor_32x32 ssse3/;
specialize qw/aom_d135_predictor_4x4 neon/;
specialize qw/aom_d153_predictor_4x4 ssse3/;
specialize qw/aom_dc_predictor_4x4 dspr2 msa neon sse2/;
specialize qw/aom_dc_predictor_4x8 sse2/;
specialize qw/aom_d153_predictor_8x8 ssse3/;
specialize qw/aom_dc_predictor_8x4 sse2/;
specialize qw/aom_dc_predictor_8x8 dspr2 neon msa sse2/;
specialize qw/aom_dc_predictor_8x16 sse2/;
specialize qw/aom_d153_predictor_16x16 ssse3/;
specialize qw/aom_dc_predictor_16x8 sse2/;
specialize qw/aom_dc_predictor_16x16 dspr2 neon msa sse2/;
specialize qw/aom_dc_predictor_16x32 sse2/;
specialize qw/aom_d153_predictor_32x32 ssse3/;
specialize qw/aom_dc_predictor_32x16 sse2 avx2/;
specialize qw/aom_dc_predictor_4x4 dspr2 msa neon sse2/;
specialize qw/aom_dc_predictor_8x8 dspr2 neon msa sse2/;
specialize qw/aom_dc_predictor_16x16 dspr2 neon msa sse2/;
specialize qw/aom_dc_predictor_32x32 msa neon sse2 avx2/;
# TODO(luoyi): Need to rewrite these.
# specialize qw/aom_dc_predictor_4x8 sse2/;
# specialize qw/aom_dc_predictor_8x4 sse2/;
# specialize qw/aom_dc_predictor_8x16 sse2/;
# specialize qw/aom_dc_predictor_16x8 sse2/;
# specialize qw/aom_dc_predictor_16x32 sse2/;
# specialize qw/aom_dc_predictor_32x16 sse2 avx2/;
specialize qw/aom_d207e_predictor_4x4 sse2/;
specialize qw/aom_d207e_predictor_4x8 sse2/;
specialize qw/aom_d207e_predictor_8x4 sse2/;
......@@ -229,16 +233,20 @@ specialize qw/aom_d45e_predictor_32x32 ssse3/;
specialize qw/aom_highbd_v_predictor_16x32 sse2/;
specialize qw/aom_highbd_v_predictor_32x16 sse2/;
specialize qw/aom_highbd_v_predictor_32x32 sse2/;
specialize qw/aom_highbd_dc_predictor_4x4 sse2/;
specialize qw/aom_highbd_dc_predictor_4x8 sse2/;
specialize qw/aom_highbd_dc_predictor_8x4 sse2/;;
specialize qw/aom_highbd_dc_predictor_8x8 sse2/;;
specialize qw/aom_highbd_dc_predictor_8x16 sse2/;;
specialize qw/aom_highbd_dc_predictor_16x8 sse2/;
specialize qw/aom_highbd_dc_predictor_8x8 sse2/;
specialize qw/aom_highbd_dc_predictor_16x16 sse2/;
specialize qw/aom_highbd_dc_predictor_16x32 sse2/;
specialize qw/aom_highbd_dc_predictor_32x16 sse2/;
specialize qw/aom_highbd_dc_predictor_32x32 sse2/;
# TODO(luoyi): Need to rewrite these
# specialize qw/aom_highbd_dc_predictor_4x8 sse2/;
# specialize qw/aom_highbd_dc_predictor_8x4 sse2/;
# specialize qw/aom_highbd_dc_predictor_8x16 sse2/;
# specialize qw/aom_highbd_dc_predictor_16x8 sse2/;
# specialize qw/aom_highbd_dc_predictor_16x32 sse2/;
# specialize qw/aom_highbd_dc_predictor_32x16 sse2/;
specialize qw/aom_highbd_h_predictor_4x4 sse2/;
specialize qw/aom_highbd_h_predictor_4x8 sse2/;
specialize qw/aom_highbd_h_predictor_8x4 sse2/;
......
......@@ -371,6 +371,102 @@ static INLINE void dc_predictor(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
}
}
static INLINE void dc_predictor_rect(uint8_t *dst, ptrdiff_t stride, int bw,
int bh, const uint8_t *above,
const uint8_t *left, uint32_t multiplier,
int shift) {
int i, r, expected_dc, sum = 0;
for (i = 0; i < bw; i++) {
sum += above[i];
}
for (i = 0; i < bh; i++) {
sum += left[i];
}
expected_dc = (int)(((uint64_t)sum * multiplier) >> shift);
for (r = 0; r < bh; r++) {
memset(dst, expected_dc, bw);
dst += stride;
}
}
#define DC_MULTIPLIER_1X2 0xABU
#define DC_MULTIPLIER_1X4 0xCDU
void aom_dc_predictor_4x8_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
dc_predictor_rect(dst, stride, 4, 8, above, left, DC_MULTIPLIER_1X2, 11);
}
void aom_dc_predictor_8x4_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
dc_predictor_rect(dst, stride, 8, 4, above, left, DC_MULTIPLIER_1X2, 11);
}
void aom_dc_predictor_4x16_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
dc_predictor_rect(dst, stride, 4, 16, above, left, DC_MULTIPLIER_1X4, 12);
}
void aom_dc_predictor_16x4_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
dc_predictor_rect(dst, stride, 16, 4, above, left, DC_MULTIPLIER_1X4, 12);
}
void aom_dc_predictor_8x16_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
dc_predictor_rect(dst, stride, 8, 16, above, left, DC_MULTIPLIER_1X2, 12);
}
void aom_dc_predictor_16x8_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
dc_predictor_rect(dst, stride, 16, 8, above, left, DC_MULTIPLIER_1X2, 12);
}
void aom_dc_predictor_8x32_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
dc_predictor_rect(dst, stride, 8, 32, above, left, DC_MULTIPLIER_1X4, 13);
}
void aom_dc_predictor_32x8_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
dc_predictor_rect(dst, stride, 32, 8, above, left, DC_MULTIPLIER_1X4, 13);
}
void aom_dc_predictor_16x32_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
dc_predictor_rect(dst, stride, 16, 32, above, left, DC_MULTIPLIER_1X2, 13);
}
void aom_dc_predictor_32x16_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
dc_predictor_rect(dst, stride, 32, 16, above, left, DC_MULTIPLIER_1X2, 13);
}
#if CONFIG_TX64X64
void aom_dc_predictor_16x64_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
dc_predictor_rect(dst, stride, 16, 64, above, left, DC_MULTIPLIER_1X4, 14);
}
void aom_dc_predictor_64x16_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
dc_predictor_rect(dst, stride, 64, 16, above, left, DC_MULTIPLIER_1X4, 14);
}
void aom_dc_predictor_32x64_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
dc_predictor_rect(dst, stride, 32, 64, above, left, DC_MULTIPLIER_1X2, 14);
}
void aom_dc_predictor_64x32_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
dc_predictor_rect(dst, stride, 64, 32, above, left, DC_MULTIPLIER_1X2, 14);
}
#endif // CONFIG_TX64X64
void aom_d45e_predictor_2x2_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
const int A = above[0];
......@@ -918,6 +1014,129 @@ static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
}
}
static INLINE void highbd_dc_predictor_rect(uint16_t *dst, ptrdiff_t stride,
int bw, int bh,
const uint16_t *above,
const uint16_t *left, int bd,
uint32_t multiplier, int shift) {
int i, r, expected_dc, sum = 0;
(void)bd;
for (i = 0; i < bw; i++) {
sum += above[i];
}
for (i = 0; i < bh; i++) {
sum += left[i];
}
expected_dc = (int)(((uint64_t)sum * multiplier) >> shift);
for (r = 0; r < bh; r++) {
aom_memset16(dst, expected_dc, bw);
dst += stride;
}
}
void aom_highbd_dc_predictor_4x8_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above, const uint16_t *left,
int bd) {
highbd_dc_predictor_rect(dst, stride, 4, 8, above, left, bd,
DC_MULTIPLIER_1X2, 11);
}
void aom_highbd_dc_predictor_8x4_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above, const uint16_t *left,
int bd) {
highbd_dc_predictor_rect(dst, stride, 8, 4, above, left, bd,
DC_MULTIPLIER_1X2, 11);
}
void aom_highbd_dc_predictor_4x16_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above, const uint16_t *left,
int bd) {
highbd_dc_predictor_rect(dst, stride, 4, 16, above, left, bd,
DC_MULTIPLIER_1X4, 12);
}
void aom_highbd_dc_predictor_16x4_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above, const uint16_t *left,
int bd) {
highbd_dc_predictor_rect(dst, stride, 16, 4, above, left, bd,
DC_MULTIPLIER_1X4, 12);
}
void aom_highbd_dc_predictor_8x16_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above, const uint16_t *left,
int bd) {
highbd_dc_predictor_rect(dst, stride, 8, 16, above, left, bd,
DC_MULTIPLIER_1X2, 12);
}
void aom_highbd_dc_predictor_16x8_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above, const uint16_t *left,
int bd) {
highbd_dc_predictor_rect(dst, stride, 16, 8, above, left, bd,
DC_MULTIPLIER_1X2, 12);
}
void aom_highbd_dc_predictor_8x32_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above, const uint16_t *left,
int bd) {
highbd_dc_predictor_rect(dst, stride, 8, 32, above, left, bd,
DC_MULTIPLIER_1X4, 13);
}
void aom_highbd_dc_predictor_32x8_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above, const uint16_t *left,
int bd) {
highbd_dc_predictor_rect(dst, stride, 32, 8, above, left, bd,
DC_MULTIPLIER_1X4, 13);
}
void aom_highbd_dc_predictor_16x32_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above,
const uint16_t *left, int bd) {
highbd_dc_predictor_rect(dst, stride, 16, 32, above, left, bd,
DC_MULTIPLIER_1X2, 13);
}
void aom_highbd_dc_predictor_32x16_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above,
const uint16_t *left, int bd) {
highbd_dc_predictor_rect(dst, stride, 32, 16, above, left, bd,
DC_MULTIPLIER_1X2, 13);
}
#if CONFIG_TX64X64
void aom_highbd_dc_predictor_16x64_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above,
const uint16_t *left, int bd) {
highbd_dc_predictor_rect(dst, stride, 16, 64, above, left, bd,
DC_MULTIPLIER_1X4, 14);
}
void aom_highbd_dc_predictor_64x16_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above,
const uint16_t *left, int bd) {
highbd_dc_predictor_rect(dst, stride, 64, 16, above, left, bd,
DC_MULTIPLIER_1X4, 14);
}
void aom_highbd_dc_predictor_32x64_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above,
const uint16_t *left, int bd) {
highbd_dc_predictor_rect(dst, stride, 32, 64, above, left, bd,
DC_MULTIPLIER_1X2, 14);
}
void aom_highbd_dc_predictor_64x32_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above,
const uint16_t *left, int bd) {
highbd_dc_predictor_rect(dst, stride, 64, 32, above, left, bd,
DC_MULTIPLIER_1X2, 14);
}
#endif // CONFIG_TX64X64
// This serves as a wrapper function, so that all the prediction functions
// can be unified and accessed as a pointer array. Note that the boundary
// above and left are not necessarily used all the time.
......@@ -982,6 +1201,19 @@ static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
intra_pred_sized(type, 4, 4) \
intra_pred_highbd_sized(type, 2, 2) \
intra_pred_above_4x4(type)
#define intra_pred_square(type) \
intra_pred_sized(type, 2, 2) \
intra_pred_sized(type, 4, 4) \
intra_pred_sized(type, 8, 8) \
intra_pred_sized(type, 16, 16) \
intra_pred_sized(type, 32, 32) \
intra_pred_sized(type, 64, 64) \
intra_pred_highbd_sized(type, 2, 2) \
intra_pred_highbd_sized(type, 4, 4) \
intra_pred_highbd_sized(type, 8, 8) \
intra_pred_highbd_sized(type, 16, 16) \
intra_pred_highbd_sized(type, 32, 32) \
intra_pred_highbd_sized(type, 64, 64)
#else // CONFIG_TX64X64
#define intra_pred_rectangular(type) \
intra_pred_sized(type, 4, 8) \
......@@ -1018,6 +1250,17 @@ static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
intra_pred_sized(type, 4, 4) \
intra_pred_highbd_sized(type, 2, 2) \
intra_pred_above_4x4(type)
#define intra_pred_square(type) \
intra_pred_sized(type, 2, 2) \
intra_pred_sized(type, 4, 4) \
intra_pred_sized(type, 8, 8) \
intra_pred_sized(type, 16, 16) \
intra_pred_sized(type, 32, 32) \
intra_pred_highbd_sized(type, 2, 2) \
intra_pred_highbd_sized(type, 4, 4) \
intra_pred_highbd_sized(type, 8, 8) \
intra_pred_highbd_sized(type, 16, 16) \
intra_pred_highbd_sized(type, 32, 32)
#endif // CONFIG_TX64X64
intra_pred_allsizes(d207e)
......@@ -1035,6 +1278,6 @@ intra_pred_allsizes(paeth)
intra_pred_allsizes(dc_128)
intra_pred_allsizes(dc_left)
intra_pred_allsizes(dc_top)
intra_pred_allsizes(dc)
intra_pred_square(dc)
/* clang-format on */
#undef intra_pred_allsizes
......@@ -335,10 +335,10 @@ INSTANTIATE_TEST_CASE_P(AVX2_TO_C_12, HighbdIntraPredTest,
#if HAVE_SSE2
const IntraPredFunc<IntraPred> LowbdIntraPredTestVector[] = {
lowbd_intrapred(d63e, sse2), lowbd_intrapred(d207e, sse2),
lowbd_intrapred(dc, sse2), lowbd_intrapred(dc_top, sse2),
lowbd_intrapred(dc_left, sse2), lowbd_intrapred(dc_128, sse2),
lowbd_intrapred(v, sse2), lowbd_intrapred(h, sse2),
lowbd_intrapred(d63e, sse2), lowbd_intrapred(d207e, sse2),
lowbd_intrapred(dc_top, sse2), lowbd_intrapred(dc_left, sse2),
lowbd_intrapred(dc_128, sse2), lowbd_intrapred(v, sse2),
lowbd_intrapred(h, sse2),
};
INSTANTIATE_TEST_CASE_P(SSE2, LowbdIntraPredTest,
......@@ -362,11 +362,11 @@ const IntraPredFunc<IntraPred> LowbdIntraPredTestVectorAvx2[] = {
lowbd_entry(dc, 32, 32, avx2), lowbd_entry(dc_top, 32, 32, avx2),
lowbd_entry(dc_left, 32, 32, avx2), lowbd_entry(dc_128, 32, 32, avx2),
lowbd_entry(v, 32, 32, avx2), lowbd_entry(h, 32, 32, avx2),
lowbd_entry(dc, 32, 16, avx2), lowbd_entry(dc_top, 32, 16, avx2),
lowbd_entry(dc_left, 32, 16, avx2), lowbd_entry(dc_128, 32, 16, avx2),
lowbd_entry(v, 32, 16, avx2), lowbd_entry(paeth, 16, 8, avx2),
lowbd_entry(paeth, 16, 16, avx2), lowbd_entry(paeth, 16, 32, avx2),
lowbd_entry(paeth, 32, 16, avx2), lowbd_entry(paeth, 32, 32, avx2),
lowbd_entry(dc_top, 32, 16, avx2), lowbd_entry(dc_left, 32, 16, avx2),
lowbd_entry(dc_128, 32, 16, avx2), lowbd_entry(v, 32, 16, avx2),
lowbd_entry(paeth, 16, 8, avx2), lowbd_entry(paeth, 16, 16, avx2),
lowbd_entry(paeth, 16, 32, avx2), lowbd_entry(paeth, 32, 16, avx2),
lowbd_entry(paeth, 32, 32, avx2),
};
INSTANTIATE_TEST_CASE_P(AVX2, LowbdIntraPredTest,
......
......@@ -160,7 +160,7 @@ void TestIntraPred8(const char *block_name, AvxPredFunc const *pred_funcs) {
"97111eb1bc26bade6272015df829f1ae", "d19a8a73cc46b807f2c5e817576cc1e1",
};
static const char *const kSignatures8x4[kNumAv1IntraFuncs] = {
"23f9fc11344426c9bee2e06d57dfd628", "2d71a26d1bae1fb34734de7b42fc5eb7",
"8ab62c3d363a05bb72ffdc4ccd0fe2c6", "2d71a26d1bae1fb34734de7b42fc5eb7",
"5af9c1b2fd9d5721fad67b67b3f7c816", "00d71b17be662753813d515f197d145e",
"bef10ec984427e28f4390f43809d10af", "77773cdfb7ed6bc882ab202a64b0a470",
"cba356970f6b9a1b6024e1dbe4a66f9b", "c58c21efc804242848e6f29a93a7984d",
......@@ -202,7 +202,7 @@ void TestIntraPred16(const char *block_name, AvxPredFunc const *pred_funcs) {
"bb6c74c9076c9f266ab11fb57060d8e6", "0c5162bc28489756ddb847b5678e6f07",
};
static const char *const kSignatures16x8[kNumAv1IntraFuncs] = {
"b4cbdbdf10ce13300b4063a3daf99e04", "3731e1e6202064a9d0604d7c293ecee4",
"cbfc8f0593f199629d2453128f4748fe", "3731e1e6202064a9d0604d7c293ecee4",
"6c856188c4256a06452f0d5d70cac436", "1f2192b4c8c497589484ea7bf9c944e8",
"84011bd4b7f565119d06787840e333a0", "0e48949f7a6aa36f0d76b5d01f91124a",
"58114c06f6b9d8285e5020c7afd834ab", "e37afe84a8b3c5e0f048d4652ecbe09e",
......@@ -244,7 +244,7 @@ void TestIntraPred32(const char *block_name, AvxPredFunc const *pred_funcs) {
"866c224746dc260cda861a7b1b383fb3", "cea23799fc3526e1b6a6ff02b42b82af",
};
static const char *const kSignatures32x16[kNumAv1IntraFuncs] = {
"d1aeb8d5fdcfd3307922af01a798a4dc", "b0bcb514ebfbee065faea9d34c12ae75",
"431bafa0817b17f8aec0bef13e72bbf7", "b0bcb514ebfbee065faea9d34c12ae75",
"d6a18c63b4e909871c0137ca652fad23", "fd047f2fc1b8ffb95d0eeef3e8796a45",
"645ab60779ea348fd93c81561c31bab9", "4409633c9db8dff41ade4292a3a56e7f",
"b9b2935b2287a9a461ac5c11251ac706", "43b05f808c0ac4fe8accd84d293b0488",
......@@ -306,12 +306,13 @@ INTRA_PRED_TEST(SSE2_1, TestIntraPred4, "intra4x4", aom_dc_predictor_4x4_sse2,
aom_h_predictor_4x4_sse2, NULL, NULL, NULL, NULL,
aom_d207e_predictor_4x4_sse2, aom_d63e_predictor_4x4_sse2, NULL,
NULL, NULL, NULL)
INTRA_PRED_TEST(SSE2_2, TestIntraPred4, "intra4x8", aom_dc_predictor_4x8_sse2,
INTRA_PRED_TEST(SSE2_2, TestIntraPred4, "intra4x8", NULL,
aom_dc_left_predictor_4x8_sse2, aom_dc_top_predictor_4x8_sse2,
aom_dc_128_predictor_4x8_sse2, aom_v_predictor_4x8_sse2,
aom_h_predictor_4x8_sse2, NULL, NULL, NULL, NULL,
aom_d207e_predictor_4x8_sse2, aom_d63e_predictor_4x8_sse2, NULL,
NULL, NULL, NULL)
#endif // HAVE_SSE2
#if HAVE_SSSE3
......@@ -388,13 +389,13 @@ INTRA_PRED_TEST(SSE2_1, TestIntraPred8, "intra8x8", aom_dc_predictor_8x8_sse2,
aom_h_predictor_8x8_sse2, NULL, NULL, NULL, NULL,
aom_d207e_predictor_8x8_sse2, aom_d63e_predictor_8x8_sse2, NULL,
NULL, NULL, NULL)
INTRA_PRED_TEST(SSE2_2, TestIntraPred8, "intra8x4", aom_dc_predictor_8x4_sse2,
INTRA_PRED_TEST(SSE2_2, TestIntraPred8, "intra8x4", NULL,
aom_dc_left_predictor_8x4_sse2, aom_dc_top_predictor_8x4_sse2,
aom_dc_128_predictor_8x4_sse2, aom_v_predictor_8x4_sse2,
aom_h_predictor_8x4_sse2, NULL, NULL, NULL, NULL,
aom_d207e_predictor_8x4_sse2, aom_d63e_predictor_8x4_sse2, NULL,
NULL, NULL, NULL)
INTRA_PRED_TEST(SSE2_3, TestIntraPred8, "intra8x16", aom_dc_predictor_8x16_sse2,
INTRA_PRED_TEST(SSE2_3, TestIntraPred8, "intra8x16", NULL,
aom_dc_left_predictor_8x16_sse2, aom_dc_top_predictor_8x16_sse2,
aom_dc_128_predictor_8x16_sse2, aom_v_predictor_8x16_sse2,
aom_h_predictor_8x16_sse2, NULL, NULL, NULL, NULL,
......@@ -481,14 +482,14 @@ INTRA_PRED_TEST(SSE2_1, TestIntraPred16, "intra16x16",
aom_h_predictor_16x16_sse2, NULL, NULL, NULL, NULL,
aom_d207e_predictor_16x16_sse2, aom_d63e_predictor_16x16_sse2,
NULL, NULL, NULL, NULL)
INTRA_PRED_TEST(SSE2_2, TestIntraPred16, "intra16x8",
aom_dc_predictor_16x8_sse2, aom_dc_left_predictor_16x8_sse2,
aom_dc_top_predictor_16x8_sse2, aom_dc_128_predictor_16x8_sse2,
aom_v_predictor_16x8_sse2, aom_h_predictor_16x8_sse2, NULL,
NULL, NULL, NULL, aom_d207e_predictor_16x8_sse2,
aom_d63e_predictor_16x8_sse2, NULL, NULL, NULL, NULL)
INTRA_PRED_TEST(SSE2_3, TestIntraPred16, "intra16x32",
aom_dc_predictor_16x32_sse2, aom_dc_left_predictor_16x32_sse2,
INTRA_PRED_TEST(SSE2_2, TestIntraPred16, "intra16x8", NULL,
aom_dc_left_predictor_16x8_sse2, aom_dc_top_predictor_16x8_sse2,
aom_dc_128_predictor_16x8_sse2, aom_v_predictor_16x8_sse2,
aom_h_predictor_16x8_sse2, NULL, NULL, NULL, NULL,
aom_d207e_predictor_16x8_sse2, aom_d63e_predictor_16x8_sse2,
NULL, NULL, NULL, NULL)
INTRA_PRED_TEST(SSE2_3, TestIntraPred16, "intra16x32", NULL,
aom_dc_left_predictor_16x32_sse2,
aom_dc_top_predictor_16x32_sse2,
aom_dc_128_predictor_16x32_sse2, aom_v_predictor_16x32_sse2,
aom_h_predictor_16x32_sse2, NULL, NULL, NULL, NULL,
......@@ -579,8 +580,8 @@ INTRA_PRED_TEST(SSE2_1, TestIntraPred32, "intra32x32",
aom_h_predictor_32x32_sse2, NULL, NULL, NULL, NULL,
aom_d207e_predictor_32x32_sse2, aom_d63e_predictor_32x32_sse2,
NULL, NULL, NULL, NULL)
INTRA_PRED_TEST(SSE2_2, TestIntraPred32, "intra32x16",
aom_dc_predictor_32x16_sse2, aom_dc_left_predictor_32x16_sse2,
INTRA_PRED_TEST(SSE2_2, TestIntraPred32, "intra32x16", NULL,
aom_dc_left_predictor_32x16_sse2,
aom_dc_top_predictor_32x16_sse2,
aom_dc_128_predictor_32x16_sse2, aom_v_predictor_32x16_sse2,
aom_h_predictor_32x16_sse2, NULL, NULL, NULL, NULL,
......@@ -607,8 +608,8 @@ INTRA_PRED_TEST(AVX2_1, TestIntraPred32, "intra32x32",
aom_dc_128_predictor_32x32_avx2, aom_v_predictor_32x32_avx2,
aom_h_predictor_32x32_avx2, NULL, NULL, NULL, NULL, NULL, NULL,
aom_paeth_predictor_32x32_avx2, NULL, NULL, NULL)
INTRA_PRED_TEST(AVX2_2, TestIntraPred32, "intra32x16",
aom_dc_predictor_32x16_avx2, aom_dc_left_predictor_32x16_avx2,
INTRA_PRED_TEST(AVX2_2, TestIntraPred32, "intra32x16", NULL,
aom_dc_left_predictor_32x16_avx2,
aom_dc_top_predictor_32x16_avx2,
aom_dc_128_predictor_32x16_avx2, aom_v_predictor_32x16_avx2,
NULL, NULL, NULL, NULL, NULL, NULL, NULL,
......@@ -717,7 +718,7 @@ void TestHighbdIntraPred8(const char *block_name,
"0edc415b5dd7299f7a34fb9f71d31d78", "2bc8ec19e9f4b77a64b8a0a1f6aec7e7",
};
static const char *const kSignatures8x4[kNumAv1IntraFuncs] = {
"d58cd4c4bf3b7bbaa5db5e1a5622ec78", "6e572c35aa782d00cafcb99e9ea047ea",
"d4fd8840e17077cfe3031e747d687699", "6e572c35aa782d00cafcb99e9ea047ea",
"e8c22a3702b416dc9ab974505afbed09", "aaa4e4762a795aad7ad74de0c662c4e4",
"a19f9101967383c3dcbd516dc317a291", "9ab8cb91f1a595b9ebe3fe8de58031aa",
"c6c7d65264397d4d31e378e1f1cfd921", "5804158e463ff794b6b8a623f5d2c10d",
......@@ -727,7 +728,7 @@ void TestHighbdIntraPred8(const char *block_name,
"b4871af8316089e3e23522175df7e93f", "d33301e1c2cb173be46792a22d19881a",
};
static const char *const kSignatures8x16[kNumAv1IntraFuncs] = {
"4562de1d0336610880fdd5685498a9ec", "16310fa7076394f16fc85c4b149d89c9",
"086c82fb2e7e2aa7c88115432b3036fe", "16310fa7076394f16fc85c4b149d89c9",
"0e94af88e1dc573b6f0f499cddd1f530", "dfd245ee20d091c67809160340365aa9",
"d3562504327f70c096c5be23fd8a3747", "601b853558502acbb5135eadd2da117a",
"e83f9a8bc16b507d2ed0b6b31a25d6f5", "fc8427d942246e8cba81247bb294afb5",
......@@ -770,7 +771,7 @@ void TestHighbdIntraPred16(const char *block_name,
"688c6660a6dc6fa61fa1aa38e708c209", "0cdf641b4f81d69509c92ae0b93ef5ff",
};
static const char *const kSignatures16x32[kNumAv1IntraFuncs] = {
"aee4b3b0e3cc02d48e2c40d77f807927", "8baef2b2e789f79c8df9d90ad10f34a4",
"663490212042a4e6936d780c425bf10a", "8baef2b2e789f79c8df9d90ad10f34a4",
"038c38ee3c4f090bb8d736eab136aafc", "1a3de2aaeaffd68a9fd6c7f6557b83f3",
"385c6e0ea29421dd81011a2934641e26", "6cf96c285d1a2d4787f955dad715b08c",
"21f82421fda1c3afca8baca0dc048a52", "eac3734852c99a051f6d15a921d9e7b9",
......@@ -803,7 +804,7 @@ void TestHighbdIntraPred32(const char *block_name,
"b073a70d3672f1282236994f5d12e94b", "c51607aebad5dcb3c1e3b58ef9e5b84e",
};
static const char *const kSignatures32x16[kNumAv1IntraFuncs] = {
"290b23c9f5a1de7905bfa71a942da29b", "701e7b82593c66da5052fc4b6afd79ce",
"0c072d478d63466c676daf207a0e4ae5", "701e7b82593c66da5052fc4b6afd79ce",
"4da828c5455cd246735a663fbb204989", "e3fbeaf234efece8dbd752b77226200c",
"4d1d8c969f05155a7e7e84cf7aad021b", "c22e4877c2c946d5bdc0d542e29e70cf",
"ffd86b234d65c2e1386a5b5b5c188a69", "50aaaa7d90e300b635ab18cdd73e189b",
......@@ -859,13 +860,12 @@ HIGHBD_INTRA_PRED_TEST(
NULL, NULL, NULL, NULL)
HIGHBD_INTRA_PRED_TEST(
SSE2_2, TestHighbdIntraPred4, "Hbd Intra4x8",
aom_highbd_dc_predictor_4x8_sse2, aom_highbd_dc_left_predictor_4x8_sse2,
aom_highbd_dc_top_predictor_4x8_sse2, aom_highbd_dc_128_predictor_4x8_sse2,
aom_highbd_v_predictor_4x8_sse2, aom_highbd_h_predictor_4x8_sse2,
aom_highbd_d45e_predictor_4x8_sse2, NULL, NULL, NULL,
aom_highbd_d207e_predictor_4x8_sse2, aom_highbd_d63e_predictor_4x8_sse2,
NULL, NULL, NULL, NULL)
SSE2_2, TestHighbdIntraPred4, "Hbd Intra4x8", NULL,
aom_highbd_dc_left_predictor_4x8_sse2, aom_highbd_dc_top_predictor_4x8_sse2,
aom_highbd_dc_128_predictor_4x8_sse2, aom_highbd_v_predictor_4x8_sse2,
aom_highbd_h_predictor_4x8_sse2, aom_highbd_d45e_predictor_4x8_sse2, NULL,
NULL, NULL, aom_highbd_d207e_predictor_4x8_sse2,
aom_highbd_d63e_predictor_4x8_sse2, NULL, NULL, NULL, NULL)
#endif
HIGHBD_INTRA_PRED_TEST(
......@@ -903,21 +903,22 @@ HIGHBD_INTRA_PRED_TEST(
aom_highbd_d207e_predictor_8x8_sse2, aom_highbd_d63e_predictor_8x8_sse2,
NULL, NULL, NULL, NULL)
HIGHBD_INTRA_PRED_TEST(
SSE2_2, TestHighbdIntraPred8, "Hbd Intra8x4",
aom_highbd_dc_predictor_8x4_sse2, aom_highbd_dc_left_predictor_8x4_sse2,
aom_highbd_dc_top_predictor_8x4_sse2, aom_highbd_dc_128_predictor_8x4_sse2,
aom_highbd_v_predictor_8x4_sse2, aom_highbd_h_predictor_8x4_sse2,
aom_highbd_d45e_predictor_8x4_sse2, NULL, NULL, NULL,
aom_highbd_d207e_predictor_8x4_sse2, aom_highbd_d63e_predictor_8x4_sse2,
NULL, NULL, NULL, NULL)
HIGHBD_INTRA_PRED_TEST(
SSE2_3, TestHighbdIntraPred8, "Hbd Intra8x16",
aom_highbd_dc_predictor_8x16_sse2, aom_highbd_dc_left_predictor_8x16_sse2,
aom_highbd_dc_top_predictor_8x16_sse2,
aom_highbd_dc_128_predictor_8x16_sse2, aom_highbd_v_predictor_8x16_sse2,
aom_highbd_h_predictor_8x16_sse2, aom_highbd_d45e_predictor_8x16_sse2, NULL,
NULL, NULL, aom_highbd_d207e_predictor_8x16_sse2,
aom_highbd_d63e_predictor_8x16_sse2, NULL, NULL, NULL, NULL)
SSE2_2, TestHighbdIntraPred8, "Hbd Intra8x4", NULL,
aom_highbd_dc_left_predictor_8x4_sse2, aom_highbd_dc_top_predictor_8x4_sse2,
aom_highbd_dc_128_predictor_8x4_sse2, aom_highbd_v_predictor_8x4_sse2,
aom_highbd_h_predictor_8x4_sse2, aom_highbd_d45e_predictor_8x4_sse2, NULL,
NULL, NULL, aom_highbd_d207e_predictor_8x4_sse2,
aom_highbd_d63e_predictor_8x4_sse2, NULL, NULL, NULL, NULL)
HIGHBD_INTRA_PRED_TEST(SSE2_3, TestHighbdIntraPred8, "Hbd Intra8x16", NULL,
aom_highbd_dc_left_predictor_8x16_sse2,
aom_highbd_dc_top_predictor_8x16_sse2,
aom_highbd_dc_128_predictor_8x16_sse2,
aom_highbd_v_predictor_8x16_sse2,
aom_highbd_h_predictor_8x16_sse2,
aom_highbd_d45e_predictor_8x16_sse2, NULL, NULL, NULL,
aom_highbd_d207e_predictor_8x16_sse2,
aom_highbd_d63e_predictor_8x16_sse2, NULL, NULL, NULL,
NULL)
#endif
#if HAVE_SSSE3
......@@ -976,8 +977,7 @@ HIGHBD_INTRA_PRED_TEST(SSE2_1, TestHighbdIntraPred16, "Hbd Intra16x16",
aom_highbd_h_predictor_16x16_sse2, NULL, NULL, NULL,
NULL, aom_highbd_d207e_predictor_16x16_sse2, NULL, NULL,
NULL, NULL, NULL)
HIGHBD_INTRA_PRED_TEST(SSE2_2, TestHighbdIntraPred16, "Hbd Intra16x8",
aom_highbd_dc_predictor_16x8_sse2,
HIGHBD_INTRA_PRED_TEST(SSE2_2, TestHighbdIntraPred16, "Hbd Intra16x8", NULL,
aom_highbd_dc_left_predictor_16x8_sse2,
aom_highbd_dc_top_predictor_16x8_sse2,
aom_highbd_dc_128_predictor_16x8_sse2,
......@@ -985,8 +985,7 @@ HIGHBD_INTRA_PRED_TEST(SSE2_2, TestHighbdIntraPred16, "Hbd Intra16x8",
aom_highbd_h_predictor_16x8_sse2, NULL, NULL, NULL, NULL,
aom_highbd_d207e_predictor_16x8_sse2, NULL, NULL, NULL,
NULL, NULL)
HIGHBD_INTRA_PRED_TEST(SSE2_3, TestHighbdIntraPred16, "Hbd Intra16x32",
aom_highbd_dc_predictor_16x32_sse2,
HIGHBD_INTRA_PRED_TEST(SSE2_3, TestHighbdIntraPred16, "Hbd Intra16x32", NULL,
aom_highbd_dc_left_predictor_16x32_sse2,
aom_highbd_dc_top_predictor_16x32_sse2,
aom_highbd_dc_128_predictor_16x32_sse2,
......@@ -1072,8 +1071,7 @@ HIGHBD_INTRA_PRED_TEST(SSE2_1, TestHighbdIntraPred32, "Hbd Intra32x32",
aom_highbd_v_predictor_32x32_sse2,
aom_highbd_h_predictor_32x32_sse2, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL)
HIGHBD_INTRA_PRED_TEST(SSE2_2, TestHighbdIntraPred32, "Hbd Intra32x16",
aom_highbd_dc_predictor_32x16_sse2,
HIGHBD_INTRA_PRED_TEST(SSE2_2, TestHighbdIntraPred32, "Hbd Intra32x16", NULL,
aom_highbd_dc_left_predictor_32x16_sse2,
aom_highbd_dc_top_predictor_32x16_sse2,
aom_highbd_dc_128_predictor_32x16_sse2,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment