Commit 6839682c authored by Urvang Joshi's avatar Urvang Joshi

Rect DC_PRED: mult and shifts equivalent to div.

(1) We use an initial variable shift, and then
(2) A multiply + 16 bit shift for bit-depth = 8, OR
    A multiply + 17 bit shift for bit-depth = 10 and 12

All the constants (shifts and multipliers) for different block sizes are
based on "Algorithm 1" in
http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=1467632

Note:
This also reverts test hashes etc to the version before
commit 37ebf187.

BUG=aomedia:1191

Change-Id: I2aaebc3e95958a3f145c0408aa5cbcc85e30c8dc
parent e1f0b227
......@@ -172,20 +172,19 @@ specialize qw/aom_d153_predictor_8x8 ssse3/;
specialize qw/aom_d153_predictor_16x16 ssse3/;
specialize qw/aom_d153_predictor_32x32 ssse3/;
# TODO(yungingwang): optimize rectangular DC_PRED to replace division
# by multiply and shift.
specialize qw/aom_dc_predictor_4x4 dspr2 msa neon sse2/;
specialize qw/aom_dc_predictor_4x8 sse2/;
specialize qw/aom_dc_predictor_8x4 sse2/;
specialize qw/aom_dc_predictor_8x8 dspr2 neon msa sse2/;
specialize qw/aom_dc_predictor_8x16 sse2/;
specialize qw/aom_dc_predictor_16x8 sse2/;
specialize qw/aom_dc_predictor_16x16 dspr2 neon msa sse2/;
specialize qw/aom_dc_predictor_16x32 sse2/;
specialize qw/aom_dc_predictor_32x16 sse2 avx2/;
specialize qw/aom_dc_predictor_32x32 msa neon sse2 avx2/;
# TODO(luoyi): Need to rewrite these.
# specialize qw/aom_dc_predictor_4x8 sse2/;
# specialize qw/aom_dc_predictor_8x4 sse2/;
# specialize qw/aom_dc_predictor_8x16 sse2/;
# specialize qw/aom_dc_predictor_16x8 sse2/;
# specialize qw/aom_dc_predictor_16x32 sse2/;
# specialize qw/aom_dc_predictor_32x16 sse2 avx2/;
specialize qw/aom_d207e_predictor_4x4 sse2/;
specialize qw/aom_d207e_predictor_4x8 sse2/;
specialize qw/aom_d207e_predictor_8x4 sse2/;
......@@ -230,18 +229,18 @@ specialize qw/aom_d45e_predictor_32x32 ssse3/;
specialize qw/aom_highbd_v_predictor_32x16 sse2/;
specialize qw/aom_highbd_v_predictor_32x32 sse2/;
# TODO(yungingwang): optimize rectangular DC_PRED to replace division
# by multiply and shift.
specialize qw/aom_highbd_dc_predictor_4x4 sse2/;
specialize qw/aom_highbd_dc_predictor_8x8 sse2/;
specialize qw/aom_highbd_dc_predictor_4x8 sse2/;
specialize qw/aom_highbd_dc_predictor_8x4 sse2/;;
specialize qw/aom_highbd_dc_predictor_8x8 sse2/;;
specialize qw/aom_highbd_dc_predictor_8x16 sse2/;;
specialize qw/aom_highbd_dc_predictor_16x8 sse2/;
specialize qw/aom_highbd_dc_predictor_16x16 sse2/;
specialize qw/aom_highbd_dc_predictor_16x32 sse2/;
specialize qw/aom_highbd_dc_predictor_32x16 sse2/;
specialize qw/aom_highbd_dc_predictor_32x32 sse2/;
# TODO(luoyi): Need to rewrite these
# specialize qw/aom_highbd_dc_predictor_4x8 sse2/;
# specialize qw/aom_highbd_dc_predictor_8x4 sse2/;
# specialize qw/aom_highbd_dc_predictor_8x16 sse2/;
# specialize qw/aom_highbd_dc_predictor_16x8 sse2/;
# specialize qw/aom_highbd_dc_predictor_16x32 sse2/;
# specialize qw/aom_highbd_dc_predictor_32x16 sse2/;
specialize qw/aom_highbd_h_predictor_4x4 sse2/;
specialize qw/aom_highbd_h_predictor_4x8 sse2/;
......
......@@ -361,101 +361,133 @@ static INLINE void dc_predictor(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
}
}
static INLINE int divide_using_multiply_shift(int num, int shift1,
int multiplier, int shift2) {
const int interm = num >> shift1;
return interm * multiplier >> shift2;
}
// The constants (multiplier and shifts) for a given block size are obtained
// as follows:
// - Let sum_w_h = block width + block height.
// - Shift 'sum_w_h' right until we reach an odd number. Let the number of
// shifts for that block size be called 'shift1' (see the parameter in
// dc_predictor_rect() function), and let the odd number be 'd'. [d has only 2
// possible values: d = 3 for a 1:2 rect block and d = 5 for a 1:4 rect
// block].
// - Find multipliers for (i) dividing by 3, and (ii) dividing by 5,
// using the "Algorithm 1" in:
// http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=1467632
// by ensuring that m + n = 16 (in that algorithm). This ensures that our 2nd
// shift will be 16, regardless of the block size.
// Note: For low bitdepth, assembly code may be optimized by using smaller
// constants for smaller block sizes, where the range of the 'sum' is
// restricted to fewer bits.
#define DC_MULTIPLIER_1X2 0x5556
#define DC_MULTIPLIER_1X4 0x3334
#define DC_SHIFT2 16
static INLINE void dc_predictor_rect(uint8_t *dst, ptrdiff_t stride, int bw,
int bh, const uint8_t *above,
const uint8_t *left, uint32_t multiplier,
int shift) {
int i, r, expected_dc, sum = 0;
const uint8_t *left, int shift1,
int multiplier) {
int sum = 0;
for (i = 0; i < bw; i++) {
for (int i = 0; i < bw; i++) {
sum += above[i];
}
for (i = 0; i < bh; i++) {
for (int i = 0; i < bh; i++) {
sum += left[i];
}
expected_dc = (int)(((uint64_t)sum * multiplier) >> shift);
expected_dc = clip_pixel(expected_dc);
const int expected_dc = divide_using_multiply_shift(
sum + ((bw + bh) >> 1), shift1, multiplier, DC_SHIFT2);
assert(expected_dc < (1 << 8));
for (r = 0; r < bh; r++) {
for (int r = 0; r < bh; r++) {
memset(dst, expected_dc, bw);
dst += stride;
}
}
#define DC_MULTIPLIER_1X2 0xABU
#define DC_MULTIPLIER_1X4 0xCDU
#undef DC_SHIFT2
void aom_dc_predictor_4x8_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
dc_predictor_rect(dst, stride, 4, 8, above, left, DC_MULTIPLIER_1X2, 11);
dc_predictor_rect(dst, stride, 4, 8, above, left, 2, DC_MULTIPLIER_1X2);
}
void aom_dc_predictor_8x4_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
dc_predictor_rect(dst, stride, 8, 4, above, left, DC_MULTIPLIER_1X2, 11);
dc_predictor_rect(dst, stride, 8, 4, above, left, 2, DC_MULTIPLIER_1X2);
}
void aom_dc_predictor_4x16_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
dc_predictor_rect(dst, stride, 4, 16, above, left, DC_MULTIPLIER_1X4, 12);
dc_predictor_rect(dst, stride, 4, 16, above, left, 2, DC_MULTIPLIER_1X4);
}
void aom_dc_predictor_16x4_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
dc_predictor_rect(dst, stride, 16, 4, above, left, DC_MULTIPLIER_1X4, 12);
dc_predictor_rect(dst, stride, 16, 4, above, left, 2, DC_MULTIPLIER_1X4);
}
void aom_dc_predictor_8x16_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
dc_predictor_rect(dst, stride, 8, 16, above, left, DC_MULTIPLIER_1X2, 12);
dc_predictor_rect(dst, stride, 8, 16, above, left, 3, DC_MULTIPLIER_1X2);
}
void aom_dc_predictor_16x8_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
dc_predictor_rect(dst, stride, 16, 8, above, left, DC_MULTIPLIER_1X2, 12);
dc_predictor_rect(dst, stride, 16, 8, above, left, 3, DC_MULTIPLIER_1X2);
}
void aom_dc_predictor_8x32_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
dc_predictor_rect(dst, stride, 8, 32, above, left, DC_MULTIPLIER_1X4, 13);
dc_predictor_rect(dst, stride, 8, 32, above, left, 3, DC_MULTIPLIER_1X4);
}
void aom_dc_predictor_32x8_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
dc_predictor_rect(dst, stride, 32, 8, above, left, DC_MULTIPLIER_1X4, 13);
dc_predictor_rect(dst, stride, 32, 8, above, left, 3, DC_MULTIPLIER_1X4);
}
void aom_dc_predictor_16x32_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
dc_predictor_rect(dst, stride, 16, 32, above, left, DC_MULTIPLIER_1X2, 13);
dc_predictor_rect(dst, stride, 16, 32, above, left, 4, DC_MULTIPLIER_1X2);
}
void aom_dc_predictor_32x16_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
dc_predictor_rect(dst, stride, 32, 16, above, left, DC_MULTIPLIER_1X2, 13);
dc_predictor_rect(dst, stride, 32, 16, above, left, 4, DC_MULTIPLIER_1X2);
}
void aom_dc_predictor_16x64_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
dc_predictor_rect(dst, stride, 16, 64, above, left, DC_MULTIPLIER_1X4, 14);
dc_predictor_rect(dst, stride, 16, 64, above, left, 4, DC_MULTIPLIER_1X4);
}
void aom_dc_predictor_64x16_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
dc_predictor_rect(dst, stride, 64, 16, above, left, DC_MULTIPLIER_1X4, 14);
dc_predictor_rect(dst, stride, 64, 16, above, left, 4, DC_MULTIPLIER_1X4);
}
void aom_dc_predictor_32x64_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
dc_predictor_rect(dst, stride, 32, 64, above, left, DC_MULTIPLIER_1X2, 14);
dc_predictor_rect(dst, stride, 32, 64, above, left, 5, DC_MULTIPLIER_1X2);
}
void aom_dc_predictor_64x32_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
dc_predictor_rect(dst, stride, 64, 32, above, left, DC_MULTIPLIER_1X2, 14);
dc_predictor_rect(dst, stride, 64, 32, above, left, 5, DC_MULTIPLIER_1X2);
}
#undef DC_MULTIPLIER_1X2
#undef DC_MULTIPLIER_1X4
void aom_d45e_predictor_2x2_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
const int A = above[0];
......@@ -1003,127 +1035,148 @@ static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
}
}
// Obtained similarly as DC_MULTIPLIER_1X2 and DC_MULTIPLIER_1X4 above, but
// assume 2nd shift of 17 bits instead of 16.
// Note: Strictly speaking, 2nd shift needs to be 17 only when:
// - bit depth == 12, and
// - bw + bh is divisible by 5 (as opposed to divisible by 3).
// All other cases can use half the multipliers with a shift of 16 instead.
// This special optimization can be used when writing assembly code.
#define HIGHBD_DC_MULTIPLIER_1X2 0xAAAB
// Note: This constant is odd, but a smaller even constant (0x199a) with the
// appropriate shift should work for neon in 8/10-bit.
#define HIGHBD_DC_MULTIPLIER_1X4 0x6667
#define HIGHBD_DC_SHIFT2 17
static INLINE void highbd_dc_predictor_rect(uint16_t *dst, ptrdiff_t stride,
int bw, int bh,
const uint16_t *above,
const uint16_t *left, int bd,
uint32_t multiplier, int shift) {
int i, r, expected_dc, sum = 0;
int shift1, uint32_t multiplier) {
int sum = 0;
(void)bd;
for (i = 0; i < bw; i++) {
for (int i = 0; i < bw; i++) {
sum += above[i];
}
for (i = 0; i < bh; i++) {
for (int i = 0; i < bh; i++) {
sum += left[i];
}
expected_dc = (int)(((uint64_t)sum * multiplier) >> shift);
expected_dc = clip_pixel_highbd(expected_dc, bd);
const int expected_dc = divide_using_multiply_shift(
sum + ((bw + bh) >> 1), shift1, multiplier, HIGHBD_DC_SHIFT2);
assert(expected_dc < (1 << bd));
for (r = 0; r < bh; r++) {
for (int r = 0; r < bh; r++) {
aom_memset16(dst, expected_dc, bw);
dst += stride;
}
}
#undef HIGHBD_DC_SHIFT2
void aom_highbd_dc_predictor_4x8_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above, const uint16_t *left,
int bd) {
highbd_dc_predictor_rect(dst, stride, 4, 8, above, left, bd,
DC_MULTIPLIER_1X2, 11);
highbd_dc_predictor_rect(dst, stride, 4, 8, above, left, bd, 2,
HIGHBD_DC_MULTIPLIER_1X2);
}
void aom_highbd_dc_predictor_8x4_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above, const uint16_t *left,
int bd) {
highbd_dc_predictor_rect(dst, stride, 8, 4, above, left, bd,
DC_MULTIPLIER_1X2, 11);
highbd_dc_predictor_rect(dst, stride, 8, 4, above, left, bd, 2,
HIGHBD_DC_MULTIPLIER_1X2);
}
void aom_highbd_dc_predictor_4x16_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above, const uint16_t *left,
int bd) {
highbd_dc_predictor_rect(dst, stride, 4, 16, above, left, bd,
DC_MULTIPLIER_1X4, 12);
highbd_dc_predictor_rect(dst, stride, 4, 16, above, left, bd, 2,
HIGHBD_DC_MULTIPLIER_1X4);
}
void aom_highbd_dc_predictor_16x4_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above, const uint16_t *left,
int bd) {
highbd_dc_predictor_rect(dst, stride, 16, 4, above, left, bd,
DC_MULTIPLIER_1X4, 12);
highbd_dc_predictor_rect(dst, stride, 16, 4, above, left, bd, 2,
HIGHBD_DC_MULTIPLIER_1X4);
}
void aom_highbd_dc_predictor_8x16_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above, const uint16_t *left,
int bd) {
highbd_dc_predictor_rect(dst, stride, 8, 16, above, left, bd,
DC_MULTIPLIER_1X2, 12);
highbd_dc_predictor_rect(dst, stride, 8, 16, above, left, bd, 3,
HIGHBD_DC_MULTIPLIER_1X2);
}
void aom_highbd_dc_predictor_16x8_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above, const uint16_t *left,
int bd) {
highbd_dc_predictor_rect(dst, stride, 16, 8, above, left, bd,
DC_MULTIPLIER_1X2, 12);
highbd_dc_predictor_rect(dst, stride, 16, 8, above, left, bd, 3,
HIGHBD_DC_MULTIPLIER_1X2);
}
void aom_highbd_dc_predictor_8x32_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above, const uint16_t *left,
int bd) {
highbd_dc_predictor_rect(dst, stride, 8, 32, above, left, bd,
DC_MULTIPLIER_1X4, 13);
highbd_dc_predictor_rect(dst, stride, 8, 32, above, left, bd, 3,
HIGHBD_DC_MULTIPLIER_1X4);
}
void aom_highbd_dc_predictor_32x8_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above, const uint16_t *left,
int bd) {
highbd_dc_predictor_rect(dst, stride, 32, 8, above, left, bd,
DC_MULTIPLIER_1X4, 13);
highbd_dc_predictor_rect(dst, stride, 32, 8, above, left, bd, 3,
HIGHBD_DC_MULTIPLIER_1X4);
}
void aom_highbd_dc_predictor_16x32_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above,
const uint16_t *left, int bd) {
highbd_dc_predictor_rect(dst, stride, 16, 32, above, left, bd,
DC_MULTIPLIER_1X2, 13);
highbd_dc_predictor_rect(dst, stride, 16, 32, above, left, bd, 4,
HIGHBD_DC_MULTIPLIER_1X2);
}
void aom_highbd_dc_predictor_32x16_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above,
const uint16_t *left, int bd) {
highbd_dc_predictor_rect(dst, stride, 32, 16, above, left, bd,
DC_MULTIPLIER_1X2, 13);
highbd_dc_predictor_rect(dst, stride, 32, 16, above, left, bd, 4,
HIGHBD_DC_MULTIPLIER_1X2);
}
void aom_highbd_dc_predictor_16x64_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above,
const uint16_t *left, int bd) {
highbd_dc_predictor_rect(dst, stride, 16, 64, above, left, bd,
DC_MULTIPLIER_1X4, 14);
highbd_dc_predictor_rect(dst, stride, 16, 64, above, left, bd, 4,
HIGHBD_DC_MULTIPLIER_1X4);
}
void aom_highbd_dc_predictor_64x16_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above,
const uint16_t *left, int bd) {
highbd_dc_predictor_rect(dst, stride, 64, 16, above, left, bd,
DC_MULTIPLIER_1X4, 14);
highbd_dc_predictor_rect(dst, stride, 64, 16, above, left, bd, 4,
HIGHBD_DC_MULTIPLIER_1X4);
}
void aom_highbd_dc_predictor_32x64_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above,
const uint16_t *left, int bd) {
highbd_dc_predictor_rect(dst, stride, 32, 64, above, left, bd,
DC_MULTIPLIER_1X2, 14);
highbd_dc_predictor_rect(dst, stride, 32, 64, above, left, bd, 5,
HIGHBD_DC_MULTIPLIER_1X2);
}
void aom_highbd_dc_predictor_64x32_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above,
const uint16_t *left, int bd) {
highbd_dc_predictor_rect(dst, stride, 64, 32, above, left, bd,
DC_MULTIPLIER_1X2, 14);
highbd_dc_predictor_rect(dst, stride, 64, 32, above, left, bd, 5,
HIGHBD_DC_MULTIPLIER_1X2);
}
#undef HIGHBD_DC_MULTIPLIER_1X2
#undef HIGHBD_DC_MULTIPLIER_1X4
// This serves as a wrapper function, so that all the prediction functions
// can be unified and accessed as a pointer array. Note that the boundary
// above and left are not necessarily used all the time.
......
......@@ -57,7 +57,7 @@ decode_to_md5() {
decode_to_md5_av1() {
# expected MD5 sum for the last frame.
local expected_md5="085ee3045d9e5e6538853dd762b73512"
local expected_md5="fc7565de847d04dc3485b4858c0ed298"
local file="${AV1_IVF_FILE}"
# TODO(urvang): Check in the encoded file (like libvpx does) to avoid
......
......@@ -334,10 +334,10 @@ INSTANTIATE_TEST_CASE_P(AVX2_TO_C_12, HighbdIntraPredTest,
#if HAVE_SSE2
const IntraPredFunc<IntraPred> LowbdIntraPredTestVector[] = {
lowbd_intrapred(d63e, sse2), lowbd_intrapred(d207e, sse2),
lowbd_intrapred(dc_top, sse2), lowbd_intrapred(dc_left, sse2),
lowbd_intrapred(dc_128, sse2), lowbd_intrapred(v, sse2),
lowbd_intrapred(h, sse2),
lowbd_intrapred(d63e, sse2), lowbd_intrapred(d207e, sse2),
lowbd_intrapred(dc, sse2), lowbd_intrapred(dc_top, sse2),
lowbd_intrapred(dc_left, sse2), lowbd_intrapred(dc_128, sse2),
lowbd_intrapred(v, sse2), lowbd_intrapred(h, sse2),
};
INSTANTIATE_TEST_CASE_P(SSE2, LowbdIntraPredTest,
......@@ -363,11 +363,11 @@ const IntraPredFunc<IntraPred> LowbdIntraPredTestVectorAvx2[] = {
lowbd_entry(dc, 32, 32, avx2), lowbd_entry(dc_top, 32, 32, avx2),
lowbd_entry(dc_left, 32, 32, avx2), lowbd_entry(dc_128, 32, 32, avx2),
lowbd_entry(v, 32, 32, avx2), lowbd_entry(h, 32, 32, avx2),
lowbd_entry(dc_top, 32, 16, avx2), lowbd_entry(dc_left, 32, 16, avx2),
lowbd_entry(dc_128, 32, 16, avx2), lowbd_entry(v, 32, 16, avx2),
lowbd_entry(paeth, 16, 8, avx2), lowbd_entry(paeth, 16, 16, avx2),
lowbd_entry(paeth, 16, 32, avx2), lowbd_entry(paeth, 32, 16, avx2),
lowbd_entry(paeth, 32, 32, avx2),
lowbd_entry(dc, 32, 16, avx2), lowbd_entry(dc_top, 32, 16, avx2),
lowbd_entry(dc_left, 32, 16, avx2), lowbd_entry(dc_128, 32, 16, avx2),
lowbd_entry(v, 32, 16, avx2), lowbd_entry(paeth, 16, 8, avx2),
lowbd_entry(paeth, 16, 16, avx2), lowbd_entry(paeth, 16, 32, avx2),
lowbd_entry(paeth, 32, 16, avx2), lowbd_entry(paeth, 32, 32, avx2),
};
INSTANTIATE_TEST_CASE_P(AVX2, LowbdIntraPredTest,
......
......@@ -160,7 +160,7 @@ void TestIntraPred8(const char *block_name, AvxPredFunc const *pred_funcs) {
"97111eb1bc26bade6272015df829f1ae", "d19a8a73cc46b807f2c5e817576cc1e1",
};
static const char *const kSignatures8x4[kNumAv1IntraFuncs] = {
"8ab62c3d363a05bb72ffdc4ccd0fe2c6", "2d71a26d1bae1fb34734de7b42fc5eb7",
"23f9fc11344426c9bee2e06d57dfd628", "2d71a26d1bae1fb34734de7b42fc5eb7",
"5af9c1b2fd9d5721fad67b67b3f7c816", "00d71b17be662753813d515f197d145e",
"bef10ec984427e28f4390f43809d10af", "77773cdfb7ed6bc882ab202a64b0a470",
"cba356970f6b9a1b6024e1dbe4a66f9b", "c58c21efc804242848e6f29a93a7984d",
......@@ -202,7 +202,7 @@ void TestIntraPred16(const char *block_name, AvxPredFunc const *pred_funcs) {
"bb6c74c9076c9f266ab11fb57060d8e6", "0c5162bc28489756ddb847b5678e6f07",
};
static const char *const kSignatures16x8[kNumAv1IntraFuncs] = {
"cbfc8f0593f199629d2453128f4748fe", "3731e1e6202064a9d0604d7c293ecee4",
"b4cbdbdf10ce13300b4063a3daf99e04", "3731e1e6202064a9d0604d7c293ecee4",
"6c856188c4256a06452f0d5d70cac436", "1f2192b4c8c497589484ea7bf9c944e8",
"84011bd4b7f565119d06787840e333a0", "0e48949f7a6aa36f0d76b5d01f91124a",
"58114c06f6b9d8285e5020c7afd834ab", "e37afe84a8b3c5e0f048d4652ecbe09e",
......@@ -244,7 +244,7 @@ void TestIntraPred32(const char *block_name, AvxPredFunc const *pred_funcs) {
"866c224746dc260cda861a7b1b383fb3", "cea23799fc3526e1b6a6ff02b42b82af",
};
static const char *const kSignatures32x16[kNumAv1IntraFuncs] = {
"431bafa0817b17f8aec0bef13e72bbf7", "b0bcb514ebfbee065faea9d34c12ae75",
"d1aeb8d5fdcfd3307922af01a798a4dc", "b0bcb514ebfbee065faea9d34c12ae75",
"d6a18c63b4e909871c0137ca652fad23", "fd047f2fc1b8ffb95d0eeef3e8796a45",
"645ab60779ea348fd93c81561c31bab9", "4409633c9db8dff41ade4292a3a56e7f",
"b9b2935b2287a9a461ac5c11251ac706", "43b05f808c0ac4fe8accd84d293b0488",
......@@ -306,13 +306,12 @@ INTRA_PRED_TEST(SSE2_1, TestIntraPred4, "intra4x4", aom_dc_predictor_4x4_sse2,
aom_h_predictor_4x4_sse2, NULL, NULL, NULL, NULL,
aom_d207e_predictor_4x4_sse2, aom_d63e_predictor_4x4_sse2, NULL,
NULL, NULL, NULL)
INTRA_PRED_TEST(SSE2_2, TestIntraPred4, "intra4x8", NULL,
INTRA_PRED_TEST(SSE2_2, TestIntraPred4, "intra4x8", aom_dc_predictor_4x8_sse2,
aom_dc_left_predictor_4x8_sse2, aom_dc_top_predictor_4x8_sse2,
aom_dc_128_predictor_4x8_sse2, aom_v_predictor_4x8_sse2,
aom_h_predictor_4x8_sse2, NULL, NULL, NULL, NULL,
aom_d207e_predictor_4x8_sse2, aom_d63e_predictor_4x8_sse2, NULL,
NULL, NULL, NULL)
#endif // HAVE_SSE2
#if HAVE_SSSE3
......@@ -389,13 +388,13 @@ INTRA_PRED_TEST(SSE2_1, TestIntraPred8, "intra8x8", aom_dc_predictor_8x8_sse2,
aom_h_predictor_8x8_sse2, NULL, NULL, NULL, NULL,
aom_d207e_predictor_8x8_sse2, aom_d63e_predictor_8x8_sse2, NULL,
NULL, NULL, NULL)
INTRA_PRED_TEST(SSE2_2, TestIntraPred8, "intra8x4", NULL,
INTRA_PRED_TEST(SSE2_2, TestIntraPred8, "intra8x4", aom_dc_predictor_8x4_sse2,
aom_dc_left_predictor_8x4_sse2, aom_dc_top_predictor_8x4_sse2,
aom_dc_128_predictor_8x4_sse2, aom_v_predictor_8x4_sse2,
aom_h_predictor_8x4_sse2, NULL, NULL, NULL, NULL,
aom_d207e_predictor_8x4_sse2, aom_d63e_predictor_8x4_sse2, NULL,
NULL, NULL, NULL)
INTRA_PRED_TEST(SSE2_3, TestIntraPred8, "intra8x16", NULL,
INTRA_PRED_TEST(SSE2_3, TestIntraPred8, "intra8x16", aom_dc_predictor_8x16_sse2,
aom_dc_left_predictor_8x16_sse2, aom_dc_top_predictor_8x16_sse2,
aom_dc_128_predictor_8x16_sse2, aom_v_predictor_8x16_sse2,
aom_h_predictor_8x16_sse2, NULL, NULL, NULL, NULL,
......@@ -482,14 +481,14 @@ INTRA_PRED_TEST(SSE2_1, TestIntraPred16, "intra16x16",
aom_h_predictor_16x16_sse2, NULL, NULL, NULL, NULL,
aom_d207e_predictor_16x16_sse2, aom_d63e_predictor_16x16_sse2,
NULL, NULL, NULL, NULL)
INTRA_PRED_TEST(SSE2_2, TestIntraPred16, "intra16x8", NULL,
aom_dc_left_predictor_16x8_sse2, aom_dc_top_predictor_16x8_sse2,
aom_dc_128_predictor_16x8_sse2, aom_v_predictor_16x8_sse2,
aom_h_predictor_16x8_sse2, NULL, NULL, NULL, NULL,
aom_d207e_predictor_16x8_sse2, aom_d63e_predictor_16x8_sse2,
NULL, NULL, NULL, NULL)
INTRA_PRED_TEST(SSE2_3, TestIntraPred16, "intra16x32", NULL,
aom_dc_left_predictor_16x32_sse2,
INTRA_PRED_TEST(SSE2_2, TestIntraPred16, "intra16x8",
aom_dc_predictor_16x8_sse2, aom_dc_left_predictor_16x8_sse2,
aom_dc_top_predictor_16x8_sse2, aom_dc_128_predictor_16x8_sse2,
aom_v_predictor_16x8_sse2, aom_h_predictor_16x8_sse2, NULL,
NULL, NULL, NULL, aom_d207e_predictor_16x8_sse2,
aom_d63e_predictor_16x8_sse2, NULL, NULL, NULL, NULL)
INTRA_PRED_TEST(SSE2_3, TestIntraPred16, "intra16x32",
aom_dc_predictor_16x32_sse2, aom_dc_left_predictor_16x32_sse2,
aom_dc_top_predictor_16x32_sse2,
aom_dc_128_predictor_16x32_sse2, aom_v_predictor_16x32_sse2,
aom_h_predictor_16x32_sse2, NULL, NULL, NULL, NULL,
......@@ -580,8 +579,8 @@ INTRA_PRED_TEST(SSE2_1, TestIntraPred32, "intra32x32",
aom_h_predictor_32x32_sse2, NULL, NULL, NULL, NULL,
aom_d207e_predictor_32x32_sse2, aom_d63e_predictor_32x32_sse2,
NULL, NULL, NULL, NULL)
INTRA_PRED_TEST(SSE2_2, TestIntraPred32, "intra32x16", NULL,
aom_dc_left_predictor_32x16_sse2,
INTRA_PRED_TEST(SSE2_2, TestIntraPred32, "intra32x16",
aom_dc_predictor_32x16_sse2, aom_dc_left_predictor_32x16_sse2,
aom_dc_top_predictor_32x16_sse2,
aom_dc_128_predictor_32x16_sse2, aom_v_predictor_32x16_sse2,
aom_h_predictor_32x16_sse2, NULL, NULL, NULL, NULL,
......@@ -608,8 +607,8 @@ INTRA_PRED_TEST(AVX2_1, TestIntraPred32, "intra32x32",
aom_dc_128_predictor_32x32_avx2, aom_v_predictor_32x32_avx2,
aom_h_predictor_32x32_avx2, NULL, NULL, NULL, NULL, NULL, NULL,
aom_paeth_predictor_32x32_avx2, NULL, NULL, NULL)
INTRA_PRED_TEST(AVX2_2, TestIntraPred32, "intra32x16", NULL,
aom_dc_left_predictor_32x16_avx2,
INTRA_PRED_TEST(AVX2_2, TestIntraPred32, "intra32x16",
aom_dc_predictor_32x16_avx2, aom_dc_left_predictor_32x16_avx2,
aom_dc_top_predictor_32x16_avx2,
aom_dc_128_predictor_32x16_avx2, aom_v_predictor_32x16_avx2,
NULL, NULL, NULL, NULL, NULL, NULL, NULL,
......@@ -718,7 +717,7 @@ void TestHighbdIntraPred8(const char *block_name,
"0edc415b5dd7299f7a34fb9f71d31d78", "2bc8ec19e9f4b77a64b8a0a1f6aec7e7",
};
static const char *const kSignatures8x4[kNumAv1IntraFuncs] = {
"d4fd8840e17077cfe3031e747d687699", "6e572c35aa782d00cafcb99e9ea047ea",
"d58cd4c4bf3b7bbaa5db5e1a5622ec78", "6e572c35aa782d00cafcb99e9ea047ea",
"e8c22a3702b416dc9ab974505afbed09", "aaa4e4762a795aad7ad74de0c662c4e4",
"a19f9101967383c3dcbd516dc317a291", "9ab8cb91f1a595b9ebe3fe8de58031aa",
"c6c7d65264397d4d31e378e1f1cfd921", "5804158e463ff794b6b8a623f5d2c10d",
......@@ -728,7 +727,7 @@ void TestHighbdIntraPred8(const char *block_name,
"b4871af8316089e3e23522175df7e93f", "d33301e1c2cb173be46792a22d19881a",
};
static const char *const kSignatures8x16[kNumAv1IntraFuncs] = {
"086c82fb2e7e2aa7c88115432b3036fe", "16310fa7076394f16fc85c4b149d89c9",
"4562de1d0336610880fdd5685498a9ec", "16310fa7076394f16fc85c4b149d89c9",
"0e94af88e1dc573b6f0f499cddd1f530", "dfd245ee20d091c67809160340365aa9",
"d3562504327f70c096c5be23fd8a3747", "601b853558502acbb5135eadd2da117a",
"e83f9a8bc16b507d2ed0b6b31a25d6f5", "fc8427d942246e8cba81247bb294afb5",
......@@ -771,7 +770,7 @@ void TestHighbdIntraPred16(const char *block_name,
"688c6660a6dc6fa61fa1aa38e708c209", "0cdf641b4f81d69509c92ae0b93ef5ff",
};
static const char *const kSignatures16x32[kNumAv1IntraFuncs] = {
"663490212042a4e6936d780c425bf10a", "8baef2b2e789f79c8df9d90ad10f34a4",
"aee4b3b0e3cc02d48e2c40d77f807927", "8baef2b2e789f79c8df9d90ad10f34a4",
"038c38ee3c4f090bb8d736eab136aafc", "1a3de2aaeaffd68a9fd6c7f6557b83f3",
"385c6e0ea29421dd81011a2934641e26", "6cf96c285d1a2d4787f955dad715b08c",
"21f82421fda1c3afca8baca0dc048a52", "eac3734852c99a051f6d15a921d9e7b9",
......@@ -804,7 +803,7 @@ void TestHighbdIntraPred32(const char *block_name,
"b073a70d3672f1282236994f5d12e94b", "c51607aebad5dcb3c1e3b58ef9e5b84e",
};
static const char *const kSignatures32x16[kNumAv1IntraFuncs] = {
"0c072d478d63466c676daf207a0e4ae5", "701e7b82593c66da5052fc4b6afd79ce",
"290b23c9f5a1de7905bfa71a942da29b", "701e7b82593c66da5052fc4b6afd79ce",
"4da828c5455cd246735a663fbb204989", "e3fbeaf234efece8dbd752b77226200c",
"4d1d8c969f05155a7e7e84cf7aad021b", "c22e4877c2c946d5bdc0d542e29e70cf",
"ffd86b234d65c2e1386a5b5b5c188a69", "50aaaa7d90e300b635ab18cdd73e189b",
......@@ -860,12 +859,13 @@ HIGHBD_INTRA_PRED_TEST(
NULL, NULL, NULL, NULL)
HIGHBD_INTRA_PRED_TEST(
SSE2_2, TestHighbdIntraPred4, "Hbd Intra4x8", NULL,
aom_highbd_dc_left_predictor_4x8_sse2, aom_highbd_dc_top_predictor_4x8_sse2,
aom_highbd_dc_128_predictor_4x8_sse2, aom_highbd_v_predictor_4x8_sse2,
aom_highbd_h_predictor_4x8_sse2, aom_highbd_d45e_predictor_4x8_sse2, NULL,
NULL, NULL, aom_highbd_d207e_predictor_4x8_sse2,
aom_highbd_d63e_predictor_4x8_sse2, NULL, NULL, NULL, NULL)
SSE2_2, TestHighbdIntraPred4, "Hbd Intra4x8",
aom_highbd_dc_predictor_4x8_sse2, aom_highbd_dc_left_predictor_4x8_sse2,
aom_highbd_dc_top_predictor_4x8_sse2, aom_highbd_dc_128_predictor_4x8_sse2,
aom_highbd_v_predictor_4x8_sse2, aom_highbd_h_predictor_4x8_sse2,
aom_highbd_d45e_predictor_4x8_sse2, NULL, NULL, NULL,
aom_highbd_d207e_predictor_4x8_sse2, aom_highbd_d63e_predictor_4x8_sse2,
NULL, NULL, NULL, NULL)