Commit 6839682c authored by Urvang Joshi's avatar Urvang Joshi

Rect DC_PRED: mult and shifts equivalent to div.

(1) We use an initial variable shift, and then
(2) A multiply + 16 bit shift for bit-depth = 8, OR
    A multiply + 17 bit shift for bit-depth = 10 and 12

All the constants (shifts and multipliers) for different block sizes are
based on "Algorithm 1" in
http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=1467632

Note:
This also reverts test hashes etc to the version before
commit 37ebf187.

BUG=aomedia:1191

Change-Id: I2aaebc3e95958a3f145c0408aa5cbcc85e30c8dc
parent e1f0b227
......@@ -172,20 +172,19 @@ specialize qw/aom_d153_predictor_8x8 ssse3/;
specialize qw/aom_d153_predictor_16x16 ssse3/;
specialize qw/aom_d153_predictor_32x32 ssse3/;
# TODO(yungingwang): optimize rectangular DC_PRED to replace division
# by multiply and shift.
specialize qw/aom_dc_predictor_4x4 dspr2 msa neon sse2/;
specialize qw/aom_dc_predictor_4x8 sse2/;
specialize qw/aom_dc_predictor_8x4 sse2/;
specialize qw/aom_dc_predictor_8x8 dspr2 neon msa sse2/;
specialize qw/aom_dc_predictor_8x16 sse2/;
specialize qw/aom_dc_predictor_16x8 sse2/;
specialize qw/aom_dc_predictor_16x16 dspr2 neon msa sse2/;
specialize qw/aom_dc_predictor_16x32 sse2/;
specialize qw/aom_dc_predictor_32x16 sse2 avx2/;
specialize qw/aom_dc_predictor_32x32 msa neon sse2 avx2/;
# TODO(luoyi): Need to rewrite these.
# specialize qw/aom_dc_predictor_4x8 sse2/;
# specialize qw/aom_dc_predictor_8x4 sse2/;
# specialize qw/aom_dc_predictor_8x16 sse2/;
# specialize qw/aom_dc_predictor_16x8 sse2/;
# specialize qw/aom_dc_predictor_16x32 sse2/;
# specialize qw/aom_dc_predictor_32x16 sse2 avx2/;
specialize qw/aom_d207e_predictor_4x4 sse2/;
specialize qw/aom_d207e_predictor_4x8 sse2/;
specialize qw/aom_d207e_predictor_8x4 sse2/;
......@@ -230,18 +229,18 @@ specialize qw/aom_d45e_predictor_32x32 ssse3/;
specialize qw/aom_highbd_v_predictor_32x16 sse2/;
specialize qw/aom_highbd_v_predictor_32x32 sse2/;
# TODO(yungingwang): optimize rectangular DC_PRED to replace division
# by multiply and shift.
specialize qw/aom_highbd_dc_predictor_4x4 sse2/;
specialize qw/aom_highbd_dc_predictor_8x8 sse2/;
specialize qw/aom_highbd_dc_predictor_4x8 sse2/;
specialize qw/aom_highbd_dc_predictor_8x4 sse2/;;
specialize qw/aom_highbd_dc_predictor_8x8 sse2/;;
specialize qw/aom_highbd_dc_predictor_8x16 sse2/;;
specialize qw/aom_highbd_dc_predictor_16x8 sse2/;
specialize qw/aom_highbd_dc_predictor_16x16 sse2/;
specialize qw/aom_highbd_dc_predictor_16x32 sse2/;
specialize qw/aom_highbd_dc_predictor_32x16 sse2/;
specialize qw/aom_highbd_dc_predictor_32x32 sse2/;
# TODO(luoyi): Need to rewrite these
# specialize qw/aom_highbd_dc_predictor_4x8 sse2/;
# specialize qw/aom_highbd_dc_predictor_8x4 sse2/;
# specialize qw/aom_highbd_dc_predictor_8x16 sse2/;
# specialize qw/aom_highbd_dc_predictor_16x8 sse2/;
# specialize qw/aom_highbd_dc_predictor_16x32 sse2/;
# specialize qw/aom_highbd_dc_predictor_32x16 sse2/;
specialize qw/aom_highbd_h_predictor_4x4 sse2/;
specialize qw/aom_highbd_h_predictor_4x8 sse2/;
......
This diff is collapsed.
......@@ -57,7 +57,7 @@ decode_to_md5() {
decode_to_md5_av1() {
# expected MD5 sum for the last frame.
local expected_md5="085ee3045d9e5e6538853dd762b73512"
local expected_md5="fc7565de847d04dc3485b4858c0ed298"
local file="${AV1_IVF_FILE}"
# TODO(urvang): Check in the encoded file (like libvpx does) to avoid
......
......@@ -334,10 +334,10 @@ INSTANTIATE_TEST_CASE_P(AVX2_TO_C_12, HighbdIntraPredTest,
#if HAVE_SSE2
const IntraPredFunc<IntraPred> LowbdIntraPredTestVector[] = {
lowbd_intrapred(d63e, sse2), lowbd_intrapred(d207e, sse2),
lowbd_intrapred(dc_top, sse2), lowbd_intrapred(dc_left, sse2),
lowbd_intrapred(dc_128, sse2), lowbd_intrapred(v, sse2),
lowbd_intrapred(h, sse2),
lowbd_intrapred(d63e, sse2), lowbd_intrapred(d207e, sse2),
lowbd_intrapred(dc, sse2), lowbd_intrapred(dc_top, sse2),
lowbd_intrapred(dc_left, sse2), lowbd_intrapred(dc_128, sse2),
lowbd_intrapred(v, sse2), lowbd_intrapred(h, sse2),
};
INSTANTIATE_TEST_CASE_P(SSE2, LowbdIntraPredTest,
......@@ -363,11 +363,11 @@ const IntraPredFunc<IntraPred> LowbdIntraPredTestVectorAvx2[] = {
lowbd_entry(dc, 32, 32, avx2), lowbd_entry(dc_top, 32, 32, avx2),
lowbd_entry(dc_left, 32, 32, avx2), lowbd_entry(dc_128, 32, 32, avx2),
lowbd_entry(v, 32, 32, avx2), lowbd_entry(h, 32, 32, avx2),
lowbd_entry(dc_top, 32, 16, avx2), lowbd_entry(dc_left, 32, 16, avx2),
lowbd_entry(dc_128, 32, 16, avx2), lowbd_entry(v, 32, 16, avx2),
lowbd_entry(paeth, 16, 8, avx2), lowbd_entry(paeth, 16, 16, avx2),
lowbd_entry(paeth, 16, 32, avx2), lowbd_entry(paeth, 32, 16, avx2),
lowbd_entry(paeth, 32, 32, avx2),
lowbd_entry(dc, 32, 16, avx2), lowbd_entry(dc_top, 32, 16, avx2),
lowbd_entry(dc_left, 32, 16, avx2), lowbd_entry(dc_128, 32, 16, avx2),
lowbd_entry(v, 32, 16, avx2), lowbd_entry(paeth, 16, 8, avx2),
lowbd_entry(paeth, 16, 16, avx2), lowbd_entry(paeth, 16, 32, avx2),
lowbd_entry(paeth, 32, 16, avx2), lowbd_entry(paeth, 32, 32, avx2),
};
INSTANTIATE_TEST_CASE_P(AVX2, LowbdIntraPredTest,
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment