Commit 63bd6dc9 authored by Yi Luo's avatar Yi Luo

Fix rectangle transform computation overflow

- Add 16-bit saturation in fdct_round_shift().
- Add extreme value tests and round trip error tests.
- Fix inv 4x8 txfm calculation accuracy.
- Fix 4x8, 8x4, 8x16, 16x8, 16x32, 32x16 extreme value tests.
- BDRate: lowres: -0.034
          midres: -0.036
          hdres:  -0.013
BUG=webm:1340

Change-Id: I48365c1e50a03a7b1aa69b8856b732b483299fb5
parent 125e7293
......@@ -14,12 +14,15 @@
#include "aom_dsp/txfm_common.h"
static INLINE tran_high_t saturate_int16(tran_high_t value) {
tran_high_t result;
result = value > INT16_MAX ? INT16_MAX : value;
return result < INT16_MIN ? INT16_MIN : result;
}
static INLINE tran_high_t fdct_round_shift(tran_high_t input) {
tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
// TODO(debargha, peter.derivaz): Find new bounds for this assert
// and make the bounds consts.
// assert(INT16_MIN <= rv && rv <= INT16_MAX);
return rv;
return saturate_int16(rv);
}
void aom_fdct32(const tran_high_t *input, tran_high_t *output, int round);
......
......@@ -1190,8 +1190,6 @@ void av1_iht4x8_32_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
in[6] = load_input_data(input + 2 * 8);
in[7] = load_input_data(input + 3 * 8);
scale_sqrt2_8x4(in + 4);
// Row transform
switch (tx_type) {
case DCT_DCT:
......@@ -1230,6 +1228,8 @@ void av1_iht4x8_32_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
default: assert(0); break;
}
scale_sqrt2_8x4(in + 4);
// Repack data
in[0] = _mm_unpacklo_epi64(in[4], in[6]);
in[1] = _mm_unpackhi_epi64(in[4], in[6]);
......
......@@ -787,10 +787,10 @@ static void fadst8(const tran_low_t *input, tran_low_t *output) {
s6 = cospi_26_64 * x6 + cospi_6_64 * x7;
s7 = cospi_6_64 * x6 - cospi_26_64 * x7;
x0 = fdct_round_shift(s0 + s4);
x1 = fdct_round_shift(s1 + s5);
x2 = fdct_round_shift(s2 + s6);
x3 = fdct_round_shift(s3 + s7);
x0 = s0 + s4;
x1 = s1 + s5;
x2 = s2 + s6;
x3 = s3 + s7;
x4 = fdct_round_shift(s0 - s4);
x5 = fdct_round_shift(s1 - s5);
x6 = fdct_round_shift(s2 - s6);
......@@ -806,10 +806,10 @@ static void fadst8(const tran_low_t *input, tran_low_t *output) {
s6 = -cospi_24_64 * x6 + cospi_8_64 * x7;
s7 = cospi_8_64 * x6 + cospi_24_64 * x7;
x0 = s0 + s2;
x1 = s1 + s3;
x2 = s0 - s2;
x3 = s1 - s3;
x0 = fdct_round_shift(s0 + s2);
x1 = fdct_round_shift(s1 + s3);
x2 = fdct_round_shift(s0 - s2);
x3 = fdct_round_shift(s1 - s3);
x4 = fdct_round_shift(s4 + s6);
x5 = fdct_round_shift(s5 + s7);
x6 = fdct_round_shift(s4 - s6);
......@@ -875,14 +875,15 @@ static void fadst16(const tran_low_t *input, tran_low_t *output) {
s14 = x14 * cospi_29_64 + x15 * cospi_3_64;
s15 = x14 * cospi_3_64 - x15 * cospi_29_64;
x0 = fdct_round_shift(s0 + s8);
x1 = fdct_round_shift(s1 + s9);
x2 = fdct_round_shift(s2 + s10);
x3 = fdct_round_shift(s3 + s11);
x4 = fdct_round_shift(s4 + s12);
x5 = fdct_round_shift(s5 + s13);
x6 = fdct_round_shift(s6 + s14);
x7 = fdct_round_shift(s7 + s15);
x0 = s0 + s8;
x1 = s1 + s9;
x2 = s2 + s10;
x3 = s3 + s11;
x4 = s4 + s12;
x5 = s5 + s13;
x6 = s6 + s14;
x7 = s7 + s15;
x8 = fdct_round_shift(s0 - s8);
x9 = fdct_round_shift(s1 - s9);
x10 = fdct_round_shift(s2 - s10);
......@@ -914,14 +915,15 @@ static void fadst16(const tran_low_t *input, tran_low_t *output) {
x1 = s1 + s5;
x2 = s2 + s6;
x3 = s3 + s7;
x4 = s0 - s4;
x5 = s1 - s5;
x6 = s2 - s6;
x7 = s3 - s7;
x8 = fdct_round_shift(s8 + s12);
x9 = fdct_round_shift(s9 + s13);
x10 = fdct_round_shift(s10 + s14);
x11 = fdct_round_shift(s11 + s15);
x4 = fdct_round_shift(s0 - s4);
x5 = fdct_round_shift(s1 - s5);
x6 = fdct_round_shift(s2 - s6);
x7 = fdct_round_shift(s3 - s7);
x8 = s8 + s12;
x9 = s9 + s13;
x10 = s10 + s14;
x11 = s11 + s15;
x12 = fdct_round_shift(s8 - s12);
x13 = fdct_round_shift(s9 - s13);
x14 = fdct_round_shift(s10 - s14);
......@@ -945,18 +947,21 @@ static void fadst16(const tran_low_t *input, tran_low_t *output) {
s14 = -x14 * cospi_24_64 + x15 * cospi_8_64;
s15 = x14 * cospi_8_64 + x15 * cospi_24_64;
x0 = s0 + s2;
x1 = s1 + s3;
x2 = s0 - s2;
x3 = s1 - s3;
x0 = fdct_round_shift(s0 + s2);
x1 = fdct_round_shift(s1 + s3);
x2 = fdct_round_shift(s0 - s2);
x3 = fdct_round_shift(s1 - s3);
x4 = fdct_round_shift(s4 + s6);
x5 = fdct_round_shift(s5 + s7);
x6 = fdct_round_shift(s4 - s6);
x7 = fdct_round_shift(s5 - s7);
x8 = s8 + s10;
x9 = s9 + s11;
x10 = s8 - s10;
x11 = s9 - s11;
x8 = fdct_round_shift(s8 + s10);
x9 = fdct_round_shift(s9 + s11);
x10 = fdct_round_shift(s8 - s10);
x11 = fdct_round_shift(s9 - s11);
x12 = fdct_round_shift(s12 + s14);
x13 = fdct_round_shift(s13 + s15);
x14 = fdct_round_shift(s12 - s14);
......@@ -1230,7 +1235,7 @@ void av1_fht4x8_c(const int16_t *input, tran_low_t *output, int stride,
for (i = 0; i < n2; ++i) {
for (j = 0; j < n; ++j) temp_in[j] = out[j + i * n];
ht.rows(temp_in, temp_out);
for (j = 0; j < n; ++j) output[j + i * n] = (temp_out[j] + 1) >> 2;
for (j = 0; j < n; ++j) output[j + i * n] = temp_out[j] >> 2;
}
// Note: overall scale factor of transform is 8 times unitary
}
......@@ -1281,7 +1286,7 @@ void av1_fht8x4_c(const int16_t *input, tran_low_t *output, int stride,
for (i = 0; i < n; ++i) {
for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
ht.rows(temp_in, temp_out);
for (j = 0; j < n2; ++j) output[j + i * n2] = (temp_out[j] + 1) >> 2;
for (j = 0; j < n2; ++j) output[j + i * n2] = temp_out[j] >> 2;
}
// Note: overall scale factor of transform is 8 times unitary
}
......@@ -1332,8 +1337,7 @@ void av1_fht8x16_c(const int16_t *input, tran_low_t *output, int stride,
for (i = 0; i < n2; ++i) {
for (j = 0; j < n; ++j) temp_in[j] = out[j + i * n];
ht.rows(temp_in, temp_out);
for (j = 0; j < n; ++j)
output[j + i * n] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
for (j = 0; j < n; ++j) output[j + i * n] = temp_out[j] >> 2;
}
// Note: overall scale factor of transform is 8 times unitary
}
......@@ -1384,8 +1388,7 @@ void av1_fht16x8_c(const int16_t *input, tran_low_t *output, int stride,
for (i = 0; i < n; ++i) {
for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
ht.rows(temp_in, temp_out);
for (j = 0; j < n2; ++j)
output[j + i * n2] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
for (j = 0; j < n2; ++j) output[j + i * n2] = temp_out[j] >> 2;
}
// Note: overall scale factor of transform is 8 times unitary
}
......@@ -1435,9 +1438,7 @@ void av1_fht16x32_c(const int16_t *input, tran_low_t *output, int stride,
for (i = 0; i < n2; ++i) {
for (j = 0; j < n; ++j) temp_in[j] = out[j + i * n];
ht.rows(temp_in, temp_out);
for (j = 0; j < n; ++j)
output[j + i * n] =
(tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2);
for (j = 0; j < n; ++j) output[j + i * n] = temp_out[j] >> 2;
}
// Note: overall scale factor of transform is 4 times unitary
}
......@@ -1487,9 +1488,7 @@ void av1_fht32x16_c(const int16_t *input, tran_low_t *output, int stride,
for (i = 0; i < n; ++i) {
for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
ht.rows(temp_in, temp_out);
for (j = 0; j < n2; ++j)
output[j + i * n2] =
(tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2);
for (j = 0; j < n2; ++j) output[j + i * n2] = temp_out[j] >> 2;
}
// Note: overall scale factor of transform is 4 times unitary
}
......
This diff is collapsed.
......@@ -69,10 +69,35 @@ class AV1Trans16x32HT : public libaom_test::TransformTestBase,
IhtFunc inv_txfm_;
};
TEST_P(AV1Trans16x32HT, AccuracyCheck) { RunAccuracyCheck(48); }
TEST_P(AV1Trans16x32HT, CoeffCheck) { RunCoeffCheck(); }
TEST_P(AV1Trans16x32HT, MemCheck) { RunMemCheck(); }
TEST_P(AV1Trans16x32HT, InvCoeffCheck) { RunInvCoeffCheck(); }
TEST_P(AV1Trans16x32HT, InvAccuracyCheck) { RunInvAccuracyCheck(9); }
using std::tr1::make_tuple;
const Ht16x32Param kArrayHt16x32Param_c[] = {
make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 0, AOM_BITS_8, 512),
make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 1, AOM_BITS_8, 512),
make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 2, AOM_BITS_8, 512),
make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 3, AOM_BITS_8, 512),
#if CONFIG_EXT_TX
make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 4, AOM_BITS_8, 512),
make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 5, AOM_BITS_8, 512),
make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 6, AOM_BITS_8, 512),
make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 7, AOM_BITS_8, 512),
make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 8, AOM_BITS_8, 512),
make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 9, AOM_BITS_8, 512),
make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 10, AOM_BITS_8, 512),
make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 11, AOM_BITS_8, 512),
make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 12, AOM_BITS_8, 512),
make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 13, AOM_BITS_8, 512),
make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 14, AOM_BITS_8, 512),
make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 15, AOM_BITS_8, 512)
#endif // CONFIG_EXT_TX
};
INSTANTIATE_TEST_CASE_P(C, AV1Trans16x32HT,
::testing::ValuesIn(kArrayHt16x32Param_c));
#if HAVE_SSE2
const Ht16x32Param kArrayHt16x32Param_sse2[] = {
......
......@@ -69,8 +69,11 @@ class AV1Trans16x8HT : public libaom_test::TransformTestBase,
IhtFunc inv_txfm_;
};
TEST_P(AV1Trans16x8HT, AccuracyCheck) { RunAccuracyCheck(1); }
TEST_P(AV1Trans16x8HT, CoeffCheck) { RunCoeffCheck(); }
TEST_P(AV1Trans16x8HT, MemCheck) { RunMemCheck(); }
TEST_P(AV1Trans16x8HT, InvCoeffCheck) { RunInvCoeffCheck(); }
TEST_P(AV1Trans16x8HT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
using std::tr1::make_tuple;
......
......@@ -69,10 +69,35 @@ class AV1Trans32x16HT : public libaom_test::TransformTestBase,
IhtFunc inv_txfm_;
};
TEST_P(AV1Trans32x16HT, MemCheck) { RunMemCheck(); }
TEST_P(AV1Trans32x16HT, AccuracyCheck) { RunAccuracyCheck(43); }
TEST_P(AV1Trans32x16HT, CoeffCheck) { RunCoeffCheck(); }
TEST_P(AV1Trans32x16HT, InvCoeffCheck) { RunInvCoeffCheck(); }
TEST_P(AV1Trans32x16HT, InvAccuracyCheck) { RunInvAccuracyCheck(9); }
using std::tr1::make_tuple;
const Ht32x16Param kArrayHt32x16Param_c[] = {
make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 0, AOM_BITS_8, 512),
make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 1, AOM_BITS_8, 512),
make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 2, AOM_BITS_8, 512),
make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 3, AOM_BITS_8, 512),
#if CONFIG_EXT_TX
make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 4, AOM_BITS_8, 512),
make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 5, AOM_BITS_8, 512),
make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 6, AOM_BITS_8, 512),
make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 7, AOM_BITS_8, 512),
make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 8, AOM_BITS_8, 512),
make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 9, AOM_BITS_8, 512),
make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 10, AOM_BITS_8, 512),
make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 11, AOM_BITS_8, 512),
make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 12, AOM_BITS_8, 512),
make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 13, AOM_BITS_8, 512),
make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 14, AOM_BITS_8, 512),
make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 15, AOM_BITS_8, 512)
#endif // CONFIG_EXT_TX
};
INSTANTIATE_TEST_CASE_P(C, AV1Trans32x16HT,
::testing::ValuesIn(kArrayHt32x16Param_c));
#if HAVE_SSE2
const Ht32x16Param kArrayHt32x16Param_sse2[] = {
......
......@@ -69,8 +69,11 @@ class AV1Trans4x8HT : public libaom_test::TransformTestBase,
IhtFunc inv_txfm_;
};
TEST_P(AV1Trans4x8HT, AccuracyCheck) { RunAccuracyCheck(0); }
TEST_P(AV1Trans4x8HT, CoeffCheck) { RunCoeffCheck(); }
TEST_P(AV1Trans4x8HT, MemCheck) { RunMemCheck(); }
TEST_P(AV1Trans4x8HT, InvCoeffCheck) { RunInvCoeffCheck(); }
TEST_P(AV1Trans4x8HT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
using std::tr1::make_tuple;
......
......@@ -69,8 +69,11 @@ class AV1Trans8x16HT : public libaom_test::TransformTestBase,
IhtFunc inv_txfm_;
};
TEST_P(AV1Trans8x16HT, MemCheck) { RunMemCheck(); }
TEST_P(AV1Trans8x16HT, AccuracyCheck) { RunAccuracyCheck(1); }
TEST_P(AV1Trans8x16HT, CoeffCheck) { RunCoeffCheck(); }
TEST_P(AV1Trans8x16HT, InvCoeffCheck) { RunInvCoeffCheck(); }
TEST_P(AV1Trans8x16HT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
using std::tr1::make_tuple;
......
......@@ -69,8 +69,11 @@ class AV1Trans8x4HT : public libaom_test::TransformTestBase,
IhtFunc inv_txfm_;
};
TEST_P(AV1Trans8x4HT, AccuracyCheck) { RunAccuracyCheck(0); }
TEST_P(AV1Trans8x4HT, CoeffCheck) { RunCoeffCheck(); }
TEST_P(AV1Trans8x4HT, MemCheck) { RunMemCheck(); }
TEST_P(AV1Trans8x4HT, InvCoeffCheck) { RunInvCoeffCheck(); }
TEST_P(AV1Trans8x4HT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
using std::tr1::make_tuple;
......
......@@ -103,10 +103,10 @@ class TransformTestBase {
}
EXPECT_GE(static_cast<uint32_t>(limit), max_error)
<< "Error: 4x4 FHT/IHT has an individual round trip error > " << limit;
<< "Error: FHT/IHT has an individual round trip error > " << limit;
EXPECT_GE(count_test_block * limit, total_error)
<< "Error: 4x4 FHT/IHT has average round trip error > " << limit
<< "Error: FHT/IHT has average round trip error > " << limit
<< " per block";
aom_free(test_input_block);
......@@ -249,7 +249,9 @@ class TransformTestBase {
int row_length = FindRowLength();
// The minimum quant value is 4.
for (int j = 0; j < num_coeffs_; ++j) {
EXPECT_EQ(output_block[j], output_ref_block[j]);
EXPECT_EQ(output_block[j], output_ref_block[j])
<< "Not bit-exact at test index: " << i << ", "
<< "j = " << j << std::endl;
EXPECT_GE(row_length * kDctMaxValue << (bit_depth_ - 8),
abs(output_block[j]))
<< "Error: NxN FDCT has coefficient larger than N*DCT_MAX_VALUE";
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment