Commit 838a53d6 authored by David Barker's avatar David Barker Committed by Debargha Mukherjee

Improve rectangular transform accuracy

By adjusting the internal scaling and rounding in the transforms,
we can adjust the maximum round-trip errors to:
* 8x16 and 16x8: 0 pixel values (ie, transforms are exact)
* 16x32: 1 pixel value
* 32x16: 2 pixel values

Change-Id: I0ba691a8d27042dcf1dd5ae81568d07a92d68781
parent 29c61068
......@@ -1337,7 +1337,9 @@ void av1_fht8x16_c(const int16_t *input, tran_low_t *output, int stride,
for (i = 0; i < n2; ++i) {
for (j = 0; j < n; ++j) temp_in[j] = out[j + i * n];
ht.rows(temp_in, temp_out);
for (j = 0; j < n; ++j) output[j + i * n] = temp_out[j] >> 2;
for (j = 0; j < n; ++j)
output[j + i * n] =
saturate_int16(temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
}
// Note: overall scale factor of transform is 8 times unitary
}
......@@ -1388,7 +1390,9 @@ void av1_fht16x8_c(const int16_t *input, tran_low_t *output, int stride,
for (i = 0; i < n; ++i) {
for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
ht.rows(temp_in, temp_out);
for (j = 0; j < n2; ++j) output[j + i * n2] = temp_out[j] >> 2;
for (j = 0; j < n2; ++j)
output[j + i * n2] =
saturate_int16(temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
}
// Note: overall scale factor of transform is 8 times unitary
}
......@@ -1429,16 +1433,20 @@ void av1_fht16x32_c(const int16_t *input, tran_low_t *output, int stride,
// Columns
for (i = 0; i < n; ++i) {
for (j = 0; j < n2; ++j)
temp_in[j] = (tran_low_t)fdct_round_shift(input[j * stride + i] * Sqrt2);
temp_in[j] =
(tran_low_t)fdct_round_shift(input[j * stride + i] * 4 * Sqrt2);
ht.cols(temp_in, temp_out);
for (j = 0; j < n2; ++j) out[j * n + i] = temp_out[j];
for (j = 0; j < n2; ++j)
out[j * n + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
}
// Rows
for (i = 0; i < n2; ++i) {
for (j = 0; j < n; ++j) temp_in[j] = out[j + i * n];
ht.rows(temp_in, temp_out);
for (j = 0; j < n; ++j) output[j + i * n] = temp_out[j] >> 2;
for (j = 0; j < n; ++j)
output[j + i * n] =
saturate_int16(temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
}
// Note: overall scale factor of transform is 4 times unitary
}
......@@ -1479,16 +1487,20 @@ void av1_fht32x16_c(const int16_t *input, tran_low_t *output, int stride,
// Columns
for (i = 0; i < n2; ++i) {
for (j = 0; j < n; ++j)
temp_in[j] = (tran_low_t)fdct_round_shift(input[j * stride + i] * Sqrt2);
temp_in[j] =
(tran_low_t)fdct_round_shift(input[j * stride + i] * 4 * Sqrt2);
ht.cols(temp_in, temp_out);
for (j = 0; j < n; ++j) out[j * n2 + i] = temp_out[j];
for (j = 0; j < n; ++j)
out[j * n2 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
}
// Rows
for (i = 0; i < n; ++i) {
for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
ht.rows(temp_in, temp_out);
for (j = 0; j < n2; ++j) output[j + i * n2] = temp_out[j] >> 2;
for (j = 0; j < n2; ++j)
output[j + i * n2] =
saturate_int16(temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
}
// Note: overall scale factor of transform is 4 times unitary
}
......
This diff is collapsed.
......@@ -69,11 +69,11 @@ class AV1Trans16x32HT : public libaom_test::TransformTestBase,
IhtFunc inv_txfm_;
};
TEST_P(AV1Trans16x32HT, AccuracyCheck) { RunAccuracyCheck(48); }
TEST_P(AV1Trans16x32HT, AccuracyCheck) { RunAccuracyCheck(1); }
TEST_P(AV1Trans16x32HT, CoeffCheck) { RunCoeffCheck(); }
TEST_P(AV1Trans16x32HT, MemCheck) { RunMemCheck(); }
TEST_P(AV1Trans16x32HT, InvCoeffCheck) { RunInvCoeffCheck(); }
TEST_P(AV1Trans16x32HT, InvAccuracyCheck) { RunInvAccuracyCheck(9); }
TEST_P(AV1Trans16x32HT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
using std::tr1::make_tuple;
const Ht16x32Param kArrayHt16x32Param_c[] = {
......
......@@ -69,11 +69,11 @@ class AV1Trans16x8HT : public libaom_test::TransformTestBase,
IhtFunc inv_txfm_;
};
TEST_P(AV1Trans16x8HT, AccuracyCheck) { RunAccuracyCheck(1); }
TEST_P(AV1Trans16x8HT, AccuracyCheck) { RunAccuracyCheck(0); }
TEST_P(AV1Trans16x8HT, CoeffCheck) { RunCoeffCheck(); }
TEST_P(AV1Trans16x8HT, MemCheck) { RunMemCheck(); }
TEST_P(AV1Trans16x8HT, InvCoeffCheck) { RunInvCoeffCheck(); }
TEST_P(AV1Trans16x8HT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
TEST_P(AV1Trans16x8HT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
using std::tr1::make_tuple;
......
......@@ -70,10 +70,10 @@ class AV1Trans32x16HT : public libaom_test::TransformTestBase,
};
TEST_P(AV1Trans32x16HT, MemCheck) { RunMemCheck(); }
TEST_P(AV1Trans32x16HT, AccuracyCheck) { RunAccuracyCheck(43); }
TEST_P(AV1Trans32x16HT, AccuracyCheck) { RunAccuracyCheck(2); }
TEST_P(AV1Trans32x16HT, CoeffCheck) { RunCoeffCheck(); }
TEST_P(AV1Trans32x16HT, InvCoeffCheck) { RunInvCoeffCheck(); }
TEST_P(AV1Trans32x16HT, InvAccuracyCheck) { RunInvAccuracyCheck(9); }
TEST_P(AV1Trans32x16HT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
using std::tr1::make_tuple;
const Ht32x16Param kArrayHt32x16Param_c[] = {
......
......@@ -70,10 +70,10 @@ class AV1Trans8x16HT : public libaom_test::TransformTestBase,
};
TEST_P(AV1Trans8x16HT, MemCheck) { RunMemCheck(); }
TEST_P(AV1Trans8x16HT, AccuracyCheck) { RunAccuracyCheck(1); }
TEST_P(AV1Trans8x16HT, AccuracyCheck) { RunAccuracyCheck(0); }
TEST_P(AV1Trans8x16HT, CoeffCheck) { RunCoeffCheck(); }
TEST_P(AV1Trans8x16HT, InvCoeffCheck) { RunInvCoeffCheck(); }
TEST_P(AV1Trans8x16HT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
TEST_P(AV1Trans8x16HT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
using std::tr1::make_tuple;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment