Commit 81760810 authored by Urvang Joshi's avatar Urvang Joshi

SMOOTH_PRED: Use 12-bit multiplications instead of 18-bit.

Compression performance is roughly neutral:

AWCY:
-----
                 High Latency     Low Latency
  All Keyframes  0.00             0.00
  Video overall  0.01            -0.01

Google sets:
------------

- All Keyframes:

  lowres  -0.001
  midres   0.000
  hdres    0.001

- Video overall:
  lowres   0.019
  midres   0.000
  hdres   -0.013

Change-Id: I89be2739203bf3e2848e4ba7ae2988c625f54513
parent 92109818
......@@ -260,13 +260,7 @@ static INLINE void paeth_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
}
}
// Weights are quadratic from 'bs' to '1', scaled by 2^12.
// TODO(urvang): All weights can be at the same scale: going from '1' to '1/bs'
// instead (still scaled by 2^12 or more).
// Rationale: Given that max block dimension is 64 (=2^6), and max pixel value
// is below 2^12 (for both normal and highbitdepth), power of (31 - 6 - 12 - 1)
// = 12 is chosen so that all weighted sums in smooth_predictor() remain within
// 2^31 (unsigned integer) range.
// Weights are quadratic from '1' to '1 / block_size', scaled by 2^12.
static const int sm_weight_log2_scale = 12;
#if CONFIG_TX64X64
......@@ -275,29 +269,26 @@ static const uint32_t sm_weight_arrays[6][64] = {
static const uint32_t sm_weight_arrays[5][32] = {
#endif // CONFIG_TX64X64
// bs = 2
{ 8192, 4096 },
{ 4096, 2048 },
// bs = 4
{ 16384, 9557, 5461, 4096 },
{ 4096, 2389, 1365, 1024 },
// bs = 8
{ 32768, 25161, 18725, 13458, 9362, 6437, 4681, 4096 },
{ 4096, 3145, 2341, 1682, 1170, 805, 585, 512 },
// bs = 16
{ 65536, 57617, 50244, 43418, 37137, 31403, 26214, 21572, 17476, 13926, 10923,
8465, 6554, 5188, 4369, 4096 },
{ 4096, 3601, 3140, 2714, 2321, 1963, 1638, 1348, 1092, 870, 683, 529, 410,
324, 273, 256 },
// bs = 32
{ 131072, 123012, 115217, 107685, 100418, 93415, 86677, 80202,
73992, 68046, 62365, 56948, 51795, 46906, 42281, 37921,
33825, 29993, 26426, 23123, 20084, 17309, 14798, 12552,
10570, 8853, 7399, 6210, 5285, 4625, 4228, 4096 },
{ 4096, 3844, 3601, 3365, 3138, 2919, 2709, 2506, 2312, 2126, 1949,
1780, 1619, 1466, 1321, 1185, 1057, 937, 826, 723, 628, 541,
462, 392, 330, 277, 231, 194, 165, 145, 132, 128 },
#if CONFIG_TX64X64
// bs = 64
{ 262144, 254017, 246020, 238153, 230416, 222809, 215333, 207986,
200769, 193682, 186726, 179899, 173202, 166636, 160199, 153893,
147716, 141670, 135753, 129967, 124310, 118784, 113388, 108121,
102985, 97979, 93103, 88357, 83740, 79254, 74898, 70672,
66576, 62610, 58774, 55068, 51493, 48047, 44731, 41545,
38489, 35564, 32768, 30102, 27567, 25161, 22886, 20740,
18725, 16839, 15084, 13458, 11963, 10598, 9362, 8257,
7282, 6437, 5721, 5136, 4681, 4356, 4161, 4096 },
{ 4096, 3969, 3844, 3721, 3600, 3481, 3365, 3250, 3137, 3026, 2918,
2811, 2706, 2604, 2503, 2405, 2308, 2214, 2121, 2031, 1942, 1856,
1772, 1689, 1609, 1531, 1455, 1381, 1308, 1238, 1170, 1104, 1040,
978, 918, 860, 805, 751, 699, 649, 601, 556, 512, 470,
431, 393, 358, 324, 293, 263, 236, 210, 187, 166, 146,
129, 114, 101, 89, 80, 73, 68, 65, 64 },
#endif // CONFIG_TX64X64
};
......@@ -307,24 +298,23 @@ static INLINE void smooth_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
const uint8_t below_pred = left[bs - 1]; // estimated by bottom-left pixel
const uint8_t right_pred = above[bs - 1]; // estimated by top-right pixel
const int log2_bs = (int)lround(log2(bs));
const int arr_index = log2_bs - 1;
const int arr_index = (int)lround(log2(bs)) - 1;
const uint32_t *const sm_weights = sm_weight_arrays[arr_index];
// scale = 2 * bs * 2^sm_weight_log2_scale
const int log2_scale = 1 + log2_bs + sm_weight_log2_scale;
// scale = 2 * 2^sm_weight_log2_scale
const int log2_scale = 1 + sm_weight_log2_scale;
assert(log2_scale + 8 < 8 * 31); // sanity check: no overflow.
const uint32_t scaled_bs = sm_weights[0];
assert((int)scaled_bs == (bs << sm_weight_log2_scale));
const uint32_t scale = sm_weights[0];
assert((int)scale == (1 << sm_weight_log2_scale));
int r;
for (r = 0; r < bs; ++r) {
int c;
for (c = 0; c < bs; ++c) {
const uint8_t pixels[] = { above[c], below_pred, left[r], right_pred };
const uint32_t weights[] = { sm_weights[r], scaled_bs - sm_weights[r],
sm_weights[c], scaled_bs - sm_weights[c] };
const uint32_t weights[] = { sm_weights[r], scale - sm_weights[r],
sm_weights[c], scale - sm_weights[c] };
uint32_t this_pred = 0;
int i;
assert(scaled_bs >= sm_weights[r] && scaled_bs >= sm_weights[c]);
assert(scale >= sm_weights[r] && scale >= sm_weights[c]);
for (i = 0; i < 4; ++i) {
this_pred += weights[i] * pixels[i];
}
......@@ -1042,24 +1032,23 @@ static INLINE void highbd_smooth_predictor(uint16_t *dst, ptrdiff_t stride,
const uint16_t *left, int bd) {
const uint16_t below_pred = left[bs - 1]; // estimated by bottom-left pixel
const uint16_t right_pred = above[bs - 1]; // estimated by top-right pixel
const int log2_bs = (int)lround(log2(bs));
const int arr_index = log2_bs - 1;
const int arr_index = (int)lround(log2(bs)) - 1;
const uint32_t *const sm_weights = sm_weight_arrays[arr_index];
// scale = 2 * bs * 2^sm_weight_log2_scale
const int log2_scale = 1 + log2_bs + sm_weight_log2_scale;
// scale = 2 * 2^sm_weight_log2_scale
const int log2_scale = 1 + sm_weight_log2_scale;
assert(log2_scale + 8 < 8 * 31); // sanity check: no overflow.
const uint32_t scaled_bs = sm_weights[0];
assert((int)scaled_bs == (bs << sm_weight_log2_scale));
const uint32_t scale = sm_weights[0];
assert((int)scale == (1 << sm_weight_log2_scale));
int r;
for (r = 0; r < bs; ++r) {
int c;
for (c = 0; c < bs; ++c) {
const uint16_t pixels[] = { above[c], below_pred, left[r], right_pred };
const uint32_t weights[] = { sm_weights[r], scaled_bs - sm_weights[r],
sm_weights[c], scaled_bs - sm_weights[c] };
const uint32_t weights[] = { sm_weights[r], scale - sm_weights[r],
sm_weights[c], scale - sm_weights[c] };
uint32_t this_pred = 0;
int i;
assert(scaled_bs >= sm_weights[r] && scaled_bs >= sm_weights[c]);
assert(scale >= sm_weights[r] && scale >= sm_weights[c]);
for (i = 0; i < 4; ++i) {
this_pred += weights[i] * pixels[i];
}
......
......@@ -179,7 +179,7 @@ void TestIntraPred32(AvxPredFunc const *pred_funcs) {
"f162b51ed618d28b936974cff4391da5",
#if CONFIG_ALT_INTRA
"297e8fbb5d33c29b12b228fa9d7c40a4",
"7177dd1ae3b49441f997d439a5bd451a"
"a08d5b7e104c5fc2b203789ee5f725a7"
#else
"9e1370c6d42e08d357d9612c93a71cfc",
#endif // CONFIG_ALT_INTRA
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment