Commit d3b99738 authored by David Barker's avatar David Barker Committed by Debargha Mukherjee

[wedge/compound-segment, normative] Reduce multiple rounding

As described in the linked bug report, the masked blend operation
contains multiple stages of rounding. This commit replaces one
intermediate round with a right shift, which should be slightly
faster and more accurate.

BUG=aomedia:1292

Change-Id: Ib24ce687e628b05d645fbde5306ee552f7ad876b
parent f7a12420
......@@ -22,6 +22,17 @@
// as described for AOM_BLEND_A64 in aom_dsp/blend.h. src0 or src1 can
// be the same as dst, or dst can be different from both sources.
// NOTE(david.barker): The input and output of aom_blend_a64_d32_mask_c() are
// in a higher intermediate precision, and will later be rounded down to pixel
// precision.
// Thus, in order to avoid double-rounding, we want to use normal right shifts
// within this function, not ROUND_POWER_OF_TWO.
// This works because of the identity:
// ROUND_POWER_OF_TWO(x >> y, z) == ROUND_POWER_OF_TWO(x, y+z)
//
// In contrast, the output of the non-d32 functions will not be further rounded,
// so we *should* use ROUND_POWER_OF_TWO there.
void aom_blend_a64_d32_mask_c(int32_t *dst, uint32_t dst_stride,
const int32_t *src0, uint32_t src0_stride,
const int32_t *src1, uint32_t src1_stride,
......@@ -41,8 +52,10 @@ void aom_blend_a64_d32_mask_c(int32_t *dst, uint32_t dst_stride,
for (i = 0; i < h; ++i) {
for (j = 0; j < w; ++j) {
const int m = mask[i * mask_stride + j];
dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
src1[i * src1_stride + j]);
dst[i * dst_stride + j] =
((m * src0[i * src0_stride + j] +
(AOM_BLEND_A64_MAX_ALPHA - m) * src1[i * src1_stride + j]) >>
AOM_BLEND_A64_ROUND_BITS);
}
}
} else if (subw == 1 && subh == 1) {
......@@ -54,8 +67,10 @@ void aom_blend_a64_d32_mask_c(int32_t *dst, uint32_t dst_stride,
mask[(2 * i) * mask_stride + (2 * j + 1)] +
mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
2);
dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
src1[i * src1_stride + j]);
dst[i * dst_stride + j] =
((m * src0[i * src0_stride + j] +
(AOM_BLEND_A64_MAX_ALPHA - m) * src1[i * src1_stride + j]) >>
AOM_BLEND_A64_ROUND_BITS);
}
}
} else if (subw == 1 && subh == 0) {
......@@ -63,8 +78,10 @@ void aom_blend_a64_d32_mask_c(int32_t *dst, uint32_t dst_stride,
for (j = 0; j < w; ++j) {
const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
mask[i * mask_stride + (2 * j + 1)]);
dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
src1[i * src1_stride + j]);
dst[i * dst_stride + j] =
((m * src0[i * src0_stride + j] +
(AOM_BLEND_A64_MAX_ALPHA - m) * src1[i * src1_stride + j]) >>
AOM_BLEND_A64_ROUND_BITS);
}
}
} else {
......@@ -72,8 +89,10 @@ void aom_blend_a64_d32_mask_c(int32_t *dst, uint32_t dst_stride,
for (j = 0; j < w; ++j) {
const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
mask[(2 * i + 1) * mask_stride + j]);
dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
src1[i * src1_stride + j]);
dst[i * dst_stride + j] =
((m * src0[i * src0_stride + j] +
(AOM_BLEND_A64_MAX_ALPHA - m) * src1[i * src1_stride + j]) >>
AOM_BLEND_A64_ROUND_BITS);
}
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment