Commit 27acc478 authored by Yushin Cho's avatar Yushin Cho

Skip adding zero siginal to prediction with DC only idct

If DC only idct gives zero, then we can skip the steps which
add zero signal to predicted signal.
DC only idct cases will occur more frequently at lower bit rates.

Similar changes can be done for C version of high bit depth idct functions.

Change-Id: I53af22904568f7043091710da70ca8299bf361c5
parent b5bf51ec
......@@ -145,6 +145,8 @@ void aom_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest,
out = WRAPLOW(dct_const_round_shift(out * cospi_16_64));
a1 = ROUND_POWER_OF_TWO(out, 4);
if (a1 == 0) return;
for (i = 0; i < 4; i++) {
dest[0] = clip_pixel_add(dest[0], a1);
dest[1] = clip_pixel_add(dest[1], a1);
......@@ -238,6 +240,7 @@ void aom_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64));
out = WRAPLOW(dct_const_round_shift(out * cospi_16_64));
a1 = ROUND_POWER_OF_TWO(out, 5);
if (a1 == 0) return;
for (j = 0; j < 8; ++j) {
for (i = 0; i < 8; ++i) dest[i] = clip_pixel_add(dest[i], a1);
dest += stride;
......@@ -776,6 +779,7 @@ void aom_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64));
out = WRAPLOW(dct_const_round_shift(out * cospi_16_64));
a1 = ROUND_POWER_OF_TWO(out, 6);
if (a1 == 0) return;
for (j = 0; j < 16; ++j) {
for (i = 0; i < 16; ++i) dest[i] = clip_pixel_add(dest[i], a1);
dest += stride;
......@@ -1245,6 +1249,7 @@ void aom_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64));
out = WRAPLOW(dct_const_round_shift(out * cospi_16_64));
a1 = ROUND_POWER_OF_TWO(out, 6);
if (a1 == 0) return;
for (j = 0; j < 32; ++j) {
for (i = 0; i < 32; ++i) dest[i] = clip_pixel_add(dest[i], a1);
......
......@@ -163,6 +163,8 @@ void aom_idct4x4_1_add_sse2(const tran_low_t *input, uint8_t *dest,
a = (int)dct_const_round_shift(a * cospi_16_64);
a = ROUND_POWER_OF_TWO(a, 4);
if (a == 0) return;
dc_value = _mm_set1_epi16(a);
RECON_AND_STORE4X4(dest + 0 * stride, dc_value);
......@@ -521,6 +523,8 @@ void aom_idct8x8_1_add_sse2(const tran_low_t *input, uint8_t *dest,
a = (int)dct_const_round_shift(a * cospi_16_64);
a = ROUND_POWER_OF_TWO(a, 5);
if (a == 0) return;
dc_value = _mm_set1_epi16(a);
RECON_AND_STORE(dest + 0 * stride, dc_value);
......@@ -1291,6 +1295,8 @@ void aom_idct16x16_1_add_sse2(const tran_low_t *input, uint8_t *dest,
a = (int)dct_const_round_shift(a * cospi_16_64);
a = ROUND_POWER_OF_TWO(a, 6);
if (a == 0) return;
dc_value = _mm_set1_epi16(a);
for (i = 0; i < 16; ++i) {
......@@ -3437,6 +3443,8 @@ void aom_idct32x32_1_add_sse2(const tran_low_t *input, uint8_t *dest,
a = (int)dct_const_round_shift(a * cospi_16_64);
a = ROUND_POWER_OF_TWO(a, 6);
if (a == 0) return;
dc_value = _mm_set1_epi16(a);
for (j = 0; j < 32; ++j) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment