Commit 67dbc8fe authored by Yunqing Wang's avatar Yunqing Wang Committed by Gerrit Code Review
Browse files

Merge "Add eob<=10 case in idct32x32" into experimental

parents 852ca19e c550bb3b
...@@ -1292,3 +1292,30 @@ void vp9_short_idct1_32x32_c(int16_t *input, int16_t *output) { ...@@ -1292,3 +1292,30 @@ void vp9_short_idct1_32x32_c(int16_t *input, int16_t *output) {
out = dct_const_round_shift(out * cospi_16_64); out = dct_const_round_shift(out * cospi_16_64);
output[0] = ROUND_POWER_OF_TWO(out, 6); output[0] = ROUND_POWER_OF_TWO(out, 6);
} }
void vp9_short_idct10_32x32_c(int16_t *input, int16_t *output, int pitch) {
int16_t out[32 * 32];
int16_t *outptr = out;
const int half_pitch = pitch >> 1;
int i, j;
int16_t temp_in[32], temp_out[32];
/* First transform rows. Since all non-zero dct coefficients are in
* upper-left 4x4 area, we only need to calculate first 4 rows here.
*/
vpx_memset(out, 0, sizeof(out));
for (i = 0; i < 4; ++i) {
idct32_1d(input, outptr);
input += half_pitch;
outptr += 32;
}
// Columns
for (i = 0; i < 32; ++i) {
for (j = 0; j < 32; ++j)
temp_in[j] = out[j * 32 + i];
idct32_1d(temp_in, temp_out);
for (j = 0; j < 32; ++j)
output[j * 32 + i] = ROUND_POWER_OF_TWO(temp_out[j], 6);
}
}
...@@ -281,6 +281,9 @@ specialize vp9_short_idct32x32 ...@@ -281,6 +281,9 @@ specialize vp9_short_idct32x32
prototype void vp9_short_idct1_32x32 "int16_t *input, int16_t *output" prototype void vp9_short_idct1_32x32 "int16_t *input, int16_t *output"
specialize vp9_short_idct1_32x32 specialize vp9_short_idct1_32x32
prototype void vp9_short_idct10_32x32 "int16_t *input, int16_t *output, int pitch"
specialize vp9_short_idct10_32x32
prototype void vp9_short_iht8x8 "int16_t *input, int16_t *output, int pitch, int tx_type" prototype void vp9_short_iht8x8 "int16_t *input, int16_t *output, int pitch, int tx_type"
specialize vp9_short_iht8x8 specialize vp9_short_iht8x8
......
...@@ -314,14 +314,34 @@ void vp9_dequant_idct_add_32x32_c(int16_t *input, const int16_t *dq, ...@@ -314,14 +314,34 @@ void vp9_dequant_idct_add_32x32_c(int16_t *input, const int16_t *dq,
if (eob) { if (eob) {
input[0] = input[0] * dq[0] / 2; input[0] = input[0] * dq[0] / 2;
if (eob == 1) { if (eob == 1) {
vp9_short_idct1_32x32_c(input, output); vp9_short_idct1_32x32(input, output);
add_constant_residual(output[0], pred, pitch, dest, stride, 32, 32); add_constant_residual(output[0], pred, pitch, dest, stride, 32, 32);
input[0] = 0; input[0] = 0;
} else if (eob <= 10) {
input[1] = input[1] * dq[1] / 2;
input[2] = input[2] * dq[1] / 2;
input[3] = input[3] * dq[1] / 2;
input[32] = input[32] * dq[1] / 2;
input[33] = input[33] * dq[1] / 2;
input[34] = input[34] * dq[1] / 2;
input[64] = input[64] * dq[1] / 2;
input[65] = input[65] * dq[1] / 2;
input[96] = input[96] * dq[1] / 2;
// the idct halves ( >> 1) the pitch
vp9_short_idct10_32x32(input, output, 64);
input[0] = input[1] = input[2] = input[3] = 0;
input[32] = input[33] = input[34] = 0;
input[64] = input[65] = 0;
input[96] = 0;
add_residual(output, pred, pitch, dest, stride, 32, 32);
} else { } else {
int i; int i;
for (i = 1; i < 1024; i++) for (i = 1; i < 1024; i++)
input[i] = input[i] * dq[1] / 2; input[i] = input[i] * dq[1] / 2;
vp9_short_idct32x32_c(input, output, 64); vp9_short_idct32x32(input, output, 64);
vpx_memset(input, 0, 2048); vpx_memset(input, 0, 2048);
add_residual(output, pred, pitch, dest, stride, 32, 32); add_residual(output, pred, pitch, dest, stride, 32, 32);
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment