Commit 38fa4871 authored by Jingning Han's avatar Jingning Han
Browse files

Shortcut 8x8/16x16 inverse 2D-DCT

This commit brought back the shortcut implementation of 8x8/16x16
inverse 2D-DCT. When the eob <= 10, it skips the inverse transform
operations on row 4:7/4:15 in the first round. For bus_cif at 1000
kbps, this provides about 2% speed-up at speed 0.

Change-Id: I453e2d72956467d75be4ad8c04b4482ab889d572
parent 325e0aa6
......@@ -95,6 +95,9 @@ void vp9_idct_add_8x8_c(int16_t *input, uint8_t *dest, int stride, int eob) {
// DC only DCT coefficient
vp9_short_idct8x8_1_add(input, dest, stride);
input[0] = 0;
} else if (eob <= 10) {
vp9_short_idct10_8x8_add(input, dest, stride);
vpx_memset(input, 0, 128);
} else {
vp9_short_idct8x8_add(input, dest, stride);
vpx_memset(input, 0, 128);
......@@ -128,6 +131,9 @@ void vp9_idct_add_16x16_c(int16_t *input, uint8_t *dest, int stride, int eob) {
input[0] = 0;
vp9_add_constant_residual_16x16(out, dest, stride);
} else if (eob <= 10) {
vp9_short_idct10_16x16_add(input, dest, stride);
vpx_memset(input, 0, 512);
} else {
vp9_short_idct16x16_add(input, dest, stride);
vpx_memset(input, 0, 512);
......
......@@ -52,10 +52,21 @@ static void inverse_transform_b_8x8_add(MACROBLOCKD *xd, int eob,
int stride) {
if (eob <= 1)
vp9_short_idct8x8_1_add(dqcoeff, dest, stride);
else if (eob <= 10)
vp9_short_idct10_8x8_add(dqcoeff, dest, stride);
else
vp9_short_idct8x8_add(dqcoeff, dest, stride);
}
static void inverse_transform_b_16x16_add(MACROBLOCKD *xd, int eob,
int16_t *dqcoeff, uint8_t *dest,
int stride) {
if (eob <= 10)
vp9_short_idct10_16x16_add(dqcoeff, dest, stride);
else
vp9_short_idct16x16_add(dqcoeff, dest, stride);
}
static void subtract_plane(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int plane) {
struct macroblock_plane *const p = &x->plane[plane];
const MACROBLOCKD *const xd = &x->e_mbd;
......@@ -538,7 +549,8 @@ static void encode_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
vp9_short_idct32x32_add(dqcoeff, dst, pd->dst.stride);
break;
case TX_16X16:
vp9_short_idct16x16_add(dqcoeff, dst, pd->dst.stride);
inverse_transform_b_16x16_add(xd, pd->eobs[block], dqcoeff,
dst, pd->dst.stride);
break;
case TX_8X8:
inverse_transform_b_8x8_add(xd, pd->eobs[block], dqcoeff,
......@@ -691,7 +703,7 @@ void encode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize,
pd->dequant, p->zbin_extra, eob, scan, iscan);
if (!x->skip_encode && *eob) {
if (tx_type == DCT_DCT)
vp9_short_idct16x16_add(dqcoeff, dst, pd->dst.stride);
inverse_transform_b_16x16_add(xd, *eob, dqcoeff, dst, pd->dst.stride);
else
vp9_short_iht16x16_add(dqcoeff, dst, pd->dst.stride, tx_type);
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment