diff --git a/configure b/configure index 67171744bc4e504fc30c766e39ff416153a7a9ab..52ff9c898471218bf403228c99d940d6c6307fd9 100755 --- a/configure +++ b/configure @@ -228,6 +228,7 @@ EXPERIMENT_LIST=" newupdate superblocks expanded_coef_context + int_8x8fdct " CONFIG_LIST=" external_build diff --git a/vp8/encoder/dct.c b/vp8/encoder/dct.c index e7505619a7d0cc2953d5f007095e0dd290aaa8bd..6f9c68ef756175c85254bb09217e92254744fd3a 100644 --- a/vp8/encoder/dct.c +++ b/vp8/encoder/dct.c @@ -12,6 +12,249 @@ #include <math.h> #include "vpx_ports/config.h" +#if CONFIG_INT_8X8FDCT + +static const int xC1S7 = 16069; +static const int xC2S6 = 15137; +static const int xC3S5 = 13623; +static const int xC4S4 = 11585; +static const int xC5S3 = 9102; +static const int xC6S2 = 6270; +static const int xC7S1 = 3196; + +#define SHIFT_BITS 14 +#define DOROUND(X) X += (1<<(SHIFT_BITS-1)); + +#define FINAL_SHIFT 3 +#define FINAL_ROUNDING (1<<(FINAL_SHIFT -1)) +#define IN_SHIFT (FINAL_SHIFT+1) + + +void vp8_short_fdct8x8_c ( short * InputData, short * OutputData, int pitch) +{ + int loop; + int short_pitch = pitch>>1; + int is07, is12, is34, is56; + int is0734, is1256; + int id07, id12, id34, id56; + int irot_input_x, irot_input_y; + int icommon_product1; // Re-used product (c4s4 * (s12 - s56)) + int icommon_product2; // Re-used product (c4s4 * (d12 + d56)) + int temp1, temp2; // intermediate variable for computation + + int InterData[64]; + int *ip = InterData; + short *op = OutputData; + + for (loop = 0; loop < 8; loop++) + { + // Pre calculate some common sums and differences. + is07 = (InputData[0] + InputData[7])<<IN_SHIFT; + is12 = (InputData[1] + InputData[2])<<IN_SHIFT; + is34 = (InputData[3] + InputData[4])<<IN_SHIFT; + is56 = (InputData[5] + InputData[6])<<IN_SHIFT; + id07 = (InputData[0] - InputData[7])<<IN_SHIFT; + id12 = (InputData[1] - InputData[2])<<IN_SHIFT; + id34 = (InputData[3] - InputData[4])<<IN_SHIFT; + id56 = (InputData[5] - InputData[6])<<IN_SHIFT; + + is0734 = is07 + is34; + is1256 = is12 + is56; + + // Pre-Calculate some common product terms. + icommon_product1 = xC4S4*(is12 - is56); + DOROUND(icommon_product1) + icommon_product1>>=SHIFT_BITS; + + icommon_product2 = xC4S4*(id12 + id56); + DOROUND(icommon_product2) + icommon_product2>>=SHIFT_BITS; + + + ip[0] = (xC4S4*(is0734 + is1256)); + DOROUND(ip[0]); + ip[0] >>= SHIFT_BITS; + + ip[4] = (xC4S4*(is0734 - is1256)); + DOROUND(ip[4]); + ip[4] >>= SHIFT_BITS; + + // Define inputs to rotation for outputs 2 and 6 + irot_input_x = id12 - id56; + irot_input_y = is07 - is34; + + // Apply rotation for outputs 2 and 6. + temp1=xC6S2*irot_input_x; + DOROUND(temp1); + temp1>>=SHIFT_BITS; + temp2=xC2S6*irot_input_y; + DOROUND(temp2); + temp2>>=SHIFT_BITS; + ip[2] = temp1 + temp2; + + temp1=xC6S2*irot_input_y; + DOROUND(temp1); + temp1>>=SHIFT_BITS; + temp2=xC2S6*irot_input_x ; + DOROUND(temp2); + temp2>>=SHIFT_BITS; + ip[6] = temp1 -temp2 ; + + // Define inputs to rotation for outputs 1 and 7 + irot_input_x = icommon_product1 + id07; + irot_input_y = -( id34 + icommon_product2 ); + + // Apply rotation for outputs 1 and 7. + temp1=xC1S7*irot_input_x; + DOROUND(temp1); + temp1>>=SHIFT_BITS; + temp2=xC7S1*irot_input_y; + DOROUND(temp2); + temp2>>=SHIFT_BITS; + ip[1] = temp1 - temp2; + + temp1=xC7S1*irot_input_x; + DOROUND(temp1); + temp1>>=SHIFT_BITS; + temp2=xC1S7*irot_input_y ; + DOROUND(temp2); + temp2>>=SHIFT_BITS; + ip[7] = temp1 + temp2 ; + + // Define inputs to rotation for outputs 3 and 5 + irot_input_x = id07 - icommon_product1; + irot_input_y = id34 - icommon_product2; + + // Apply rotation for outputs 3 and 5. + temp1=xC3S5*irot_input_x; + DOROUND(temp1); + temp1>>=SHIFT_BITS; + temp2=xC5S3*irot_input_y ; + DOROUND(temp2); + temp2>>=SHIFT_BITS; + ip[3] = temp1 - temp2 ; + + + temp1=xC5S3*irot_input_x; + DOROUND(temp1); + temp1>>=SHIFT_BITS; + temp2=xC3S5*irot_input_y; + DOROUND(temp2); + temp2>>=SHIFT_BITS; + ip[5] = temp1 + temp2; + + // Increment data pointer for next row + InputData += short_pitch ; + ip += 8; + } + + // Performed DCT on rows, now transform the columns + ip = InterData; + for (loop = 0; loop < 8; loop++) + { + // Pre calculate some common sums and differences. + is07 = ip[0 * 8] + ip[7 * 8]; + is12 = ip[1 * 8] + ip[2 * 8]; + is34 = ip[3 * 8] + ip[4 * 8]; + is56 = ip[5 * 8] + ip[6 * 8]; + + id07 = ip[0 * 8] - ip[7 * 8]; + id12 = ip[1 * 8] - ip[2 * 8]; + id34 = ip[3 * 8] - ip[4 * 8]; + id56 = ip[5 * 8] - ip[6 * 8]; + + is0734 = is07 + is34; + is1256 = is12 + is56; + + // Pre-Calculate some common product terms + icommon_product1 = xC4S4*(is12 - is56) ; + icommon_product2 = xC4S4*(id12 + id56) ; + DOROUND(icommon_product1) + DOROUND(icommon_product2) + icommon_product1>>=SHIFT_BITS; + icommon_product2>>=SHIFT_BITS; + + + temp1 = xC4S4*(is0734 + is1256) ; + temp2 = xC4S4*(is0734 - is1256) ; + DOROUND(temp1); + DOROUND(temp2); + temp1>>=SHIFT_BITS; + + temp2>>=SHIFT_BITS; + op[0*8] = (temp1 + FINAL_ROUNDING)>>FINAL_SHIFT; + op[4*8] = (temp2 + FINAL_ROUNDING)>>FINAL_SHIFT; + + // Define inputs to rotation for outputs 2 and 6 + irot_input_x = id12 - id56; + irot_input_y = is07 - is34; + + // Apply rotation for outputs 2 and 6. + temp1=xC6S2*irot_input_x; + DOROUND(temp1); + temp1>>=SHIFT_BITS; + temp2=xC2S6*irot_input_y; + DOROUND(temp2); + temp2>>=SHIFT_BITS; + op[2*8] = (temp1 + temp2 + FINAL_ROUNDING)>>FINAL_SHIFT; + + temp1=xC6S2*irot_input_y; + DOROUND(temp1); + temp1>>=SHIFT_BITS; + temp2=xC2S6*irot_input_x ; + DOROUND(temp2); + temp2>>=SHIFT_BITS; + op[6*8] = (temp1 -temp2 + FINAL_ROUNDING)>>FINAL_SHIFT ; + + // Define inputs to rotation for outputs 1 and 7 + irot_input_x = icommon_product1 + id07; + irot_input_y = -( id34 + icommon_product2 ); + + // Apply rotation for outputs 1 and 7. + temp1=xC1S7*irot_input_x; + DOROUND(temp1); + temp1>>=SHIFT_BITS; + temp2=xC7S1*irot_input_y; + DOROUND(temp2); + temp2>>=SHIFT_BITS; + op[1*8] = (temp1 - temp2 + FINAL_ROUNDING)>>FINAL_SHIFT; + + temp1=xC7S1*irot_input_x; + DOROUND(temp1); + temp1>>=SHIFT_BITS; + temp2=xC1S7*irot_input_y ; + DOROUND(temp2); + temp2>>=SHIFT_BITS; + op[7*8] = (temp1 + temp2 + FINAL_ROUNDING)>>FINAL_SHIFT; + + // Define inputs to rotation for outputs 3 and 5 + irot_input_x = id07 - icommon_product1; + irot_input_y = id34 - icommon_product2; + + // Apply rotation for outputs 3 and 5. + temp1=xC3S5*irot_input_x; + DOROUND(temp1); + temp1>>=SHIFT_BITS; + temp2=xC5S3*irot_input_y ; + DOROUND(temp2); + temp2>>=SHIFT_BITS; + op[3*8] = (temp1 - temp2 + FINAL_ROUNDING)>>FINAL_SHIFT ; + + + temp1=xC5S3*irot_input_x; + DOROUND(temp1); + temp1>>=SHIFT_BITS; + temp2=xC3S5*irot_input_y; + DOROUND(temp2); + temp2>>=SHIFT_BITS; + op[5*8] = (temp1 + temp2 + FINAL_ROUNDING)>>FINAL_SHIFT; + + // Increment data pointer for next column. + ip ++; + op ++; + } +} +#else void vp8_short_fdct8x8_c(short *block, short *coefs, int pitch) { @@ -102,7 +345,7 @@ void vp8_short_fdct8x8_c(short *block, short *coefs, int pitch) return; } - +#endif void vp8_short_fhaar2x2_c(short *input, short *output, int pitch) //pitch = 8 { @@ -214,4 +457,4 @@ void vp8_short_walsh4x4_c(short *input, short *output, int pitch) ip += 4; op += 4; } -} +} \ No newline at end of file diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c index ae8ab8846fb9d6baa06d199dbcdf4d8770ae792d..81c69d9f85383eac8c585d1f0f74e11297687678 100644 --- a/vp8/encoder/encodemb.c +++ b/vp8/encoder/encodemb.c @@ -239,7 +239,9 @@ void vp8_transform_mbuv_8x8(MACROBLOCK *x) { int i; +#if !CONFIG_INT_8X8FDCT vp8_clear_system_state(); +#endif for (i = 16; i < 24; i += 4) { @@ -252,9 +254,9 @@ void vp8_transform_mbuv_8x8(MACROBLOCK *x) void vp8_transform_intra_mby_8x8(MACROBLOCK *x)//changed { int i; - +#if !CONFIG_INT_8X8FDCT vp8_clear_system_state(); - +#endif for (i = 0; i < 9; i += 8) { x->vp8_short_fdct8x8(&x->block[i].src_diff[0], @@ -279,9 +281,9 @@ void vp8_transform_intra_mby_8x8(MACROBLOCK *x)//changed void vp8_transform_mb_8x8(MACROBLOCK *x) { int i; - +#if !CONFIG_INT_8X8FDCT vp8_clear_system_state(); - +#endif for (i = 0; i < 9; i += 8) { x->vp8_short_fdct8x8(&x->block[i].src_diff[0], @@ -312,9 +314,9 @@ void vp8_transform_mb_8x8(MACROBLOCK *x) void vp8_transform_mby_8x8(MACROBLOCK *x) { int i; - +#if !CONFIG_INT_8X8FDCT vp8_clear_system_state(); - +#endif for (i = 0; i < 9; i += 8) { x->vp8_short_fdct8x8(&x->block[i].src_diff[0],