Commit 66f440f1 authored by Jingning Han's avatar Jingning Han

Refactoring hybrid transform coding

The forward and inverse hybrid transforms are now performed using
single function modules, where the dimension is sent as argument.

Added an inline function clip8b to clip the reconstruction pixels
into range of 0-255.

Change-Id: Id7d870b3e1aefc092721c80c0af6f641eb5f3747
parent 106815f8
......@@ -111,9 +111,10 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_lossless_c);
extern prototype_second_order(vp8_short_inv_walsh4x4_1_lossless_c);
#endif
#if CONFIG_HYBRIDTRANSFORM
#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM
#include "vp8/common/blockd.h"
void vp8_iht4x4llm_c(short *input, short *output, int pitch, TX_TYPE tx_type);
void vp8_ihtllm_c(short *input, short *output, int pitch,
TX_TYPE tx_type, int tx_dim);
#endif
......
......@@ -93,120 +93,17 @@ float iadst_8[64] = {
};
#endif
#if CONFIG_HYBRIDTRANSFORM
void vp8_iht4x4llm_c(short *input, short *output, int pitch, TX_TYPE tx_type) {
int i, j, k;
float bufa[16], bufb[16]; // buffers are for floating-point test purpose
// the implementation could be simplified in
// conjunction with integer transform
short *ip = input;
short *op = output;
int shortpitch = pitch >> 1;
float *pfa = &bufa[0];
float *pfb = &bufb[0];
// pointers to vertical and horizontal transforms
float *ptv, *pth;
// load and convert residual array into floating-point
for(j = 0; j < 4; j++) {
for(i = 0; i < 4; i++) {
pfa[i] = (float)ip[i];
}
pfa += 4;
ip += 4;
}
// vertical transformation
pfa = &bufa[0];
pfb = &bufb[0];
switch(tx_type) {
case ADST_ADST :
case ADST_DCT :
ptv = &iadst_4[0];
break;
default :
ptv = &idct_4[0];
break;
}
for(j = 0; j < 4; j++) {
for(i = 0; i < 4; i++) {
pfb[i] = 0 ;
for(k = 0; k < 4; k++) {
pfb[i] += ptv[k] * pfa[(k<<2)];
}
pfa += 1;
}
pfb += 4;
ptv += 4;
pfa = &bufa[0];
}
// horizontal transformation
pfa = &bufa[0];
pfb = &bufb[0];
switch(tx_type) {
case ADST_ADST :
case DCT_ADST :
pth = &iadst_4[0];
break;
default :
pth = &idct_4[0];
break;
}
for(j = 0; j < 4; j++) {
for(i = 0; i < 4; i++) {
pfa[i] = 0;
for(k = 0; k < 4; k++) {
pfa[i] += pfb[k] * pth[k];
}
pth += 4;
}
pfa += 4;
pfb += 4;
switch(tx_type) {
case ADST_ADST :
case DCT_ADST :
pth = &iadst_4[0];
break;
default :
pth = &idct_4[0];
break;
}
}
// convert to short integer format and load BLOCKD buffer
op = output;
pfa = &bufa[0];
for(j = 0; j < 4; j++) {
for(i = 0; i < 4; i++) {
op[i] = (pfa[i] > 0 ) ? (short)( pfa[i] / 8 + 0.49) :
-(short)( - pfa[i] / 8 + 0.49);
}
op += shortpitch;
pfa += 4;
}
}
#endif
#if CONFIG_HYBRIDTRANSFORM8X8
void vp8_iht8x8llm_c(short *input, short *output, int pitch, TX_TYPE tx_type) {
#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM
void vp8_ihtllm_c(short *input, short *output, int pitch,
TX_TYPE tx_type, int tx_dim) {
int i, j, k;
float bufa[64], bufb[64]; // buffers are for floating-point test purpose
// the implementation could be simplified in
// conjunction with integer transform
// further notice, since we are thinking to use one
// function for both 4x4 and 8x8 transforms, the
// temporary buffers are simply initialized with 64.
short *ip = input;
short *op = output;
int shortpitch = pitch >> 1;
......@@ -218,12 +115,12 @@ void vp8_iht8x8llm_c(short *input, short *output, int pitch, TX_TYPE tx_type) {
float *ptv, *pth;
// load and convert residual array into floating-point
for(j = 0; j < 8; j++) {
for(i = 0; i < 8; i++) {
for(j = 0; j < tx_dim; j++) {
for(i = 0; i < tx_dim; i++) {
pfa[i] = (float)ip[i];
}
pfa += 8;
ip += 8;
pfa += tx_dim;
ip += tx_dim;
}
// vertical transformation
......@@ -233,25 +130,25 @@ void vp8_iht8x8llm_c(short *input, short *output, int pitch, TX_TYPE tx_type) {
switch(tx_type) {
case ADST_ADST :
case ADST_DCT :
ptv = &iadst_8[0];
ptv = (tx_dim == 4) ? &iadst_4[0] : &iadst_8[0];
break;
default :
ptv = &idct_8[0];
ptv = (tx_dim == 4) ? &idct_4[0] : &idct_8[0];
break;
}
for(j = 0; j < 8; j++) {
for(i = 0; i < 8; i++) {
for(j = 0; j < tx_dim; j++) {
for(i = 0; i < tx_dim; i++) {
pfb[i] = 0 ;
for(k = 0; k < 8; k++) {
pfb[i] += ptv[k] * pfa[(k<<3)];
for(k = 0; k < tx_dim; k++) {
pfb[i] += ptv[k] * pfa[(k * tx_dim)];
}
pfa += 1;
}
pfb += 8;
ptv += 8;
pfb += tx_dim;
ptv += tx_dim;
pfa = &bufa[0];
}
......@@ -262,34 +159,34 @@ void vp8_iht8x8llm_c(short *input, short *output, int pitch, TX_TYPE tx_type) {
switch(tx_type) {
case ADST_ADST :
case DCT_ADST :
pth = &iadst_8[0];
pth = (tx_dim == 4) ? &iadst_4[0] : &iadst_8[0];
break;
default :
pth = &idct_8[0];
pth = (tx_dim == 4) ? &idct_4[0] : &idct_8[0];
break;
}
for(j = 0; j < 8; j++) {
for(i = 0; i < 8; i++) {
for(j = 0; j < tx_dim; j++) {
for(i = 0; i < tx_dim; i++) {
pfa[i] = 0;
for(k = 0; k < 8; k++) {
for(k = 0; k < tx_dim; k++) {
pfa[i] += pfb[k] * pth[k];
}
pth += 8;
pth += tx_dim;
}
pfa += 8;
pfb += 8;
pfa += tx_dim;
pfb += tx_dim;
switch(tx_type) {
case ADST_ADST :
case DCT_ADST :
pth = &iadst_8[0];
pth = (tx_dim == 4) ? &iadst_4[0] : &iadst_8[0];
break;
default :
pth = &idct_8[0];
pth = (tx_dim == 4) ? &idct_4[0] : &idct_8[0];
break;
}
}
......@@ -298,13 +195,14 @@ void vp8_iht8x8llm_c(short *input, short *output, int pitch, TX_TYPE tx_type) {
op = output;
pfa = &bufa[0];
for(j = 0; j < 8; j++) {
for(i = 0; i < 8; i++) {
for(j = 0; j < tx_dim; j++) {
for(i = 0; i < tx_dim; i++) {
op[i] = (pfa[i] > 0 ) ? (short)( pfa[i] / 8 + 0.49) :
-(short)( - pfa[i] / 8 + 0.49);
}
op += shortpitch;
pfa += 8;
pfa += tx_dim;
}
}
#endif
......
......@@ -33,7 +33,7 @@ static void recon_dcblock_8x8(MACROBLOCKD *x) {
#if CONFIG_HYBRIDTRANSFORM
void vp8_inverse_htransform_b(const vp8_idct_rtcd_vtable_t *rtcd, BLOCKD *b, int pitch) {
vp8_iht4x4llm_c(b->dqcoeff, b->diff, pitch, b->bmi.as_mode.tx_type);
vp8_ihtllm_c(b->dqcoeff, b->diff, pitch, b->bmi.as_mode.tx_type, 4);
}
#endif
......
......@@ -392,7 +392,6 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
txfm_map(b, pred_mode_conv(i8x8mode));
vp8_ht_dequant_idct_add_8x8_c(b->bmi.as_mode.tx_type,
q, dq, pre, dst, 16, stride);
// vp8_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride);
q += 64;
#else
for (j = 0; j < 4; j++) {
......
......@@ -55,7 +55,7 @@ void vp8_ht_dequant_idct_add_c(TX_TYPE tx_type, short *input, short *dq,
input[i] = dq[i] * input[i];
}
vp8_iht4x4llm_c( input, output, 4 << 1, tx_type );
vp8_ihtllm_c(input, output, 4 << 1, tx_type, 4);
vpx_memset(input, 0, 32);
......@@ -95,7 +95,7 @@ void vp8_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, short *input, short *dq,
input[i] = dq[1] * input[i];
}
vp8_iht8x8llm_c(input, output, 16, tx_type);
vp8_ihtllm_c(input, output, 16, tx_type, 8);
vpx_memset(input, 0, 128);
......@@ -117,9 +117,10 @@ void vp8_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, short *input, short *dq,
diff_ptr += 8;
pred += pitch;
}
diff_ptr = output + (b + 1) / 2 * 4 * 8 + (b + 1) % 2 * 4;
dest = origdest + (b + 1) / 2 * 4 * stride + (b + 1) % 2 * 4;
pred = origpred + (b + 1) / 2 * 4 * pitch + (b + 1) % 2 * 4;
// shift buffer pointers to next 4x4 block in the submacroblock
diff_ptr = output + (b + 1) / 2 * 4 * 8 + ((b + 1) % 2) * 4;
dest = origdest + (b + 1) / 2 * 4 * stride + ((b + 1) % 2) * 4;
pred = origpred + (b + 1) / 2 * 4 * pitch + ((b + 1) % 2) * 4;
}
}
#endif
......
......@@ -329,114 +329,9 @@ void vp8_short_fhaar2x2_c(short *input, short *output, int pitch) { // pitch = 8
}
#if CONFIG_HYBRIDTRANSFORM
void vp8_fht4x4_c(short *input, short *output, int pitch, TX_TYPE tx_type) {
int i, j, k;
float bufa[16], bufb[16]; // buffers are for floating-point test purpose
// the implementation could be simplified in
// conjunction with integer transform
short *ip = input;
short *op = output;
float *pfa = &bufa[0];
float *pfb = &bufb[0];
// pointers to vertical and horizontal transforms
float *ptv, *pth;
// load and convert residual array into floating-point
for(j = 0; j < 4; j++) {
for(i = 0; i < 4; i++) {
pfa[i] = (float)ip[i];
}
pfa += 4;
ip += pitch / 2;
}
// vertical transformation
pfa = &bufa[0];
pfb = &bufb[0];
switch(tx_type) {
case ADST_ADST :
case ADST_DCT :
ptv = &adst_4[0];
break;
default :
ptv = &dct_4[0];
break;
}
for(j = 0; j < 4; j++) {
for(i = 0; i < 4; i++) {
pfb[i] = 0;
for(k = 0; k < 4; k++) {
pfb[i] += ptv[k] * pfa[(k<<2)];
}
pfa += 1;
}
pfb += 4;
ptv += 4;
pfa = &bufa[0];
}
// horizontal transformation
pfa = &bufa[0];
pfb = &bufb[0];
switch(tx_type) {
case ADST_ADST :
case DCT_ADST :
pth = &adst_4[0];
break;
default :
pth = &dct_4[0];
break;
}
for(j = 0; j < 4; j++) {
for(i = 0; i < 4; i++) {
pfa[i] = 0;
for(k = 0; k < 4; k++) {
pfa[i] += pfb[k] * pth[k];
}
pth += 4;
}
pfa += 4;
pfb += 4;
switch(tx_type) {
case ADST_ADST :
case DCT_ADST :
pth = &adst_4[0];
break;
default :
pth = &dct_4[0];
break;
}
}
// convert to short integer format and load BLOCKD buffer
op = output ;
pfa = &bufa[0] ;
for(j = 0; j < 4; j++) {
for(i = 0; i < 4; i++) {
op[i] = (pfa[i] > 0 ) ? (short)( 8 * pfa[i] + 0.49) :
-(short)(- 8 * pfa[i] + 0.49);
}
op += 4;
pfa += 4;
}
}
#endif
#if CONFIG_HYBRIDTRANSFORM8X8
void vp8_fht8x8_c(short *input, short *output, int pitch, TX_TYPE tx_type) {
#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM
void vp8_fht_c(short *input, short *output, int pitch,
TX_TYPE tx_type, int tx_dim) {
int i, j, k;
float bufa[64], bufb[64]; // buffers are for floating-point test purpose
// the implementation could be simplified in
......@@ -451,11 +346,11 @@ void vp8_fht8x8_c(short *input, short *output, int pitch, TX_TYPE tx_type) {
float *ptv, *pth;
// load and convert residual array into floating-point
for(j = 0; j < 8; j++) {
for(i = 0; i < 8; i++) {
for(j = 0; j < tx_dim; j++) {
for(i = 0; i < tx_dim; i++) {
pfa[i] = (float)ip[i];
}
pfa += 8;
pfa += tx_dim;
ip += pitch / 2;
}
......@@ -466,24 +361,24 @@ void vp8_fht8x8_c(short *input, short *output, int pitch, TX_TYPE tx_type) {
switch(tx_type) {
case ADST_ADST :
case ADST_DCT :
ptv = &adst_8[0];
ptv = (tx_dim == 4) ? &adst_4[0] : &adst_8[0];
break;
default :
ptv = &dct_8[0];
ptv = (tx_dim == 4) ? &dct_4[0] : &dct_8[0];
break;
}
for(j = 0; j < 8; j++) {
for(i = 0; i < 8; i++) {
for(j = 0; j < tx_dim; j++) {
for(i = 0; i < tx_dim; i++) {
pfb[i] = 0;
for(k = 0; k < 8; k++) {
pfb[i] += ptv[k] * pfa[(k<<3)];
for(k = 0; k < tx_dim; k++) {
pfb[i] += ptv[k] * pfa[(k * tx_dim)];
}
pfa += 1;
}
pfb += 8;
ptv += 8;
pfb += tx_dim;
ptv += tx_dim;
pfa = &bufa[0];
}
......@@ -494,34 +389,34 @@ void vp8_fht8x8_c(short *input, short *output, int pitch, TX_TYPE tx_type) {
switch(tx_type) {
case ADST_ADST :
case DCT_ADST :
pth = &adst_8[0];
pth = (tx_dim == 4) ? &adst_4[0] : &adst_8[0];
break;
default :
pth = &dct_8[0];
pth = (tx_dim == 4) ? &dct_4[0] : &dct_8[0];
break;
}
for(j = 0; j < 8; j++) {
for(i = 0; i < 8; i++) {
for(j = 0; j < tx_dim; j++) {
for(i = 0; i < tx_dim; i++) {
pfa[i] = 0;
for(k = 0; k < 8; k++) {
for(k = 0; k < tx_dim; k++) {
pfa[i] += pfb[k] * pth[k];
}
pth += 8;
pth += tx_dim;
}
pfa += 8;
pfb += 8;
pfa += tx_dim;
pfb += tx_dim;
switch(tx_type) {
case ADST_ADST :
case DCT_ADST :
pth = &adst_8[0];
pth = (tx_dim == 4) ? &adst_4[0] : &adst_8[0];
break;
default :
pth = &dct_8[0];
pth = (tx_dim == 4) ? &dct_4[0] : &dct_8[0];
break;
}
}
......@@ -530,13 +425,13 @@ void vp8_fht8x8_c(short *input, short *output, int pitch, TX_TYPE tx_type) {
op = output ;
pfa = &bufa[0] ;
for(j = 0; j < 8; j++) {
for(i = 0; i < 8; i++) {
for(j = 0; j < tx_dim; j++) {
for(i = 0; i < tx_dim; i++) {
op[i] = (pfa[i] > 0 ) ? (short)( 8 * pfa[i] + 0.49) :
-(short)(- 8 * pfa[i] + 0.49);
}
op += 8;
pfa += 8;
op += tx_dim;
pfa += tx_dim;
}
}
#endif
......@@ -582,14 +477,6 @@ void vp8_short_fdct4x4_c(short *input, short *output, int pitch) {
}
}
#if CONFIG_HYBRIDTRANSFORM
void vp8_fht8x4_c(short *input, short *output, int pitch,
TX_TYPE tx_type) {
vp8_fht4x4_c(input, output, pitch, tx_type);
vp8_fht4x4_c(input + 4, output + 16, pitch, tx_type);
}
#endif
void vp8_short_fdct8x4_c(short *input, short *output, int pitch)
{
vp8_short_fdct4x4_c(input, output, pitch);
......
......@@ -23,9 +23,9 @@
#endif
#if CONFIG_HYBRIDTRANSFORM
void vp8_fht4x4_c(short *input, short *output, int pitch, TX_TYPE tx_type);
void vp8_fht8x4_c(short *input, short *output, int pitch, TX_TYPE tx_type);
#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM
void vp8_fht_c(short *input, short *output, int pitch,
TX_TYPE tx_type, int tx_dim);
#endif
#if CONFIG_TX16X16
......
......@@ -91,8 +91,7 @@ void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *rtcd,
if(active_ht) {
b->bmi.as_mode.test = b->bmi.as_mode.first;
txfm_map(b, b->bmi.as_mode.first);
vp8_fht4x4_c(be->src_diff, be->coeff, 32, b->bmi.as_mode.tx_type);
vp8_fht_c(be->src_diff, be->coeff, 32, b->bmi.as_mode.tx_type, 4);
vp8_ht_quantize_b(be, b);
vp8_inverse_htransform_b(IF_RTCD(&rtcd->common->idct), b, 32) ;
} else {
......@@ -317,16 +316,11 @@ void vp8_encode_intra8x8(const VP8_ENCODER_RTCD *rtcd,
vp8_subtract_4b_c(be, b, 16);
txfm_map(b, pred_mode_conv(b->bmi.as_mode.first));
vp8_fht8x8_c(be->src_diff, (x->block + idx)->coeff, 32,
b->bmi.as_mode.tx_type);
vp8_fht_c(be->src_diff, (x->block + idx)->coeff, 32,
b->bmi.as_mode.tx_type, 8);
x->quantize_b_8x8(x->block + idx, xd->block + idx);
vp8_iht8x8llm_c(xd->block[idx].dqcoeff, xd->block[ib].diff, 32,
b->bmi.as_mode.tx_type);
// x->vp8_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
// x->quantize_b_8x8(x->block + idx, xd->block + idx);
// vp8_short_idct8x8_c(xd->block[idx].dqcoeff, xd->block[ib].diff, 32);
vp8_ihtllm_c(xd->block[idx].dqcoeff, xd->block[ib].diff, 32,
b->bmi.as_mode.tx_type, 8);
// reconstruct submacroblock
for (i = 0; i < 4; i++) {
......
......@@ -612,20 +612,20 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type,
if((type == PLANE_TYPE_Y_WITH_DC) && active_ht) {
switch (b->bmi.as_mode.tx_type) {
case ADST_DCT:
pt_scan = vp8_row_scan;
scan = vp8_row_scan;
break;
case DCT_ADST:
pt_scan = vp8_col_scan;
scan = vp8_col_scan;