Commit fcbff9ee authored by Jingning Han's avatar Jingning Han

Replacing the 8x8 DCT with 8x8 ADST/DCT for I8x8

Fixed the code review comments.

Under the htrans8x8 experiment the 8X8 DCT in the
I8X8 mode is replaced with a combination of 8X8 ADST and
DCT.

Overall coding gains with the htrans8x8 experiment are:
derf:   0.486
std-hd: 1.040
hd:     1.063
yt:     0.506

Note that part of the gain comes from bigger transforms
(8x8 instead of 4x4) and part comes from replacing the DCT
wth the ADST.

Change-Id: I92ca6bbfce11b4165d612b81d9adfad4d010c775
parent e6de9c2e
......@@ -223,8 +223,8 @@ EXPERIMENT_LIST="
pred_filter
lossless
hybridtransform
hybridtransform8x8
switchable_interp
htrans8x8
tx16x16
"
CONFIG_LIST="
......
......@@ -54,7 +54,6 @@ typedef struct {
#define PLANE_TYPE_UV 2
#define PLANE_TYPE_Y_WITH_DC 3
typedef char ENTROPY_CONTEXT;
typedef struct {
ENTROPY_CONTEXT y1[4];
......@@ -179,6 +178,50 @@ typedef enum {
B_MODE_COUNT
} B_PREDICTION_MODE;
#if CONFIG_HYBRIDTRANSFORM8X8
// convert MB_PREDICTION_MODE to B_PREDICTION_MODE
static B_PREDICTION_MODE pred_mode_conv(MB_PREDICTION_MODE mode) {
B_PREDICTION_MODE b_mode;
switch (mode) {
case DC_PRED:
b_mode = B_DC_PRED;
break;
case V_PRED:
b_mode = B_VE_PRED;
break;
case H_PRED:
b_mode = B_HE_PRED;
break;
case TM_PRED:
b_mode = B_TM_PRED;
break;
case D45_PRED:
b_mode = B_LD_PRED;
break;
case D135_PRED:
b_mode = B_RD_PRED;
break;
case D117_PRED:
b_mode = B_VR_PRED;
break;
case D153_PRED:
b_mode = B_HD_PRED;
break;
case D27_PRED:
b_mode = B_VL_PRED;
break;
case D63_PRED:
b_mode = B_HU_PRED;
break;
default :
// for debug purpose, to be removed after full testing
assert(0);
break;
}
return b_mode;
}
#endif
#define VP8_BINTRAMODES (B_HU_PRED + 1) /* 10 */
#define VP8_SUBMVREFS (1 + NEW4X4 - LEFT4X4)
......@@ -389,6 +432,32 @@ typedef struct MacroBlockD {
} MACROBLOCKD;
#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM
// transform mapping
static void txfm_map(BLOCKD *b, B_PREDICTION_MODE bmode) {
switch (bmode) {
case B_TM_PRED :
case B_RD_PRED :
b->bmi.as_mode.tx_type = ADST_ADST;
break;
case B_VE_PRED :
case B_VR_PRED :
b->bmi.as_mode.tx_type = ADST_DCT;
break;
case B_HE_PRED :
case B_HD_PRED :
case B_HU_PRED :
b->bmi.as_mode.tx_type = DCT_ADST;
break;
default :
b->bmi.as_mode.tx_type = DCT_DCT;
break;
}
}
#endif
extern void vp8_build_block_doffsets(MACROBLOCKD *x);
extern void vp8_setup_block_dptrs(MACROBLOCKD *x);
......
......@@ -434,7 +434,7 @@ vp8_default_coef_probs_8x8[BLOCK_TYPES_8X8]
{ 6, 117, 180, 254, 199, 216, 255, 251, 128, 128, 128}
}
}
#if CONFIG_HTRANS8X8
#if CONFIG_HYBRIDTRANSFORM8X8
,
{ /* block Type 3 */
{ /* Coeff Band 0 */
......
......@@ -60,9 +60,9 @@ extern vp8_extra_bit_struct vp8_extra_bits[12]; /* indexed by token value */
/* Coefficients are predicted via a 3-dimensional probability table. */
/* Outside dimension. 0 = Y no DC, 1 = Y2, 2 = UV, 3 = Y with DC */
#define BLOCK_TYPES 4
#if CONFIG_HTRANS8X8
#if CONFIG_HYBRIDTRANSFORM8X8
#define BLOCK_TYPES_8X8 4
#else
#define BLOCK_TYPES_8X8 3
......
......@@ -35,6 +35,8 @@ static const int cospi8sqrt2minus1 = 20091;
static const int sinpi8sqrt2 = 35468;
static const int rounding = 0;
// TODO: these transforms can be further converted into integer forms
// for complexity optimization
#if CONFIG_HYBRIDTRANSFORM
float idct_4[16] = {
0.500000000000000, 0.653281482438188, 0.500000000000000, 0.270598050073099,
......@@ -51,11 +53,52 @@ float iadst_4[16] = {
};
#endif
#if CONFIG_HYBRIDTRANSFORM8X8
float idct_8[64] = {
0.353553390593274, 0.490392640201615, 0.461939766255643, 0.415734806151273,
0.353553390593274, 0.277785116509801, 0.191341716182545, 0.097545161008064,
0.353553390593274, 0.415734806151273, 0.191341716182545, -0.097545161008064,
-0.353553390593274, -0.490392640201615, -0.461939766255643, -0.277785116509801,
0.353553390593274, 0.277785116509801, -0.191341716182545, -0.490392640201615,
-0.353553390593274, 0.097545161008064, 0.461939766255643, 0.415734806151273,
0.353553390593274, 0.097545161008064, -0.461939766255643, -0.277785116509801,
0.353553390593274, 0.415734806151273, -0.191341716182545, -0.490392640201615,
0.353553390593274, -0.097545161008064, -0.461939766255643, 0.277785116509801,
0.353553390593274, -0.415734806151273, -0.191341716182545, 0.490392640201615,
0.353553390593274, -0.277785116509801, -0.191341716182545, 0.490392640201615,
-0.353553390593274, -0.097545161008064, 0.461939766255643, -0.415734806151273,
0.353553390593274, -0.415734806151273, 0.191341716182545, 0.097545161008064,
-0.353553390593274, 0.490392640201615, -0.461939766255643, 0.277785116509801,
0.353553390593274, -0.490392640201615, 0.461939766255643, -0.415734806151273,
0.353553390593274, -0.277785116509801, 0.191341716182545, -0.097545161008064
};
float iadst_8[64] = {
0.089131608307533, 0.255357107325376, 0.387095214016349, 0.466553967085785,
0.483002021635509, 0.434217976756762, 0.326790388032145, 0.175227946595735,
0.175227946595735, 0.434217976756762, 0.466553967085785, 0.255357107325376,
-0.089131608307533, -0.387095214016348, -0.483002021635509, -0.326790388032145,
0.255357107325376, 0.483002021635509, 0.175227946595735, -0.326790388032145,
-0.466553967085785, -0.089131608307533, 0.387095214016349, 0.434217976756762,
0.326790388032145, 0.387095214016349, -0.255357107325376, -0.434217976756762,
0.175227946595735, 0.466553967085786, -0.089131608307534, -0.483002021635509,
0.387095214016349, 0.175227946595735, -0.483002021635509, 0.089131608307533,
0.434217976756762, -0.326790388032145, -0.255357107325377, 0.466553967085785,
0.434217976756762, -0.089131608307533, -0.326790388032145, 0.483002021635509,
-0.255357107325376, -0.175227946595735, 0.466553967085785, -0.387095214016348,
0.466553967085785, -0.326790388032145, 0.089131608307533, 0.175227946595735,
-0.387095214016348, 0.483002021635509, -0.434217976756762, 0.255357107325376,
0.483002021635509, -0.466553967085785, 0.434217976756762, -0.387095214016348,
0.326790388032145, -0.255357107325375, 0.175227946595736, -0.089131608307532
};
#endif
#if CONFIG_HYBRIDTRANSFORM
void vp8_iht4x4llm_c(short *input, short *output, int pitch, TX_TYPE tx_type) {
int i, j, k;
float bufa[16], bufb[16]; // buffers are for floating-point test purpose
// the implementation could be simplified in conjunction with integer transform
// the implementation could be simplified in
// conjunction with integer transform
short *ip = input;
short *op = output;
int shortpitch = pitch >> 1;
......@@ -158,6 +201,113 @@ void vp8_iht4x4llm_c(short *input, short *output, int pitch, TX_TYPE tx_type) {
}
#endif
#if CONFIG_HYBRIDTRANSFORM8X8
void vp8_iht8x8llm_c(short *input, short *output, int pitch, TX_TYPE tx_type) {
int i, j, k;
float bufa[64], bufb[64]; // buffers are for floating-point test purpose
// the implementation could be simplified in
// conjunction with integer transform
short *ip = input;
short *op = output;
int shortpitch = pitch >> 1;
float *pfa = &bufa[0];
float *pfb = &bufb[0];
// pointers to vertical and horizontal transforms
float *ptv, *pth;
// load and convert residual array into floating-point
for(j = 0; j < 8; j++) {
for(i = 0; i < 8; i++) {
pfa[i] = (float)ip[i];
}
pfa += 8;
ip += 8;
}
// vertical transformation
pfa = &bufa[0];
pfb = &bufb[0];
switch(tx_type) {
case ADST_ADST :
case ADST_DCT :
ptv = &iadst_8[0];
break;
default :
ptv = &idct_8[0];
break;
}
for(j = 0; j < 8; j++) {
for(i = 0; i < 8; i++) {
pfb[i] = 0 ;
for(k = 0; k < 8; k++) {
pfb[i] += ptv[k] * pfa[(k<<3)];
}
pfa += 1;
}
pfb += 8;
ptv += 8;
pfa = &bufa[0];
}
// horizontal transformation
pfa = &bufa[0];
pfb = &bufb[0];
switch(tx_type) {
case ADST_ADST :
case DCT_ADST :
pth = &iadst_8[0];
break;
default :
pth = &idct_8[0];
break;
}
for(j = 0; j < 8; j++) {
for(i = 0; i < 8; i++) {
pfa[i] = 0;
for(k = 0; k < 8; k++) {
pfa[i] += pfb[k] * pth[k];
}
pth += 8;
}
pfa += 8;
pfb += 8;
switch(tx_type) {
case ADST_ADST :
case DCT_ADST :
pth = &iadst_8[0];
break;
default :
pth = &idct_8[0];
break;
}
}
// convert to short integer format and load BLOCKD buffer
op = output;
pfa = &bufa[0];
for(j = 0; j < 8; j++) {
for(i = 0; i < 8; i++) {
op[i] = (pfa[i] > 0 ) ? (short)( pfa[i] / 8 + 0.49) :
-(short)( - pfa[i] / 8 + 0.49);
}
op += shortpitch;
pfa += 8;
}
}
#endif
void vp8_short_idct4x4llm_c(short *input, short *output, int pitch) {
int i;
......
......@@ -46,7 +46,6 @@ int dec_debug = 0;
#define COEFCOUNT_TESTING
static int merge_index(int v, int n, int modulus) {
int max1 = (n - 1 - modulus / 2) / modulus + 1;
if (v < max1) v = v * modulus + modulus / 2;
......@@ -260,7 +259,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
}
}
#if CONFIG_HTRANS8X8
#if CONFIG_HYBRIDTRANSFORM8X8
if (xd->mode_info_context->mbmi.mode == I8X8_PRED) {
xd->mode_info_context->mbmi.txfm_size = TX_8X8;
}
......@@ -336,29 +335,8 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
for (i = 0; i < 16; i++) {
BLOCKD *b = &xd->block[i];
int b_mode = xd->mode_info_context->bmi[i].as_mode.first;
if(active_ht) {
switch(b_mode) {
case B_TM_PRED :
case B_RD_PRED :
b->bmi.as_mode.tx_type = ADST_ADST;
break;
case B_VE_PRED :
case B_VR_PRED :
b->bmi.as_mode.tx_type = ADST_DCT;
break ;
case B_HE_PRED :
case B_HD_PRED :
case B_HU_PRED :
b->bmi.as_mode.tx_type = DCT_ADST;
break;
default :
b->bmi.as_mode.tx_type = DCT_DCT;
break;
}
}
if(active_ht)
txfm_map(b, b_mode);
} // loop over 4x4 blocks
}
#endif
......@@ -392,7 +370,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
int i8x8mode;
BLOCKD *b;
#if CONFIG_HTRANS8X8
#if CONFIG_HYBRIDTRANSFORM8X8
int idx = (ib & 0x02) ? (ib + 2) : ib;
short *q = xd->block[idx].qcoeff;
......@@ -410,8 +388,11 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
RECON_INVOKE(RTCD_VTABLE(recon), intra8x8_predict)
(b, i8x8mode, b->predictor);
#if CONFIG_HTRANS8X8
vp8_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride);
#if CONFIG_HYBRIDTRANSFORM8X8
txfm_map(b, pred_mode_conv(i8x8mode));
vp8_ht_dequant_idct_add_8x8_c(b->bmi.as_mode.tx_type,
q, dq, pre, dst, 16, stride);
// vp8_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride);
q += 64;
#else
for (j = 0; j < 4; j++) {
......
......@@ -79,6 +79,51 @@ void vp8_ht_dequant_idct_add_c(TX_TYPE tx_type, short *input, short *dq,
}
#endif
#if CONFIG_HYBRIDTRANSFORM8X8
void vp8_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, short *input, short *dq,
unsigned char *pred, unsigned char *dest,
int pitch, int stride) {
short output[64];
short *diff_ptr = output;
int b, r, c;
int i;
unsigned char *origdest = dest;
unsigned char *origpred = pred;
input[0] = dq[0] * input[0];
for (i = 1; i < 64; i++) {
input[i] = dq[1] * input[i];
}
vp8_iht8x8llm_c(input, output, 16, tx_type);
vpx_memset(input, 0, 128);
for (b = 0; b < 4; b++) {
for (r = 0; r < 4; r++) {
for (c = 0; c < 4; c++) {
int a = diff_ptr[c] + pred[c];
if (a < 0)
a = 0;
if (a > 255)
a = 255;
dest[c] = (unsigned char) a;
}
dest += stride;
diff_ptr += 8;
pred += pitch;
}
diff_ptr = output + (b + 1) / 2 * 4 * 8 + (b + 1) % 2 * 4;
dest = origdest + (b + 1) / 2 * 4 * stride + (b + 1) % 2 * 4;
pred = origpred + (b + 1) / 2 * 4 * pitch + (b + 1) % 2 * 4;
}
}
#endif
void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *pred,
unsigned char *dest, int pitch, int stride) {
short output[16];
......
......@@ -473,7 +473,7 @@ int vp8_decode_mb_tokens_8x8(VP8D_COMP *pbi, MACROBLOCKD *xd) {
const int seg_active = segfeature_active(xd, segment_id, SEG_LVL_EOB);
INT16 *qcoeff_ptr = &xd->qcoeff[0];
#if CONFIG_HTRANS8X8
#if CONFIG_HYBRIDTRANSFORM8X8
int bufthred = (xd->mode_info_context->mbmi.mode == I8X8_PRED) ? 16 : 24;
if (xd->mode_info_context->mbmi.mode != B_PRED &&
xd->mode_info_context->mbmi.mode != SPLITMV &&
......@@ -506,7 +506,7 @@ int vp8_decode_mb_tokens_8x8(VP8D_COMP *pbi, MACROBLOCKD *xd) {
else
seg_eob = 64;
#if CONFIG_HTRANS8X8
#if CONFIG_HYBRIDTRANSFORM8X8
for (i = 0; i < bufthred ; i += 4) {
#else
for (i = 0; i < 24; i += 4) {
......@@ -528,7 +528,7 @@ int vp8_decode_mb_tokens_8x8(VP8D_COMP *pbi, MACROBLOCKD *xd) {
qcoeff_ptr += 64;
}
#if CONFIG_HTRANS8X8
#if CONFIG_HYBRIDTRANSFORM8X8
if (xd->mode_info_context->mbmi.mode == I8X8_PRED) {
type = PLANE_TYPE_UV;
seg_eob = 16;
......
......@@ -17,6 +17,8 @@
#include "vp8/common/blockd.h"
// TODO: these transforms can be converted into integer forms to reduce
// the complexity
float dct_4[16] = {
0.500000000000000, 0.500000000000000, 0.500000000000000, 0.500000000000000,
0.653281482438188, 0.270598050073099, -0.270598050073099, -0.653281482438188,
......@@ -32,6 +34,45 @@ float adst_4[16] = {
};
#endif
#if CONFIG_HYBRIDTRANSFORM8X8
float dct_8[64] = {
0.353553390593274, 0.353553390593274, 0.353553390593274, 0.353553390593274,
0.353553390593274, 0.353553390593274, 0.353553390593274, 0.353553390593274,
0.490392640201615, 0.415734806151273, 0.277785116509801, 0.097545161008064,
-0.097545161008064, -0.277785116509801, -0.415734806151273, -0.490392640201615,
0.461939766255643, 0.191341716182545, -0.191341716182545, -0.461939766255643,
-0.461939766255643, -0.191341716182545, 0.191341716182545, 0.461939766255643,
0.415734806151273, -0.097545161008064, -0.490392640201615, -0.277785116509801,
0.277785116509801, 0.490392640201615, 0.097545161008064, -0.415734806151273,
0.353553390593274, -0.353553390593274, -0.353553390593274, 0.353553390593274,
0.353553390593274, -0.353553390593274, -0.353553390593274, 0.353553390593274,
0.277785116509801, -0.490392640201615, 0.097545161008064, 0.415734806151273,
-0.415734806151273, -0.097545161008064, 0.490392640201615, -0.277785116509801,
0.191341716182545, -0.461939766255643, 0.461939766255643, -0.191341716182545,
-0.191341716182545, 0.461939766255643, -0.461939766255643, 0.191341716182545,
0.097545161008064, -0.277785116509801, 0.415734806151273, -0.490392640201615,
0.490392640201615, -0.415734806151273, 0.277785116509801, -0.097545161008064
};
float adst_8[64] = {
0.089131608307533, 0.175227946595735, 0.255357107325376, 0.326790388032145,
0.387095214016349, 0.434217976756762, 0.466553967085785, 0.483002021635509,
0.255357107325376, 0.434217976756762, 0.483002021635509, 0.387095214016349,
0.175227946595735, -0.089131608307533, -0.326790388032145, -0.466553967085785,
0.387095214016349, 0.466553967085785, 0.175227946595735, -0.255357107325376,
-0.483002021635509, -0.326790388032145, 0.089131608307533, 0.434217976756762,
0.466553967085785, 0.255357107325376, -0.326790388032145, -0.434217976756762,
0.089131608307533, 0.483002021635509, 0.175227946595735, -0.387095214016348,
0.483002021635509, -0.089131608307533, -0.466553967085785, 0.175227946595735,
0.434217976756762, -0.255357107325376, -0.387095214016348, 0.326790388032145,
0.434217976756762, -0.387095214016348, -0.089131608307533, 0.466553967085786,
-0.326790388032145, -0.175227946595735, 0.483002021635509, -0.255357107325375,
0.326790388032145, -0.483002021635509, 0.387095214016349, -0.089131608307534,
-0.255357107325377, 0.466553967085785, -0.434217976756762, 0.175227946595736,
0.175227946595735, -0.326790388032145, 0.434217976756762, -0.483002021635509,
0.466553967085785, -0.387095214016348, 0.255357107325376, -0.089131608307532
};
#endif
static const int xC1S7 = 16069;
static const int xC2S6 = 15137;
......@@ -394,6 +435,112 @@ void vp8_fht4x4_c(short *input, short *output, int pitch, TX_TYPE tx_type) {
}
#endif
#if CONFIG_HYBRIDTRANSFORM8X8
void vp8_fht8x8_c(short *input, short *output, int pitch, TX_TYPE tx_type) {
int i, j, k;
float bufa[64], bufb[64]; // buffers are for floating-point test purpose
// the implementation could be simplified in
// conjunction with integer transform
short *ip = input;
short *op = output;
float *pfa = &bufa[0];
float *pfb = &bufb[0];
// pointers to vertical and horizontal transforms
float *ptv, *pth;
// load and convert residual array into floating-point
for(j = 0; j < 8; j++) {
for(i = 0; i < 8; i++) {
pfa[i] = (float)ip[i];
}
pfa += 8;
ip += pitch / 2;
}
// vertical transformation
pfa = &bufa[0];
pfb = &bufb[0];
switch(tx_type) {
case ADST_ADST :
case ADST_DCT :
ptv = &adst_8[0];
break;
default :
ptv = &dct_8[0];
break;
}
for(j = 0; j < 8; j++) {
for(i = 0; i < 8; i++) {
pfb[i] = 0;
for(k = 0; k < 8; k++) {
pfb[i] += ptv[k] * pfa[(k<<3)];
}
pfa += 1;
}
pfb += 8;
ptv += 8;
pfa = &bufa[0];
}
// horizontal transformation
pfa = &bufa[0];
pfb = &bufb[0];
switch(tx_type) {
case ADST_ADST :
case DCT_ADST :
pth = &adst_8[0];
break;
default :
pth = &dct_8[0];
break;
}
for(j = 0; j < 8; j++) {
for(i = 0; i < 8; i++) {
pfa[i] = 0;
for(k = 0; k < 8; k++) {
pfa[i] += pfb[k] * pth[k];
}
pth += 8;
}
pfa += 8;
pfb += 8;
switch(tx_type) {
case ADST_ADST :
case DCT_ADST :
pth = &adst_8[0];
break;
default :
pth = &dct_8[0];
break;
}
}
// convert to short integer format and load BLOCKD buffer
op = output ;
pfa = &bufa[0] ;
for(j = 0; j < 8; j++) {
for(i = 0; i < 8; i++) {
op[i] = (pfa[i] > 0 ) ? (short)( 8 * pfa[i] + 0.49) :
-(short)(- 8 * pfa[i] + 0.49);
}
op += 8;
pfa += 8;
}
}
#endif
void vp8_short_fdct4x4_c(short *input, short *output, int pitch) {
int i;
int a1, b1, c1, d1;
......
......@@ -90,28 +90,7 @@ void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *rtcd,
#if CONFIG_HYBRIDTRANSFORM
if(active_ht) {
b->bmi.as_mode.test = b->bmi.as_mode.first;
switch(b->bmi.as_mode.first) {
// case B_DC_PRED :
case B_TM_PRED :
case B_RD_PRED :
b->bmi.as_mode.tx_type = ADST_ADST;
break;
case B_VE_PRED :
case B_VR_PRED :
b->bmi.as_mode.tx_type = ADST_DCT;
<