Commit 9824230f authored by Jingning Han's avatar Jingning Han

Adds hybrid transform

Adds ADST/DCT hybrid transform coding for Intra4x4 mode.
The ADST is applied to directions in which the boundary
pixels are used for prediction, while DCT applied to
directions without corresponding boundary prediction.

Adds enum TX_TYPE in b_mode_infor to indicate the transform
type used.

Make coding style consistent with google style.
Fixed the commented issues.

Experimental results in terms of bit-rate reduction:
derf:   0.731%
yt:     0.982%
std-hd: 0.459%
hd:     0.725%

Will be looking at 8x8 transforms next.

Change-Id: I46dbd7b80dbb3e8856e9c34fbc58cb3764a12fcf
parent 6fd0929f
......@@ -228,6 +228,7 @@ EXPERIMENT_LIST="
adaptive_entropy
pred_filter
lossless
hybridtransform
"
CONFIG_LIST="
external_build
......
......@@ -124,12 +124,25 @@ typedef enum {
} TX_SIZE;
#if CONFIG_HYBRIDTRANSFORM
typedef enum {
DCT_DCT = 0, // DCT in both horizontal and vertical
ADST_DCT = 1, // ADST in horizontal, DCT in vertical
DCT_ADST = 2, // DCT in horizontal, ADST in vertical
ADST_ADST = 3 // ADST in both directions
} TX_TYPE;
#endif
#define VP8_YMODES (B_PRED + 1)
#define VP8_UV_MODES (TM_PRED + 1)
#define VP8_I8X8_MODES (TM_PRED + 1)
#define VP8_MVREFS (1 + SPLITMV - NEARESTMV)
#if CONFIG_HYBRIDTRANSFORM
#define ACTIVE_HT 110 // quantization stepsize threshold
#endif
typedef enum {
B_DC_PRED, /* average of above and left pixels */
B_TM_PRED,
......@@ -163,6 +176,11 @@ typedef enum {
union b_mode_info {
struct {
B_PREDICTION_MODE first;
#if CONFIG_HYBRIDTRANSFORM
B_PREDICTION_MODE test;
TX_TYPE tx_type;
#endif
#if CONFIG_COMP_INTRA_PRED
B_PREDICTION_MODE second;
#endif
......@@ -183,6 +201,10 @@ typedef enum {
typedef struct {
MB_PREDICTION_MODE mode, uv_mode;
#if CONFIG_HYBRIDTRANSFORM
MB_PREDICTION_MODE mode_rdopt;
#endif
#if CONFIG_COMP_INTRA_PRED
MB_PREDICTION_MODE second_mode, second_uv_mode;
#endif
......@@ -345,6 +367,10 @@ typedef struct MacroBlockD {
int mb_index; // Index of the MB in the SB (0..3)
#if CONFIG_HYBRIDTRANSFORM
int q_index;
#endif
} MACROBLOCKD;
......
......@@ -65,6 +65,24 @@ DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]) = {
9, 12, 13, 10,
7, 11, 14, 15,
};
#if CONFIG_HYBRIDTRANSFORM
DECLARE_ALIGNED(16, const int, vp8_col_scan[16]) = {
0, 4, 8, 12,
1, 5, 9, 13,
2, 6, 10, 14,
3, 7, 11, 15
};
DECLARE_ALIGNED(16, const int, vp8_row_scan[16]) = {
0, 1, 2, 3,
4, 5, 6, 7,
8, 9, 10, 11,
12, 13, 14, 15
};
#endif
DECLARE_ALIGNED(64, cuchar, vp8_coef_bands_8x8[64]) = { 0, 1, 2, 3, 5, 4, 4, 5,
5, 3, 6, 3, 5, 4, 6, 6,
6, 5, 5, 6, 6, 6, 6, 6,
......
......@@ -107,6 +107,12 @@ extern DECLARE_ALIGNED(16, const unsigned char, vp8_prev_token_class[MAX_ENTROPY
struct VP8Common;
void vp8_default_coef_probs(struct VP8Common *);
extern DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]);
#if CONFIG_HYBRIDTRANSFORM
extern DECLARE_ALIGNED(16, const int, vp8_col_scan[16]);
extern DECLARE_ALIGNED(16, const int, vp8_row_scan[16]);
#endif
extern short vp8_default_zig_zag_mask[16];
extern DECLARE_ALIGNED(64, const int, vp8_default_zig_zag1d_8x8[64]);
extern short vp8_default_zig_zag_mask_8x8[64];// int64_t
......
......@@ -97,6 +97,12 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_lossless_c);
extern prototype_second_order(vp8_short_inv_walsh4x4_1_lossless_c);
#endif
#if CONFIG_HYBRIDTRANSFORM
#include "vp8/common/blockd.h"
void vp8_iht4x4llm_c(short *input, short *output, int pitch, TX_TYPE tx_type);
#endif
typedef prototype_idct((*vp8_idct_fn_t));
typedef prototype_idct_scalar_add((*vp8_idct_scalar_add_fn_t));
typedef prototype_second_order((*vp8_second_order_fn_t));
......
......@@ -25,6 +25,9 @@
#include "vpx_ports/config.h"
#include "vp8/common/idct.h"
#if CONFIG_HYBRIDTRANSFORM
#include "vp8/common/blockd.h"
#endif
#include <math.h>
......@@ -32,6 +35,130 @@ static const int cospi8sqrt2minus1 = 20091;
static const int sinpi8sqrt2 = 35468;
static const int rounding = 0;
#if CONFIG_HYBRIDTRANSFORM
float idct_4[16] = {
0.500000000000000, 0.653281482438188, 0.500000000000000, 0.270598050073099,
0.500000000000000, 0.270598050073099, -0.500000000000000, -0.653281482438188,
0.500000000000000, -0.270598050073099, -0.500000000000000, 0.653281482438188,
0.500000000000000, -0.653281482438188, 0.500000000000000, -0.270598050073099
};
float iadst_4[16] = {
0.228013428883779, 0.577350269189626, 0.656538502008139, 0.428525073124360,
0.428525073124360, 0.577350269189626, -0.228013428883779, -0.656538502008139,
0.577350269189626, 0, -0.577350269189626, 0.577350269189626,
0.656538502008139, -0.577350269189626, 0.428525073124359, -0.228013428883779
};
#endif
#if CONFIG_HYBRIDTRANSFORM
void vp8_iht4x4llm_c(short *input, short *output, int pitch, TX_TYPE tx_type) {
int i, j, k;
float bufa[16], bufb[16]; // buffers are for floating-point test purpose
// the implementation could be simplified in conjunction with integer transform
short *ip = input;
short *op = output;
int shortpitch = pitch >> 1;
float *pfa = &bufa[0];
float *pfb = &bufb[0];
// pointers to vertical and horizontal transforms
float *ptv, *pth;
// load and convert residual array into floating-point
for(j = 0; j < 4; j++) {
for(i = 0; i < 4; i++) {
pfa[i] = (float)ip[i];
}
pfa += 4;
ip += 4;
}
// vertical transformation
pfa = &bufa[0];
pfb = &bufb[0];
switch(tx_type) {
case ADST_ADST :
case ADST_DCT :
ptv = &iadst_4[0];
break;
default :
ptv = &idct_4[0];
break;
}
for(j = 0; j < 4; j++) {
for(i = 0; i < 4; i++) {
pfb[i] = 0 ;
for(k = 0; k < 4; k++) {
pfb[i] += ptv[k] * pfa[(k<<2)];
}
pfa += 1;
}
pfb += 4;
ptv += 4;
pfa = &bufa[0];
}
// horizontal transformation
pfa = &bufa[0];
pfb = &bufb[0];
switch(tx_type) {
case ADST_ADST :
case DCT_ADST :
pth = &iadst_4[0];
break;
default :
pth = &idct_4[0];
break;
}
for(j = 0; j < 4; j++) {
for(i = 0; i < 4; i++) {
pfa[i] = 0;
for(k = 0; k < 4; k++) {
pfa[i] += pfb[k] * pth[k];
}
pth += 4;
}
pfa += 4;
pfb += 4;
switch(tx_type) {
case ADST_ADST :
case DCT_ADST :
pth = &iadst_4[0];
break;
default :
pth = &idct_4[0];
break;
}
}
// convert to short integer format and load BLOCKD buffer
op = output;
pfa = &bufa[0];
for(j = 0; j < 4; j++) {
for(i = 0; i < 4; i++) {
op[i] = (pfa[i] > 0 ) ? (short)( pfa[i] / 8 + 0.49) :
-(short)( - pfa[i] / 8 + 0.49);
}
op += shortpitch;
pfa += 4;
}
}
#endif
void vp8_short_idct4x4llm_c(short *input, short *output, int pitch) {
int i;
int a1, b1, c1, d1;
......
......@@ -31,6 +31,11 @@ static void recon_dcblock_8x8(MACROBLOCKD *x) {
}
#if CONFIG_HYBRIDTRANSFORM
void vp8_inverse_htransform_b(const vp8_idct_rtcd_vtable_t *rtcd, BLOCKD *b, int pitch) {
vp8_iht4x4llm_c(b->dqcoeff, b->diff, pitch, b->bmi.as_mode.tx_type);
}
#endif
void vp8_inverse_transform_b(const vp8_idct_rtcd_vtable_t *rtcd, BLOCKD *b, int pitch) {
if (b->eob <= 1)
......
......@@ -15,6 +15,11 @@
#include "vpx_ports/config.h"
#include "idct.h"
#include "blockd.h"
#if CONFIG_HYBRIDTRANSFORM
extern void vp8_inverse_htransform_b(const vp8_idct_rtcd_vtable_t *rtcd, BLOCKD *b, int pitch);
#endif
extern void vp8_inverse_transform_b(const vp8_idct_rtcd_vtable_t *rtcd, BLOCKD *b, int pitch);
extern void vp8_inverse_transform_mb(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
extern void vp8_inverse_transform_mby(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
......
......@@ -128,6 +128,11 @@ void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd) {
xd->block[i].dequant = pc->Y1dequant[QIndex];
}
#if CONFIG_HYBRIDTRANSFORM
xd->q_index = QIndex;
#endif
#if CONFIG_LOSSLESS
if (!QIndex) {
pbi->common.rtcd.idct.idct1 = vp8_short_inv_walsh4x4_1_x8_c;
......@@ -208,6 +213,11 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
int i;
int tx_type;
#if CONFIG_HYBRIDTRANSFORM
int QIndex = xd->q_index;
int active_ht = (QIndex < ACTIVE_HT);
#endif
if (pbi->common.frame_type == KEY_FRAME) {
if (pbi->common.txfm_mode == ALLOW_8X8 &&
xd->mode_info_context->mbmi.mode != I8X8_PRED &&
......@@ -281,6 +291,39 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
if (xd->segmentation_enabled)
mb_init_dequantizer(pbi, xd);
#if CONFIG_HYBRIDTRANSFORM
// parse transform types for intra 4x4 mode
if (mode == B_PRED) {
for (i = 0; i < 16; i++) {
BLOCKD *b = &xd->block[i];
int b_mode = xd->mode_info_context->bmi[i].as_mode.first;
if(active_ht) {
switch(b_mode) {
case B_TM_PRED :
case B_RD_PRED :
b->bmi.as_mode.tx_type = ADST_ADST;
break;
case B_VE_PRED :
case B_VR_PRED :
b->bmi.as_mode.tx_type = ADST_DCT;
break ;
case B_HE_PRED :
case B_HD_PRED :
case B_HU_PRED :
b->bmi.as_mode.tx_type = DCT_ADST;
break;
default :
b->bmi.as_mode.tx_type = DCT_DCT;
break;
}
}
} // loop over 4x4 blocks
}
#endif
/* do prediction */
if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
if (mode != I8X8_PRED) {
......@@ -360,16 +403,29 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
}
#endif
if (xd->eobs[i] > 1) {
DEQUANT_INVOKE(&pbi->dequant, idct_add)
(b->qcoeff, b->dequant, b->predictor,
*(b->base_dst) + b->dst, 16, b->dst_stride);
} else {
IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)
(b->qcoeff[0] * b->dequant[0], b->predictor,
*(b->base_dst) + b->dst, 16, b->dst_stride);
((int *)b->qcoeff)[0] = 0;
#if CONFIG_HYBRIDTRANSFORM
if(active_ht)
vp8_ht_dequant_idct_add_c( (TX_TYPE)b->bmi.as_mode.tx_type, b->qcoeff,
b->dequant, b->predictor,
*(b->base_dst) + b->dst, 16, b->dst_stride);
else
vp8_dequant_idct_add_c(b->qcoeff, b->dequant, b->predictor,
*(b->base_dst) + b->dst, 16, b->dst_stride);
#else
if (xd->eobs[i] > 1)
{
DEQUANT_INVOKE(&pbi->dequant, idct_add)
(b->qcoeff, b->dequant, b->predictor,
*(b->base_dst) + b->dst, 16, b->dst_stride);
}
else
{
IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)
(b->qcoeff[0] * b->dequant[0], b->predictor,
*(b->base_dst) + b->dst, 16, b->dst_stride);
((int *)b->qcoeff)[0] = 0;
}
#endif
}
} else if (mode == SPLITMV) {
DEQUANT_INVOKE(&pbi->dequant, idct_add_y_block)
......@@ -378,8 +434,6 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
xd->dst.y_stride, xd->eobs);
} else {
BLOCKD *b = &xd->block[24];
if (tx_type == TX_8X8) {
DEQUANT_INVOKE(&pbi->dequant, block_2x2)(b);
#ifdef DEC_DEBUG
......
......@@ -41,6 +41,44 @@ void vp8_dequantize_b_c(BLOCKD *d) {
}
}
#if CONFIG_HYBRIDTRANSFORM
void vp8_ht_dequant_idct_add_c(TX_TYPE tx_type, short *input, short *dq,
unsigned char *pred, unsigned char *dest,
int pitch, int stride) {
short output[16];
short *diff_ptr = output;
int r, c;
int i;
for (i = 0; i < 16; i++) {
input[i] = dq[i] * input[i];
}
vp8_iht4x4llm_c( input, output, 4 << 1, tx_type );
vpx_memset(input, 0, 32);
for (r = 0; r < 4; r++) {
for (c = 0; c < 4; c++) {
int a = diff_ptr[c] + pred[c];
if (a < 0)
a = 0;
if (a > 255)
a = 255;
dest[c] = (unsigned char) a;
}
dest += stride;
diff_ptr += 4;
pred += pitch;
}
}
#endif
void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *pred,
unsigned char *dest, int pitch, int stride) {
short output[16];
......
......@@ -76,6 +76,17 @@ extern prototype_dequant_block(vp8_dequant_block);
#endif
extern prototype_dequant_idct_add(vp8_dequant_idct_add);
#if CONFIG_HYBRIDTRANSFORM
// declare dequantization and inverse transform module of hybrid transform decoder
#ifndef vp8_ht_dequant_idct_add
#define vp8_ht_dequant_idct_add vp8_ht_dequant_idct_add_c
#endif
extern void vp8_ht_dequant_idct_add(TX_TYPE tx_type, short *input, short *dq,
unsigned char *pred, unsigned char *dest,
int pitch, int stride);
#endif
#ifndef vp8_dequant_dc_idct_add
#define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_add_c
#endif
......
......@@ -119,6 +119,53 @@ int get_token(int v) {
else return DCT_VAL_CATEGORY6;
}
#if CONFIG_HYBRIDTRANSFORM
void static count_tokens_adaptive_scan(const MACROBLOCKD *xd, INT16 *qcoeff_ptr,
int block, int type, ENTROPY_CONTEXT *a,
ENTROPY_CONTEXT *l, int eob, int seg_eob,
FRAME_CONTEXT *fc) {
int c, pt, token, band;
const int *scan;
int QIndex = xd->q_index;
int active_ht = (QIndex < ACTIVE_HT) &&
(xd->mode_info_context->mbmi.mode == B_PRED);
if(active_ht) {
switch(xd->block[block].bmi.as_mode.tx_type) {
case ADST_DCT :
scan = vp8_row_scan;
break;
case DCT_ADST :
scan = vp8_col_scan;
break;
default :
scan = vp8_default_zig_zag1d;
break;
}
} else {
scan = vp8_default_zig_zag1d;
}
VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
for (c = !type; c < eob; ++c) {
int rc = scan[c];
int v = qcoeff_ptr[rc];
band = vp8_coef_bands[c];
token = get_token(v);
fc->coef_counts[type][band][pt][token]++;
pt = vp8_prev_token_class[token];
}
if (eob < seg_eob) {
band = vp8_coef_bands[c];
fc->coef_counts[type][band][pt][DCT_EOB_TOKEN]++;
}
}
#endif
void static count_tokens(INT16 *qcoeff_ptr, int block, int type,
ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
int eob, int seg_eob, FRAME_CONTEXT *const fc) {
......@@ -289,8 +336,14 @@ static int vp8_decode_coefs(VP8D_COMP *dx, const MACROBLOCKD *xd,
WRITE_COEF_CONTINUE(val);
}
#if CONFIG_ADAPTIVE_ENTROPY
if (block_type == TX_4X4)
#if CONFIG_HYBRIDTRANSFORM
count_tokens_adaptive_scan(xd, qcoeff_ptr, i, type, a, l, c, seg_eob, fc);
#else
count_tokens(qcoeff_ptr, i, type, a, l, c, seg_eob, fc);
#endif
else
count_tokens_8x8(qcoeff_ptr, i, type, a, l, c, seg_eob, fc);
#endif
......@@ -351,12 +404,21 @@ int vp8_decode_mb_tokens_8x8(VP8D_COMP *pbi, MACROBLOCKD *xd) {
return eobtotal;
}
int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *xd) {
ENTROPY_CONTEXT *const A = (ENTROPY_CONTEXT *)xd->above_context;
ENTROPY_CONTEXT *const L = (ENTROPY_CONTEXT *)xd->left_context;
char *const eobs = xd->eobs;
#if CONFIG_HYBRIDTRANSFORM
const int *scan = vp8_default_zig_zag1d;
int QIndex = xd->q_index;
int active_ht = (QIndex < ACTIVE_HT) &&
(xd->mode_info_context->mbmi.mode == B_PRED);
#else
const int *const scan = vp8_default_zig_zag1d;
#endif
int c, i, type, eobtotal = 0, seg_eob = 16;
INT16 *qcoeff_ptr = &xd->qcoeff[0];
......@@ -388,6 +450,41 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *xd) {
if (i == 16)
type = PLANE_TYPE_UV;
#if CONFIG_HYBRIDTRANSFORM
if (type == PLANE_TYPE_Y_WITH_DC &&
xd->mode_info_context->mbmi.mode == B_PRED &&
active_ht) {
BLOCKD *b = &xd->block[i];
switch(b->bmi.as_mode.first) {
case B_TM_PRED :
case B_RD_PRED :
b->bmi.as_mode.tx_type = ADST_ADST;
scan = vp8_default_zig_zag1d;
break;
case B_VE_PRED :
case B_VR_PRED :
b->bmi.as_mode.tx_type = ADST_DCT;
scan = vp8_row_scan;
break ;
case B_HE_PRED :
case B_HD_PRED :
case B_HU_PRED :
b->bmi.as_mode.tx_type = DCT_ADST;
scan = vp8_col_scan;
break;
default :
b->bmi.as_mode.tx_type = DCT_DCT;
scan = vp8_default_zig_zag1d;
break;
}
}
if (type == PLANE_TYPE_UV) {
scan = vp8_default_zig_zag1d;
}
#endif
c = vp8_decode_coefs(dx, xd, a, l, type, seg_eob, qcoeff_ptr,
i, scan, TX_4X4, coef_bands_x);
a[0] = l[0] = ((eobs[i] = c) != !type);
......
......@@ -13,6 +13,28 @@
#include "vpx_ports/config.h"
#include "vp8/common/idct.h"
#if CONFIG_HYBRIDTRANSFORM
#include "vp8/common/blockd.h"
float dct_4[16] = {
0.500000000000000, 0.500000000000000, 0.500000000000000, 0.500000000000000,
0.653281482438188, 0.270598050073099, -0.270598050073099, -0.653281482438188,
0.500000000000000, -0.500000000000000, -0.500000000000000, 0.500000000000000,
0.270598050073099, -0.653281482438188, 0.653281482438188, -0.270598050073099
};
float adst_4[16] = {
0.228013428883779, 0.428525073124360, 0.577350269189626, 0.656538502008139,
0.577350269189626, 0.577350269189626, 0.000000000000000, -0.577350269189626,
0.656538502008139, -0.228013428883779, -0.577350269189626, 0.428525073124359,
0.428525073124360, -0.656538502008139, 0.577350269189626, -0.228013428883779
};
#endif
#if CONFIG_INT_8X8FDCT
static const int xC1S7 = 16069;
static const int xC2S6 = 15137;
static const int xC3S5 = 13623;
......@@ -268,6 +290,112 @@ void vp8_short_fhaar2x2_c(short *input, short *output, int pitch) { // pitch = 8
}
#if CONFIG_HYBRIDTRANSFORM
void vp8_fht4x4_c(short *input, short *output, int pitch, TX_TYPE tx_type) {
int i, j, k;
float bufa[16], bufb[16]; // buffers are for floating-point test purpose
// the implementation could be simplified in
// conjunction with integer transform
short *ip = input;
short *op = output;
float *pfa = &bufa[0];
float *pfb = &bufb[0];