Commit d3724abe authored by Ronald S. Bultje's avatar Ronald S. Bultje

Re-add support for ADST in superblocks.

This also changes the RD search to take account of the correct block
index when searching (this is required for ADST positioning to work
correctly in combination with tx_select).

Change-Id: Ie50d05b3a024a64ecd0b376887aa38ac5f7b6af6
parent 31623715
......@@ -460,20 +460,20 @@ extern const uint8_t vp9_block2above_sb64[TX_SIZE_MAX_SB][384];
#define USE_ADST_FOR_I8X8_4X4 1
#define USE_ADST_PERIPHERY_ONLY 1
static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, const BLOCKD *b) {
static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, int ib) {
// TODO(debargha): explore different patterns for ADST usage when blocksize
// is smaller than the prediction size
TX_TYPE tx_type = DCT_DCT;
int ib = (int)(b - xd->block);
// TODO(rbultje, debargha): Explore ADST usage for superblocks
if (xd->mode_info_context->mbmi.sb_type)
return tx_type;
if (ib >= 16)
return tx_type;
if (xd->lossless)
return DCT_DCT;
// TODO(rbultje, debargha): Explore ADST usage for superblocks
if (xd->mode_info_context->mbmi.sb_type)
return tx_type;
if (xd->mode_info_context->mbmi.mode == B_PRED &&
xd->q_index < ACTIVE_HT) {
const BLOCKD *b = &xd->block[ib];
tx_type = txfm_map(
#if CONFIG_NEWBINTRAMODES
b->bmi.as_mode.first == B_CONTEXT_PRED ? b->bmi.as_mode.context :
......@@ -481,6 +481,7 @@ static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, const BLOCKD *b) {
b->bmi.as_mode.first);
} else if (xd->mode_info_context->mbmi.mode == I8X8_PRED &&
xd->q_index < ACTIVE_HT) {
const BLOCKD *b = &xd->block[ib];
#if USE_ADST_FOR_I8X8_4X4
#if USE_ADST_PERIPHERY_ONLY
// Use ADST for periphery blocks only
......@@ -517,18 +518,18 @@ static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, const BLOCKD *b) {
return tx_type;
}
static TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd, const BLOCKD *b) {
static TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd, int ib) {
// TODO(debargha): explore different patterns for ADST usage when blocksize
// is smaller than the prediction size
TX_TYPE tx_type = DCT_DCT;
int ib = (int)(b - xd->block);
if (ib >= 16)
return tx_type;
// TODO(rbultje, debargha): Explore ADST usage for superblocks
if (xd->mode_info_context->mbmi.sb_type)
return tx_type;
if (ib >= 16)
return tx_type;
if (xd->mode_info_context->mbmi.mode == I8X8_PRED &&
xd->q_index < ACTIVE_HT8) {
const BLOCKD *b = &xd->block[ib];
// TODO(rbultje): MB_PREDICTION_MODE / B_PREDICTION_MODE should be merged
// or the relationship otherwise modified to address this type conversion.
tx_type = txfm_map(pred_mode_conv(
......@@ -552,14 +553,10 @@ static TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd, const BLOCKD *b) {
return tx_type;
}
static TX_TYPE get_tx_type_16x16(const MACROBLOCKD *xd, const BLOCKD *b) {
static TX_TYPE get_tx_type_16x16(const MACROBLOCKD *xd, int ib) {
TX_TYPE tx_type = DCT_DCT;
int ib = (int)(b - xd->block);
if (ib >= 16)
return tx_type;
// TODO(rbultje, debargha): Explore ADST usage for superblocks
if (xd->mode_info_context->mbmi.sb_type)
return tx_type;
if (xd->mode_info_context->mbmi.mode < I8X8_PRED &&
xd->q_index < ACTIVE_HT16) {
tx_type = txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode));
......@@ -567,24 +564,6 @@ static TX_TYPE get_tx_type_16x16(const MACROBLOCKD *xd, const BLOCKD *b) {
return tx_type;
}
static TX_TYPE get_tx_type(const MACROBLOCKD *xd, const BLOCKD *b) {
TX_TYPE tx_type = DCT_DCT;
int ib = (int)(b - xd->block);
if (ib >= 16)
return tx_type;
if (xd->mode_info_context->mbmi.txfm_size == TX_16X16) {
tx_type = get_tx_type_16x16(xd, b);
}
if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) {
ib = (ib & 8) + ((ib & 4) >> 1);
tx_type = get_tx_type_8x8(xd, &xd->block[ib]);
}
if (xd->mode_info_context->mbmi.txfm_size == TX_4X4) {
tx_type = get_tx_type_4x4(xd, b);
}
return tx_type;
}
void vp9_build_block_doffsets(MACROBLOCKD *xd);
void vp9_setup_block_dptrs(MACROBLOCKD *xd);
......
......@@ -24,7 +24,7 @@ void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) {
int i;
for (i = 0; i < 16; i++) {
TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[i]);
TX_TYPE tx_type = get_tx_type_4x4(xd, i);
if (tx_type != DCT_DCT) {
vp9_short_iht4x4(xd->block[i].dqcoeff, xd->block[i].diff, 16, tx_type);
} else {
......@@ -58,7 +58,7 @@ void vp9_inverse_transform_mby_8x8(MACROBLOCKD *xd) {
BLOCKD *blockd = xd->block;
for (i = 0; i < 9; i += 8) {
TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[i]);
TX_TYPE tx_type = get_tx_type_8x8(xd, i);
if (tx_type != DCT_DCT) {
vp9_short_iht8x8(xd->block[i].dqcoeff, xd->block[i].diff, 16, tx_type);
} else {
......@@ -67,7 +67,7 @@ void vp9_inverse_transform_mby_8x8(MACROBLOCKD *xd) {
}
}
for (i = 2; i < 11; i += 8) {
TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[i]);
TX_TYPE tx_type = get_tx_type_8x8(xd, i);
if (tx_type != DCT_DCT) {
vp9_short_iht8x8(xd->block[i + 2].dqcoeff, xd->block[i].diff,
16, tx_type);
......@@ -100,7 +100,7 @@ void vp9_inverse_transform_b_16x16(int16_t *input_dqcoeff,
void vp9_inverse_transform_mby_16x16(MACROBLOCKD *xd) {
BLOCKD *bd = &xd->block[0];
TX_TYPE tx_type = get_tx_type_16x16(xd, bd);
TX_TYPE tx_type = get_tx_type_16x16(xd, 0);
if (tx_type != DCT_DCT) {
vp9_short_iht16x16(bd->dqcoeff, bd->diff, 16, tx_type);
} else {
......@@ -123,9 +123,16 @@ void vp9_inverse_transform_sby_16x16(MACROBLOCKD *xd) {
for (n = 0; n < 4; n++) {
const int x_idx = n & 1, y_idx = n >> 1;
const TX_TYPE tx_type = get_tx_type_16x16(xd, (y_idx * 8 + x_idx) * 4);
vp9_inverse_transform_b_16x16(xd->dqcoeff + n * 256,
xd->diff + x_idx * 16 + y_idx * 32 * 16, 64);
if (tx_type == DCT_DCT) {
vp9_inverse_transform_b_16x16(xd->dqcoeff + n * 256,
xd->diff + x_idx * 16 + y_idx * 32 * 16,
64);
} else {
vp9_short_iht16x16(xd->dqcoeff + n * 256,
xd->diff + x_idx * 16 + y_idx * 32 * 16, 32, tx_type);
}
}
}
......@@ -134,9 +141,15 @@ void vp9_inverse_transform_sby_8x8(MACROBLOCKD *xd) {
for (n = 0; n < 16; n++) {
const int x_idx = n & 3, y_idx = n >> 2;
const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 8 + x_idx) * 2);
vp9_inverse_transform_b_8x8(xd->dqcoeff + n * 64,
xd->diff + x_idx * 8 + y_idx * 32 * 8, 64);
if (tx_type == DCT_DCT) {
vp9_inverse_transform_b_8x8(xd->dqcoeff + n * 64,
xd->diff + x_idx * 8 + y_idx * 32 * 8, 64);
} else {
vp9_short_iht8x8(xd->dqcoeff + n * 64,
xd->diff + x_idx * 8 + y_idx * 32 * 8, 32, tx_type);
}
}
}
......@@ -145,9 +158,15 @@ void vp9_inverse_transform_sby_4x4(MACROBLOCKD *xd) {
for (n = 0; n < 64; n++) {
const int x_idx = n & 7, y_idx = n >> 3;
const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 8 + x_idx);
vp9_inverse_transform_b_4x4(xd, xd->eobs[n], xd->dqcoeff + n * 16,
xd->diff + x_idx * 4 + y_idx * 4 * 32, 64);
if (tx_type == DCT_DCT) {
vp9_inverse_transform_b_4x4(xd, xd->eobs[n], xd->dqcoeff + n * 16,
xd->diff + x_idx * 4 + y_idx * 4 * 32, 64);
} else {
vp9_short_iht4x4(xd->dqcoeff + n * 16,
xd->diff + x_idx * 4 + y_idx * 4 * 32, 32, tx_type);
}
}
}
......@@ -206,9 +225,16 @@ void vp9_inverse_transform_sb64y_16x16(MACROBLOCKD *xd) {
for (n = 0; n < 16; n++) {
const int x_idx = n & 3, y_idx = n >> 2;
const TX_TYPE tx_type = get_tx_type_16x16(xd, (y_idx * 16 + x_idx) * 4);
vp9_inverse_transform_b_16x16(xd->dqcoeff + n * 256,
xd->diff + x_idx * 16 + y_idx * 64 * 16, 128);
if (tx_type == DCT_DCT) {
vp9_inverse_transform_b_16x16(xd->dqcoeff + n * 256,
xd->diff + x_idx * 16 + y_idx * 64 * 16,
128);
} else {
vp9_short_iht16x16(xd->dqcoeff + n * 256,
xd->diff + x_idx * 16 + y_idx * 64 * 16, 64, tx_type);
}
}
}
......@@ -217,9 +243,15 @@ void vp9_inverse_transform_sb64y_8x8(MACROBLOCKD *xd) {
for (n = 0; n < 64; n++) {
const int x_idx = n & 7, y_idx = n >> 3;
const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 16 + x_idx) * 2);
vp9_inverse_transform_b_8x8(xd->dqcoeff + n * 64,
xd->diff + x_idx * 8 + y_idx * 64 * 8, 128);
if (tx_type == DCT_DCT) {
vp9_inverse_transform_b_8x8(xd->dqcoeff + n * 64,
xd->diff + x_idx * 8 + y_idx * 64 * 8, 128);
} else {
vp9_short_iht8x8(xd->dqcoeff + n * 64,
xd->diff + x_idx * 8 + y_idx * 64 * 8, 64, tx_type);
}
}
}
......@@ -228,9 +260,15 @@ void vp9_inverse_transform_sb64y_4x4(MACROBLOCKD *xd) {
for (n = 0; n < 256; n++) {
const int x_idx = n & 15, y_idx = n >> 4;
const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 16 + x_idx);
vp9_inverse_transform_b_4x4(xd, xd->eobs[n], xd->dqcoeff + n * 16,
xd->diff + x_idx * 4 + y_idx * 4 * 64, 128);
if (tx_type == DCT_DCT) {
vp9_inverse_transform_b_4x4(xd, xd->eobs[n], xd->dqcoeff + n * 16,
xd->diff + x_idx * 4 + y_idx * 4 * 64, 128);
} else {
vp9_short_iht4x4(xd->dqcoeff + n * 16,
xd->diff + x_idx * 4 + y_idx * 4 * 64, 64, tx_type);
}
}
}
......
......@@ -201,8 +201,7 @@ static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd,
static void decode_16x16(VP9D_COMP *pbi, MACROBLOCKD *xd,
BOOL_DECODER* const bc) {
BLOCKD *bd = &xd->block[0];
TX_TYPE tx_type = get_tx_type_16x16(xd, bd);
TX_TYPE tx_type = get_tx_type_16x16(xd, 0);
#ifdef DEC_DEBUG
if (dec_debug) {
int i;
......@@ -240,7 +239,7 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd,
BOOL_DECODER* const bc) {
// First do Y
// if the first one is DCT_DCT assume all the rest are as well
TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[0]);
TX_TYPE tx_type = get_tx_type_8x8(xd, 0);
#ifdef DEC_DEBUG
if (dec_debug) {
int i;
......@@ -267,7 +266,7 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd,
int i8x8mode = b->bmi.as_mode.first;
vp9_intra8x8_predict(xd, b, i8x8mode, b->predictor);
}
tx_type = get_tx_type_8x8(xd, &xd->block[ib]);
tx_type = get_tx_type_8x8(xd, ib);
if (tx_type != DCT_DCT) {
vp9_ht_dequant_idct_add_8x8_c(tx_type, q, dq, pre, dst, 16, stride,
xd->eobs[idx]);
......@@ -341,7 +340,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
vp9_intra8x8_predict(xd, b, i8x8mode, b->predictor);
for (j = 0; j < 4; j++) {
b = &xd->block[ib + iblock[j]];
tx_type = get_tx_type_4x4(xd, b);
tx_type = get_tx_type_4x4(xd, ib + iblock[j]);
if (tx_type != DCT_DCT) {
vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff,
b->dequant, b->predictor,
......@@ -375,7 +374,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
eobtotal += vp9_decode_coefs_4x4(pbi, xd, bc, PLANE_TYPE_Y_WITH_DC, i);
vp9_intra4x4_predict(xd, b, b_mode, b->predictor);
tx_type = get_tx_type_4x4(xd, b);
tx_type = get_tx_type_4x4(xd, i);
if (tx_type != DCT_DCT) {
vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff,
b->dequant, b->predictor,
......@@ -397,7 +396,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
xd->dst.v_buffer,
xd->dst.uv_stride,
xd);
} else if (mode == SPLITMV || get_tx_type_4x4(xd, &xd->block[0]) == DCT_DCT) {
} else if (mode == SPLITMV || get_tx_type_4x4(xd, 0) == DCT_DCT) {
xd->itxm_add_y_block(xd->qcoeff,
xd->block[0].dequant,
xd->predictor,
......@@ -431,7 +430,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
#endif
for (i = 0; i < 16; i++) {
BLOCKD *b = &xd->block[i];
tx_type = get_tx_type_4x4(xd, b);
tx_type = get_tx_type_4x4(xd, i);
if (tx_type != DCT_DCT) {
vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff,
b->dequant, b->predictor,
......@@ -517,13 +516,24 @@ static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd,
xd->block[20].dequant, xd->dst.v_buffer, xd->dst.v_buffer,
xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[320]);
break;
case TX_16X16: // FIXME(rbultje): adst
case TX_16X16:
for (n = 0; n < 16; n++) {
const int x_idx = n & 3, y_idx = n >> 2;
vp9_dequant_idct_add_16x16(xd->qcoeff + n * 256, xd->block[0].dequant,
xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 16]);
const TX_TYPE tx_type = get_tx_type_16x16(xd,
(y_idx * 16 + x_idx) * 4);
if (tx_type == DCT_DCT) {
vp9_dequant_idct_add_16x16(xd->qcoeff + n * 256,
xd->block[0].dequant,
xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 16]);
} else {
vp9_ht_dequant_idct_add_16x16_c(tx_type, xd->qcoeff + n * 256,
xd->block[0].dequant,
xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 16]);
}
}
for (n = 0; n < 4; n++) {
const int x_idx = n & 1, y_idx = n >> 1;
......@@ -539,13 +549,23 @@ static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd,
xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[320 + n * 16]);
}
break;
case TX_8X8: // FIXME(rbultje): adst
case TX_8X8:
for (n = 0; n < 64; n++) {
const int x_idx = n & 7, y_idx = n >> 3;
vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64, xd->block[0].dequant,
xd->dst.y_buffer + y_idx * 8 * xd->dst.y_stride + x_idx * 8,
xd->dst.y_buffer + y_idx * 8 * xd->dst.y_stride + x_idx * 8,
xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 4]);
const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 16 + x_idx) * 2);
if (tx_type == DCT_DCT) {
vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64,
xd->block[0].dequant,
xd->dst.y_buffer + y_idx * 8 * xd->dst.y_stride + x_idx * 8,
xd->dst.y_buffer + y_idx * 8 * xd->dst.y_stride + x_idx * 8,
xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 4]);
} else {
vp9_ht_dequant_idct_add_8x8_c(tx_type, xd->qcoeff + n * 64,
xd->block[0].dequant,
xd->dst.y_buffer + y_idx * 8 * xd->dst.y_stride + x_idx * 8,
xd->dst.y_buffer + y_idx * 8 * xd->dst.y_stride + x_idx * 8,
xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 4]);
}
}
for (n = 0; n < 16; n++) {
const int x_idx = n & 3, y_idx = n >> 2;
......@@ -561,13 +581,22 @@ static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd,
xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[320 + n * 4]);
}
break;
case TX_4X4: // FIXME(rbultje): adst
case TX_4X4:
for (n = 0; n < 256; n++) {
const int x_idx = n & 15, y_idx = n >> 4;
xd->itxm_add(xd->qcoeff + n * 16, xd->block[0].dequant,
xd->dst.y_buffer + y_idx * 4 * xd->dst.y_stride + x_idx * 4,
xd->dst.y_buffer + y_idx * 4 * xd->dst.y_stride + x_idx * 4,
xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n]);
const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 16 + x_idx);
if (tx_type == DCT_DCT) {
xd->itxm_add(xd->qcoeff + n * 16, xd->block[0].dequant,
xd->dst.y_buffer + y_idx * 4 * xd->dst.y_stride + x_idx * 4,
xd->dst.y_buffer + y_idx * 4 * xd->dst.y_stride + x_idx * 4,
xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n]);
} else {
vp9_ht_dequant_idct_add_c(tx_type, xd->qcoeff + n * 16,
xd->block[0].dequant,
xd->dst.y_buffer + y_idx * 4 * xd->dst.y_stride + x_idx * 4,
xd->dst.y_buffer + y_idx * 4 * xd->dst.y_stride + x_idx * 4,
xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n]);
}
}
for (n = 0; n < 64; n++) {
const int x_idx = n & 7, y_idx = n >> 3;
......@@ -649,14 +678,24 @@ static void decode_superblock32(VP9D_COMP *pbi, MACROBLOCKD *xd,
xd->dst.v_buffer,
xd->dst.uv_stride, xd);
break;
case TX_16X16: // FIXME(rbultje): adst
case TX_16X16:
for (n = 0; n < 4; n++) {
const int x_idx = n & 1, y_idx = n >> 1;
vp9_dequant_idct_add_16x16(
xd->qcoeff + n * 256, xd->block[0].dequant,
xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 16]);
const TX_TYPE tx_type = get_tx_type_16x16(xd,
(y_idx * 8 + x_idx) * 4);
if (tx_type == DCT_DCT) {
vp9_dequant_idct_add_16x16(
xd->qcoeff + n * 256, xd->block[0].dequant,
xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 16]);
} else {
vp9_ht_dequant_idct_add_16x16_c(tx_type, xd->qcoeff + n * 256,
xd->block[0].dequant,
xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 16]);
}
}
vp9_dequant_idct_add_uv_block_16x16_c(xd->qcoeff + 1024,
xd->block[16].dequant,
......@@ -664,13 +703,23 @@ static void decode_superblock32(VP9D_COMP *pbi, MACROBLOCKD *xd,
xd->dst.v_buffer,
xd->dst.uv_stride, xd);
break;
case TX_8X8: // FIXME(rbultje): adst
case TX_8X8:
for (n = 0; n < 16; n++) {
const int x_idx = n & 3, y_idx = n >> 2;
vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64, xd->block[0].dequant,
xd->dst.y_buffer + y_idx * 8 * xd->dst.y_stride + x_idx * 8,
xd->dst.y_buffer + y_idx * 8 * xd->dst.y_stride + x_idx * 8,
xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 4]);
const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 8 + x_idx) * 2);
if (tx_type == DCT_DCT) {
vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64,
xd->block[0].dequant,
xd->dst.y_buffer + y_idx * 8 * xd->dst.y_stride + x_idx * 8,
xd->dst.y_buffer + y_idx * 8 * xd->dst.y_stride + x_idx * 8,
xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 4]);
} else {
vp9_ht_dequant_idct_add_8x8_c(tx_type, xd->qcoeff + n * 64,
xd->block[0].dequant,
xd->dst.y_buffer + y_idx * 8 * xd->dst.y_stride + x_idx * 8,
xd->dst.y_buffer + y_idx * 8 * xd->dst.y_stride + x_idx * 8,
xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 4]);
}
}
for (n = 0; n < 4; n++) {
const int x_idx = n & 1, y_idx = n >> 1;
......@@ -686,13 +735,22 @@ static void decode_superblock32(VP9D_COMP *pbi, MACROBLOCKD *xd,
xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[80 + n * 4]);
}
break;
case TX_4X4: // FIXME(rbultje): adst
case TX_4X4:
for (n = 0; n < 64; n++) {
const int x_idx = n & 7, y_idx = n >> 3;
xd->itxm_add(xd->qcoeff + n * 16, xd->block[0].dequant,
xd->dst.y_buffer + y_idx * 4 * xd->dst.y_stride + x_idx * 4,
xd->dst.y_buffer + y_idx * 4 * xd->dst.y_stride + x_idx * 4,
xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n]);
const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 8 + x_idx);
if (tx_type == DCT_DCT) {
xd->itxm_add(xd->qcoeff + n * 16, xd->block[0].dequant,
xd->dst.y_buffer + y_idx * 4 * xd->dst.y_stride + x_idx * 4,
xd->dst.y_buffer + y_idx * 4 * xd->dst.y_stride + x_idx * 4,
xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n]);
} else {
vp9_ht_dequant_idct_add_c(tx_type, xd->qcoeff + n * 16,
xd->block[0].dequant,
xd->dst.y_buffer + y_idx * 4 * xd->dst.y_stride + x_idx * 4,
xd->dst.y_buffer + y_idx * 4 * xd->dst.y_stride + x_idx * 4,
xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n]);
}
}
for (n = 0; n < 16; n++) {
const int x_idx = n & 3, y_idx = n >> 2;
......
......@@ -96,9 +96,8 @@ static int get_signed(BOOL_DECODER *br, int value_to_sign) {
static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd,
BOOL_DECODER* const br, int block_idx,
PLANE_TYPE type, TX_TYPE tx_type,
int seg_eob, int16_t *qcoeff_ptr,
const int *const scan, TX_SIZE txfm_size) {
PLANE_TYPE type, int seg_eob, int16_t *qcoeff_ptr,
TX_SIZE txfm_size) {
ENTROPY_CONTEXT* const A0 = (ENTROPY_CONTEXT *) xd->above_context;
ENTROPY_CONTEXT* const L0 = (ENTROPY_CONTEXT *) xd->left_context;
int aidx, lidx;
......@@ -114,6 +113,7 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd,
uint16_t nzc = 0;
uint16_t nzc_expected = xd->mode_info_context->mbmi.nzcs[block_idx];
#endif
const int *scan;
if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64) {
aidx = vp9_block2above_sb64[txfm_size][block_idx];
......@@ -128,19 +128,34 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd,
switch (txfm_size) {
default:
case TX_4X4:
case TX_4X4: {
const TX_TYPE tx_type = get_tx_type_4x4(xd, block_idx);
switch (tx_type) {
default:
scan = vp9_default_zig_zag1d_4x4;
break;
case ADST_DCT:
scan = vp9_row_scan_4x4;
break;
case DCT_ADST:
scan = vp9_col_scan_4x4;
break;
}
above_ec = A0[aidx] != 0;
left_ec = L0[lidx] != 0;
coef_probs = fc->coef_probs_4x4;
coef_counts = fc->coef_counts_4x4;
break;
}
case TX_8X8:
scan = vp9_default_zig_zag1d_8x8;
coef_probs = fc->coef_probs_8x8;
coef_counts = fc->coef_counts_8x8;
above_ec = (A0[aidx] + A0[aidx + 1]) != 0;
left_ec = (L0[lidx] + L0[lidx + 1]) != 0;
break;
case TX_16X16:
scan = vp9_default_zig_zag1d_16x16;
coef_probs = fc->coef_probs_16x16;
coef_counts = fc->coef_counts_16x16;
if (type == PLANE_TYPE_UV) {
......@@ -154,6 +169,7 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd,
}
break;
case TX_32X32:
scan = vp9_default_zig_zag1d_32x32;
coef_probs = fc->coef_probs_32x32;
coef_counts = fc->coef_counts_32x32;
if (type == PLANE_TYPE_UV) {
......@@ -318,17 +334,15 @@ int vp9_decode_sb_tokens(VP9D_COMP* const pbi,
case TX_32X32:
// Luma block
c = decode_coefs(pbi, xd, bc, 0, PLANE_TYPE_Y_WITH_DC,
DCT_DCT, get_eob(xd, segment_id, 1024),
xd->qcoeff, vp9_default_zig_zag1d_32x32, TX_32X32);
get_eob(xd, segment_id, 1024), xd->qcoeff, TX_32X32);
xd->eobs[0] = c;
eobtotal += c;
// 16x16 chroma blocks
seg_eob = get_eob(xd, segment_id, 256);
for (i = 64; i < 96; i += 16) {
c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob,
xd->qcoeff + i * 16,
vp9_default_zig_zag1d_16x16, TX_16X16);
c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, seg_eob,
xd->qcoeff + i * 16, TX_16X16);
xd->eobs[i] = c;
eobtotal += c;
}
......@@ -338,17 +352,15 @@ int vp9_decode_sb_tokens(VP9D_COMP* const pbi,
seg_eob = get_eob(xd, segment_id, 256);
for (i = 0; i < 64; i += 16) {
c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC,
DCT_DCT, seg_eob, xd->qcoeff + i * 16,
vp9_default_zig_zag1d_16x16, TX_16X16);
seg_eob, xd->qcoeff + i * 16, TX_16X16);
xd->eobs[i] = c;
eobtotal += c;
}
// 16x16 chroma blocks
for (i = 64; i < 96; i += 16) {
c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob,
xd->qcoeff + i * 16,
vp9_default_zig_zag1d_16x16, TX_16X16);
c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, seg_eob,
xd->qcoeff + i * 16, TX_16X16);
xd->eobs[i] = c;
eobtotal += c;
}
......@@ -358,17 +370,15 @@ int vp9_decode_sb_tokens(VP9D_COMP* const pbi,
seg_eob = get_eob(xd, segment_id, 64);
for (i = 0; i < 64; i += 4) {
c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC,
DCT_DCT, seg_eob, xd->qcoeff + i * 16,
vp9_default_zig_zag1d_8x8, TX_8X8);
seg_eob, xd->qcoeff + i * 16, TX_8X8);
xd->eobs[i] = c;
eobtotal += c;
}
// 8x8 chroma blocks
for (i = 64; i < 96; i += 4) {
c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob,
xd->qcoeff + i * 16,
vp9_default_zig_zag1d_8x8, TX_8X8);
c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, seg_eob,
xd->qcoeff + i * 16, TX_8X8);
xd->eobs[i] = c;
eobtotal += c;
}
......@@ -378,17 +388,15 @@ int vp9_decode_sb_tokens(VP9D_COMP* const pbi,
seg_eob = get_eob(xd, segment_id, 16);
for (i = 0; i < 64; i++) {
c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC,
DCT_DCT, seg_eob, xd->qcoeff + i * 16,
vp9_default_zig_zag1d_4x4, TX_4X4);
seg_eob, xd->qcoeff + i * 16, TX_4X4);