Commit 111ca421 authored by Ronald S. Bultje's avatar Ronald S. Bultje

Make superblocks independent of macroblock code and data.

Split macroblock and superblock tokenization and detokenization
functions and coefficient-related data structs so that the bitstream
layout and related code of superblock coefficients looks less like it's
a hack to fit macroblocks in superblocks.

In addition, unify chroma transform size selection from luma transform
size (i.e. always use the same size, as long as it fits the predictor);
in practice, this means 32x32 and 64x64 superblocks using the 16x16 luma
transform will now use the 16x16 (instead of the 8x8) chroma transform,
and 64x64 superblocks using the 32x32 luma transform will now use the
32x32 (instead of the 16x16) chroma transform.

Lastly, add a trellis optimize function for 32x32 transform blocks.

HD gains about 0.3%, STDHD about 0.15% and derf about 0.1%. There's
a few negative points here and there that I might want to analyze
a little closer.

Change-Id: Ibad7c3ddfe1acfc52771dfc27c03e9783e054430
parent 2d3e879f
This diff is collapsed.
......@@ -277,13 +277,6 @@ typedef struct blockd {
union b_mode_info bmi;
} BLOCKD;
typedef struct superblockd {
/* 32x32 Y and 16x16 U/V */
DECLARE_ALIGNED(16, int16_t, diff[32*32+16*16*2]);
DECLARE_ALIGNED(16, int16_t, qcoeff[32*32+16*16*2]);
DECLARE_ALIGNED(16, int16_t, dqcoeff[32*32+16*16*2]);
} SUPERBLOCKD;
struct scale_factors {
int x_num;
int x_den;
......@@ -297,13 +290,11 @@ struct scale_factors {
};
typedef struct macroblockd {
DECLARE_ALIGNED(16, int16_t, diff[384]); /* from idct diff */
DECLARE_ALIGNED(16, uint8_t, predictor[384]);
DECLARE_ALIGNED(16, int16_t, qcoeff[384]);
DECLARE_ALIGNED(16, int16_t, dqcoeff[384]);
DECLARE_ALIGNED(16, uint16_t, eobs[24]);
SUPERBLOCKD sb_coeff_data;
DECLARE_ALIGNED(16, int16_t, diff[64*64+32*32*2]); /* from idct diff */
DECLARE_ALIGNED(16, uint8_t, predictor[384]); // unused for superblocks
DECLARE_ALIGNED(16, int16_t, qcoeff[64*64+32*32*2]);
DECLARE_ALIGNED(16, int16_t, dqcoeff[64*64+32*32*2]);
DECLARE_ALIGNED(16, uint16_t, eobs[256+64*2]);
/* 16 Y blocks, 4 U, 4 V, each with 16 entries. */
BLOCKD block[24];
......@@ -451,8 +442,12 @@ static TX_TYPE txfm_map(B_PREDICTION_MODE bmode) {
}
}
extern const uint8_t vp9_block2left[TX_SIZE_MAX_SB][24];
extern const uint8_t vp9_block2above[TX_SIZE_MAX_SB][24];
extern const uint8_t vp9_block2left[TX_SIZE_MAX_MB][24];
extern const uint8_t vp9_block2above[TX_SIZE_MAX_MB][24];
extern const uint8_t vp9_block2left_sb[TX_SIZE_MAX_SB][96];
extern const uint8_t vp9_block2above_sb[TX_SIZE_MAX_SB][96];
extern const uint8_t vp9_block2left_sb64[TX_SIZE_MAX_SB][384];
extern const uint8_t vp9_block2above_sb64[TX_SIZE_MAX_SB][384];
#define USE_ADST_FOR_I16X16_8X8 0
#define USE_ADST_FOR_I16X16_4X4 0
......
This diff is collapsed.
......@@ -336,6 +336,6 @@ void vp9_adapt_coef_probs(VP9_COMMON *cm) {
BLOCK_TYPES, cm->fc.coef_counts_16x16,
count_sat, update_factor);
update_coef_probs(cm->fc.coef_probs_32x32, cm->fc.pre_coef_probs_32x32,
BLOCK_TYPES_32X32, cm->fc.coef_counts_32x32,
BLOCK_TYPES, cm->fc.coef_counts_32x32,
count_sat, update_factor);
}
......@@ -61,7 +61,6 @@ extern vp9_extra_bit_struct vp9_extra_bits[12]; /* indexed by token value */
/* Outside dimension. 0 = Y with DC, 1 = UV */
#define BLOCK_TYPES 2
#define BLOCK_TYPES_32X32 1
#define REF_TYPES 2 // intra=0, inter=1
/* Middle dimension reflects the coefficient position within the transform. */
......@@ -110,12 +109,24 @@ extern DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]);
void vp9_coef_tree_initialize(void);
void vp9_adapt_coef_probs(struct VP9Common *);
static void vp9_reset_mb_tokens_context(MACROBLOCKD* const xd) {
static INLINE void vp9_reset_mb_tokens_context(MACROBLOCKD* const xd) {
/* Clear entropy contexts */
vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
}
static INLINE void vp9_reset_sb_tokens_context(MACROBLOCKD* const xd) {
/* Clear entropy contexts */
vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 2);
vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 2);
}
static INLINE void vp9_reset_sb64_tokens_context(MACROBLOCKD* const xd) {
/* Clear entropy contexts */
vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 4);
vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 4);
}
extern const int vp9_coef_bands[32];
extern const int vp9_coef_bands4x4[16];
......
......@@ -645,7 +645,7 @@ void vp9_short_idct16x16_c(int16_t *input, int16_t *output, int pitch) {
// First transform rows
for (i = 0; i < 16; ++i) {
idct16_1d(input, outptr);
input += half_pitch;
input += 16;
outptr += 16;
}
......@@ -655,7 +655,7 @@ void vp9_short_idct16x16_c(int16_t *input, int16_t *output, int pitch) {
temp_in[j] = out[j * 16 + i];
idct16_1d(temp_in, temp_out);
for (j = 0; j < 16; ++j)
output[j * 16 + i] = ROUND_POWER_OF_TWO(temp_out[j], 6);
output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 6);
}
}
......@@ -838,7 +838,7 @@ static const transform_2d IHT_16[] = {
};
void vp9_short_iht16x16_c(int16_t *input, int16_t *output,
int input_pitch, TX_TYPE tx_type) {
int pitch, TX_TYPE tx_type) {
int i, j;
int16_t out[16 * 16];
int16_t *outptr = out;
......@@ -848,7 +848,7 @@ void vp9_short_iht16x16_c(int16_t *input, int16_t *output,
// Rows
for (i = 0; i < 16; ++i) {
ht.rows(input, outptr);
input += input_pitch;
input += 16;
outptr += 16;
}
......@@ -858,7 +858,7 @@ void vp9_short_iht16x16_c(int16_t *input, int16_t *output,
temp_in[j] = out[j * 16 + i];
ht.cols(temp_in, temp_out);
for (j = 0; j < 16; ++j)
output[j * 16 + i] = ROUND_POWER_OF_TWO(temp_out[j], 6);
output[j * pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 6);
}
}
......@@ -875,7 +875,7 @@ void vp9_short_idct10_16x16_c(int16_t *input, int16_t *output, int pitch) {
vpx_memset(out, 0, sizeof(out));
for (i = 0; i < 4; ++i) {
idct16_1d(input, outptr);
input += half_pitch;
input += 16;
outptr += 16;
}
......@@ -885,7 +885,7 @@ void vp9_short_idct10_16x16_c(int16_t *input, int16_t *output, int pitch) {
temp_in[j] = out[j*16 + i];
idct16_1d(temp_in, temp_out);
for (j = 0; j < 16; ++j)
output[j*16 + i] = ROUND_POWER_OF_TWO(temp_out[j], 6);
output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 6);
}
}
......@@ -1273,7 +1273,7 @@ void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) {
// Rows
for (i = 0; i < 32; ++i) {
idct32_1d(input, outptr);
input += half_pitch;
input += 32;
outptr += 32;
}
......@@ -1283,7 +1283,7 @@ void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) {
temp_in[j] = out[j * 32 + i];
idct32_1d(temp_in, temp_out);
for (j = 0; j < 32; ++j)
output[j * 32 + i] = ROUND_POWER_OF_TWO(temp_out[j], 6);
output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 6);
}
}
......@@ -1306,7 +1306,7 @@ void vp9_short_idct10_32x32_c(int16_t *input, int16_t *output, int pitch) {
vpx_memset(out, 0, sizeof(out));
for (i = 0; i < 4; ++i) {
idct32_1d(input, outptr);
input += half_pitch;
input += 32;
outptr += 32;
}
......@@ -1316,6 +1316,6 @@ void vp9_short_idct10_32x32_c(int16_t *input, int16_t *output, int pitch) {
temp_in[j] = out[j * 32 + i];
idct32_1d(temp_in, temp_out);
for (j = 0; j < 32; ++j)
output[j * 32 + i] = ROUND_POWER_OF_TWO(temp_out[j], 6);
output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 6);
}
}
......@@ -11,12 +11,13 @@
#include "vp9/common/vp9_invtrans.h"
#include "./vp9_rtcd.h"
void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int block, int pitch) {
BLOCKD *b = &xd->block[block];
if (xd->eobs[block] <= 1)
xd->inv_txm4x4_1(b->dqcoeff, b->diff, pitch);
void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int eob,
int16_t *dqcoeff, int16_t *diff,
int pitch) {
if (eob <= 1)
xd->inv_txm4x4_1(dqcoeff, diff, pitch);
else
xd->inv_txm4x4(b->dqcoeff, b->diff, pitch);
xd->inv_txm4x4(dqcoeff, diff, pitch);
}
void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) {
......@@ -27,7 +28,8 @@ void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) {
if (tx_type != DCT_DCT) {
vp9_short_iht4x4(xd->block[i].dqcoeff, xd->block[i].diff, 16, tx_type);
} else {
vp9_inverse_transform_b_4x4(xd, i, 32);
vp9_inverse_transform_b_4x4(xd, xd->eobs[i], xd->block[i].dqcoeff,
xd->block[i].diff, 32);
}
}
}
......@@ -36,7 +38,8 @@ void vp9_inverse_transform_mbuv_4x4(MACROBLOCKD *xd) {
int i;
for (i = 16; i < 24; i++) {
vp9_inverse_transform_b_4x4(xd, i, 16);
vp9_inverse_transform_b_4x4(xd, xd->eobs[i], xd->block[i].dqcoeff,
xd->block[i].diff, 16);
}
}
......@@ -111,13 +114,170 @@ void vp9_inverse_transform_mb_16x16(MACROBLOCKD *xd) {
vp9_inverse_transform_mbuv_8x8(xd);
}
void vp9_inverse_transform_sby_32x32(SUPERBLOCKD *xd_sb) {
vp9_short_idct32x32(xd_sb->dqcoeff, xd_sb->diff, 64);
void vp9_inverse_transform_sby_32x32(MACROBLOCKD *xd) {
vp9_short_idct32x32(xd->dqcoeff, xd->diff, 64);
}
void vp9_inverse_transform_sby_16x16(MACROBLOCKD *xd) {
int n;
for (n = 0; n < 4; n++) {
const int x_idx = n & 1, y_idx = n >> 1;
vp9_inverse_transform_b_16x16(xd->dqcoeff + n * 256,
xd->diff + x_idx * 16 + y_idx * 32 * 16, 64);
}
}
void vp9_inverse_transform_sby_8x8(MACROBLOCKD *xd) {
int n;
for (n = 0; n < 16; n++) {
const int x_idx = n & 3, y_idx = n >> 2;
vp9_inverse_transform_b_8x8(xd->dqcoeff + n * 64,
xd->diff + x_idx * 8 + y_idx * 32 * 8, 64);
}
}
void vp9_inverse_transform_sby_4x4(MACROBLOCKD *xd) {
int n;
for (n = 0; n < 64; n++) {
const int x_idx = n & 7, y_idx = n >> 3;
vp9_inverse_transform_b_4x4(xd, xd->eobs[n], xd->dqcoeff + n * 16,
xd->diff + x_idx * 4 + y_idx * 4 * 32, 64);
}
}
void vp9_inverse_transform_sbuv_16x16(MACROBLOCKD *xd) {
vp9_inverse_transform_b_16x16(xd->dqcoeff + 1024,
xd->diff + 1024, 32);
vp9_inverse_transform_b_16x16(xd->dqcoeff + 1280,
xd->diff + 1280, 32);
}
void vp9_inverse_transform_sbuv_8x8(MACROBLOCKD *xd) {
int n;
for (n = 0; n < 4; n++) {
const int x_idx = n & 1, y_idx = n >> 1;
vp9_inverse_transform_b_8x8(xd->dqcoeff + 1024 + n * 64,
xd->diff + 1024 + x_idx * 8 + y_idx * 16 * 8,
32);
vp9_inverse_transform_b_8x8(xd->dqcoeff + 1280 + n * 64,
xd->diff + 1280 + x_idx * 8 + y_idx * 16 * 8,
32);
}
}
void vp9_inverse_transform_sbuv_4x4(MACROBLOCKD *xd) {
int n;
for (n = 0; n < 16; n++) {
const int x_idx = n & 3, y_idx = n >> 2;
vp9_inverse_transform_b_4x4(xd, xd->eobs[64 + n],
xd->dqcoeff + 1024 + n * 16,
xd->diff + 1024 + x_idx * 4 + y_idx * 16 * 4,
32);
vp9_inverse_transform_b_4x4(xd, xd->eobs[64 + 16 + n],
xd->dqcoeff + 1280 + n * 16,
xd->diff + 1280 + x_idx * 4 + y_idx * 16 * 4,
32);
}
}
void vp9_inverse_transform_sbuv_16x16(SUPERBLOCKD *xd_sb) {
vp9_inverse_transform_b_16x16(xd_sb->dqcoeff + 1024,
xd_sb->diff + 1024, 32);
vp9_inverse_transform_b_16x16(xd_sb->dqcoeff + 1280,
xd_sb->diff + 1280, 32);
void vp9_inverse_transform_sb64y_32x32(MACROBLOCKD *xd) {
int n;
for (n = 0; n < 4; n++) {
const int x_idx = n & 1, y_idx = n >> 1;
vp9_short_idct32x32(xd->dqcoeff + n * 1024,
xd->diff + x_idx * 32 + y_idx * 32 * 64, 128);
}
}
void vp9_inverse_transform_sb64y_16x16(MACROBLOCKD *xd) {
int n;
for (n = 0; n < 16; n++) {
const int x_idx = n & 3, y_idx = n >> 2;
vp9_inverse_transform_b_16x16(xd->dqcoeff + n * 256,
xd->diff + x_idx * 16 + y_idx * 64 * 16, 128);
}
}
void vp9_inverse_transform_sb64y_8x8(MACROBLOCKD *xd) {
int n;
for (n = 0; n < 64; n++) {
const int x_idx = n & 7, y_idx = n >> 3;
vp9_inverse_transform_b_8x8(xd->dqcoeff + n * 64,
xd->diff + x_idx * 8 + y_idx * 64 * 8, 128);
}
}
void vp9_inverse_transform_sb64y_4x4(MACROBLOCKD *xd) {
int n;
for (n = 0; n < 256; n++) {
const int x_idx = n & 15, y_idx = n >> 4;
vp9_inverse_transform_b_4x4(xd, xd->eobs[n], xd->dqcoeff + n * 16,
xd->diff + x_idx * 4 + y_idx * 4 * 64, 128);
}
}
void vp9_inverse_transform_sb64uv_32x32(MACROBLOCKD *xd) {
vp9_short_idct32x32(xd->dqcoeff + 4096,
xd->diff + 4096, 64);
vp9_short_idct32x32(xd->dqcoeff + 4096 + 1024,
xd->diff + 4096 + 1024, 64);
}
void vp9_inverse_transform_sb64uv_16x16(MACROBLOCKD *xd) {
int n;
for (n = 0; n < 4; n++) {
const int x_idx = n & 1, y_idx = n >> 1, off = x_idx * 16 + y_idx * 32 * 16;
vp9_inverse_transform_b_16x16(xd->dqcoeff + 4096 + n * 256,
xd->diff + 4096 + off, 64);
vp9_inverse_transform_b_16x16(xd->dqcoeff + 4096 + 1024 + n * 256,
xd->diff + 4096 + 1024 + off, 64);
}
}
void vp9_inverse_transform_sb64uv_8x8(MACROBLOCKD *xd) {
int n;
for (n = 0; n < 16; n++) {
const int x_idx = n & 3, y_idx = n >> 2, off = x_idx * 8 + y_idx * 32 * 8;
vp9_inverse_transform_b_8x8(xd->dqcoeff + 4096 + n * 64,
xd->diff + 4096 + off, 64);
vp9_inverse_transform_b_8x8(xd->dqcoeff + 4096 + 1024 + n * 64,
xd->diff + 4096 + 1024 + off, 64);
}
}
void vp9_inverse_transform_sb64uv_4x4(MACROBLOCKD *xd) {
int n;
for (n = 0; n < 64; n++) {
const int x_idx = n & 7, y_idx = n >> 3, off = x_idx * 4 + y_idx * 32 * 4;
vp9_inverse_transform_b_4x4(xd, xd->eobs[256 + n],
xd->dqcoeff + 4096 + n * 16,
xd->diff + 4096 + off, 64);
vp9_inverse_transform_b_4x4(xd, xd->eobs[256 + 64 + n],
xd->dqcoeff + 4096 + 1024 + n * 16,
xd->diff + 4096 + 1024 + off, 64);
}
}
......@@ -15,7 +15,9 @@
#include "vpx/vpx_integer.h"
#include "vp9/common/vp9_blockd.h"
void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int block, int pitch);
void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int eob,
int16_t *dqcoeff, int16_t *diff,
int pitch);
void vp9_inverse_transform_mb_4x4(MACROBLOCKD *xd);
......@@ -39,7 +41,21 @@ void vp9_inverse_transform_mb_16x16(MACROBLOCKD *xd);
void vp9_inverse_transform_mby_16x16(MACROBLOCKD *xd);
void vp9_inverse_transform_sby_32x32(SUPERBLOCKD *xd_sb);
void vp9_inverse_transform_sbuv_16x16(SUPERBLOCKD *xd_sb);
void vp9_inverse_transform_sby_32x32(MACROBLOCKD *xd);
void vp9_inverse_transform_sby_16x16(MACROBLOCKD *xd);
void vp9_inverse_transform_sby_8x8(MACROBLOCKD *xd);
void vp9_inverse_transform_sby_4x4(MACROBLOCKD *xd);
void vp9_inverse_transform_sbuv_16x16(MACROBLOCKD *xd);
void vp9_inverse_transform_sbuv_8x8(MACROBLOCKD *xd);
void vp9_inverse_transform_sbuv_4x4(MACROBLOCKD *xd);
void vp9_inverse_transform_sb64y_32x32(MACROBLOCKD *xd);
void vp9_inverse_transform_sb64y_16x16(MACROBLOCKD *xd);
void vp9_inverse_transform_sb64y_8x8(MACROBLOCKD *xd);
void vp9_inverse_transform_sb64y_4x4(MACROBLOCKD *xd);
void vp9_inverse_transform_sb64uv_32x32(MACROBLOCKD *xd);
void vp9_inverse_transform_sb64uv_16x16(MACROBLOCKD *xd);
void vp9_inverse_transform_sb64uv_8x8(MACROBLOCKD *xd);
void vp9_inverse_transform_sb64uv_4x4(MACROBLOCKD *xd);
#endif // VP9_COMMON_VP9_INVTRANS_H_
......@@ -61,7 +61,7 @@ typedef struct frame_contexts {
vp9_coeff_probs coef_probs_4x4[BLOCK_TYPES];
vp9_coeff_probs coef_probs_8x8[BLOCK_TYPES];
vp9_coeff_probs coef_probs_16x16[BLOCK_TYPES];
vp9_coeff_probs coef_probs_32x32[BLOCK_TYPES_32X32];
vp9_coeff_probs coef_probs_32x32[BLOCK_TYPES];
nmv_context nmvc;
nmv_context pre_nmvc;
......@@ -83,12 +83,12 @@ typedef struct frame_contexts {
vp9_coeff_probs pre_coef_probs_4x4[BLOCK_TYPES];
vp9_coeff_probs pre_coef_probs_8x8[BLOCK_TYPES];
vp9_coeff_probs pre_coef_probs_16x16[BLOCK_TYPES];
vp9_coeff_probs pre_coef_probs_32x32[BLOCK_TYPES_32X32];
vp9_coeff_probs pre_coef_probs_32x32[BLOCK_TYPES];
vp9_coeff_count coef_counts_4x4[BLOCK_TYPES];
vp9_coeff_count coef_counts_8x8[BLOCK_TYPES];
vp9_coeff_count coef_counts_16x16[BLOCK_TYPES];
vp9_coeff_count coef_counts_32x32[BLOCK_TYPES_32X32];
vp9_coeff_count coef_counts_32x32[BLOCK_TYPES];
nmv_context_counts NMVcount;
vp9_prob switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1]
......
......@@ -117,7 +117,7 @@ void vp9_recon_mbuv_s_c(MACROBLOCKD *xd, uint8_t *udst, uint8_t *vdst) {
void vp9_recon_sby_s_c(MACROBLOCKD *xd, uint8_t *dst) {
int x, y, stride = xd->block[0].dst_stride;
int16_t *diff = xd->sb_coeff_data.diff;
int16_t *diff = xd->diff;
for (y = 0; y < 32; y++) {
for (x = 0; x < 32; x++) {
......@@ -130,8 +130,8 @@ void vp9_recon_sby_s_c(MACROBLOCKD *xd, uint8_t *dst) {
void vp9_recon_sbuv_s_c(MACROBLOCKD *xd, uint8_t *udst, uint8_t *vdst) {
int x, y, stride = xd->block[16].dst_stride;
int16_t *udiff = xd->sb_coeff_data.diff + 1024;
int16_t *vdiff = xd->sb_coeff_data.diff + 1280;
int16_t *udiff = xd->diff + 1024;
int16_t *vdiff = xd->diff + 1280;
for (y = 0; y < 16; y++) {
for (x = 0; x < 16; x++) {
......@@ -145,6 +145,36 @@ void vp9_recon_sbuv_s_c(MACROBLOCKD *xd, uint8_t *udst, uint8_t *vdst) {
}
}
void vp9_recon_sb64y_s_c(MACROBLOCKD *xd, uint8_t *dst) {
int x, y, stride = xd->block[0].dst_stride;
int16_t *diff = xd->diff;
for (y = 0; y < 64; y++) {
for (x = 0; x < 64; x++) {
dst[x] = clip_pixel(dst[x] + diff[x]);
}
dst += stride;
diff += 64;
}
}
void vp9_recon_sb64uv_s_c(MACROBLOCKD *xd, uint8_t *udst, uint8_t *vdst) {
int x, y, stride = xd->block[16].dst_stride;
int16_t *udiff = xd->diff + 4096;
int16_t *vdiff = xd->diff + 4096 + 1024;
for (y = 0; y < 32; y++) {
for (x = 0; x < 32; x++) {
udst[x] = clip_pixel(udst[x] + udiff[x]);
vdst[x] = clip_pixel(vdst[x] + vdiff[x]);
}
udst += stride;
vdst += stride;
udiff += 32;
vdiff += 32;
}
}
void vp9_recon_mby_c(MACROBLOCKD *xd) {
int i;
......
......@@ -97,6 +97,12 @@ specialize vp9_recon_sby_s
prototype void vp9_recon_sbuv_s "struct macroblockd *x, uint8_t *udst, uint8_t *vdst"
specialize void vp9_recon_sbuv_s
prototype void vp9_recon_sb64y_s "struct macroblockd *x, uint8_t *dst"
specialize vp9_recon_sb64y_s
prototype void vp9_recon_sb64uv_s "struct macroblockd *x, uint8_t *udst, uint8_t *vdst"
specialize void vp9_recon_sb64uv_s
prototype void vp9_build_intra_predictors_mby_s "struct macroblockd *x"
specialize vp9_build_intra_predictors_mby_s
......
This diff is collapsed.
......@@ -354,7 +354,7 @@ void vp9_dequant_idct_add_uv_block_16x16_c(int16_t *q, const int16_t *dq,
int stride,
MACROBLOCKD *xd) {
vp9_dequant_idct_add_16x16_c(q, dq, dstu, dstu, stride, stride,
xd->eobs[16]);
xd->eobs[64]);
vp9_dequant_idct_add_16x16_c(q + 256, dq, dstv, dstv, stride, stride,
xd->eobs[20]);
xd->eobs[80]);
}
......@@ -90,9 +90,8 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd,
const int *const scan, TX_SIZE txfm_size) {
ENTROPY_CONTEXT* const A0 = (ENTROPY_CONTEXT *) xd->above_context;
ENTROPY_CONTEXT* const L0 = (ENTROPY_CONTEXT *) xd->left_context;
const int aidx = vp9_block2above[txfm_size][block_idx];
const int lidx = vp9_block2left[txfm_size][block_idx];
ENTROPY_CONTEXT above_ec = A0[aidx] != 0, left_ec = L0[lidx] != 0;
int aidx, lidx;
ENTROPY_CONTEXT above_ec, left_ec;
FRAME_CONTEXT *const fc = &dx->common.fc;
int recent_energy = 0;
int pt, c = 0;
......@@ -101,9 +100,22 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd,
vp9_coeff_count *coef_counts;
const int ref = xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME;
if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64) {
aidx = vp9_block2above_sb64[txfm_size][block_idx];
lidx = vp9_block2left_sb64[txfm_size][block_idx];
} else if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32) {
aidx = vp9_block2above_sb[txfm_size][block_idx];
lidx = vp9_block2left_sb[txfm_size][block_idx];
} else {
aidx = vp9_block2above[txfm_size][block_idx];
lidx = vp9_block2left[txfm_size][block_idx];
}
switch (txfm_size) {
default:
case TX_4X4:
above_ec = A0[aidx] != 0;
left_ec = L0[lidx] != 0;
coef_probs = fc->coef_probs_4x4;
coef_counts = fc->coef_counts_4x4;
break;
......@@ -240,7 +252,7 @@ SKIP_START:
if (type == PLANE_TYPE_UV) {
ENTROPY_CONTEXT *A1 = (ENTROPY_CONTEXT *) (xd->above_context + 1);
ENTROPY_CONTEXT *L1 = (ENTROPY_CONTEXT *) (xd->left_context + 1);
A1[aidx] = A1[aidx + 1] = L1[aidx] = L1[lidx + 1] = A0[aidx];
A1[aidx] = A1[aidx + 1] = L1[lidx] = L1[lidx + 1] = A0[aidx];
if (txfm_size >= TX_32X32) {
ENTROPY_CONTEXT *A2 = (ENTROPY_CONTEXT *) (xd->above_context + 2);
ENTROPY_CONTEXT *L2 = (ENTROPY_CONTEXT *) (xd->left_context + 2);
......@@ -272,24 +284,181 @@ int vp9_decode_sb_tokens(VP9D_COMP* const pbi,
MACROBLOCKD* const xd,
BOOL_DECODER* const bc) {
const int segment_id = xd->mode_info_context->mbmi.segment_id;
int i, eobtotal = 0, seg_eob;
int i, eobtotal = 0, seg_eob, c;
// Luma block
int c = decode_coefs(pbi, xd, bc, 0, PLANE_TYPE_Y_WITH_DC,
switch (xd->mode_info_context->mbmi.txfm_size) {
case TX_32X32:
// Luma block
c = decode_coefs(pbi, xd, bc, 0, PLANE_TYPE_Y_WITH_DC,
DCT_DCT, get_eob(xd, segment_id, 1024),
xd->sb_coeff_data.qcoeff,
vp9_default_zig_zag1d_32x32, TX_32X32);
xd->eobs[0] = c;
eobtotal += c;
xd->qcoeff, vp9_default_zig_zag1d_32x32, TX_32X32);
xd->eobs[0] = c;
eobtotal += c;
// 16x16 chroma blocks
seg_eob = get_eob(xd, segment_id, 256);
for (i = 16; i < 24; i += 4) {
c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob,
xd->sb_coeff_data.qcoeff + 1024 + (i - 16) * 64,
vp9_default_zig_zag1d_16x16, TX_16X16);
xd->eobs[i] = c;