diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h index c58e852fe18e403d6650e1855363e6f4df3db1b5..ccb9c4c5527193170b81f12492e14153a69a3039 100644 --- a/vp9/common/vp9_entropy.h +++ b/vp9/common/vp9_entropy.h @@ -127,12 +127,6 @@ static INLINE void reset_skip_context(MACROBLOCKD *xd, BLOCK_SIZE bsize) { extern const uint8_t vp9_coefband_trans_8x8plus[MAXBAND_INDEX + 1]; extern const uint8_t vp9_coefband_trans_4x4[MAXBAND_INDEX + 1]; - -static int get_coef_band(const uint8_t * band_translate, int coef_index) { - return (coef_index > MAXBAND_INDEX) - ? (COEF_BANDS-1) : band_translate[coef_index]; -} - // 128 lists of probabilities are stored for the following ONE node probs: // 1, 3, 5, 7, ..., 253, 255 // In between probabilities are interpolated linearly @@ -181,11 +175,6 @@ static int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a, return combine_entropy_contexts(above_ec, left_ec); } -static const uint8_t *get_band_translate(TX_SIZE tx_size) { - return tx_size == TX_4X4 ? vp9_coefband_trans_4x4 - : vp9_coefband_trans_8x8plus; -} - static void get_scan(const MACROBLOCKD *xd, TX_SIZE tx_size, PLANE_TYPE type, int block_idx, const int16_t **scan, const int16_t **scan_nb) { diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index aa3903e5a00934cc8464dcd75be672c370a683a8..bf70e1392a9a0a59caec9941e05345c84234b078 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -45,6 +45,7 @@ typedef struct TileWorkerData { DECLARE_ALIGNED(16, int16_t, qcoeff[MAX_MB_PLANE][64 * 64]); DECLARE_ALIGNED(16, int16_t, dqcoeff[MAX_MB_PLANE][64 * 64]); DECLARE_ALIGNED(16, uint16_t, eobs[MAX_MB_PLANE][256]); + const uint8_t *band_translate[2]; } TileWorkerData; static int read_be32(const uint8_t *p) { @@ -294,7 +295,8 @@ struct intra_args { VP9_COMMON *cm; MACROBLOCKD *xd; vp9_reader *r; - unsigned char* token_cache; + uint8_t *token_cache; + const uint8_t *band_translate[2]; }; static void predict_and_reconstruct_intra_block(int plane, int block, @@ -303,6 +305,9 @@ static void predict_and_reconstruct_intra_block(int plane, int block, struct intra_args *const args = arg; VP9_COMMON *const cm = args->cm; MACROBLOCKD *const xd = args->xd; + const uint8_t *band_translate[2] = { + args->band_translate[0], args->band_translate[1] + }; struct macroblockd_plane *const pd = &xd->plane[plane]; MODE_INFO *const mi = xd->mi_8x8[0]; @@ -324,7 +329,7 @@ static void predict_and_reconstruct_intra_block(int plane, int block, if (!mi->mbmi.skip_coeff) { vp9_decode_block_tokens(cm, xd, plane, block, plane_bsize, tx_size, - args->r, args->token_cache); + args->r, args->token_cache, band_translate); inverse_transform_block(xd, plane, block, plane_bsize, tx_size); } } @@ -334,7 +339,8 @@ struct inter_args { MACROBLOCKD *xd; vp9_reader *r; int *eobtotal; - unsigned char* token_cache; + uint8_t *token_cache; + const uint8_t *band_translate[2]; }; static void reconstruct_inter_block(int plane, int block, @@ -343,10 +349,14 @@ static void reconstruct_inter_block(int plane, int block, struct inter_args *args = arg; VP9_COMMON *const cm = args->cm; MACROBLOCKD *const xd = args->xd; + const uint8_t *band_translate[2] = { + args->band_translate[0], args->band_translate[1] + }; *args->eobtotal += vp9_decode_block_tokens(cm, xd, plane, block, plane_bsize, tx_size, - args->r, args->token_cache); + args->r, args->token_cache, + band_translate); inverse_transform_block(xd, plane, block, plane_bsize, tx_size); } @@ -398,7 +408,8 @@ static void decode_modes_b(VP9_COMMON *const cm, MACROBLOCKD *const xd, const TileInfo *const tile, int mi_row, int mi_col, vp9_reader *r, BLOCK_SIZE bsize, - unsigned char *token_cache) { + uint8_t *token_cache, + const uint8_t *band_translate[2]) { const int less8x8 = bsize < BLOCK_8X8; MB_MODE_INFO *mbmi; @@ -420,7 +431,9 @@ static void decode_modes_b(VP9_COMMON *const cm, MACROBLOCKD *const xd, } if (!is_inter_block(mbmi)) { - struct intra_args arg = { cm, xd, r, token_cache }; + struct intra_args arg = { + cm, xd, r, token_cache, {band_translate[0], band_translate[1]} + }; foreach_transformed_block(xd, bsize, predict_and_reconstruct_intra_block, &arg); } else { @@ -438,7 +451,10 @@ static void decode_modes_b(VP9_COMMON *const cm, MACROBLOCKD *const xd, // Reconstruction if (!mbmi->skip_coeff) { int eobtotal = 0; - struct inter_args arg = { cm, xd, r, &eobtotal, token_cache }; + struct inter_args arg = { + cm, xd, r, &eobtotal, token_cache, + {band_translate[0], band_translate[1]} + }; foreach_transformed_block(xd, bsize, reconstruct_inter_block, &arg); if (!less8x8 && eobtotal == 0) mbmi->skip_coeff = 1; // skip loopfilter @@ -478,7 +494,8 @@ static void decode_modes_sb(VP9_COMMON *const cm, MACROBLOCKD *const xd, const TileInfo *const tile, int mi_row, int mi_col, vp9_reader* r, BLOCK_SIZE bsize, - unsigned char *token_cache) { + uint8_t *token_cache, + const uint8_t *band_translate[2]) { const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2; PARTITION_TYPE partition; BLOCK_SIZE subsize; @@ -489,33 +506,37 @@ static void decode_modes_sb(VP9_COMMON *const cm, MACROBLOCKD *const xd, partition = read_partition(cm, xd, hbs, mi_row, mi_col, bsize, r); subsize = get_subsize(bsize, partition); if (subsize < BLOCK_8X8) { - decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache); + decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache, + band_translate); } else { switch (partition) { case PARTITION_NONE: - decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache); + decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache, + band_translate); break; case PARTITION_HORZ: - decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache); + decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache, + band_translate); if (mi_row + hbs < cm->mi_rows) decode_modes_b(cm, xd, tile, mi_row + hbs, mi_col, r, subsize, - token_cache); + token_cache, band_translate); break; case PARTITION_VERT: - decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache); + decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache, + band_translate); if (mi_col + hbs < cm->mi_cols) decode_modes_b(cm, xd, tile, mi_row, mi_col + hbs, r, subsize, - token_cache); + token_cache, band_translate); break; case PARTITION_SPLIT: decode_modes_sb(cm, xd, tile, mi_row, mi_col, r, subsize, - token_cache); + token_cache, band_translate); decode_modes_sb(cm, xd, tile, mi_row, mi_col + hbs, r, subsize, - token_cache); + token_cache, band_translate); decode_modes_sb(cm, xd, tile, mi_row + hbs, mi_col, r, subsize, - token_cache); + token_cache, band_translate); decode_modes_sb(cm, xd, tile, mi_row + hbs, mi_col + hbs, r, subsize, - token_cache); + token_cache, band_translate); break; default: assert(!"Invalid partition type"); @@ -798,9 +819,13 @@ static void decode_tile(VP9D_COMP *pbi, const TileInfo *const tile, vp9_zero(xd->left_context); vp9_zero(xd->left_seg_context); for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; - mi_col += MI_BLOCK_SIZE) + mi_col += MI_BLOCK_SIZE) { + const uint8_t *band_translate[2] = { + vp9_coefband_trans_4x4, pbi->coefband_trans_8x8plus + }; decode_modes_sb(cm, xd, tile, mi_row, mi_col, r, BLOCK_64X64, - pbi->token_cache); + pbi->token_cache, band_translate); + } if (pbi->do_loopfilter_inline) { const int lf_start = mi_row - MI_BLOCK_SIZE; @@ -948,7 +973,7 @@ static void setup_tile_macroblockd(TileWorkerData *const tile_data) { } static int tile_worker_hook(void *arg1, void *arg2) { - TileWorkerData *tile_data = (TileWorkerData*)arg1; + TileWorkerData *const tile_data = (TileWorkerData*)arg1; const TileInfo *const tile = (TileInfo*)arg2; int mi_row, mi_col; @@ -960,7 +985,8 @@ static int tile_worker_hook(void *arg1, void *arg2) { mi_col += MI_BLOCK_SIZE) { decode_modes_sb(tile_data->cm, &tile_data->xd, tile, mi_row, mi_col, &tile_data->bit_reader, BLOCK_64X64, - tile_data->token_cache); + tile_data->token_cache, + tile_data->band_translate); } } return !tile_data->xd.corrupted; @@ -1019,6 +1045,8 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, const uint8_t *data) { tile_data->cm = cm; tile_data->xd = pbi->mb; tile_data->xd.corrupted = 0; + tile_data->band_translate[0] = vp9_coefband_trans_4x4; + tile_data->band_translate[1] = pbi->coefband_trans_8x8plus; vp9_tile_init(tile, tile_data->cm, 0, tile_col); setup_token_decoder(data, data_end, size, &cm->error, @@ -1299,6 +1327,13 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) { const int tile_cols = 1 << cm->log2_tile_cols; YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm); + vpx_memset(pbi->coefband_trans_8x8plus, + (COEF_BANDS - 1), + sizeof(pbi->coefband_trans_8x8plus)); + vpx_memcpy(pbi->coefband_trans_8x8plus, + vp9_coefband_trans_8x8plus, + sizeof(vp9_coefband_trans_8x8plus)); + if (!first_partition_size) { // showing a frame directly *p_data_end = data + 1; diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index b8d670b96598a7bdce1847726d424a32e5318afe..65786dd8c38303ecc87884368285d499136ee395 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c @@ -93,7 +93,8 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, vp9_reader *r, int block_idx, PLANE_TYPE type, int seg_eob, int16_t *dqcoeff_ptr, TX_SIZE tx_size, const int16_t *dq, int pt, - uint8_t *token_cache) { + uint8_t *token_cache, + const uint8_t *band_translate) { const FRAME_CONTEXT *const fc = &cm->fc; FRAME_COUNTS *const counts = &cm->counts; const int ref = is_inter_block(&xd->mi_8x8[0]->mbmi); @@ -108,31 +109,30 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, unsigned int (*eob_branch_count)[PREV_COEF_CONTEXTS] = counts->eob_branch[tx_size][type][ref]; const int16_t *scan, *nb; - const uint8_t *const band_translate = get_band_translate(tx_size); + const uint8_t *cat6; get_scan(xd, tx_size, type, block_idx, &scan, &nb); - while (1) { + while (c < seg_eob) { int val; - const uint8_t *cat6 = cat6_prob; - if (c >= seg_eob) - break; if (c) pt = get_coef_context(nb, token_cache, c); - band = get_coef_band(band_translate, c); + band = *band_translate++; prob = coef_probs[band][pt]; if (!cm->frame_parallel_decoding_mode) ++eob_branch_count[band][pt]; if (!vp9_read(r, prob[EOB_CONTEXT_NODE])) break; + goto DECODE_ZERO; SKIP_START: if (c >= seg_eob) break; if (c) pt = get_coef_context(nb, token_cache, c); - band = get_coef_band(band_translate, c); + band = *band_translate++; prob = coef_probs[band][pt]; + DECODE_ZERO: if (!vp9_read(r, prob[ZERO_CONTEXT_NODE])) { INCREMENT_COUNT(ZERO_TOKEN); token_cache[scan[c]] = vp9_pt_energy_class[ZERO_TOKEN]; @@ -200,6 +200,7 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, WRITE_COEF_CONTINUE(val, DCT_VAL_CATEGORY5); } val = 0; + cat6 = cat6_prob; while (*cat6) { val = (val << 1) | vp9_read(r, *cat6++); } @@ -218,7 +219,8 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd, int plane, int block, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, vp9_reader *r, - uint8_t *token_cache) { + uint8_t *token_cache, + const uint8_t *band_translate[2]) { struct macroblockd_plane *const pd = &xd->plane[plane]; const int seg_eob = get_tx_eob(&cm->seg, xd->mi_8x8[0]->mbmi.segment_id, tx_size); @@ -229,7 +231,8 @@ int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd, eob = decode_coefs(cm, xd, r, block, pd->plane_type, seg_eob, BLOCK_OFFSET(pd->dqcoeff, block), - tx_size, pd->dequant, pt, token_cache); + tx_size, pd->dequant, pt, token_cache, + band_translate[tx_size != TX_4X4]); set_contexts(xd, pd, plane_bsize, tx_size, eob > 0, aoff, loff); diff --git a/vp9/decoder/vp9_detokenize.h b/vp9/decoder/vp9_detokenize.h index 04939ead36b1d29f1181bcdfece7b0cfdbfc44da..9b8c17a4550774c42c97e4e8d00b2cc041d5e4ca 100644 --- a/vp9/decoder/vp9_detokenize.h +++ b/vp9/decoder/vp9_detokenize.h @@ -18,6 +18,7 @@ int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd, int plane, int block, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, vp9_reader *r, - uint8_t *token_cache); + uint8_t *token_cache, + const uint8_t *band_translate[2]); #endif // VP9_DECODER_VP9_DETOKENIZE_H_ diff --git a/vp9/decoder/vp9_onyxd_int.h b/vp9/decoder/vp9_onyxd_int.h index 7ad05e6b297dae24277198c8950e08ce16cdce7e..e29b453ff66046496dbcb2a90e7b4c0642dfd6f8 100644 --- a/vp9/decoder/vp9_onyxd_int.h +++ b/vp9/decoder/vp9_onyxd_int.h @@ -54,7 +54,8 @@ typedef struct VP9Decompressor { ENTROPY_CONTEXT *above_context[MAX_MB_PLANE]; PARTITION_CONTEXT *above_seg_context; - DECLARE_ALIGNED(16, unsigned char, token_cache[1024]); + DECLARE_ALIGNED(16, uint8_t, token_cache[1024]); + DECLARE_ALIGNED(16, uint8_t, coefband_trans_8x8plus[1024]); } VP9D_COMP; #endif // VP9_DECODER_VP9_ONYXD_INT_H_ diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 4d9a92a712895557d0373576c8a91401f1f34ef0..04ce1f4169ba8ace215bb5a429a4dd14b3619d32 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -184,6 +184,9 @@ struct macroblock { BLOCK_SIZE sb64_partitioning; void (*fwd_txm4x4)(const int16_t *input, int16_t *output, int stride); + + // band cache + DECLARE_ALIGNED(16, uint8_t, coefband_trans_8x8plus[1024]); }; // TODO(jingning): the variables used here are little complicated. need further diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 75ed8eab7577ebbeeccd28e3f4a41a6f77ef6d41..70008103e4717c50265244161342d993ec837c47 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -138,7 +138,9 @@ static void optimize_b(MACROBLOCK *mb, uint8_t token_cache[1024]; const int ib = txfrm_block_to_raster_block(plane_bsize, tx_size, block); const int16_t *dequant_ptr = pd->dequant; - const uint8_t *const band_translate = get_band_translate(tx_size); + const uint8_t *const band_translate = (tx_size == TX_4X4 ? + vp9_coefband_trans_4x4 : + mb->coefband_trans_8x8plus); assert((!type && !plane) || (type && plane)); dqcoeff_ptr = BLOCK_OFFSET(pd->dqcoeff, block); @@ -179,7 +181,7 @@ static void optimize_b(MACROBLOCK *mb, t0 = (vp9_dct_value_tokens_ptr + x)->token; /* Consider both possible successor states. */ if (next < default_eob) { - band = get_coef_band(band_translate, i + 1); + band = band_translate[i + 1]; pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache); rate0 += mb->token_costs[tx_size][type][ref][band][0][pt] @@ -230,7 +232,7 @@ static void optimize_b(MACROBLOCK *mb, t0 = t1 = (vp9_dct_value_tokens_ptr + x)->token; } if (next < default_eob) { - band = get_coef_band(band_translate, i + 1); + band = band_translate[i + 1]; if (t0 != DCT_EOB_TOKEN) { pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache); rate0 += mb->token_costs[tx_size][type][ref][band][!x][pt] @@ -264,7 +266,7 @@ static void optimize_b(MACROBLOCK *mb, /* There's no choice to make for a zero coefficient, so we don't * add a new trellis node, but we do need to update the costs. */ - band = get_coef_band(band_translate, i + 1); + band = band_translate[i + 1]; t0 = tokens[next][0].token; t1 = tokens[next][1].token; /* Update the cost of each path if we're past the EOB token. */ @@ -284,7 +286,7 @@ static void optimize_b(MACROBLOCK *mb, } /* Now pick the best path through the whole trellis. */ - band = get_coef_band(band_translate, i + 1); + band = band_translate[i + 1]; pt = combine_entropy_contexts(*a, *l); rate0 = tokens[next][0].rate; rate1 = tokens[next][1].rate; diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index f4106934c6eada9c2ca3494a371b30b1e58d1476..7603ac03db9715f5a1561eb6ac23382073db6e89 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -1223,6 +1223,13 @@ static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { cpi->fixed_divide[0] = 0; for (i = 1; i < 512; i++) cpi->fixed_divide[i] = 0x80000 / i; + + vpx_memset(cpi->mb.coefband_trans_8x8plus, + (COEF_BANDS-1), + sizeof(cpi->mb.coefband_trans_8x8plus)); + vpx_memcpy(cpi->mb.coefband_trans_8x8plus, + vp9_coefband_trans_8x8plus, + sizeof(vp9_coefband_trans_8x8plus)); } diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index 7d4676e97bdf643afa6756e04c38029132949a5d..11dd0c0af3657fa3bd0e298431052f8784c2a32e 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -115,7 +115,9 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize, vp9_coeff_count *const counts = cpi->coef_counts[tx_size]; vp9_coeff_probs_model *const coef_probs = cpi->common.fc.coef_probs[tx_size]; const int ref = is_inter_block(mbmi); - const uint8_t *const band_translate = get_band_translate(tx_size); + const uint8_t *const band_translate = (tx_size == TX_4X4 ? + vp9_coefband_trans_4x4 : + cpi->mb.coefband_trans_8x8plus); const int seg_eob = get_tx_eob(&cpi->common.seg, segment_id, tx_size); int aoff, loff; txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &aoff, &loff); @@ -127,7 +129,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize, get_scan(xd, tx_size, type, block, &scan, &nb); c = 0; do { - const int band = get_coef_band(band_translate, c); + const int band = band_translate[c]; int token; int v = 0; rc = scan[c];