diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index f52adfc972dae2f193914053fa5ada6bf51fc5ba..a666d1d1d87f52679c76d650defcc603959ee2b8 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -170,9 +170,9 @@ struct buf_2d { }; struct macroblockd_plane { - DECLARE_ALIGNED(16, int16_t, qcoeff[64 * 64]); - DECLARE_ALIGNED(16, int16_t, dqcoeff[64 * 64]); - DECLARE_ALIGNED(16, uint16_t, eobs[256]); + int16_t *qcoeff; + int16_t *dqcoeff; + uint16_t *eobs; PLANE_TYPE plane_type; int subsampling_x; int subsampling_y; diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index aad400aa1a408e2c62bf7a6654fb34c2dd420ff8..218fdd81314558bfca9dd37913637676ec9b8d34 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -42,6 +42,9 @@ typedef struct TileWorkerData { vp9_reader bit_reader; DECLARE_ALIGNED(16, MACROBLOCKD, xd); DECLARE_ALIGNED(16, unsigned char, token_cache[1024]); + DECLARE_ALIGNED(16, int16_t, qcoeff[MAX_MB_PLANE][64 * 64]); + DECLARE_ALIGNED(16, int16_t, dqcoeff[MAX_MB_PLANE][64 * 64]); + DECLARE_ALIGNED(16, uint16_t, eobs[MAX_MB_PLANE][256]); } TileWorkerData; static int read_be32(const uint8_t *p) { @@ -931,6 +934,19 @@ static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) { return end; } +static void setup_tile_macroblockd(TileWorkerData *const tile_data) { + MACROBLOCKD *xd = &tile_data->xd; + struct macroblockd_plane *const pd = xd->plane; + int i; + + for (i = 0; i < MAX_MB_PLANE; ++i) { + pd[i].qcoeff = tile_data->qcoeff[i]; + pd[i].dqcoeff = tile_data->dqcoeff[i]; + pd[i].eobs = tile_data->eobs[i]; + vpx_memset(xd->plane[i].dqcoeff, 0, 64 * 64 * sizeof(int16_t)); + } +} + static int tile_worker_hook(void *arg1, void *arg2) { TileWorkerData *tile_data = (TileWorkerData*)arg1; const TileInfo *const tile = (TileInfo*)arg2; @@ -1008,6 +1024,7 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, const uint8_t *data) { setup_token_decoder(data, data_end, size, &cm->error, &tile_data->bit_reader); setup_tile_context(pbi, &tile_data->xd, 0, tile_col); + setup_tile_macroblockd(tile_data); worker->had_error = 0; if (i == num_workers - 1 || tile_col == tile_cols - 1) { @@ -1319,7 +1336,7 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) { cm->fc = cm->frame_contexts[cm->frame_context_idx]; vp9_zero(cm->counts); for (i = 0; i < MAX_MB_PLANE; ++i) - vp9_zero(xd->plane[i].dqcoeff); + vpx_memset(xd->plane[i].dqcoeff, 0, 64 * 64 * sizeof(int16_t)); xd->corrupted = 0; new_fb->corrupted = read_compressed_header(pbi, data, first_partition_size); diff --git a/vp9/decoder/vp9_onyxd_if.c b/vp9/decoder/vp9_onyxd_if.c index 5f970a3d5636645f35d03e03594ed7f6b3836d91..cb45d3702e89ca969b0ad51eb21693abe373cfc0 100644 --- a/vp9/decoder/vp9_onyxd_if.c +++ b/vp9/decoder/vp9_onyxd_if.c @@ -107,6 +107,18 @@ void vp9_initialize_dec() { } } +static void init_macroblockd(VP9D_COMP *const pbi) { + MACROBLOCKD *xd = &pbi->mb; + struct macroblockd_plane *const pd = xd->plane; + int i; + + for (i = 0; i < MAX_MB_PLANE; ++i) { + pd[i].qcoeff = pbi->qcoeff[i]; + pd[i].dqcoeff = pbi->dqcoeff[i]; + pd[i].eobs = pbi->eobs[i]; + } +} + VP9D_PTR vp9_create_decompressor(VP9D_CONFIG *oxcf) { VP9D_COMP *const pbi = vpx_memalign(32, sizeof(VP9D_COMP)); VP9_COMMON *const cm = pbi ? &pbi->common : NULL; @@ -141,6 +153,8 @@ VP9D_PTR vp9_create_decompressor(VP9D_CONFIG *oxcf) { cm->error.setjmp = 0; pbi->decoded_key_frame = 0; + init_macroblockd(pbi); + vp9_worker_init(&pbi->lf_worker); return pbi; diff --git a/vp9/decoder/vp9_onyxd_int.h b/vp9/decoder/vp9_onyxd_int.h index 7c4c9db36f86f3c34246ff6076c1af2c53ba7f43..7ad05e6b297dae24277198c8950e08ce16cdce7e 100644 --- a/vp9/decoder/vp9_onyxd_int.h +++ b/vp9/decoder/vp9_onyxd_int.h @@ -22,6 +22,10 @@ typedef struct VP9Decompressor { DECLARE_ALIGNED(16, VP9_COMMON, common); + DECLARE_ALIGNED(16, int16_t, qcoeff[MAX_MB_PLANE][64 * 64]); + DECLARE_ALIGNED(16, int16_t, dqcoeff[MAX_MB_PLANE][64 * 64]); + DECLARE_ALIGNED(16, uint16_t, eobs[MAX_MB_PLANE][256]); + VP9D_CONFIG oxcf; const uint8_t *source; diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index f922f900ad42eec2cf8678b596ef33004ec6ff1b..4f1357a2d7888addb25823653a89ab21cfea4744 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -1524,6 +1524,17 @@ static void free_pick_mode_context(MACROBLOCK *x) { } } +static void init_macroblock(VP9_COMP *const cpi) { + MACROBLOCKD *xd = &cpi->mb.e_mbd; + struct macroblockd_plane *const pd = xd->plane; + int i; + for (i = 0; i < MAX_MB_PLANE; ++i) { + pd[i].qcoeff = cpi->qcoeff[i]; + pd[i].dqcoeff = cpi->dqcoeff[i]; + pd[i].eobs = cpi->eobs[i]; + } +} + VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { int i, j; volatile union { @@ -1562,6 +1573,8 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { init_pick_mode_context(cpi); + init_macroblock(cpi); + cm->current_video_frame = 0; cpi->kf_overspend_bits = 0; cpi->kf_bitrate_adjustment = 0; diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 9429c7fedd284e34079abacbc1ecd97d1423cff9..839a92b7e335f77a1b23a9ea3fe6dc011c1990ab 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -313,6 +313,10 @@ typedef struct VP9_COMP { VP9_CONFIG oxcf; struct rdcost_block_args rdcost_stack; + DECLARE_ALIGNED(16, int16_t, qcoeff[MAX_MB_PLANE][64 * 64]); + DECLARE_ALIGNED(16, int16_t, dqcoeff[MAX_MB_PLANE][64 * 64]); + DECLARE_ALIGNED(16, uint16_t, eobs[MAX_MB_PLANE][256]); + struct lookahead_ctx *lookahead; struct lookahead_entry *source; #if CONFIG_MULTIPLE_ARF